LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/APSInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/StringRef.h"
57#include "llvm/IR/CallingConv.h"
58#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/GlobalValue.h"
65#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsPowerPC.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
83#include "llvm/Support/Debug.h"
85#include "llvm/Support/Format.h"
91#include <algorithm>
92#include <cassert>
93#include <cstdint>
94#include <iterator>
95#include <list>
96#include <optional>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "ppc-lowering"
103
105 "disable-p10-store-forward",
106 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
107 cl::init(false));
108
109static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
110cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
113cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
116cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
117
118static cl::opt<bool> DisableSCO("disable-ppc-sco",
119cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
120
121static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
122cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
123
124static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
125cl::desc("use absolute jump tables on ppc"), cl::Hidden);
126
127static cl::opt<bool>
128 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
129 cl::desc("disable vector permute decomposition"),
130 cl::init(true), cl::Hidden);
131
133 "disable-auto-paired-vec-st",
134 cl::desc("disable automatically generated 32byte paired vector stores"),
135 cl::init(true), cl::Hidden);
136
138 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
139 cl::desc("Set minimum number of entries to use a jump table on PPC"));
140
142 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
143 cl::desc("max depth when checking alias info in GatherAllAliases()"));
144
146 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
147 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
148 "function to use initial-exec"));
149
150STATISTIC(NumTailCalls, "Number of tail calls");
151STATISTIC(NumSiblingCalls, "Number of sibling calls");
152STATISTIC(ShufflesHandledWithVPERM,
153 "Number of shuffles lowered to a VPERM or XXPERM");
154STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
155
156static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
157
158static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
159
160// A faster local-[exec|dynamic] TLS access sequence (enabled with the
161// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
162// variables; consistent with the IBM XL compiler, we apply a max size of
163// slightly under 32KB.
165
166// FIXME: Remove this once the bug has been fixed!
168
170 const PPCSubtarget &STI)
171 : TargetLowering(TM), Subtarget(STI) {
172 // Initialize map that relates the PPC addressing modes to the computed flags
173 // of a load/store instruction. The map is used to determine the optimal
174 // addressing mode when selecting load and stores.
175 initializeAddrModeMap();
176 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
177 // arguments are at least 4/8 bytes aligned.
178 bool isPPC64 = Subtarget.isPPC64();
179 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
180 const MVT RegVT = Subtarget.getScalarIntVT();
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184 if (!useSoftFloat()) {
185 if (hasSPE()) {
186 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187 // EFPU2 APU only supports f32
188 if (!Subtarget.hasEFPU2())
189 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190 } else {
191 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193 }
194 }
195
198
199 // PowerPC uses addo_carry,subo_carry to propagate carry.
202
203 // On P10, the default lowering generates better code using the
204 // setbc instruction.
205 if (!Subtarget.hasP10Vector()) {
207 if (isPPC64)
209 }
210
211 // Match BITREVERSE to customized fast code sequence in the td file.
214
215 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
216 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
217
218 // Custom lower inline assembly to check for special registers.
219 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
220 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
221
222 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
223 for (MVT VT : MVT::integer_valuetypes()) {
226 }
227
228 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
229 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
230
231 if (Subtarget.isISA3_0()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
235 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
236 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
237 } else {
238 // No extending loads from f16 or HW conversions back and forth.
239 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
240 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
241 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
242 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
243 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
244 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
245 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
246 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
247 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
248 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
249 }
250
251 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
252
253 // PowerPC has pre-inc load and store's.
264 if (!Subtarget.hasSPE()) {
269 }
270
271 if (Subtarget.useCRBits()) {
273
274 if (isPPC64 || Subtarget.hasFPCVT()) {
279
281 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
283 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
284
289
291 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
293 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
294 } else {
299 }
300
301 // PowerPC does not support direct load/store of condition registers.
302 setOperationAction(ISD::LOAD, MVT::i1, Custom);
303 setOperationAction(ISD::STORE, MVT::i1, Custom);
304
305 // FIXME: Remove this once the ANDI glue bug is fixed:
306 if (ANDIGlueBug)
308
309 for (MVT VT : MVT::integer_valuetypes()) {
312 setTruncStoreAction(VT, MVT::i1, Expand);
313 }
314
315 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
316 }
317
318 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
319 // PPC (the libcall is not available).
324
325 // We do not currently implement these libm ops for PowerPC.
326 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
327 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
328 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
329 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
330 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
331 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
332
333 // PowerPC has no SREM/UREM instructions unless we are on P9
334 // On P9 we may use a hardware instruction to compute the remainder.
335 // When the result of both the remainder and the division is required it is
336 // more efficient to compute the remainder from the result of the division
337 // rather than use the remainder instruction. The instructions are legalized
338 // directly because the DivRemPairsPass performs the transformation at the IR
339 // level.
340 if (Subtarget.isISA3_0()) {
345 } else {
350 }
351
352 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
361
362 // Handle constrained floating-point operations of scalar.
363 // TODO: Handle SPE specific operation.
369
374
375 if (!Subtarget.hasSPE()) {
378 }
379
380 if (Subtarget.hasVSX()) {
383 }
384
385 if (Subtarget.hasFSQRT()) {
388 }
389
390 if (Subtarget.hasFPRND()) {
395
400 }
401
402 // We don't support sin/cos/sqrt/fmod/pow
403 setOperationAction(ISD::FSIN , MVT::f64, Expand);
404 setOperationAction(ISD::FCOS , MVT::f64, Expand);
405 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
407 setOperationAction(ISD::FPOW , MVT::f64, Expand);
408 setOperationAction(ISD::FSIN , MVT::f32, Expand);
409 setOperationAction(ISD::FCOS , MVT::f32, Expand);
410 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
412 setOperationAction(ISD::FPOW , MVT::f32, Expand);
413
414 // MASS transformation for LLVM intrinsics with replicating fast-math flag
415 // to be consistent to PPCGenScalarMASSEntries pass
416 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
417 setOperationAction(ISD::FSIN , MVT::f64, Custom);
418 setOperationAction(ISD::FCOS , MVT::f64, Custom);
419 setOperationAction(ISD::FPOW , MVT::f64, Custom);
420 setOperationAction(ISD::FLOG, MVT::f64, Custom);
421 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
422 setOperationAction(ISD::FEXP, MVT::f64, Custom);
423 setOperationAction(ISD::FSIN , MVT::f32, Custom);
424 setOperationAction(ISD::FCOS , MVT::f32, Custom);
425 setOperationAction(ISD::FPOW , MVT::f32, Custom);
426 setOperationAction(ISD::FLOG, MVT::f32, Custom);
427 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
428 setOperationAction(ISD::FEXP, MVT::f32, Custom);
429 }
430
431 if (Subtarget.hasSPE()) {
434 } else {
435 setOperationAction(ISD::FMA , MVT::f64, Legal);
436 setOperationAction(ISD::FMA , MVT::f32, Legal);
438 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
439 }
440
441 if (Subtarget.hasSPE())
442 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
443
444 // If we're enabling GP optimizations, use hardware square root
445 if (!Subtarget.hasFSQRT() &&
446 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
447 Subtarget.hasFRE()))
448 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
449
450 if (!Subtarget.hasFSQRT() &&
451 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
452 Subtarget.hasFRES()))
453 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
454
455 if (Subtarget.hasFCPSGN()) {
458 } else {
461 }
462
463 if (Subtarget.hasFPRND()) {
464 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
465 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
466 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
467 setOperationAction(ISD::FROUND, MVT::f64, Legal);
468
469 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
470 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
471 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
472 setOperationAction(ISD::FROUND, MVT::f32, Legal);
473 }
474
475 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
476 // instruction xxbrd to speed up scalar BSWAP64.
477 if (Subtarget.isISA3_1()) {
480 } else {
483 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
484 }
485
486 // CTPOP or CTTZ were introduced in P8/P9 respectively
487 if (Subtarget.isISA3_0()) {
488 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
489 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
490 } else {
491 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
492 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
493 }
494
495 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
498 } else {
501 }
502
503 // PowerPC does not have ROTR
506
507 if (!Subtarget.useCRBits()) {
508 // PowerPC does not have Select
513 }
514
515 // PowerPC wants to turn select_cc of FP into fsel when possible.
518
519 // PowerPC wants to optimize integer setcc a bit
520 if (!Subtarget.useCRBits())
522
523 if (Subtarget.hasFPU()) {
527
531 }
532
533 // PowerPC does not have BRCOND which requires SetCC
534 if (!Subtarget.useCRBits())
535 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
536
537 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
538
539 if (Subtarget.hasSPE()) {
540 // SPE has built-in conversions
547
548 // SPE supports signaling compare of f32/f64.
551 } else {
552 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
555
556 // PowerPC does not have [U|S]INT_TO_FP
561 }
562
563 if (Subtarget.hasDirectMove() && isPPC64) {
564 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
565 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
566 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
567 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
568 if (TM.Options.UnsafeFPMath) {
569 setOperationAction(ISD::LRINT, MVT::f64, Legal);
570 setOperationAction(ISD::LRINT, MVT::f32, Legal);
571 setOperationAction(ISD::LLRINT, MVT::f64, Legal);
572 setOperationAction(ISD::LLRINT, MVT::f32, Legal);
573 setOperationAction(ISD::LROUND, MVT::f64, Legal);
574 setOperationAction(ISD::LROUND, MVT::f32, Legal);
575 setOperationAction(ISD::LLROUND, MVT::f64, Legal);
576 setOperationAction(ISD::LLROUND, MVT::f32, Legal);
577 }
578 } else {
579 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
580 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
581 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
582 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
583 }
584
585 // We cannot sextinreg(i1). Expand to shifts.
587
588 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
589 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
590 // support continuation, user-level threading, and etc.. As a result, no
591 // other SjLj exception interfaces are implemented and please don't build
592 // your own exception handling based on them.
593 // LLVM/Clang supports zero-cost DWARF exception handling.
596
597 // We want to legalize GlobalAddress and ConstantPool nodes into the
598 // appropriate instructions to materialize the address.
609
610 // TRAP is legal.
611 setOperationAction(ISD::TRAP, MVT::Other, Legal);
612
613 // TRAMPOLINE is custom lowered.
614 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
615 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
616
617 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
618 setOperationAction(ISD::VASTART , MVT::Other, Custom);
619
620 if (Subtarget.is64BitELFABI()) {
621 // VAARG always uses double-word chunks, so promote anything smaller.
622 setOperationAction(ISD::VAARG, MVT::i1, Promote);
623 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
624 setOperationAction(ISD::VAARG, MVT::i8, Promote);
625 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
626 setOperationAction(ISD::VAARG, MVT::i16, Promote);
627 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
628 setOperationAction(ISD::VAARG, MVT::i32, Promote);
629 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
630 setOperationAction(ISD::VAARG, MVT::Other, Expand);
631 } else if (Subtarget.is32BitELFABI()) {
632 // VAARG is custom lowered with the 32-bit SVR4 ABI.
633 setOperationAction(ISD::VAARG, MVT::Other, Custom);
634 setOperationAction(ISD::VAARG, MVT::i64, Custom);
635 } else
636 setOperationAction(ISD::VAARG, MVT::Other, Expand);
637
638 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
639 if (Subtarget.is32BitELFABI())
640 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
641 else
642 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
643
644 // Use the default implementation.
645 setOperationAction(ISD::VAEND , MVT::Other, Expand);
646 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
647 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
648 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
649 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
650 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
651 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
654
655 // We want to custom lower some of our intrinsics.
661
662 // To handle counter-based loop conditions.
664
669
670 // Comparisons that require checking two conditions.
671 if (Subtarget.hasSPE()) {
676 }
689
692
693 if (Subtarget.has64BitSupport()) {
694 // They also have instructions for converting between i64 and fp.
703 // This is just the low 32 bits of a (signed) fp->i64 conversion.
704 // We cannot do this with Promote because i64 is not a legal type.
707
708 if (Subtarget.hasLFIWAX() || isPPC64) {
711 }
712 } else {
713 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
714 if (Subtarget.hasSPE()) {
717 } else {
720 }
721 }
722
723 // With the instructions enabled under FPCVT, we can do everything.
724 if (Subtarget.hasFPCVT()) {
725 if (Subtarget.has64BitSupport()) {
734 }
735
744 }
745
746 if (Subtarget.use64BitRegs()) {
747 // 64-bit PowerPC implementations can support i64 types directly
748 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
749 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
751 // 64-bit PowerPC wants to expand i128 shifts itself.
755 } else {
756 // 32-bit PowerPC wants to expand i64 shifts itself.
760 }
761
762 // PowerPC has better expansions for funnel shifts than the generic
763 // TargetLowering::expandFunnelShift.
764 if (Subtarget.has64BitSupport()) {
767 }
770
771 if (Subtarget.hasVSX()) {
772 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
773 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
774 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
775 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
778 }
779
780 if (Subtarget.hasAltivec()) {
781 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
786 }
787 // First set operation action for all vector types to expand. Then we
788 // will selectively turn on ones that can be effectively codegen'd.
790 // add/sub are legal for all supported vector VT's.
793
794 // For v2i64, these are only valid with P8Vector. This is corrected after
795 // the loop.
796 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
801 }
802 else {
807 }
808
809 if (Subtarget.hasVSX()) {
810 setOperationAction(ISD::FMAXNUM, VT, Legal);
811 setOperationAction(ISD::FMINNUM, VT, Legal);
812 }
813
814 // Vector instructions introduced in P8
815 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818 }
819 else {
822 }
823
824 // Vector instructions introduced in P9
825 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
827 else
829
830 // We promote all shuffles to v16i8.
832 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
833
834 // We promote all non-typed operations to v4i32.
836 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
838 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
840 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
841 setOperationAction(ISD::LOAD , VT, Promote);
842 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
844 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
847 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
848 setOperationAction(ISD::STORE, VT, Promote);
849 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
850
851 // No other operations are legal.
859 setOperationAction(ISD::FNEG, VT, Expand);
860 setOperationAction(ISD::FSQRT, VT, Expand);
861 setOperationAction(ISD::FLOG, VT, Expand);
862 setOperationAction(ISD::FLOG10, VT, Expand);
863 setOperationAction(ISD::FLOG2, VT, Expand);
864 setOperationAction(ISD::FEXP, VT, Expand);
865 setOperationAction(ISD::FEXP2, VT, Expand);
866 setOperationAction(ISD::FSIN, VT, Expand);
867 setOperationAction(ISD::FCOS, VT, Expand);
868 setOperationAction(ISD::FABS, VT, Expand);
869 setOperationAction(ISD::FFLOOR, VT, Expand);
870 setOperationAction(ISD::FCEIL, VT, Expand);
871 setOperationAction(ISD::FTRUNC, VT, Expand);
872 setOperationAction(ISD::FRINT, VT, Expand);
873 setOperationAction(ISD::FLDEXP, VT, Expand);
874 setOperationAction(ISD::FNEARBYINT, VT, Expand);
885 setOperationAction(ISD::FPOW, VT, Expand);
890
891 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
892 setTruncStoreAction(VT, InnerVT, Expand);
895 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
896 }
897 }
899 if (!Subtarget.hasP8Vector()) {
900 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
901 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
902 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
903 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
904 }
905
906 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
907 // with merges, splats, etc.
909
910 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
911 // are cheap, so handle them before they get expanded to scalar.
917
918 setOperationAction(ISD::AND , MVT::v4i32, Legal);
919 setOperationAction(ISD::OR , MVT::v4i32, Legal);
920 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
921 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
923 Subtarget.useCRBits() ? Legal : Expand);
924 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
933 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
934 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
935 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
936 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
937
938 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
939 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
940 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
941 if (Subtarget.hasAltivec())
942 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
944 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
945 if (Subtarget.hasP8Altivec())
946 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
947
948 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
949 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
950 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
951 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
952
953 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
954 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
955
956 if (Subtarget.hasVSX()) {
957 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
958 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
960 }
961
962 if (Subtarget.hasP8Altivec())
963 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
964 else
965 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
966
967 if (Subtarget.isISA3_1()) {
968 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
969 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
970 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
971 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
972 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
973 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
974 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
975 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
976 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
977 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
978 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
979 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
980 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
981 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
982 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
983 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
984 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
985 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
986 }
987
988 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
989 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
990
993 // LE is P8+/64-bit so direct moves are supported and these operations
994 // are legal. The custom transformation requires 64-bit since we need a
995 // pair of stores that will cover a 128-bit load for P10.
996 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1000 }
1001
1006
1007 // Altivec does not contain unordered floating-point compare instructions
1008 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1009 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1010 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1011 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1012
1013 if (Subtarget.hasVSX()) {
1016 if (Subtarget.hasP8Vector()) {
1019 }
1020 if (Subtarget.hasDirectMove() && isPPC64) {
1029 }
1031
1032 // The nearbyint variants are not allowed to raise the inexact exception
1033 // so we can only code-gen them with unsafe math.
1034 if (TM.Options.UnsafeFPMath) {
1035 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1036 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1037 }
1038
1039 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1041 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1044 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1045 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1046 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1047
1048 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1049 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1050 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1051 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1052 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1053
1054 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1055 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1056
1057 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1059
1060 // Share the Altivec comparison restrictions.
1061 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1062 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1063 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1064 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1065
1066 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1067 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1068
1070
1071 if (Subtarget.hasP8Vector())
1072 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1073
1074 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1075
1076 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1077 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1078 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1079
1080 if (Subtarget.hasP8Altivec()) {
1081 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1082 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1083 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1084
1085 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1086 // SRL, but not for SRA because of the instructions available:
1087 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1088 // doing
1089 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1090 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1091 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1092
1093 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1094 }
1095 else {
1096 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1097 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1098 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1099
1100 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1101
1102 // VSX v2i64 only supports non-arithmetic operations.
1103 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1104 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1105 }
1106
1107 if (Subtarget.isISA3_1())
1108 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1109 else
1110 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1111
1112 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1113 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1114 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1115 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1116
1118
1127
1128 // Custom handling for partial vectors of integers converted to
1129 // floating point. We already have optimal handling for v2i32 through
1130 // the DAG combine, so those aren't necessary.
1147
1148 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1149 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1150 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1151 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1154
1157
1158 // Handle constrained floating-point operations of vector.
1159 // The predictor is `hasVSX` because altivec instruction has
1160 // no exception but VSX vector instruction has.
1174
1188
1189 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1190 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1191
1192 for (MVT FPT : MVT::fp_valuetypes())
1193 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1194
1195 // Expand the SELECT to SELECT_CC
1197
1198 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1199 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1200
1201 // No implementation for these ops for PowerPC.
1202 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1203 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1204 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1205 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1206 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1207 setOperationAction(ISD::FREM, MVT::f128, Expand);
1208 }
1209
1210 if (Subtarget.hasP8Altivec()) {
1211 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1212 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1213 }
1214
1215 if (Subtarget.hasP9Vector()) {
1218
1219 // Test data class instructions store results in CR bits.
1220 if (Subtarget.useCRBits()) {
1225 }
1226
1227 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1228 // SRL, but not for SRA because of the instructions available:
1229 // VS{RL} and VS{RL}O.
1230 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1231 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1232 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1233
1234 setOperationAction(ISD::FADD, MVT::f128, Legal);
1235 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1236 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1237 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1238 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1239
1240 setOperationAction(ISD::FMA, MVT::f128, Legal);
1247
1248 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1249 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1250 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1251 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1252 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1253 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1254
1257 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1258
1259 // Handle constrained floating-point operations of fp128
1275 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1276 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1277 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1278 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1279 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1280 } else if (Subtarget.hasVSX()) {
1281 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1282 setOperationAction(ISD::STORE, MVT::f128, Promote);
1283
1284 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1285 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1286
1287 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1288 // fp_to_uint and int_to_fp.
1291
1292 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1293 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1294 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1295 setOperationAction(ISD::FABS, MVT::f128, Expand);
1296 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1297 setOperationAction(ISD::FMA, MVT::f128, Expand);
1299
1300 // Expand the fp_extend if the target type is fp128.
1301 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1303
1304 // Expand the fp_round if the source type is fp128.
1305 for (MVT VT : {MVT::f32, MVT::f64}) {
1308 }
1309
1313 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1314
1315 // Lower following f128 select_cc pattern:
1316 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1318
1319 // We need to handle f128 SELECT_CC with integer result type.
1321 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1322 }
1323
1324 if (Subtarget.hasP9Altivec()) {
1325 if (Subtarget.isISA3_1()) {
1330 } else {
1333 }
1341
1342 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1343 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1344 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1345 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1346 }
1347
1348 if (Subtarget.hasP10Vector()) {
1350 }
1351 }
1352
1353 if (Subtarget.pairedVectorMemops()) {
1354 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1355 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1356 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1357 }
1358 if (Subtarget.hasMMA()) {
1359 if (Subtarget.isISAFuture()) {
1360 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1361 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1362 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1363 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1364 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1365 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1366 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1367 } else {
1368 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1369 }
1370 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1371 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1373 }
1374
1375 if (Subtarget.has64BitSupport())
1376 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1377
1378 if (Subtarget.isISA3_1())
1379 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1380
1381 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1382
1383 if (!isPPC64) {
1384 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1385 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1386 }
1387
1389 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1390 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1392 }
1393
1395
1396 if (Subtarget.hasAltivec()) {
1397 // Altivec instructions set fields to all zeros or all ones.
1399 }
1400
1403 else if (isPPC64)
1405 else
1407
1408 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1409
1410 // We have target-specific dag combine patterns for the following nodes:
1413 if (Subtarget.hasFPCVT())
1415 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1416 if (Subtarget.useCRBits())
1417 setTargetDAGCombine(ISD::BRCOND);
1420
1422
1424
1425 if (Subtarget.useCRBits()) {
1427 }
1428
1429 // With 32 condition bits, we don't need to sink (and duplicate) compares
1430 // aggressively in CodeGenPrep.
1431 if (Subtarget.useCRBits()) {
1433 }
1434
1435 // TODO: The default entry number is set to 64. This stops most jump table
1436 // generation on PPC. But it is good for current PPC HWs because the indirect
1437 // branch instruction mtctr to the jump table may lead to bad branch predict.
1438 // Re-evaluate this value on future HWs that can do better with mtctr.
1440
1442 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1443
1444 auto CPUDirective = Subtarget.getCPUDirective();
1445 switch (CPUDirective) {
1446 default: break;
1447 case PPC::DIR_970:
1448 case PPC::DIR_A2:
1449 case PPC::DIR_E500:
1450 case PPC::DIR_E500mc:
1451 case PPC::DIR_E5500:
1452 case PPC::DIR_PWR4:
1453 case PPC::DIR_PWR5:
1454 case PPC::DIR_PWR5X:
1455 case PPC::DIR_PWR6:
1456 case PPC::DIR_PWR6X:
1457 case PPC::DIR_PWR7:
1458 case PPC::DIR_PWR8:
1459 case PPC::DIR_PWR9:
1460 case PPC::DIR_PWR10:
1461 case PPC::DIR_PWR11:
1465 break;
1466 }
1467
1468 if (Subtarget.enableMachineScheduler())
1470 else
1472
1474
1475 // The Freescale cores do better with aggressive inlining of memcpy and
1476 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1477 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1478 MaxStoresPerMemset = 32;
1480 MaxStoresPerMemcpy = 32;
1484 } else if (CPUDirective == PPC::DIR_A2) {
1485 // The A2 also benefits from (very) aggressive inlining of memcpy and
1486 // friends. The overhead of a the function call, even when warm, can be
1487 // over one hundred cycles.
1488 MaxStoresPerMemset = 128;
1489 MaxStoresPerMemcpy = 128;
1490 MaxStoresPerMemmove = 128;
1491 MaxLoadsPerMemcmp = 128;
1492 } else {
1495 }
1496
1497 // Enable generation of STXVP instructions by default for mcpu=future.
1498 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1499 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1500 DisableAutoPairedVecSt = false;
1501
1502 IsStrictFPEnabled = true;
1503
1504 // Let the subtarget (CPU) decide if a predictable select is more expensive
1505 // than the corresponding branch. This information is used in CGP to decide
1506 // when to convert selects into branches.
1507 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1508
1510}
1511
1512// *********************************** NOTE ************************************
1513// For selecting load and store instructions, the addressing modes are defined
1514// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1515// patterns to match the load the store instructions.
1516//
1517// The TD definitions for the addressing modes correspond to their respective
1518// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1519// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1520// address mode flags of a particular node. Afterwards, the computed address
1521// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1522// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1523// accordingly, based on the preferred addressing mode.
1524//
1525// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1526// MemOpFlags contains all the possible flags that can be used to compute the
1527// optimal addressing mode for load and store instructions.
1528// AddrMode contains all the possible load and store addressing modes available
1529// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1530//
1531// When adding new load and store instructions, it is possible that new address
1532// flags may need to be added into MemOpFlags, and a new addressing mode will
1533// need to be added to AddrMode. An entry of the new addressing mode (consisting
1534// of the minimal and main distinguishing address flags for the new load/store
1535// instructions) will need to be added into initializeAddrModeMap() below.
1536// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1537// need to be updated to account for selecting the optimal addressing mode.
1538// *****************************************************************************
1539/// Initialize the map that relates the different addressing modes of the load
1540/// and store instructions to a set of flags. This ensures the load/store
1541/// instruction is correctly matched during instruction selection.
1542void PPCTargetLowering::initializeAddrModeMap() {
1543 AddrModesMap[PPC::AM_DForm] = {
1544 // LWZ, STW
1549 // LBZ, LHZ, STB, STH
1554 // LHA
1559 // LFS, LFD, STFS, STFD
1564 };
1565 AddrModesMap[PPC::AM_DSForm] = {
1566 // LWA
1570 // LD, STD
1574 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1578 };
1579 AddrModesMap[PPC::AM_DQForm] = {
1580 // LXV, STXV
1584 };
1585 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1587 // TODO: Add mapping for quadword load/store.
1588}
1589
1590/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1591/// the desired ByVal argument alignment.
1592static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1593 if (MaxAlign == MaxMaxAlign)
1594 return;
1595 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1596 if (MaxMaxAlign >= 32 &&
1597 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1598 MaxAlign = Align(32);
1599 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1600 MaxAlign < 16)
1601 MaxAlign = Align(16);
1602 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1603 Align EltAlign;
1604 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1605 if (EltAlign > MaxAlign)
1606 MaxAlign = EltAlign;
1607 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1608 for (auto *EltTy : STy->elements()) {
1609 Align EltAlign;
1610 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1611 if (EltAlign > MaxAlign)
1612 MaxAlign = EltAlign;
1613 if (MaxAlign == MaxMaxAlign)
1614 break;
1615 }
1616 }
1617}
1618
1619/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1620/// function arguments in the caller parameter area.
1622 const DataLayout &DL) const {
1623 // 16byte and wider vectors are passed on 16byte boundary.
1624 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1625 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1626 if (Subtarget.hasAltivec())
1627 getMaxByValAlign(Ty, Alignment, Align(16));
1628 return Alignment;
1629}
1630
1632 return Subtarget.useSoftFloat();
1633}
1634
1636 return Subtarget.hasSPE();
1637}
1638
1640 return VT.isScalarInteger();
1641}
1642
1644 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1645 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1646 return false;
1647
1648 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1649 if (VTy->getScalarType()->isIntegerTy()) {
1650 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1651 if (ElemSizeInBits == 32) {
1652 Index = Subtarget.isLittleEndian() ? 2 : 1;
1653 return true;
1654 }
1655 if (ElemSizeInBits == 64) {
1656 Index = Subtarget.isLittleEndian() ? 1 : 0;
1657 return true;
1658 }
1659 }
1660 }
1661 return false;
1662}
1663
1664const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1665 switch ((PPCISD::NodeType)Opcode) {
1666 case PPCISD::FIRST_NUMBER: break;
1667 case PPCISD::FSEL: return "PPCISD::FSEL";
1668 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1669 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1670 case PPCISD::FCFID: return "PPCISD::FCFID";
1671 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1672 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1673 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1674 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1675 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1676 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1677 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1678 case PPCISD::FRE: return "PPCISD::FRE";
1679 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1680 case PPCISD::FTSQRT:
1681 return "PPCISD::FTSQRT";
1682 case PPCISD::FSQRT:
1683 return "PPCISD::FSQRT";
1684 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1685 case PPCISD::VPERM: return "PPCISD::VPERM";
1686 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1688 return "PPCISD::XXSPLTI_SP_TO_DP";
1690 return "PPCISD::XXSPLTI32DX";
1691 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1692 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1693 case PPCISD::XXPERM:
1694 return "PPCISD::XXPERM";
1695 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1696 case PPCISD::VSRQ:
1697 return "PPCISD::VSRQ";
1698 case PPCISD::CMPB: return "PPCISD::CMPB";
1699 case PPCISD::Hi: return "PPCISD::Hi";
1700 case PPCISD::Lo: return "PPCISD::Lo";
1701 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1702 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1703 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1704 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1705 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1706 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1707 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1708 case PPCISD::SRL: return "PPCISD::SRL";
1709 case PPCISD::SRA: return "PPCISD::SRA";
1710 case PPCISD::SHL: return "PPCISD::SHL";
1711 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1712 case PPCISD::CALL: return "PPCISD::CALL";
1713 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1714 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1715 case PPCISD::CALL_RM:
1716 return "PPCISD::CALL_RM";
1718 return "PPCISD::CALL_NOP_RM";
1720 return "PPCISD::CALL_NOTOC_RM";
1721 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1722 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1723 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1724 case PPCISD::BCTRL_RM:
1725 return "PPCISD::BCTRL_RM";
1727 return "PPCISD::BCTRL_LOAD_TOC_RM";
1728 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1729 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1730 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1731 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1732 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1733 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1734 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1735 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1736 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1737 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1739 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1741 return "PPCISD::ANDI_rec_1_EQ_BIT";
1743 return "PPCISD::ANDI_rec_1_GT_BIT";
1744 case PPCISD::VCMP: return "PPCISD::VCMP";
1745 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1746 case PPCISD::LBRX: return "PPCISD::LBRX";
1747 case PPCISD::STBRX: return "PPCISD::STBRX";
1748 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1749 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1750 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1751 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1752 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1753 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1754 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1755 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1756 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1758 return "PPCISD::ST_VSR_SCAL_INT";
1759 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1760 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1761 case PPCISD::BDZ: return "PPCISD::BDZ";
1762 case PPCISD::MFFS: return "PPCISD::MFFS";
1763 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1764 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1765 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1766 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1767 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1768 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1769 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1770 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1771 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1772 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1773 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1774 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1775 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1776 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1777 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1778 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1779 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1780 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1781 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1782 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1783 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1784 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1785 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1787 return "PPCISD::PADDI_DTPREL";
1788 case PPCISD::VADD_SPLAT:
1789 return "PPCISD::VADD_SPLAT";
1790 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1791 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1792 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1793 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1794 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1795 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1796 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1797 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1798 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1800 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1802 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1803 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1804 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1805 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1806 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1807 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1808 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1809 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1810 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1812 return "PPCISD::STRICT_FADDRTZ";
1814 return "PPCISD::STRICT_FCTIDZ";
1816 return "PPCISD::STRICT_FCTIWZ";
1818 return "PPCISD::STRICT_FCTIDUZ";
1820 return "PPCISD::STRICT_FCTIWUZ";
1822 return "PPCISD::STRICT_FCFID";
1824 return "PPCISD::STRICT_FCFIDU";
1826 return "PPCISD::STRICT_FCFIDS";
1828 return "PPCISD::STRICT_FCFIDUS";
1829 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1830 case PPCISD::STORE_COND:
1831 return "PPCISD::STORE_COND";
1832 case PPCISD::SETBC:
1833 return "PPCISD::SETBC";
1834 case PPCISD::SETBCR:
1835 return "PPCISD::SETBCR";
1836 case PPCISD::ADDC:
1837 return "PPCISD::ADDC";
1838 case PPCISD::ADDE:
1839 return "PPCISD::ADDE";
1840 case PPCISD::SUBC:
1841 return "PPCISD::SUBC";
1842 case PPCISD::SUBE:
1843 return "PPCISD::SUBE";
1844 }
1845 return nullptr;
1846}
1847
1849 EVT VT) const {
1850 if (!VT.isVector())
1851 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1852
1854}
1855
1857 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1858 return true;
1859}
1860
1861//===----------------------------------------------------------------------===//
1862// Node matching predicates, for use by the tblgen matching code.
1863//===----------------------------------------------------------------------===//
1864
1865/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1868 return CFP->getValueAPF().isZero();
1869 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1870 // Maybe this has already been legalized into the constant pool?
1871 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1872 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1873 return CFP->getValueAPF().isZero();
1874 }
1875 return false;
1876}
1877
1878/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1879/// true if Op is undef or if it matches the specified value.
1880static bool isConstantOrUndef(int Op, int Val) {
1881 return Op < 0 || Op == Val;
1882}
1883
1884/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1885/// VPKUHUM instruction.
1886/// The ShuffleKind distinguishes between big-endian operations with
1887/// two different inputs (0), either-endian operations with two identical
1888/// inputs (1), and little-endian operations with two different inputs (2).
1889/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1891 SelectionDAG &DAG) {
1892 bool IsLE = DAG.getDataLayout().isLittleEndian();
1893 if (ShuffleKind == 0) {
1894 if (IsLE)
1895 return false;
1896 for (unsigned i = 0; i != 16; ++i)
1897 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1898 return false;
1899 } else if (ShuffleKind == 2) {
1900 if (!IsLE)
1901 return false;
1902 for (unsigned i = 0; i != 16; ++i)
1903 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1904 return false;
1905 } else if (ShuffleKind == 1) {
1906 unsigned j = IsLE ? 0 : 1;
1907 for (unsigned i = 0; i != 8; ++i)
1908 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1909 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1910 return false;
1911 }
1912 return true;
1913}
1914
1915/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1916/// VPKUWUM instruction.
1917/// The ShuffleKind distinguishes between big-endian operations with
1918/// two different inputs (0), either-endian operations with two identical
1919/// inputs (1), and little-endian operations with two different inputs (2).
1920/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1922 SelectionDAG &DAG) {
1923 bool IsLE = DAG.getDataLayout().isLittleEndian();
1924 if (ShuffleKind == 0) {
1925 if (IsLE)
1926 return false;
1927 for (unsigned i = 0; i != 16; i += 2)
1928 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1929 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1930 return false;
1931 } else if (ShuffleKind == 2) {
1932 if (!IsLE)
1933 return false;
1934 for (unsigned i = 0; i != 16; i += 2)
1935 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1936 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1937 return false;
1938 } else if (ShuffleKind == 1) {
1939 unsigned j = IsLE ? 0 : 2;
1940 for (unsigned i = 0; i != 8; i += 2)
1941 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1942 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1943 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1944 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1945 return false;
1946 }
1947 return true;
1948}
1949
1950/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1951/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1952/// current subtarget.
1953///
1954/// The ShuffleKind distinguishes between big-endian operations with
1955/// two different inputs (0), either-endian operations with two identical
1956/// inputs (1), and little-endian operations with two different inputs (2).
1957/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1959 SelectionDAG &DAG) {
1960 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1961 if (!Subtarget.hasP8Vector())
1962 return false;
1963
1964 bool IsLE = DAG.getDataLayout().isLittleEndian();
1965 if (ShuffleKind == 0) {
1966 if (IsLE)
1967 return false;
1968 for (unsigned i = 0; i != 16; i += 4)
1969 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1970 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1971 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1972 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1973 return false;
1974 } else if (ShuffleKind == 2) {
1975 if (!IsLE)
1976 return false;
1977 for (unsigned i = 0; i != 16; i += 4)
1978 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1979 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1980 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1981 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1982 return false;
1983 } else if (ShuffleKind == 1) {
1984 unsigned j = IsLE ? 0 : 4;
1985 for (unsigned i = 0; i != 8; i += 4)
1986 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1987 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1988 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1989 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1990 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1991 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1992 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1993 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1994 return false;
1995 }
1996 return true;
1997}
1998
1999/// isVMerge - Common function, used to match vmrg* shuffles.
2000///
2001static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2002 unsigned LHSStart, unsigned RHSStart) {
2003 if (N->getValueType(0) != MVT::v16i8)
2004 return false;
2005 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2006 "Unsupported merge size!");
2007
2008 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2009 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2010 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2011 LHSStart+j+i*UnitSize) ||
2012 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2013 RHSStart+j+i*UnitSize))
2014 return false;
2015 }
2016 return true;
2017}
2018
2019/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2020/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2021/// The ShuffleKind distinguishes between big-endian merges with two
2022/// different inputs (0), either-endian merges with two identical inputs (1),
2023/// and little-endian merges with two different inputs (2). For the latter,
2024/// the input operands are swapped (see PPCInstrAltivec.td).
2026 unsigned ShuffleKind, SelectionDAG &DAG) {
2027 if (DAG.getDataLayout().isLittleEndian()) {
2028 if (ShuffleKind == 1) // unary
2029 return isVMerge(N, UnitSize, 0, 0);
2030 else if (ShuffleKind == 2) // swapped
2031 return isVMerge(N, UnitSize, 0, 16);
2032 else
2033 return false;
2034 } else {
2035 if (ShuffleKind == 1) // unary
2036 return isVMerge(N, UnitSize, 8, 8);
2037 else if (ShuffleKind == 0) // normal
2038 return isVMerge(N, UnitSize, 8, 24);
2039 else
2040 return false;
2041 }
2042}
2043
2044/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2045/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2046/// The ShuffleKind distinguishes between big-endian merges with two
2047/// different inputs (0), either-endian merges with two identical inputs (1),
2048/// and little-endian merges with two different inputs (2). For the latter,
2049/// the input operands are swapped (see PPCInstrAltivec.td).
2051 unsigned ShuffleKind, SelectionDAG &DAG) {
2052 if (DAG.getDataLayout().isLittleEndian()) {
2053 if (ShuffleKind == 1) // unary
2054 return isVMerge(N, UnitSize, 8, 8);
2055 else if (ShuffleKind == 2) // swapped
2056 return isVMerge(N, UnitSize, 8, 24);
2057 else
2058 return false;
2059 } else {
2060 if (ShuffleKind == 1) // unary
2061 return isVMerge(N, UnitSize, 0, 0);
2062 else if (ShuffleKind == 0) // normal
2063 return isVMerge(N, UnitSize, 0, 16);
2064 else
2065 return false;
2066 }
2067}
2068
2069/**
2070 * Common function used to match vmrgew and vmrgow shuffles
2071 *
2072 * The indexOffset determines whether to look for even or odd words in
2073 * the shuffle mask. This is based on the of the endianness of the target
2074 * machine.
2075 * - Little Endian:
2076 * - Use offset of 0 to check for odd elements
2077 * - Use offset of 4 to check for even elements
2078 * - Big Endian:
2079 * - Use offset of 0 to check for even elements
2080 * - Use offset of 4 to check for odd elements
2081 * A detailed description of the vector element ordering for little endian and
2082 * big endian can be found at
2083 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2084 * Targeting your applications - what little endian and big endian IBM XL C/C++
2085 * compiler differences mean to you
2086 *
2087 * The mask to the shuffle vector instruction specifies the indices of the
2088 * elements from the two input vectors to place in the result. The elements are
2089 * numbered in array-access order, starting with the first vector. These vectors
2090 * are always of type v16i8, thus each vector will contain 16 elements of size
2091 * 8. More info on the shuffle vector can be found in the
2092 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2093 * Language Reference.
2094 *
2095 * The RHSStartValue indicates whether the same input vectors are used (unary)
2096 * or two different input vectors are used, based on the following:
2097 * - If the instruction uses the same vector for both inputs, the range of the
2098 * indices will be 0 to 15. In this case, the RHSStart value passed should
2099 * be 0.
2100 * - If the instruction has two different vectors then the range of the
2101 * indices will be 0 to 31. In this case, the RHSStart value passed should
2102 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2103 * to 31 specify elements in the second vector).
2104 *
2105 * \param[in] N The shuffle vector SD Node to analyze
2106 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2107 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2108 * vector to the shuffle_vector instruction
2109 * \return true iff this shuffle vector represents an even or odd word merge
2110 */
2111static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2112 unsigned RHSStartValue) {
2113 if (N->getValueType(0) != MVT::v16i8)
2114 return false;
2115
2116 for (unsigned i = 0; i < 2; ++i)
2117 for (unsigned j = 0; j < 4; ++j)
2118 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2119 i*RHSStartValue+j+IndexOffset) ||
2120 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2121 i*RHSStartValue+j+IndexOffset+8))
2122 return false;
2123 return true;
2124}
2125
2126/**
2127 * Determine if the specified shuffle mask is suitable for the vmrgew or
2128 * vmrgow instructions.
2129 *
2130 * \param[in] N The shuffle vector SD Node to analyze
2131 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2132 * \param[in] ShuffleKind Identify the type of merge:
2133 * - 0 = big-endian merge with two different inputs;
2134 * - 1 = either-endian merge with two identical inputs;
2135 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2136 * little-endian merges).
2137 * \param[in] DAG The current SelectionDAG
2138 * \return true iff this shuffle mask
2139 */
2141 unsigned ShuffleKind, SelectionDAG &DAG) {
2142 if (DAG.getDataLayout().isLittleEndian()) {
2143 unsigned indexOffset = CheckEven ? 4 : 0;
2144 if (ShuffleKind == 1) // Unary
2145 return isVMerge(N, indexOffset, 0);
2146 else if (ShuffleKind == 2) // swapped
2147 return isVMerge(N, indexOffset, 16);
2148 else
2149 return false;
2150 }
2151 else {
2152 unsigned indexOffset = CheckEven ? 0 : 4;
2153 if (ShuffleKind == 1) // Unary
2154 return isVMerge(N, indexOffset, 0);
2155 else if (ShuffleKind == 0) // Normal
2156 return isVMerge(N, indexOffset, 16);
2157 else
2158 return false;
2159 }
2160 return false;
2161}
2162
2163/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2164/// amount, otherwise return -1.
2165/// The ShuffleKind distinguishes between big-endian operations with two
2166/// different inputs (0), either-endian operations with two identical inputs
2167/// (1), and little-endian operations with two different inputs (2). For the
2168/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2169int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2170 SelectionDAG &DAG) {
2171 if (N->getValueType(0) != MVT::v16i8)
2172 return -1;
2173
2175
2176 // Find the first non-undef value in the shuffle mask.
2177 unsigned i;
2178 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2179 /*search*/;
2180
2181 if (i == 16) return -1; // all undef.
2182
2183 // Otherwise, check to see if the rest of the elements are consecutively
2184 // numbered from this value.
2185 unsigned ShiftAmt = SVOp->getMaskElt(i);
2186 if (ShiftAmt < i) return -1;
2187
2188 ShiftAmt -= i;
2189 bool isLE = DAG.getDataLayout().isLittleEndian();
2190
2191 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2192 // Check the rest of the elements to see if they are consecutive.
2193 for (++i; i != 16; ++i)
2194 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2195 return -1;
2196 } else if (ShuffleKind == 1) {
2197 // Check the rest of the elements to see if they are consecutive.
2198 for (++i; i != 16; ++i)
2199 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2200 return -1;
2201 } else
2202 return -1;
2203
2204 if (isLE)
2205 ShiftAmt = 16 - ShiftAmt;
2206
2207 return ShiftAmt;
2208}
2209
2210/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2211/// specifies a splat of a single element that is suitable for input to
2212/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2214 EVT VT = N->getValueType(0);
2215 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2216 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2217
2218 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2219 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2220
2221 // The consecutive indices need to specify an element, not part of two
2222 // different elements. So abandon ship early if this isn't the case.
2223 if (N->getMaskElt(0) % EltSize != 0)
2224 return false;
2225
2226 // This is a splat operation if each element of the permute is the same, and
2227 // if the value doesn't reference the second vector.
2228 unsigned ElementBase = N->getMaskElt(0);
2229
2230 // FIXME: Handle UNDEF elements too!
2231 if (ElementBase >= 16)
2232 return false;
2233
2234 // Check that the indices are consecutive, in the case of a multi-byte element
2235 // splatted with a v16i8 mask.
2236 for (unsigned i = 1; i != EltSize; ++i)
2237 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2238 return false;
2239
2240 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2241 // An UNDEF element is a sequence of UNDEF bytes.
2242 if (N->getMaskElt(i) < 0) {
2243 for (unsigned j = 1; j != EltSize; ++j)
2244 if (N->getMaskElt(i + j) >= 0)
2245 return false;
2246 } else
2247 for (unsigned j = 0; j != EltSize; ++j)
2248 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2249 return false;
2250 }
2251 return true;
2252}
2253
2254/// Check that the mask is shuffling N byte elements. Within each N byte
2255/// element of the mask, the indices could be either in increasing or
2256/// decreasing order as long as they are consecutive.
2257/// \param[in] N the shuffle vector SD Node to analyze
2258/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2259/// Word/DoubleWord/QuadWord).
2260/// \param[in] StepLen the delta indices number among the N byte element, if
2261/// the mask is in increasing/decreasing order then it is 1/-1.
2262/// \return true iff the mask is shuffling N byte elements.
2263static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2264 int StepLen) {
2265 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2266 "Unexpected element width.");
2267 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2268
2269 unsigned NumOfElem = 16 / Width;
2270 unsigned MaskVal[16]; // Width is never greater than 16
2271 for (unsigned i = 0; i < NumOfElem; ++i) {
2272 MaskVal[0] = N->getMaskElt(i * Width);
2273 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2274 return false;
2275 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2276 return false;
2277 }
2278
2279 for (unsigned int j = 1; j < Width; ++j) {
2280 MaskVal[j] = N->getMaskElt(i * Width + j);
2281 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2282 return false;
2283 }
2284 }
2285 }
2286
2287 return true;
2288}
2289
2290bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2291 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2292 if (!isNByteElemShuffleMask(N, 4, 1))
2293 return false;
2294
2295 // Now we look at mask elements 0,4,8,12
2296 unsigned M0 = N->getMaskElt(0) / 4;
2297 unsigned M1 = N->getMaskElt(4) / 4;
2298 unsigned M2 = N->getMaskElt(8) / 4;
2299 unsigned M3 = N->getMaskElt(12) / 4;
2300 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2301 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2302
2303 // Below, let H and L be arbitrary elements of the shuffle mask
2304 // where H is in the range [4,7] and L is in the range [0,3].
2305 // H, 1, 2, 3 or L, 5, 6, 7
2306 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2307 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2308 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2309 InsertAtByte = IsLE ? 12 : 0;
2310 Swap = M0 < 4;
2311 return true;
2312 }
2313 // 0, H, 2, 3 or 4, L, 6, 7
2314 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2315 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2316 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2317 InsertAtByte = IsLE ? 8 : 4;
2318 Swap = M1 < 4;
2319 return true;
2320 }
2321 // 0, 1, H, 3 or 4, 5, L, 7
2322 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2323 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2324 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2325 InsertAtByte = IsLE ? 4 : 8;
2326 Swap = M2 < 4;
2327 return true;
2328 }
2329 // 0, 1, 2, H or 4, 5, 6, L
2330 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2331 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2332 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2333 InsertAtByte = IsLE ? 0 : 12;
2334 Swap = M3 < 4;
2335 return true;
2336 }
2337
2338 // If both vector operands for the shuffle are the same vector, the mask will
2339 // contain only elements from the first one and the second one will be undef.
2340 if (N->getOperand(1).isUndef()) {
2341 ShiftElts = 0;
2342 Swap = true;
2343 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2344 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2345 InsertAtByte = IsLE ? 12 : 0;
2346 return true;
2347 }
2348 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2349 InsertAtByte = IsLE ? 8 : 4;
2350 return true;
2351 }
2352 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2353 InsertAtByte = IsLE ? 4 : 8;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2357 InsertAtByte = IsLE ? 0 : 12;
2358 return true;
2359 }
2360 }
2361
2362 return false;
2363}
2364
2366 bool &Swap, bool IsLE) {
2367 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2368 // Ensure each byte index of the word is consecutive.
2369 if (!isNByteElemShuffleMask(N, 4, 1))
2370 return false;
2371
2372 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2373 unsigned M0 = N->getMaskElt(0) / 4;
2374 unsigned M1 = N->getMaskElt(4) / 4;
2375 unsigned M2 = N->getMaskElt(8) / 4;
2376 unsigned M3 = N->getMaskElt(12) / 4;
2377
2378 // If both vector operands for the shuffle are the same vector, the mask will
2379 // contain only elements from the first one and the second one will be undef.
2380 if (N->getOperand(1).isUndef()) {
2381 assert(M0 < 4 && "Indexing into an undef vector?");
2382 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2383 return false;
2384
2385 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2386 Swap = false;
2387 return true;
2388 }
2389
2390 // Ensure each word index of the ShuffleVector Mask is consecutive.
2391 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2392 return false;
2393
2394 if (IsLE) {
2395 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2396 // Input vectors don't need to be swapped if the leading element
2397 // of the result is one of the 3 left elements of the second vector
2398 // (or if there is no shift to be done at all).
2399 Swap = false;
2400 ShiftElts = (8 - M0) % 8;
2401 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2402 // Input vectors need to be swapped if the leading element
2403 // of the result is one of the 3 left elements of the first vector
2404 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2405 Swap = true;
2406 ShiftElts = (4 - M0) % 4;
2407 }
2408
2409 return true;
2410 } else { // BE
2411 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2412 // Input vectors don't need to be swapped if the leading element
2413 // of the result is one of the 4 elements of the first vector.
2414 Swap = false;
2415 ShiftElts = M0;
2416 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2417 // Input vectors need to be swapped if the leading element
2418 // of the result is one of the 4 elements of the right vector.
2419 Swap = true;
2420 ShiftElts = M0 - 4;
2421 }
2422
2423 return true;
2424 }
2425}
2426
2428 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2429
2430 if (!isNByteElemShuffleMask(N, Width, -1))
2431 return false;
2432
2433 for (int i = 0; i < 16; i += Width)
2434 if (N->getMaskElt(i) != i + Width - 1)
2435 return false;
2436
2437 return true;
2438}
2439
2443
2447
2451
2455
2456/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2457/// if the inputs to the instruction should be swapped and set \p DM to the
2458/// value for the immediate.
2459/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2460/// AND element 0 of the result comes from the first input (LE) or second input
2461/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2462/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2463/// mask.
2465 bool &Swap, bool IsLE) {
2466 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2467
2468 // Ensure each byte index of the double word is consecutive.
2469 if (!isNByteElemShuffleMask(N, 8, 1))
2470 return false;
2471
2472 unsigned M0 = N->getMaskElt(0) / 8;
2473 unsigned M1 = N->getMaskElt(8) / 8;
2474 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2475
2476 // If both vector operands for the shuffle are the same vector, the mask will
2477 // contain only elements from the first one and the second one will be undef.
2478 if (N->getOperand(1).isUndef()) {
2479 if ((M0 | M1) < 2) {
2480 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2481 Swap = false;
2482 return true;
2483 } else
2484 return false;
2485 }
2486
2487 if (IsLE) {
2488 if (M0 > 1 && M1 < 2) {
2489 Swap = false;
2490 } else if (M0 < 2 && M1 > 1) {
2491 M0 = (M0 + 2) % 4;
2492 M1 = (M1 + 2) % 4;
2493 Swap = true;
2494 } else
2495 return false;
2496
2497 // Note: if control flow comes here that means Swap is already set above
2498 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2499 return true;
2500 } else { // BE
2501 if (M0 < 2 && M1 > 1) {
2502 Swap = false;
2503 } else if (M0 > 1 && M1 < 2) {
2504 M0 = (M0 + 2) % 4;
2505 M1 = (M1 + 2) % 4;
2506 Swap = true;
2507 } else
2508 return false;
2509
2510 // Note: if control flow comes here that means Swap is already set above
2511 DM = (M0 << 1) + (M1 & 1);
2512 return true;
2513 }
2514}
2515
2516
2517/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2518/// appropriate for PPC mnemonics (which have a big endian bias - namely
2519/// elements are counted from the left of the vector register).
2520unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2521 SelectionDAG &DAG) {
2523 assert(isSplatShuffleMask(SVOp, EltSize));
2524 EVT VT = SVOp->getValueType(0);
2525
2526 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2527 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2528 : SVOp->getMaskElt(0);
2529
2530 if (DAG.getDataLayout().isLittleEndian())
2531 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2532 else
2533 return SVOp->getMaskElt(0) / EltSize;
2534}
2535
2536/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2537/// by using a vspltis[bhw] instruction of the specified element size, return
2538/// the constant being splatted. The ByteSize field indicates the number of
2539/// bytes of each element [124] -> [bhw].
2541 SDValue OpVal;
2542
2543 // If ByteSize of the splat is bigger than the element size of the
2544 // build_vector, then we have a case where we are checking for a splat where
2545 // multiple elements of the buildvector are folded together into a single
2546 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2547 unsigned EltSize = 16/N->getNumOperands();
2548 if (EltSize < ByteSize) {
2549 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2550 SDValue UniquedVals[4];
2551 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2552
2553 // See if all of the elements in the buildvector agree across.
2554 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2555 if (N->getOperand(i).isUndef()) continue;
2556 // If the element isn't a constant, bail fully out.
2557 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2558
2559 if (!UniquedVals[i&(Multiple-1)].getNode())
2560 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2561 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2562 return SDValue(); // no match.
2563 }
2564
2565 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2566 // either constant or undef values that are identical for each chunk. See
2567 // if these chunks can form into a larger vspltis*.
2568
2569 // Check to see if all of the leading entries are either 0 or -1. If
2570 // neither, then this won't fit into the immediate field.
2571 bool LeadingZero = true;
2572 bool LeadingOnes = true;
2573 for (unsigned i = 0; i != Multiple-1; ++i) {
2574 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2575
2576 LeadingZero &= isNullConstant(UniquedVals[i]);
2577 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2578 }
2579 // Finally, check the least significant entry.
2580 if (LeadingZero) {
2581 if (!UniquedVals[Multiple-1].getNode())
2582 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2583 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2584 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2585 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2586 }
2587 if (LeadingOnes) {
2588 if (!UniquedVals[Multiple-1].getNode())
2589 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2590 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2591 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2592 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2593 }
2594
2595 return SDValue();
2596 }
2597
2598 // Check to see if this buildvec has a single non-undef value in its elements.
2599 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2600 if (N->getOperand(i).isUndef()) continue;
2601 if (!OpVal.getNode())
2602 OpVal = N->getOperand(i);
2603 else if (OpVal != N->getOperand(i))
2604 return SDValue();
2605 }
2606
2607 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2608
2609 unsigned ValSizeInBytes = EltSize;
2610 uint64_t Value = 0;
2611 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2612 Value = CN->getZExtValue();
2613 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2614 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2615 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2616 }
2617
2618 // If the splat value is larger than the element value, then we can never do
2619 // this splat. The only case that we could fit the replicated bits into our
2620 // immediate field for would be zero, and we prefer to use vxor for it.
2621 if (ValSizeInBytes < ByteSize) return SDValue();
2622
2623 // If the element value is larger than the splat value, check if it consists
2624 // of a repeated bit pattern of size ByteSize.
2625 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2626 return SDValue();
2627
2628 // Properly sign extend the value.
2629 int MaskVal = SignExtend32(Value, ByteSize * 8);
2630
2631 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2632 if (MaskVal == 0) return SDValue();
2633
2634 // Finally, if this value fits in a 5 bit sext field, return it
2635 if (SignExtend32<5>(MaskVal) == MaskVal)
2636 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2637 return SDValue();
2638}
2639
2640//===----------------------------------------------------------------------===//
2641// Addressing Mode Selection
2642//===----------------------------------------------------------------------===//
2643
2644/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2645/// or 64-bit immediate, and if the value can be accurately represented as a
2646/// sign extension from a 16-bit value. If so, this returns true and the
2647/// immediate.
2648bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2649 if (!isa<ConstantSDNode>(N))
2650 return false;
2651
2652 Imm = (int16_t)N->getAsZExtVal();
2653 if (N->getValueType(0) == MVT::i32)
2654 return Imm == (int32_t)N->getAsZExtVal();
2655 else
2656 return Imm == (int64_t)N->getAsZExtVal();
2657}
2659 return isIntS16Immediate(Op.getNode(), Imm);
2660}
2661
2662/// Used when computing address flags for selecting loads and stores.
2663/// If we have an OR, check if the LHS and RHS are provably disjoint.
2664/// An OR of two provably disjoint values is equivalent to an ADD.
2665/// Most PPC load/store instructions compute the effective address as a sum,
2666/// so doing this conversion is useful.
2667static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2668 if (N.getOpcode() != ISD::OR)
2669 return false;
2670 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2671 if (!LHSKnown.Zero.getBoolValue())
2672 return false;
2673 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2674 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2675}
2676
2677/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2678/// be represented as an indexed [r+r] operation.
2680 SDValue &Index,
2681 SelectionDAG &DAG) const {
2682 for (SDNode *U : N->users()) {
2683 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2684 if (Memop->getMemoryVT() == MVT::f64) {
2685 Base = N.getOperand(0);
2686 Index = N.getOperand(1);
2687 return true;
2688 }
2689 }
2690 }
2691 return false;
2692}
2693
2694/// isIntS34Immediate - This method tests if value of node given can be
2695/// accurately represented as a sign extension from a 34-bit value. If so,
2696/// this returns true and the immediate.
2697bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2698 if (!isa<ConstantSDNode>(N))
2699 return false;
2700
2701 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2702 return isInt<34>(Imm);
2703}
2705 return isIntS34Immediate(Op.getNode(), Imm);
2706}
2707
2708/// SelectAddressRegReg - Given the specified addressed, check to see if it
2709/// can be represented as an indexed [r+r] operation. Returns false if it
2710/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2711/// non-zero and N can be represented by a base register plus a signed 16-bit
2712/// displacement, make a more precise judgement by checking (displacement % \p
2713/// EncodingAlignment).
2715 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2716 MaybeAlign EncodingAlignment) const {
2717 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2718 // a [pc+imm].
2720 return false;
2721
2722 int16_t Imm = 0;
2723 if (N.getOpcode() == ISD::ADD) {
2724 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2725 // SPE load/store can only handle 8-bit offsets.
2726 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2727 return true;
2728 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2729 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2730 return false; // r+i
2731 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2732 return false; // r+i
2733
2734 Base = N.getOperand(0);
2735 Index = N.getOperand(1);
2736 return true;
2737 } else if (N.getOpcode() == ISD::OR) {
2738 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2739 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2740 return false; // r+i can fold it if we can.
2741
2742 // If this is an or of disjoint bitfields, we can codegen this as an add
2743 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2744 // disjoint.
2745 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2746
2747 if (LHSKnown.Zero.getBoolValue()) {
2748 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2749 // If all of the bits are known zero on the LHS or RHS, the add won't
2750 // carry.
2751 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2752 Base = N.getOperand(0);
2753 Index = N.getOperand(1);
2754 return true;
2755 }
2756 }
2757 }
2758
2759 return false;
2760}
2761
2762// If we happen to be doing an i64 load or store into a stack slot that has
2763// less than a 4-byte alignment, then the frame-index elimination may need to
2764// use an indexed load or store instruction (because the offset may not be a
2765// multiple of 4). The extra register needed to hold the offset comes from the
2766// register scavenger, and it is possible that the scavenger will need to use
2767// an emergency spill slot. As a result, we need to make sure that a spill slot
2768// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2769// stack slot.
2770static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2771 // FIXME: This does not handle the LWA case.
2772 if (VT != MVT::i64)
2773 return;
2774
2775 // NOTE: We'll exclude negative FIs here, which come from argument
2776 // lowering, because there are no known test cases triggering this problem
2777 // using packed structures (or similar). We can remove this exclusion if
2778 // we find such a test case. The reason why this is so test-case driven is
2779 // because this entire 'fixup' is only to prevent crashes (from the
2780 // register scavenger) on not-really-valid inputs. For example, if we have:
2781 // %a = alloca i1
2782 // %b = bitcast i1* %a to i64*
2783 // store i64* a, i64 b
2784 // then the store should really be marked as 'align 1', but is not. If it
2785 // were marked as 'align 1' then the indexed form would have been
2786 // instruction-selected initially, and the problem this 'fixup' is preventing
2787 // won't happen regardless.
2788 if (FrameIdx < 0)
2789 return;
2790
2792 MachineFrameInfo &MFI = MF.getFrameInfo();
2793
2794 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2795 return;
2796
2797 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2798 FuncInfo->setHasNonRISpills();
2799}
2800
2801/// Returns true if the address N can be represented by a base register plus
2802/// a signed 16-bit displacement [r+imm], and if it is not better
2803/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2804/// displacements that are multiples of that value.
2806 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2807 MaybeAlign EncodingAlignment) const {
2808 // FIXME dl should come from parent load or store, not from address
2809 SDLoc dl(N);
2810
2811 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2812 // a [pc+imm].
2814 return false;
2815
2816 // If this can be more profitably realized as r+r, fail.
2817 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2818 return false;
2819
2820 if (N.getOpcode() == ISD::ADD) {
2821 int16_t imm = 0;
2822 if (isIntS16Immediate(N.getOperand(1), imm) &&
2823 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2824 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2825 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2826 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2827 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2828 } else {
2829 Base = N.getOperand(0);
2830 }
2831 return true; // [r+i]
2832 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2833 // Match LOAD (ADD (X, Lo(G))).
2834 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2835 "Cannot handle constant offsets yet!");
2836 Disp = N.getOperand(1).getOperand(0); // The global address.
2841 Base = N.getOperand(0);
2842 return true; // [&g+r]
2843 }
2844 } else if (N.getOpcode() == ISD::OR) {
2845 int16_t imm = 0;
2846 if (isIntS16Immediate(N.getOperand(1), imm) &&
2847 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2848 // If this is an or of disjoint bitfields, we can codegen this as an add
2849 // (for better address arithmetic) if the LHS and RHS of the OR are
2850 // provably disjoint.
2851 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2852
2853 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2854 // If all of the bits are known zero on the LHS or RHS, the add won't
2855 // carry.
2856 if (FrameIndexSDNode *FI =
2857 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2858 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2859 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2860 } else {
2861 Base = N.getOperand(0);
2862 }
2863 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2864 return true;
2865 }
2866 }
2867 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2868 // Loading from a constant address.
2869
2870 // If this address fits entirely in a 16-bit sext immediate field, codegen
2871 // this as "d, 0"
2872 int16_t Imm;
2873 if (isIntS16Immediate(CN, Imm) &&
2874 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2875 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2876 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2877 CN->getValueType(0));
2878 return true;
2879 }
2880
2881 // Handle 32-bit sext immediates with LIS + addr mode.
2882 if ((CN->getValueType(0) == MVT::i32 ||
2883 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2884 (!EncodingAlignment ||
2885 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2886 int Addr = (int)CN->getZExtValue();
2887
2888 // Otherwise, break this down into an LIS + disp.
2889 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2890
2891 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2892 MVT::i32);
2893 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2894 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2895 return true;
2896 }
2897 }
2898
2899 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2901 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2902 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2903 } else
2904 Base = N;
2905 return true; // [r+0]
2906}
2907
2908/// Similar to the 16-bit case but for instructions that take a 34-bit
2909/// displacement field (prefixed loads/stores).
2911 SDValue &Base,
2912 SelectionDAG &DAG) const {
2913 // Only on 64-bit targets.
2914 if (N.getValueType() != MVT::i64)
2915 return false;
2916
2917 SDLoc dl(N);
2918 int64_t Imm = 0;
2919
2920 if (N.getOpcode() == ISD::ADD) {
2921 if (!isIntS34Immediate(N.getOperand(1), Imm))
2922 return false;
2923 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2924 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2925 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2926 else
2927 Base = N.getOperand(0);
2928 return true;
2929 }
2930
2931 if (N.getOpcode() == ISD::OR) {
2932 if (!isIntS34Immediate(N.getOperand(1), Imm))
2933 return false;
2934 // If this is an or of disjoint bitfields, we can codegen this as an add
2935 // (for better address arithmetic) if the LHS and RHS of the OR are
2936 // provably disjoint.
2937 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2938 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2939 return false;
2940 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2941 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2942 else
2943 Base = N.getOperand(0);
2944 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2945 return true;
2946 }
2947
2948 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2949 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2950 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2951 return true;
2952 }
2953
2954 return false;
2955}
2956
2957/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2958/// represented as an indexed [r+r] operation.
2960 SDValue &Index,
2961 SelectionDAG &DAG) const {
2962 // Check to see if we can easily represent this as an [r+r] address. This
2963 // will fail if it thinks that the address is more profitably represented as
2964 // reg+imm, e.g. where imm = 0.
2965 if (SelectAddressRegReg(N, Base, Index, DAG))
2966 return true;
2967
2968 // If the address is the result of an add, we will utilize the fact that the
2969 // address calculation includes an implicit add. However, we can reduce
2970 // register pressure if we do not materialize a constant just for use as the
2971 // index register. We only get rid of the add if it is not an add of a
2972 // value and a 16-bit signed constant and both have a single use.
2973 int16_t imm = 0;
2974 if (N.getOpcode() == ISD::ADD &&
2975 (!isIntS16Immediate(N.getOperand(1), imm) ||
2976 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2977 Base = N.getOperand(0);
2978 Index = N.getOperand(1);
2979 return true;
2980 }
2981
2982 // Otherwise, do it the hard way, using R0 as the base register.
2983 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2984 N.getValueType());
2985 Index = N;
2986 return true;
2987}
2988
2989template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2990 Ty *PCRelCand = dyn_cast<Ty>(N);
2991 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2992}
2993
2994/// Returns true if this address is a PC Relative address.
2995/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2996/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2998 // This is a materialize PC Relative node. Always select this as PC Relative.
2999 Base = N;
3000 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3001 return true;
3006 return true;
3007 return false;
3008}
3009
3010/// Returns true if we should use a direct load into vector instruction
3011/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3012static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3013
3014 // If there are any other uses other than scalar to vector, then we should
3015 // keep it as a scalar load -> direct move pattern to prevent multiple
3016 // loads.
3018 if (!LD)
3019 return false;
3020
3021 EVT MemVT = LD->getMemoryVT();
3022 if (!MemVT.isSimple())
3023 return false;
3024 switch(MemVT.getSimpleVT().SimpleTy) {
3025 case MVT::i64:
3026 break;
3027 case MVT::i32:
3028 if (!ST.hasP8Vector())
3029 return false;
3030 break;
3031 case MVT::i16:
3032 case MVT::i8:
3033 if (!ST.hasP9Vector())
3034 return false;
3035 break;
3036 default:
3037 return false;
3038 }
3039
3040 SDValue LoadedVal(N, 0);
3041 if (!LoadedVal.hasOneUse())
3042 return false;
3043
3044 for (SDUse &Use : LD->uses())
3045 if (Use.getResNo() == 0 &&
3046 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3048 return false;
3049
3050 return true;
3051}
3052
3053/// getPreIndexedAddressParts - returns true by value, base pointer and
3054/// offset pointer and addressing mode by reference if the node's address
3055/// can be legally represented as pre-indexed load / store address.
3057 SDValue &Offset,
3059 SelectionDAG &DAG) const {
3060 if (DisablePPCPreinc) return false;
3061
3062 bool isLoad = true;
3063 SDValue Ptr;
3064 EVT VT;
3065 Align Alignment;
3066 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3067 Ptr = LD->getBasePtr();
3068 VT = LD->getMemoryVT();
3069 Alignment = LD->getAlign();
3070 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3071 Ptr = ST->getBasePtr();
3072 VT = ST->getMemoryVT();
3073 Alignment = ST->getAlign();
3074 isLoad = false;
3075 } else
3076 return false;
3077
3078 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3079 // instructions because we can fold these into a more efficient instruction
3080 // instead, (such as LXSD).
3081 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3082 return false;
3083 }
3084
3085 // PowerPC doesn't have preinc load/store instructions for vectors
3086 if (VT.isVector())
3087 return false;
3088
3089 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3090 // Common code will reject creating a pre-inc form if the base pointer
3091 // is a frame index, or if N is a store and the base pointer is either
3092 // the same as or a predecessor of the value being stored. Check for
3093 // those situations here, and try with swapped Base/Offset instead.
3094 bool Swap = false;
3095
3097 Swap = true;
3098 else if (!isLoad) {
3099 SDValue Val = cast<StoreSDNode>(N)->getValue();
3100 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3101 Swap = true;
3102 }
3103
3104 if (Swap)
3106
3107 AM = ISD::PRE_INC;
3108 return true;
3109 }
3110
3111 // LDU/STU can only handle immediates that are a multiple of 4.
3112 if (VT != MVT::i64) {
3113 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3114 return false;
3115 } else {
3116 // LDU/STU need an address with at least 4-byte alignment.
3117 if (Alignment < Align(4))
3118 return false;
3119
3120 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3121 return false;
3122 }
3123
3124 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3125 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3126 // sext i32 to i64 when addr mode is r+i.
3127 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3128 LD->getExtensionType() == ISD::SEXTLOAD &&
3130 return false;
3131 }
3132
3133 AM = ISD::PRE_INC;
3134 return true;
3135}
3136
3137//===----------------------------------------------------------------------===//
3138// LowerOperation implementation
3139//===----------------------------------------------------------------------===//
3140
3141/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3142/// and LoOpFlags to the target MO flags.
3143static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3144 unsigned &HiOpFlags, unsigned &LoOpFlags,
3145 const GlobalValue *GV = nullptr) {
3146 HiOpFlags = PPCII::MO_HA;
3147 LoOpFlags = PPCII::MO_LO;
3148
3149 // Don't use the pic base if not in PIC relocation model.
3150 if (IsPIC) {
3151 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3152 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3153 }
3154}
3155
3156static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3157 SelectionDAG &DAG) {
3158 SDLoc DL(HiPart);
3159 EVT PtrVT = HiPart.getValueType();
3160 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3161
3162 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3163 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3164
3165 // With PIC, the first instruction is actually "GR+hi(&G)".
3166 if (isPIC)
3167 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3168 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3169
3170 // Generate non-pic code that has direct accesses to the constant pool.
3171 // The address of the global is just (hi(&g)+lo(&g)).
3172 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3173}
3174
3176 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3177 FuncInfo->setUsesTOCBasePtr();
3178}
3179
3183
3184SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3185 SDValue GA) const {
3186 EVT VT = Subtarget.getScalarIntVT();
3187 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3188 : Subtarget.isAIXABI()
3189 ? DAG.getRegister(PPC::R2, VT)
3190 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3191 SDValue Ops[] = { GA, Reg };
3192 return DAG.getMemIntrinsicNode(
3193 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3196}
3197
3198SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3199 SelectionDAG &DAG) const {
3200 EVT PtrVT = Op.getValueType();
3201 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3202 const Constant *C = CP->getConstVal();
3203
3204 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3205 // The actual address of the GlobalValue is stored in the TOC.
3206 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3207 if (Subtarget.isUsingPCRelativeCalls()) {
3208 SDLoc DL(CP);
3209 EVT Ty = getPointerTy(DAG.getDataLayout());
3210 SDValue ConstPool = DAG.getTargetConstantPool(
3211 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3212 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3213 }
3214 setUsesTOCBasePtr(DAG);
3215 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3216 return getTOCEntry(DAG, SDLoc(CP), GA);
3217 }
3218
3219 unsigned MOHiFlag, MOLoFlag;
3220 bool IsPIC = isPositionIndependent();
3221 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3222
3223 if (IsPIC && Subtarget.isSVR4ABI()) {
3224 SDValue GA =
3225 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3226 return getTOCEntry(DAG, SDLoc(CP), GA);
3227 }
3228
3229 SDValue CPIHi =
3230 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3231 SDValue CPILo =
3232 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3233 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3234}
3235
3236// For 64-bit PowerPC, prefer the more compact relative encodings.
3237// This trades 32 bits per jump table entry for one or two instructions
3238// on the jump site.
3245
3248 return false;
3249 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3250 return true;
3252}
3253
3255 SelectionDAG &DAG) const {
3256 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3258
3259 switch (getTargetMachine().getCodeModel()) {
3260 case CodeModel::Small:
3261 case CodeModel::Medium:
3263 default:
3264 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3266 }
3267}
3268
3269const MCExpr *
3271 unsigned JTI,
3272 MCContext &Ctx) const {
3273 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3275
3276 switch (getTargetMachine().getCodeModel()) {
3277 case CodeModel::Small:
3278 case CodeModel::Medium:
3280 default:
3281 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3282 }
3283}
3284
3285SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3286 EVT PtrVT = Op.getValueType();
3288
3289 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3290 if (Subtarget.isUsingPCRelativeCalls()) {
3291 SDLoc DL(JT);
3292 EVT Ty = getPointerTy(DAG.getDataLayout());
3293 SDValue GA =
3294 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3295 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3296 return MatAddr;
3297 }
3298
3299 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3300 // The actual address of the GlobalValue is stored in the TOC.
3301 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3302 setUsesTOCBasePtr(DAG);
3303 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3304 return getTOCEntry(DAG, SDLoc(JT), GA);
3305 }
3306
3307 unsigned MOHiFlag, MOLoFlag;
3308 bool IsPIC = isPositionIndependent();
3309 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3310
3311 if (IsPIC && Subtarget.isSVR4ABI()) {
3312 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3314 return getTOCEntry(DAG, SDLoc(GA), GA);
3315 }
3316
3317 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3318 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3319 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3320}
3321
3322SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3323 SelectionDAG &DAG) const {
3324 EVT PtrVT = Op.getValueType();
3325 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3326 const BlockAddress *BA = BASDN->getBlockAddress();
3327
3328 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3329 if (Subtarget.isUsingPCRelativeCalls()) {
3330 SDLoc DL(BASDN);
3331 EVT Ty = getPointerTy(DAG.getDataLayout());
3332 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3334 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3335 return MatAddr;
3336 }
3337
3338 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3339 // The actual BlockAddress is stored in the TOC.
3340 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3341 setUsesTOCBasePtr(DAG);
3342 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3343 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3344 }
3345
3346 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3347 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3348 return getTOCEntry(
3349 DAG, SDLoc(BASDN),
3350 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3351
3352 unsigned MOHiFlag, MOLoFlag;
3353 bool IsPIC = isPositionIndependent();
3354 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3355 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3356 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3357 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3358}
3359
3360SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3361 SelectionDAG &DAG) const {
3362 if (Subtarget.isAIXABI())
3363 return LowerGlobalTLSAddressAIX(Op, DAG);
3364
3365 return LowerGlobalTLSAddressLinux(Op, DAG);
3366}
3367
3368/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3369/// and then apply the update.
3371 SelectionDAG &DAG,
3372 const TargetMachine &TM) {
3373 // Initialize TLS model opt setting lazily:
3374 // (1) Use initial-exec for single TLS var references within current function.
3375 // (2) Use local-dynamic for multiple TLS var references within current
3376 // function.
3377 PPCFunctionInfo *FuncInfo =
3379 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3381 // Iterate over all instructions within current function, collect all TLS
3382 // global variables (global variables taken as the first parameter to
3383 // Intrinsic::threadlocal_address).
3384 const Function &Func = DAG.getMachineFunction().getFunction();
3385 for (const BasicBlock &BB : Func)
3386 for (const Instruction &I : BB)
3387 if (I.getOpcode() == Instruction::Call)
3388 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3389 if (Function *CF = CI->getCalledFunction())
3390 if (CF->isDeclaration() &&
3391 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3392 if (const GlobalValue *GV =
3393 dyn_cast<GlobalValue>(I.getOperand(0))) {
3394 TLSModel::Model GVModel = TM.getTLSModel(GV);
3395 if (GVModel == TLSModel::LocalDynamic)
3396 TLSGV.insert(GV);
3397 }
3398
3399 unsigned TLSGVCnt = TLSGV.size();
3400 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3401 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3402 FuncInfo->setAIXFuncUseTLSIEForLD();
3404 }
3405
3406 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3407 LLVM_DEBUG(
3408 dbgs() << DAG.getMachineFunction().getName()
3409 << " function is using the TLS-IE model for TLS-LD access.\n");
3410 Model = TLSModel::InitialExec;
3411 }
3412}
3413
3414SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3415 SelectionDAG &DAG) const {
3416 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3417
3418 if (DAG.getTarget().useEmulatedTLS())
3419 report_fatal_error("Emulated TLS is not yet supported on AIX");
3420
3421 SDLoc dl(GA);
3422 const GlobalValue *GV = GA->getGlobal();
3423 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3424 bool Is64Bit = Subtarget.isPPC64();
3426
3427 // Apply update to the TLS model.
3428 if (Subtarget.hasAIXShLibTLSModelOpt())
3430
3431 // TLS variables are accessed through TOC entries.
3432 // To support this, set the DAG to use the TOC base pointer.
3433 setUsesTOCBasePtr(DAG);
3434
3435 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3436
3437 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3438 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3439 bool HasAIXSmallTLSGlobalAttr = false;
3440 SDValue VariableOffsetTGA =
3441 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3442 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3443 SDValue TLSReg;
3444
3445 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3446 if (GVar->hasAttribute("aix-small-tls"))
3447 HasAIXSmallTLSGlobalAttr = true;
3448
3449 if (Is64Bit) {
3450 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3451 // involves a load of the variable offset (from the TOC), followed by an
3452 // add of the loaded variable offset to R13 (the thread pointer).
3453 // This code sequence looks like:
3454 // ld reg1,var[TC](2)
3455 // add reg2, reg1, r13 // r13 contains the thread pointer
3456 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3457
3458 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3459 // global variable attribute, produce a faster access sequence for
3460 // local-exec TLS variables where the offset from the TLS base is encoded
3461 // as an immediate operand.
3462 //
3463 // We only utilize the faster local-exec access sequence when the TLS
3464 // variable has a size within the policy limit. We treat types that are
3465 // not sized or are empty as being over the policy size limit.
3466 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3467 IsTLSLocalExecModel) {
3468 Type *GVType = GV->getValueType();
3469 if (GVType->isSized() && !GVType->isEmptyTy() &&
3470 GV->getDataLayout().getTypeAllocSize(GVType) <=
3472 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3473 }
3474 } else {
3475 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3476 // involves loading the variable offset from the TOC, generating a call to
3477 // .__get_tpointer to get the thread pointer (which will be in R3), and
3478 // adding the two together:
3479 // lwz reg1,var[TC](2)
3480 // bla .__get_tpointer
3481 // add reg2, reg1, r3
3482 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3483
3484 // We do not implement the 32-bit version of the faster access sequence
3485 // for local-exec that is controlled by the -maix-small-local-exec-tls
3486 // option, or the "aix-small-tls" global variable attribute.
3487 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3488 report_fatal_error("The small-local-exec TLS access sequence is "
3489 "currently only supported on AIX (64-bit mode).");
3490 }
3491 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3492 }
3493
3494 if (Model == TLSModel::LocalDynamic) {
3495 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3496
3497 // We do not implement the 32-bit version of the faster access sequence
3498 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3499 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3500 report_fatal_error("The small-local-dynamic TLS access sequence is "
3501 "currently only supported on AIX (64-bit mode).");
3502
3503 // For local-dynamic on AIX, we need to generate one TOC entry for each
3504 // variable offset, and a single module-handle TOC entry for the entire
3505 // file.
3506
3507 SDValue VariableOffsetTGA =
3508 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3509 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3510
3512 GlobalVariable *TLSGV =
3513 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3514 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3516 assert(TLSGV && "Not able to create GV for _$TLSML.");
3517 SDValue ModuleHandleTGA =
3518 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3519 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3520 SDValue ModuleHandle =
3521 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3522
3523 // With the -maix-small-local-dynamic-tls option, produce a faster access
3524 // sequence for local-dynamic TLS variables where the offset from the
3525 // module-handle is encoded as an immediate operand.
3526 //
3527 // We only utilize the faster local-dynamic access sequence when the TLS
3528 // variable has a size within the policy limit. We treat types that are
3529 // not sized or are empty as being over the policy size limit.
3530 if (HasAIXSmallLocalDynamicTLS) {
3531 Type *GVType = GV->getValueType();
3532 if (GVType->isSized() && !GVType->isEmptyTy() &&
3533 GV->getDataLayout().getTypeAllocSize(GVType) <=
3535 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3536 ModuleHandle);
3537 }
3538
3539 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3540 }
3541
3542 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3543 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3544 // need to generate two TOC entries, one for the variable offset, one for the
3545 // region handle. The global address for the TOC entry of the region handle is
3546 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3547 // entry of the variable offset is created with MO_TLSGD_FLAG.
3548 SDValue VariableOffsetTGA =
3549 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3550 SDValue RegionHandleTGA =
3551 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3552 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3553 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3554 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3555 RegionHandle);
3556}
3557
3558SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 // FIXME: TLS addresses currently use medium model code sequences,
3561 // which is the most useful form. Eventually support for small and
3562 // large models could be added if users need it, at the cost of
3563 // additional complexity.
3564 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3565 if (DAG.getTarget().useEmulatedTLS())
3566 return LowerToTLSEmulatedModel(GA, DAG);
3567
3568 SDLoc dl(GA);
3569 const GlobalValue *GV = GA->getGlobal();
3570 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3571 bool is64bit = Subtarget.isPPC64();
3572 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3573 PICLevel::Level picLevel = M->getPICLevel();
3574
3575 const TargetMachine &TM = getTargetMachine();
3576 TLSModel::Model Model = TM.getTLSModel(GV);
3577
3578 if (Model == TLSModel::LocalExec) {
3579 if (Subtarget.isUsingPCRelativeCalls()) {
3580 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3581 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3583 SDValue MatAddr =
3584 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3585 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3586 }
3587
3588 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3590 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3592 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3593 : DAG.getRegister(PPC::R2, MVT::i32);
3594
3595 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3596 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3597 }
3598
3599 if (Model == TLSModel::InitialExec) {
3600 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3602 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3603 SDValue TGATLS = DAG.getTargetGlobalAddress(
3604 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3605 SDValue TPOffset;
3606 if (IsPCRel) {
3607 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3608 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3609 MachinePointerInfo());
3610 } else {
3611 SDValue GOTPtr;
3612 if (is64bit) {
3613 setUsesTOCBasePtr(DAG);
3614 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3615 GOTPtr =
3616 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3617 } else {
3618 if (!TM.isPositionIndependent())
3619 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3620 else if (picLevel == PICLevel::SmallPIC)
3621 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3622 else
3623 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3624 }
3625 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3626 }
3627 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3628 }
3629
3630 if (Model == TLSModel::GeneralDynamic) {
3631 if (Subtarget.isUsingPCRelativeCalls()) {
3632 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3634 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3635 }
3636
3637 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3638 SDValue GOTPtr;
3639 if (is64bit) {
3640 setUsesTOCBasePtr(DAG);
3641 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3642 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3643 GOTReg, TGA);
3644 } else {
3645 if (picLevel == PICLevel::SmallPIC)
3646 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3647 else
3648 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3649 }
3650 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3651 GOTPtr, TGA, TGA);
3652 }
3653
3654 if (Model == TLSModel::LocalDynamic) {
3655 if (Subtarget.isUsingPCRelativeCalls()) {
3656 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3658 SDValue MatPCRel =
3659 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3660 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3661 }
3662
3663 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3664 SDValue GOTPtr;
3665 if (is64bit) {
3666 setUsesTOCBasePtr(DAG);
3667 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3668 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3669 GOTReg, TGA);
3670 } else {
3671 if (picLevel == PICLevel::SmallPIC)
3672 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3673 else
3674 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3675 }
3676 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3677 PtrVT, GOTPtr, TGA, TGA);
3678 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3679 PtrVT, TLSAddr, TGA);
3680 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3681 }
3682
3683 llvm_unreachable("Unknown TLS model!");
3684}
3685
3686SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3687 SelectionDAG &DAG) const {
3688 EVT PtrVT = Op.getValueType();
3689 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3690 SDLoc DL(GSDN);
3691 const GlobalValue *GV = GSDN->getGlobal();
3692
3693 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3694 // The actual address of the GlobalValue is stored in the TOC.
3695 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3696 if (Subtarget.isUsingPCRelativeCalls()) {
3697 EVT Ty = getPointerTy(DAG.getDataLayout());
3699 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3701 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3702 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3703 MachinePointerInfo());
3704 return Load;
3705 } else {
3706 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3708 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3709 }
3710 }
3711 setUsesTOCBasePtr(DAG);
3712 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3713 return getTOCEntry(DAG, DL, GA);
3714 }
3715
3716 unsigned MOHiFlag, MOLoFlag;
3717 bool IsPIC = isPositionIndependent();
3718 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3719
3720 if (IsPIC && Subtarget.isSVR4ABI()) {
3721 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3722 GSDN->getOffset(),
3724 return getTOCEntry(DAG, DL, GA);
3725 }
3726
3727 SDValue GAHi =
3728 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3729 SDValue GALo =
3730 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3731
3732 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3733}
3734
3735SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3736 bool IsStrict = Op->isStrictFPOpcode();
3737 ISD::CondCode CC =
3738 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3739 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3740 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3741 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3742 EVT LHSVT = LHS.getValueType();
3743 SDLoc dl(Op);
3744
3745 // Soften the setcc with libcall if it is fp128.
3746 if (LHSVT == MVT::f128) {
3747 assert(!Subtarget.hasP9Vector() &&
3748 "SETCC for f128 is already legal under Power9!");
3749 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3750 Op->getOpcode() == ISD::STRICT_FSETCCS);
3751 if (RHS.getNode())
3752 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3753 DAG.getCondCode(CC));
3754 if (IsStrict)
3755 return DAG.getMergeValues({LHS, Chain}, dl);
3756 return LHS;
3757 }
3758
3759 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3760
3761 if (Op.getValueType() == MVT::v2i64) {
3762 // When the operands themselves are v2i64 values, we need to do something
3763 // special because VSX has no underlying comparison operations for these.
3764 if (LHS.getValueType() == MVT::v2i64) {
3765 // Equality can be handled by casting to the legal type for Altivec
3766 // comparisons, everything else needs to be expanded.
3767 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3768 return SDValue();
3769 SDValue SetCC32 = DAG.getSetCC(
3770 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3771 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3772 int ShuffV[] = {1, 0, 3, 2};
3773 SDValue Shuff =
3774 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3775 return DAG.getBitcast(MVT::v2i64,
3776 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3777 dl, MVT::v4i32, Shuff, SetCC32));
3778 }
3779
3780 // We handle most of these in the usual way.
3781 return Op;
3782 }
3783
3784 // If we're comparing for equality to zero, expose the fact that this is
3785 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3786 // fold the new nodes.
3787 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3788 return V;
3789
3790 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3791 // Leave comparisons against 0 and -1 alone for now, since they're usually
3792 // optimized. FIXME: revisit this when we can custom lower all setcc
3793 // optimizations.
3794 if (C->isAllOnes() || C->isZero())
3795 return SDValue();
3796 }
3797
3798 // If we have an integer seteq/setne, turn it into a compare against zero
3799 // by xor'ing the rhs with the lhs, which is faster than setting a
3800 // condition register, reading it back out, and masking the correct bit. The
3801 // normal approach here uses sub to do this instead of xor. Using xor exposes
3802 // the result to other bit-twiddling opportunities.
3803 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3804 EVT VT = Op.getValueType();
3805 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3806 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3807 }
3808 return SDValue();
3809}
3810
3811SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3812 SDNode *Node = Op.getNode();
3813 EVT VT = Node->getValueType(0);
3814 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3815 SDValue InChain = Node->getOperand(0);
3816 SDValue VAListPtr = Node->getOperand(1);
3817 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3818 SDLoc dl(Node);
3819
3820 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3821
3822 // gpr_index
3823 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3824 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3825 InChain = GprIndex.getValue(1);
3826
3827 if (VT == MVT::i64) {
3828 // Check if GprIndex is even
3829 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3830 DAG.getConstant(1, dl, MVT::i32));
3831 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3832 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3833 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3834 DAG.getConstant(1, dl, MVT::i32));
3835 // Align GprIndex to be even if it isn't
3836 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3837 GprIndex);
3838 }
3839
3840 // fpr index is 1 byte after gpr
3841 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3842 DAG.getConstant(1, dl, MVT::i32));
3843
3844 // fpr
3845 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3846 FprPtr, MachinePointerInfo(SV), MVT::i8);
3847 InChain = FprIndex.getValue(1);
3848
3849 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850 DAG.getConstant(8, dl, MVT::i32));
3851
3852 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3853 DAG.getConstant(4, dl, MVT::i32));
3854
3855 // areas
3856 SDValue OverflowArea =
3857 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3858 InChain = OverflowArea.getValue(1);
3859
3860 SDValue RegSaveArea =
3861 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3862 InChain = RegSaveArea.getValue(1);
3863
3864 // select overflow_area if index > 8
3865 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3866 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3867
3868 // adjustment constant gpr_index * 4/8
3869 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3870 VT.isInteger() ? GprIndex : FprIndex,
3871 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3872 MVT::i32));
3873
3874 // OurReg = RegSaveArea + RegConstant
3875 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3876 RegConstant);
3877
3878 // Floating types are 32 bytes into RegSaveArea
3879 if (VT.isFloatingPoint())
3880 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3881 DAG.getConstant(32, dl, MVT::i32));
3882
3883 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3884 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3885 VT.isInteger() ? GprIndex : FprIndex,
3886 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3887 MVT::i32));
3888
3889 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3890 VT.isInteger() ? VAListPtr : FprPtr,
3891 MachinePointerInfo(SV), MVT::i8);
3892
3893 // determine if we should load from reg_save_area or overflow_area
3894 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3895
3896 // increase overflow_area by 4/8 if gpr/fpr > 8
3897 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3898 DAG.getConstant(VT.isInteger() ? 4 : 8,
3899 dl, MVT::i32));
3900
3901 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3902 OverflowAreaPlusN);
3903
3904 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3905 MachinePointerInfo(), MVT::i32);
3906
3907 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3908}
3909
3910SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3911 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3912
3913 // We have to copy the entire va_list struct:
3914 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3915 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3916 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3917 false, true, /*CI=*/nullptr, std::nullopt,
3918 MachinePointerInfo(), MachinePointerInfo());
3919}
3920
3921SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3922 SelectionDAG &DAG) const {
3923 return Op.getOperand(0);
3924}
3925
3926SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3927 MachineFunction &MF = DAG.getMachineFunction();
3928 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3929
3930 assert((Op.getOpcode() == ISD::INLINEASM ||
3931 Op.getOpcode() == ISD::INLINEASM_BR) &&
3932 "Expecting Inline ASM node.");
3933
3934 // If an LR store is already known to be required then there is not point in
3935 // checking this ASM as well.
3936 if (MFI.isLRStoreRequired())
3937 return Op;
3938
3939 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3940 // type MVT::Glue. We want to ignore this last operand if that is the case.
3941 unsigned NumOps = Op.getNumOperands();
3942 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3943 --NumOps;
3944
3945 // Check all operands that may contain the LR.
3946 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3947 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3948 unsigned NumVals = Flags.getNumOperandRegisters();
3949 ++i; // Skip the ID value.
3950
3951 switch (Flags.getKind()) {
3952 default:
3953 llvm_unreachable("Bad flags!");
3957 i += NumVals;
3958 break;
3962 for (; NumVals; --NumVals, ++i) {
3963 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3964 if (Reg != PPC::LR && Reg != PPC::LR8)
3965 continue;
3966 MFI.setLRStoreRequired();
3967 return Op;
3968 }
3969 break;
3970 }
3971 }
3972 }
3973
3974 return Op;
3975}
3976
3977SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3978 SelectionDAG &DAG) const {
3979 SDValue Chain = Op.getOperand(0);
3980 SDValue Trmp = Op.getOperand(1); // trampoline
3981 SDValue FPtr = Op.getOperand(2); // nested function
3982 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3983 SDLoc dl(Op);
3984
3985 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3986
3987 if (Subtarget.isAIXABI()) {
3988 // On AIX we create a trampoline descriptor by combining the
3989 // entry point and TOC from the global descriptor (FPtr) with the
3990 // nest argument as the environment pointer.
3991 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3992 MaybeAlign PointerAlign(PointerSize);
3993 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3996 : MachineMemOperand::MONone;
3997
3998 uint64_t TOCPointerOffset = 1 * PointerSize;
3999 uint64_t EnvPointerOffset = 2 * PointerSize;
4000 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
4001 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
4002
4003 const Value *TrampolineAddr =
4004 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4005 const Function *Func =
4006 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
4007
4008 SDValue OutChains[3];
4009
4010 // Copy the entry point address from the global descriptor to the
4011 // trampoline buffer.
4012 SDValue LoadEntryPoint =
4013 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
4014 PointerAlign, MMOFlags);
4015 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
4016 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
4017 MachinePointerInfo(TrampolineAddr, 0));
4018
4019 // Copy the TOC pointer from the global descriptor to the trampoline
4020 // buffer.
4021 SDValue TOCFromDescriptorPtr =
4022 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
4023 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
4024 MachinePointerInfo(Func, TOCPointerOffset),
4025 PointerAlign, MMOFlags);
4026 SDValue TrampolineTOCPointer =
4027 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
4028 SDValue TOCLoadChain = TOCReg.getValue(1);
4029 OutChains[1] =
4030 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
4031 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
4032
4033 // Store the nest argument into the environment pointer in the trampoline
4034 // buffer.
4035 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
4036 OutChains[2] =
4037 DAG.getStore(Chain, dl, Nest, EnvPointer,
4038 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
4039
4041 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
4042 return TokenFactor;
4043 }
4044
4045 bool isPPC64 = (PtrVT == MVT::i64);
4046 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4047
4049 Args.emplace_back(Trmp, IntPtrTy);
4050 // TrampSize == (isPPC64 ? 48 : 40);
4051 Args.emplace_back(
4052 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
4053 IntPtrTy);
4054 Args.emplace_back(FPtr, IntPtrTy);
4055 Args.emplace_back(Nest, IntPtrTy);
4056
4057 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4058 TargetLowering::CallLoweringInfo CLI(DAG);
4059 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4061 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4062
4063 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4064 return CallResult.second;
4065}
4066
4067SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4068 MachineFunction &MF = DAG.getMachineFunction();
4069 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4070 EVT PtrVT = getPointerTy(MF.getDataLayout());
4071
4072 SDLoc dl(Op);
4073
4074 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4075 // vastart just stores the address of the VarArgsFrameIndex slot into the
4076 // memory location argument.
4077 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4078 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4079 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4080 MachinePointerInfo(SV));
4081 }
4082
4083 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4084 // We suppose the given va_list is already allocated.
4085 //
4086 // typedef struct {
4087 // char gpr; /* index into the array of 8 GPRs
4088 // * stored in the register save area
4089 // * gpr=0 corresponds to r3,
4090 // * gpr=1 to r4, etc.
4091 // */
4092 // char fpr; /* index into the array of 8 FPRs
4093 // * stored in the register save area
4094 // * fpr=0 corresponds to f1,
4095 // * fpr=1 to f2, etc.
4096 // */
4097 // char *overflow_arg_area;
4098 // /* location on stack that holds
4099 // * the next overflow argument
4100 // */
4101 // char *reg_save_area;
4102 // /* where r3:r10 and f1:f8 (if saved)
4103 // * are stored
4104 // */
4105 // } va_list[1];
4106
4107 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4108 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4109 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4110 PtrVT);
4111 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4112 PtrVT);
4113
4114 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4115 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4116
4117 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4118 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4119
4120 uint64_t FPROffset = 1;
4121 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4122
4123 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4124
4125 // Store first byte : number of int regs
4126 SDValue firstStore =
4127 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4128 MachinePointerInfo(SV), MVT::i8);
4129 uint64_t nextOffset = FPROffset;
4130 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4131 ConstFPROffset);
4132
4133 // Store second byte : number of float regs
4134 SDValue secondStore =
4135 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4136 MachinePointerInfo(SV, nextOffset), MVT::i8);
4137 nextOffset += StackOffset;
4138 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4139
4140 // Store second word : arguments given on stack
4141 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4142 MachinePointerInfo(SV, nextOffset));
4143 nextOffset += FrameOffset;
4144 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4145
4146 // Store third word : arguments given in registers
4147 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4148 MachinePointerInfo(SV, nextOffset));
4149}
4150
4151/// FPR - The set of FP registers that should be allocated for arguments
4152/// on Darwin and AIX.
4153static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4154 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4155 PPC::F11, PPC::F12, PPC::F13};
4156
4157/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4158/// the stack.
4159static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4160 unsigned PtrByteSize) {
4161 unsigned ArgSize = ArgVT.getStoreSize();
4162 if (Flags.isByVal())
4163 ArgSize = Flags.getByValSize();
4164
4165 // Round up to multiples of the pointer size, except for array members,
4166 // which are always packed.
4167 if (!Flags.isInConsecutiveRegs())
4168 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4169
4170 return ArgSize;
4171}
4172
4173/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4174/// on the stack.
4176 ISD::ArgFlagsTy Flags,
4177 unsigned PtrByteSize) {
4178 Align Alignment(PtrByteSize);
4179
4180 // Altivec parameters are padded to a 16 byte boundary.
4181 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4182 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4183 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4184 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4185 Alignment = Align(16);
4186
4187 // ByVal parameters are aligned as requested.
4188 if (Flags.isByVal()) {
4189 auto BVAlign = Flags.getNonZeroByValAlign();
4190 if (BVAlign > PtrByteSize) {
4191 if (BVAlign.value() % PtrByteSize != 0)
4193 "ByVal alignment is not a multiple of the pointer size");
4194
4195 Alignment = BVAlign;
4196 }
4197 }
4198
4199 // Array members are always packed to their original alignment.
4200 if (Flags.isInConsecutiveRegs()) {
4201 // If the array member was split into multiple registers, the first
4202 // needs to be aligned to the size of the full type. (Except for
4203 // ppcf128, which is only aligned as its f64 components.)
4204 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4205 Alignment = Align(OrigVT.getStoreSize());
4206 else
4207 Alignment = Align(ArgVT.getStoreSize());
4208 }
4209
4210 return Alignment;
4211}
4212
4213/// CalculateStackSlotUsed - Return whether this argument will use its
4214/// stack slot (instead of being passed in registers). ArgOffset,
4215/// AvailableFPRs, and AvailableVRs must hold the current argument
4216/// position, and will be updated to account for this argument.
4217static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4218 unsigned PtrByteSize, unsigned LinkageSize,
4219 unsigned ParamAreaSize, unsigned &ArgOffset,
4220 unsigned &AvailableFPRs,
4221 unsigned &AvailableVRs) {
4222 bool UseMemory = false;
4223
4224 // Respect alignment of argument on the stack.
4225 Align Alignment =
4226 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4227 ArgOffset = alignTo(ArgOffset, Alignment);
4228 // If there's no space left in the argument save area, we must
4229 // use memory (this check also catches zero-sized arguments).
4230 if (ArgOffset >= LinkageSize + ParamAreaSize)
4231 UseMemory = true;
4232
4233 // Allocate argument on the stack.
4234 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4235 if (Flags.isInConsecutiveRegsLast())
4236 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4237 // If we overran the argument save area, we must use memory
4238 // (this check catches arguments passed partially in memory)
4239 if (ArgOffset > LinkageSize + ParamAreaSize)
4240 UseMemory = true;
4241
4242 // However, if the argument is actually passed in an FPR or a VR,
4243 // we don't use memory after all.
4244 if (!Flags.isByVal()) {
4245 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4246 if (AvailableFPRs > 0) {
4247 --AvailableFPRs;
4248 return false;
4249 }
4250 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4251 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4252 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4253 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4254 if (AvailableVRs > 0) {
4255 --AvailableVRs;
4256 return false;
4257 }
4258 }
4259
4260 return UseMemory;
4261}
4262
4263/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4264/// ensure minimum alignment required for target.
4266 unsigned NumBytes) {
4267 return alignTo(NumBytes, Lowering->getStackAlign());
4268}
4269
4270SDValue PPCTargetLowering::LowerFormalArguments(
4271 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4272 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4273 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4274 if (Subtarget.isAIXABI())
4275 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4276 InVals);
4277 if (Subtarget.is64BitELFABI())
4278 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4279 InVals);
4280 assert(Subtarget.is32BitELFABI());
4281 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4282 InVals);
4283}
4284
4285SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4286 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4287 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4288 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4289
4290 // 32-bit SVR4 ABI Stack Frame Layout:
4291 // +-----------------------------------+
4292 // +--> | Back chain |
4293 // | +-----------------------------------+
4294 // | | Floating-point register save area |
4295 // | +-----------------------------------+
4296 // | | General register save area |
4297 // | +-----------------------------------+
4298 // | | CR save word |
4299 // | +-----------------------------------+
4300 // | | VRSAVE save word |
4301 // | +-----------------------------------+
4302 // | | Alignment padding |
4303 // | +-----------------------------------+
4304 // | | Vector register save area |
4305 // | +-----------------------------------+
4306 // | | Local variable space |
4307 // | +-----------------------------------+
4308 // | | Parameter list area |
4309 // | +-----------------------------------+
4310 // | | LR save word |
4311 // | +-----------------------------------+
4312 // SP--> +--- | Back chain |
4313 // +-----------------------------------+
4314 //
4315 // Specifications:
4316 // System V Application Binary Interface PowerPC Processor Supplement
4317 // AltiVec Technology Programming Interface Manual
4318
4319 MachineFunction &MF = DAG.getMachineFunction();
4320 MachineFrameInfo &MFI = MF.getFrameInfo();
4321 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4322
4323 EVT PtrVT = getPointerTy(MF.getDataLayout());
4324 // Potential tail calls could cause overwriting of argument stack slots.
4325 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4326 (CallConv == CallingConv::Fast));
4327 const Align PtrAlign(4);
4328
4329 // Assign locations to all of the incoming arguments.
4331 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4332 *DAG.getContext());
4333
4334 // Reserve space for the linkage area on the stack.
4335 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4336 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4337 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4338
4339 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4340 CCValAssign &VA = ArgLocs[i];
4341
4342 // Arguments stored in registers.
4343 if (VA.isRegLoc()) {
4344 const TargetRegisterClass *RC;
4345 EVT ValVT = VA.getValVT();
4346
4347 switch (ValVT.getSimpleVT().SimpleTy) {
4348 default:
4349 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4350 case MVT::i1:
4351 case MVT::i32:
4352 RC = &PPC::GPRCRegClass;
4353 break;
4354 case MVT::f32:
4355 if (Subtarget.hasP8Vector())
4356 RC = &PPC::VSSRCRegClass;
4357 else if (Subtarget.hasSPE())
4358 RC = &PPC::GPRCRegClass;
4359 else
4360 RC = &PPC::F4RCRegClass;
4361 break;
4362 case MVT::f64:
4363 if (Subtarget.hasVSX())
4364 RC = &PPC::VSFRCRegClass;
4365 else if (Subtarget.hasSPE())
4366 // SPE passes doubles in GPR pairs.
4367 RC = &PPC::GPRCRegClass;
4368 else
4369 RC = &PPC::F8RCRegClass;
4370 break;
4371 case MVT::v16i8:
4372 case MVT::v8i16:
4373 case MVT::v4i32:
4374 RC = &PPC::VRRCRegClass;
4375 break;
4376 case MVT::v4f32:
4377 RC = &PPC::VRRCRegClass;
4378 break;
4379 case MVT::v2f64:
4380 case MVT::v2i64:
4381 RC = &PPC::VRRCRegClass;
4382 break;
4383 }
4384
4385 SDValue ArgValue;
4386 // Transform the arguments stored in physical registers into
4387 // virtual ones.
4388 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4389 assert(i + 1 < e && "No second half of double precision argument");
4390 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4391 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4392 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4393 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4394 if (!Subtarget.isLittleEndian())
4395 std::swap (ArgValueLo, ArgValueHi);
4396 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4397 ArgValueHi);
4398 } else {
4399 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4400 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4401 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4402 if (ValVT == MVT::i1)
4403 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4404 }
4405
4406 InVals.push_back(ArgValue);
4407 } else {
4408 // Argument stored in memory.
4409 assert(VA.isMemLoc());
4410
4411 // Get the extended size of the argument type in stack
4412 unsigned ArgSize = VA.getLocVT().getStoreSize();
4413 // Get the actual size of the argument type
4414 unsigned ObjSize = VA.getValVT().getStoreSize();
4415 unsigned ArgOffset = VA.getLocMemOffset();
4416 // Stack objects in PPC32 are right justified.
4417 ArgOffset += ArgSize - ObjSize;
4418 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4419
4420 // Create load nodes to retrieve arguments from the stack.
4421 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4422 InVals.push_back(
4423 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4424 }
4425 }
4426
4427 // Assign locations to all of the incoming aggregate by value arguments.
4428 // Aggregates passed by value are stored in the local variable space of the
4429 // caller's stack frame, right above the parameter list area.
4430 SmallVector<CCValAssign, 16> ByValArgLocs;
4431 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4432 ByValArgLocs, *DAG.getContext());
4433
4434 // Reserve stack space for the allocations in CCInfo.
4435 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4436
4437 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4438
4439 // Area that is at least reserved in the caller of this function.
4440 unsigned MinReservedArea = CCByValInfo.getStackSize();
4441 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4442
4443 // Set the size that is at least reserved in caller of this function. Tail
4444 // call optimized function's reserved stack space needs to be aligned so that
4445 // taking the difference between two stack areas will result in an aligned
4446 // stack.
4447 MinReservedArea =
4448 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4449 FuncInfo->setMinReservedArea(MinReservedArea);
4450
4452
4453 // If the function takes variable number of arguments, make a frame index for
4454 // the start of the first vararg value... for expansion of llvm.va_start.
4455 if (isVarArg) {
4456 static const MCPhysReg GPArgRegs[] = {
4457 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4458 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4459 };
4460 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4461
4462 static const MCPhysReg FPArgRegs[] = {
4463 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4464 PPC::F8
4465 };
4466 unsigned NumFPArgRegs = std::size(FPArgRegs);
4467
4468 if (useSoftFloat() || hasSPE())
4469 NumFPArgRegs = 0;
4470
4471 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4472 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4473
4474 // Make room for NumGPArgRegs and NumFPArgRegs.
4475 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4476 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4477
4479 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4480
4481 FuncInfo->setVarArgsFrameIndex(
4482 MFI.CreateStackObject(Depth, Align(8), false));
4483 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4484
4485 // The fixed integer arguments of a variadic function are stored to the
4486 // VarArgsFrameIndex on the stack so that they may be loaded by
4487 // dereferencing the result of va_next.
4488 for (MCPhysReg GPArgReg : GPArgRegs) {
4489 // Get an existing live-in vreg, or add a new one.
4490 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4491 if (!VReg)
4492 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4493
4494 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4495 SDValue Store =
4496 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4497 MemOps.push_back(Store);
4498 // Increment the address by four for the next argument to store
4499 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4500 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4501 }
4502
4503 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4504 // is set.
4505 // The double arguments are stored to the VarArgsFrameIndex
4506 // on the stack.
4507 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4508 // Get an existing live-in vreg, or add a new one.
4509 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4510 if (!VReg)
4511 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4512
4513 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4514 SDValue Store =
4515 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4516 MemOps.push_back(Store);
4517 // Increment the address by eight for the next argument to store
4518 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4519 PtrVT);
4520 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4521 }
4522 }
4523
4524 if (!MemOps.empty())
4525 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4526
4527 return Chain;
4528}
4529
4530// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4531// value to MVT::i64 and then truncate to the correct register size.
4532SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4533 EVT ObjectVT, SelectionDAG &DAG,
4534 SDValue ArgVal,
4535 const SDLoc &dl) const {
4536 if (Flags.isSExt())
4537 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4538 DAG.getValueType(ObjectVT));
4539 else if (Flags.isZExt())
4540 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4541 DAG.getValueType(ObjectVT));
4542
4543 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4544}
4545
4546SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4547 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4548 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4549 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4550 // TODO: add description of PPC stack frame format, or at least some docs.
4551 //
4552 bool isELFv2ABI = Subtarget.isELFv2ABI();
4553 bool isLittleEndian = Subtarget.isLittleEndian();
4554 MachineFunction &MF = DAG.getMachineFunction();
4555 MachineFrameInfo &MFI = MF.getFrameInfo();
4556 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4557
4558 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4559 "fastcc not supported on varargs functions");
4560
4561 EVT PtrVT = getPointerTy(MF.getDataLayout());
4562 // Potential tail calls could cause overwriting of argument stack slots.
4563 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4564 (CallConv == CallingConv::Fast));
4565 unsigned PtrByteSize = 8;
4566 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4567
4568 static const MCPhysReg GPR[] = {
4569 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4570 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4571 };
4572 static const MCPhysReg VR[] = {
4573 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4574 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4575 };
4576
4577 const unsigned Num_GPR_Regs = std::size(GPR);
4578 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4579 const unsigned Num_VR_Regs = std::size(VR);
4580
4581 // Do a first pass over the arguments to determine whether the ABI
4582 // guarantees that our caller has allocated the parameter save area
4583 // on its stack frame. In the ELFv1 ABI, this is always the case;
4584 // in the ELFv2 ABI, it is true if this is a vararg function or if
4585 // any parameter is located in a stack slot.
4586
4587 bool HasParameterArea = !isELFv2ABI || isVarArg;
4588 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4589 unsigned NumBytes = LinkageSize;
4590 unsigned AvailableFPRs = Num_FPR_Regs;
4591 unsigned AvailableVRs = Num_VR_Regs;
4592 for (const ISD::InputArg &In : Ins) {
4593 if (In.Flags.isNest())
4594 continue;
4595
4596 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4597 LinkageSize, ParamAreaSize, NumBytes,
4598 AvailableFPRs, AvailableVRs))
4599 HasParameterArea = true;
4600 }
4601
4602 // Add DAG nodes to load the arguments or copy them out of registers. On
4603 // entry to a function on PPC, the arguments start after the linkage area,
4604 // although the first ones are often in registers.
4605
4606 unsigned ArgOffset = LinkageSize;
4607 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4610 unsigned CurArgIdx = 0;
4611 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4612 SDValue ArgVal;
4613 bool needsLoad = false;
4614 EVT ObjectVT = Ins[ArgNo].VT;
4615 EVT OrigVT = Ins[ArgNo].ArgVT;
4616 unsigned ObjSize = ObjectVT.getStoreSize();
4617 unsigned ArgSize = ObjSize;
4618 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4619 if (Ins[ArgNo].isOrigArg()) {
4620 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4621 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4622 }
4623 // We re-align the argument offset for each argument, except when using the
4624 // fast calling convention, when we need to make sure we do that only when
4625 // we'll actually use a stack slot.
4626 unsigned CurArgOffset;
4627 Align Alignment;
4628 auto ComputeArgOffset = [&]() {
4629 /* Respect alignment of argument on the stack. */
4630 Alignment =
4631 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4632 ArgOffset = alignTo(ArgOffset, Alignment);
4633 CurArgOffset = ArgOffset;
4634 };
4635
4636 if (CallConv != CallingConv::Fast) {
4637 ComputeArgOffset();
4638
4639 /* Compute GPR index associated with argument offset. */
4640 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4641 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4642 }
4643
4644 // FIXME the codegen can be much improved in some cases.
4645 // We do not have to keep everything in memory.
4646 if (Flags.isByVal()) {
4647 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4648
4649 if (CallConv == CallingConv::Fast)
4650 ComputeArgOffset();
4651
4652 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4653 ObjSize = Flags.getByValSize();
4654 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4655 // Empty aggregate parameters do not take up registers. Examples:
4656 // struct { } a;
4657 // union { } b;
4658 // int c[0];
4659 // etc. However, we have to provide a place-holder in InVals, so
4660 // pretend we have an 8-byte item at the current address for that
4661 // purpose.
4662 if (!ObjSize) {
4663 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4664 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4665 InVals.push_back(FIN);
4666 continue;
4667 }
4668
4669 // Create a stack object covering all stack doublewords occupied
4670 // by the argument. If the argument is (fully or partially) on
4671 // the stack, or if the argument is fully in registers but the
4672 // caller has allocated the parameter save anyway, we can refer
4673 // directly to the caller's stack frame. Otherwise, create a
4674 // local copy in our own frame.
4675 int FI;
4676 if (HasParameterArea ||
4677 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4678 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4679 else
4680 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4681 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4682
4683 // Handle aggregates smaller than 8 bytes.
4684 if (ObjSize < PtrByteSize) {
4685 // The value of the object is its address, which differs from the
4686 // address of the enclosing doubleword on big-endian systems.
4687 SDValue Arg = FIN;
4688 if (!isLittleEndian) {
4689 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4690 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4691 }
4692 InVals.push_back(Arg);
4693
4694 if (GPR_idx != Num_GPR_Regs) {
4695 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4696 FuncInfo->addLiveInAttr(VReg, Flags);
4697 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4698 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4699 SDValue Store =
4700 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4701 MachinePointerInfo(&*FuncArg), ObjType);
4702 MemOps.push_back(Store);
4703 }
4704 // Whether we copied from a register or not, advance the offset
4705 // into the parameter save area by a full doubleword.
4706 ArgOffset += PtrByteSize;
4707 continue;
4708 }
4709
4710 // The value of the object is its address, which is the address of
4711 // its first stack doubleword.
4712 InVals.push_back(FIN);
4713
4714 // Store whatever pieces of the object are in registers to memory.
4715 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4716 if (GPR_idx == Num_GPR_Regs)
4717 break;
4718
4719 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4720 FuncInfo->addLiveInAttr(VReg, Flags);
4721 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4722 SDValue Addr = FIN;
4723 if (j) {
4724 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4725 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4726 }
4727 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4728 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4729 SDValue Store =
4730 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4731 MachinePointerInfo(&*FuncArg, j), ObjType);
4732 MemOps.push_back(Store);
4733 ++GPR_idx;
4734 }
4735 ArgOffset += ArgSize;
4736 continue;
4737 }
4738
4739 switch (ObjectVT.getSimpleVT().SimpleTy) {
4740 default: llvm_unreachable("Unhandled argument type!");
4741 case MVT::i1:
4742 case MVT::i32:
4743 case MVT::i64:
4744 if (Flags.isNest()) {
4745 // The 'nest' parameter, if any, is passed in R11.
4746 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4747 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4748
4749 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4750 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4751
4752 break;
4753 }
4754
4755 // These can be scalar arguments or elements of an integer array type
4756 // passed directly. Clang may use those instead of "byval" aggregate
4757 // types to avoid forcing arguments to memory unnecessarily.
4758 if (GPR_idx != Num_GPR_Regs) {
4759 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4760 FuncInfo->addLiveInAttr(VReg, Flags);
4761 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4762
4763 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4764 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4765 // value to MVT::i64 and then truncate to the correct register size.
4766 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4767 } else {
4768 if (CallConv == CallingConv::Fast)
4769 ComputeArgOffset();
4770
4771 needsLoad = true;
4772 ArgSize = PtrByteSize;
4773 }
4774 if (CallConv != CallingConv::Fast || needsLoad)
4775 ArgOffset += 8;
4776 break;
4777
4778 case MVT::f32:
4779 case MVT::f64:
4780 // These can be scalar arguments or elements of a float array type
4781 // passed directly. The latter are used to implement ELFv2 homogenous
4782 // float aggregates.
4783 if (FPR_idx != Num_FPR_Regs) {
4784 unsigned VReg;
4785
4786 if (ObjectVT == MVT::f32)
4787 VReg = MF.addLiveIn(FPR[FPR_idx],
4788 Subtarget.hasP8Vector()
4789 ? &PPC::VSSRCRegClass
4790 : &PPC::F4RCRegClass);
4791 else
4792 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4793 ? &PPC::VSFRCRegClass
4794 : &PPC::F8RCRegClass);
4795
4796 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4797 ++FPR_idx;
4798 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4799 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4800 // once we support fp <-> gpr moves.
4801
4802 // This can only ever happen in the presence of f32 array types,
4803 // since otherwise we never run out of FPRs before running out
4804 // of GPRs.
4805 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4806 FuncInfo->addLiveInAttr(VReg, Flags);
4807 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4808
4809 if (ObjectVT == MVT::f32) {
4810 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4811 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4812 DAG.getConstant(32, dl, MVT::i32));
4813 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4814 }
4815
4816 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4817 } else {
4818 if (CallConv == CallingConv::Fast)
4819 ComputeArgOffset();
4820
4821 needsLoad = true;
4822 }
4823
4824 // When passing an array of floats, the array occupies consecutive
4825 // space in the argument area; only round up to the next doubleword
4826 // at the end of the array. Otherwise, each float takes 8 bytes.
4827 if (CallConv != CallingConv::Fast || needsLoad) {
4828 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4829 ArgOffset += ArgSize;
4830 if (Flags.isInConsecutiveRegsLast())
4831 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4832 }
4833 break;
4834 case MVT::v4f32:
4835 case MVT::v4i32:
4836 case MVT::v8i16:
4837 case MVT::v16i8:
4838 case MVT::v2f64:
4839 case MVT::v2i64:
4840 case MVT::v1i128:
4841 case MVT::f128:
4842 // These can be scalar arguments or elements of a vector array type
4843 // passed directly. The latter are used to implement ELFv2 homogenous
4844 // vector aggregates.
4845 if (VR_idx != Num_VR_Regs) {
4846 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4847 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4848 ++VR_idx;
4849 } else {
4850 if (CallConv == CallingConv::Fast)
4851 ComputeArgOffset();
4852 needsLoad = true;
4853 }
4854 if (CallConv != CallingConv::Fast || needsLoad)
4855 ArgOffset += 16;
4856 break;
4857 }
4858
4859 // We need to load the argument to a virtual register if we determined
4860 // above that we ran out of physical registers of the appropriate type.
4861 if (needsLoad) {
4862 if (ObjSize < ArgSize && !isLittleEndian)
4863 CurArgOffset += ArgSize - ObjSize;
4864 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4865 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4866 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4867 }
4868
4869 InVals.push_back(ArgVal);
4870 }
4871
4872 // Area that is at least reserved in the caller of this function.
4873 unsigned MinReservedArea;
4874 if (HasParameterArea)
4875 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4876 else
4877 MinReservedArea = LinkageSize;
4878
4879 // Set the size that is at least reserved in caller of this function. Tail
4880 // call optimized functions' reserved stack space needs to be aligned so that
4881 // taking the difference between two stack areas will result in an aligned
4882 // stack.
4883 MinReservedArea =
4884 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4885 FuncInfo->setMinReservedArea(MinReservedArea);
4886
4887 // If the function takes variable number of arguments, make a frame index for
4888 // the start of the first vararg value... for expansion of llvm.va_start.
4889 // On ELFv2ABI spec, it writes:
4890 // C programs that are intended to be *portable* across different compilers
4891 // and architectures must use the header file <stdarg.h> to deal with variable
4892 // argument lists.
4893 if (isVarArg && MFI.hasVAStart()) {
4894 int Depth = ArgOffset;
4895
4896 FuncInfo->setVarArgsFrameIndex(
4897 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4898 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4899
4900 // If this function is vararg, store any remaining integer argument regs
4901 // to their spots on the stack so that they may be loaded by dereferencing
4902 // the result of va_next.
4903 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4904 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4905 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4906 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4907 SDValue Store =
4908 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4909 MemOps.push_back(Store);
4910 // Increment the address by four for the next argument to store
4911 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4912 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4913 }
4914 }
4915
4916 if (!MemOps.empty())
4917 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4918
4919 return Chain;
4920}
4921
4922/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4923/// adjusted to accommodate the arguments for the tailcall.
4924static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4925 unsigned ParamSize) {
4926
4927 if (!isTailCall) return 0;
4928
4930 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4931 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4932 // Remember only if the new adjustment is bigger.
4933 if (SPDiff < FI->getTailCallSPDelta())
4934 FI->setTailCallSPDelta(SPDiff);
4935
4936 return SPDiff;
4937}
4938
4939static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4940
4941static bool callsShareTOCBase(const Function *Caller,
4942 const GlobalValue *CalleeGV,
4943 const TargetMachine &TM) {
4944 // It does not make sense to call callsShareTOCBase() with a caller that
4945 // is PC Relative since PC Relative callers do not have a TOC.
4946#ifndef NDEBUG
4947 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4948 assert(!STICaller->isUsingPCRelativeCalls() &&
4949 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4950#endif
4951
4952 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4953 // don't have enough information to determine if the caller and callee share
4954 // the same TOC base, so we have to pessimistically assume they don't for
4955 // correctness.
4956 if (!CalleeGV)
4957 return false;
4958
4959 // If the callee is preemptable, then the static linker will use a plt-stub
4960 // which saves the toc to the stack, and needs a nop after the call
4961 // instruction to convert to a toc-restore.
4962 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4963 return false;
4964
4965 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4966 // We may need a TOC restore in the situation where the caller requires a
4967 // valid TOC but the callee is PC Relative and does not.
4968 const Function *F = dyn_cast<Function>(CalleeGV);
4969 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4970
4971 // If we have an Alias we can try to get the function from there.
4972 if (Alias) {
4973 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4974 F = dyn_cast<Function>(GlobalObj);
4975 }
4976
4977 // If we still have no valid function pointer we do not have enough
4978 // information to determine if the callee uses PC Relative calls so we must
4979 // assume that it does.
4980 if (!F)
4981 return false;
4982
4983 // If the callee uses PC Relative we cannot guarantee that the callee won't
4984 // clobber the TOC of the caller and so we must assume that the two
4985 // functions do not share a TOC base.
4986 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4987 if (STICallee->isUsingPCRelativeCalls())
4988 return false;
4989
4990 // If the GV is not a strong definition then we need to assume it can be
4991 // replaced by another function at link time. The function that replaces
4992 // it may not share the same TOC as the caller since the callee may be
4993 // replaced by a PC Relative version of the same function.
4994 if (!CalleeGV->isStrongDefinitionForLinker())
4995 return false;
4996
4997 // The medium and large code models are expected to provide a sufficiently
4998 // large TOC to provide all data addressing needs of a module with a
4999 // single TOC.
5000 if (CodeModel::Medium == TM.getCodeModel() ||
5001 CodeModel::Large == TM.getCodeModel())
5002 return true;
5003
5004 // Any explicitly-specified sections and section prefixes must also match.
5005 // Also, if we're using -ffunction-sections, then each function is always in
5006 // a different section (the same is true for COMDAT functions).
5007 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
5008 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
5009 return false;
5010 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
5011 if (F->getSectionPrefix() != Caller->getSectionPrefix())
5012 return false;
5013 }
5014
5015 return true;
5016}
5017
5018static bool
5020 const SmallVectorImpl<ISD::OutputArg> &Outs) {
5021 assert(Subtarget.is64BitELFABI());
5022
5023 const unsigned PtrByteSize = 8;
5024 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5025
5026 static const MCPhysReg GPR[] = {
5027 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5028 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5029 };
5030 static const MCPhysReg VR[] = {
5031 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5032 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5033 };
5034
5035 const unsigned NumGPRs = std::size(GPR);
5036 const unsigned NumFPRs = 13;
5037 const unsigned NumVRs = std::size(VR);
5038 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5039
5040 unsigned NumBytes = LinkageSize;
5041 unsigned AvailableFPRs = NumFPRs;
5042 unsigned AvailableVRs = NumVRs;
5043
5044 for (const ISD::OutputArg& Param : Outs) {
5045 if (Param.Flags.isNest()) continue;
5046
5047 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5048 LinkageSize, ParamAreaSize, NumBytes,
5049 AvailableFPRs, AvailableVRs))
5050 return true;
5051 }
5052 return false;
5053}
5054
5055static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5056 if (CB.arg_size() != CallerFn->arg_size())
5057 return false;
5058
5059 auto CalleeArgIter = CB.arg_begin();
5060 auto CalleeArgEnd = CB.arg_end();
5061 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5062
5063 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5064 const Value* CalleeArg = *CalleeArgIter;
5065 const Value* CallerArg = &(*CallerArgIter);
5066 if (CalleeArg == CallerArg)
5067 continue;
5068
5069 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5070 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5071 // }
5072 // 1st argument of callee is undef and has the same type as caller.
5073 if (CalleeArg->getType() == CallerArg->getType() &&
5074 isa<UndefValue>(CalleeArg))
5075 continue;
5076
5077 return false;
5078 }
5079
5080 return true;
5081}
5082
5083// Returns true if TCO is possible between the callers and callees
5084// calling conventions.
5085static bool
5087 CallingConv::ID CalleeCC) {
5088 // Tail calls are possible with fastcc and ccc.
5089 auto isTailCallableCC = [] (CallingConv::ID CC){
5090 return CC == CallingConv::C || CC == CallingConv::Fast;
5091 };
5092 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5093 return false;
5094
5095 // We can safely tail call both fastcc and ccc callees from a c calling
5096 // convention caller. If the caller is fastcc, we may have less stack space
5097 // than a non-fastcc caller with the same signature so disable tail-calls in
5098 // that case.
5099 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5100}
5101
5102bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5103 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5104 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5106 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5107 bool isCalleeExternalSymbol) const {
5108 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5109
5110 if (DisableSCO && !TailCallOpt) return false;
5111
5112 // Variadic argument functions are not supported.
5113 if (isVarArg) return false;
5114
5115 // Check that the calling conventions are compatible for tco.
5116 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5117 return false;
5118
5119 // Caller contains any byval parameter is not supported.
5120 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5121 return false;
5122
5123 // Callee contains any byval parameter is not supported, too.
5124 // Note: This is a quick work around, because in some cases, e.g.
5125 // caller's stack size > callee's stack size, we are still able to apply
5126 // sibling call optimization. For example, gcc is able to do SCO for caller1
5127 // in the following example, but not for caller2.
5128 // struct test {
5129 // long int a;
5130 // char ary[56];
5131 // } gTest;
5132 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5133 // b->a = v.a;
5134 // return 0;
5135 // }
5136 // void caller1(struct test a, struct test c, struct test *b) {
5137 // callee(gTest, b); }
5138 // void caller2(struct test *b) { callee(gTest, b); }
5139 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5140 return false;
5141
5142 // If callee and caller use different calling conventions, we cannot pass
5143 // parameters on stack since offsets for the parameter area may be different.
5144 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5145 return false;
5146
5147 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5148 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5149 // callee potentially have different TOC bases then we cannot tail call since
5150 // we need to restore the TOC pointer after the call.
5151 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5152 // We cannot guarantee this for indirect calls or calls to external functions.
5153 // When PC-Relative addressing is used, the concept of the TOC is no longer
5154 // applicable so this check is not required.
5155 // Check first for indirect calls.
5156 if (!Subtarget.isUsingPCRelativeCalls() &&
5157 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5158 return false;
5159
5160 // Check if we share the TOC base.
5161 if (!Subtarget.isUsingPCRelativeCalls() &&
5162 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5163 return false;
5164
5165 // TCO allows altering callee ABI, so we don't have to check further.
5166 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5167 return true;
5168
5169 if (DisableSCO) return false;
5170
5171 // If callee use the same argument list that caller is using, then we can
5172 // apply SCO on this case. If it is not, then we need to check if callee needs
5173 // stack for passing arguments.
5174 // PC Relative tail calls may not have a CallBase.
5175 // If there is no CallBase we cannot verify if we have the same argument
5176 // list so assume that we don't have the same argument list.
5177 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5178 needStackSlotPassParameters(Subtarget, Outs))
5179 return false;
5180 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5181 return false;
5182
5183 return true;
5184}
5185
5186/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5187/// for tail call optimization. Targets which want to do tail call
5188/// optimization should implement this function.
5189bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5190 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5191 CallingConv::ID CallerCC, bool isVarArg,
5192 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5193 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5194 return false;
5195
5196 // Variable argument functions are not supported.
5197 if (isVarArg)
5198 return false;
5199
5200 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5201 // Functions containing by val parameters are not supported.
5202 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5203 return false;
5204
5205 // Non-PIC/GOT tail calls are supported.
5206 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5207 return true;
5208
5209 // At the moment we can only do local tail calls (in same module, hidden
5210 // or protected) if we are generating PIC.
5211 if (CalleeGV)
5212 return CalleeGV->hasHiddenVisibility() ||
5213 CalleeGV->hasProtectedVisibility();
5214 }
5215
5216 return false;
5217}
5218
5219/// isCallCompatibleAddress - Return the immediate to use if the specified
5220/// 32-bit value is representable in the immediate field of a BxA instruction.
5223 if (!C) return nullptr;
5224
5225 int Addr = C->getZExtValue();
5226 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5227 SignExtend32<26>(Addr) != Addr)
5228 return nullptr; // Top 6 bits have to be sext of immediate.
5229
5230 return DAG
5232 (int)C->getZExtValue() >> 2, SDLoc(Op),
5234 .getNode();
5235}
5236
5237namespace {
5238
5239struct TailCallArgumentInfo {
5240 SDValue Arg;
5241 SDValue FrameIdxOp;
5242 int FrameIdx = 0;
5243
5244 TailCallArgumentInfo() = default;
5245};
5246
5247} // end anonymous namespace
5248
5249/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5251 SelectionDAG &DAG, SDValue Chain,
5252 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5253 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5254 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5255 SDValue Arg = TailCallArgs[i].Arg;
5256 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5257 int FI = TailCallArgs[i].FrameIdx;
5258 // Store relative to framepointer.
5259 MemOpChains.push_back(DAG.getStore(
5260 Chain, dl, Arg, FIN,
5262 }
5263}
5264
5265/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5266/// the appropriate stack slot for the tail call optimized function call.
5268 SDValue OldRetAddr, SDValue OldFP,
5269 int SPDiff, const SDLoc &dl) {
5270 if (SPDiff) {
5271 // Calculate the new stack slot for the return address.
5273 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5274 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5275 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5276 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5277 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5278 NewRetAddrLoc, true);
5279 SDValue NewRetAddrFrIdx =
5280 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5281 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5282 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5283 }
5284 return Chain;
5285}
5286
5287/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5288/// the position of the argument.
5290 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5291 int SPDiff, unsigned ArgOffset,
5292 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5293 int Offset = ArgOffset + SPDiff;
5294 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5295 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5296 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5297 SDValue FIN = DAG.getFrameIndex(FI, VT);
5298 TailCallArgumentInfo Info;
5299 Info.Arg = Arg;
5300 Info.FrameIdxOp = FIN;
5301 Info.FrameIdx = FI;
5302 TailCallArguments.push_back(Info);
5303}
5304
5305/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5306/// stack slot. Returns the chain as result and the loaded frame pointers in
5307/// LROpOut/FPOpout. Used when tail calling.
5308SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5309 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5310 SDValue &FPOpOut, const SDLoc &dl) const {
5311 if (SPDiff) {
5312 // Load the LR and FP stack slot for later adjusting.
5313 LROpOut = getReturnAddrFrameIndex(DAG);
5314 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5315 MachinePointerInfo());
5316 Chain = SDValue(LROpOut.getNode(), 1);
5317 }
5318 return Chain;
5319}
5320
5321/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5322/// by "Src" to address "Dst" of size "Size". Alignment information is
5323/// specified by the specific parameter attribute. The copy will be passed as
5324/// a byval function parameter.
5325/// Sometimes what we are copying is the end of a larger object, the part that
5326/// does not fit in registers.
5328 SDValue Chain, ISD::ArgFlagsTy Flags,
5329 SelectionDAG &DAG, const SDLoc &dl) {
5330 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5331 return DAG.getMemcpy(
5332 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5333 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5334}
5335
5336/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5337/// tail calls.
5339 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5340 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5341 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5342 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5344 if (!isTailCall) {
5345 if (isVector) {
5346 SDValue StackPtr;
5347 if (isPPC64)
5348 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5349 else
5350 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5351 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5352 DAG.getConstant(ArgOffset, dl, PtrVT));
5353 }
5354 MemOpChains.push_back(
5355 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5356 // Calculate and remember argument location.
5357 } else
5358 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5359 TailCallArguments);
5360}
5361
5362static void
5364 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5365 SDValue FPOp,
5366 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5367 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5368 // might overwrite each other in case of tail call optimization.
5369 SmallVector<SDValue, 8> MemOpChains2;
5370 // Do not flag preceding copytoreg stuff together with the following stuff.
5371 InGlue = SDValue();
5372 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5373 MemOpChains2, dl);
5374 if (!MemOpChains2.empty())
5375 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5376
5377 // Store the return address to the appropriate stack slot.
5378 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5379
5380 // Emit callseq_end just before tailcall node.
5381 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5382 InGlue = Chain.getValue(1);
5383}
5384
5385// Is this global address that of a function that can be called by name? (as
5386// opposed to something that must hold a descriptor for an indirect call).
5387static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5388 if (GV) {
5389 if (GV->isThreadLocal())
5390 return false;
5391
5392 return GV->getValueType()->isFunctionTy();
5393 }
5394
5395 return false;
5396}
5397
5398SDValue PPCTargetLowering::LowerCallResult(
5399 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5400 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5401 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5403 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5404 *DAG.getContext());
5405
5406 CCRetInfo.AnalyzeCallResult(
5407 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5409 : RetCC_PPC);
5410
5411 // Copy all of the result registers out of their specified physreg.
5412 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5413 CCValAssign &VA = RVLocs[i];
5414 assert(VA.isRegLoc() && "Can only return in registers!");
5415
5416 SDValue Val;
5417
5418 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5419 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5420 InGlue);
5421 Chain = Lo.getValue(1);
5422 InGlue = Lo.getValue(2);
5423 VA = RVLocs[++i]; // skip ahead to next loc
5424 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5425 InGlue);
5426 Chain = Hi.getValue(1);
5427 InGlue = Hi.getValue(2);
5428 if (!Subtarget.isLittleEndian())
5429 std::swap (Lo, Hi);
5430 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5431 } else {
5432 Val = DAG.getCopyFromReg(Chain, dl,
5433 VA.getLocReg(), VA.getLocVT(), InGlue);
5434 Chain = Val.getValue(1);
5435 InGlue = Val.getValue(2);
5436 }
5437
5438 switch (VA.getLocInfo()) {
5439 default: llvm_unreachable("Unknown loc info!");
5440 case CCValAssign::Full: break;
5441 case CCValAssign::AExt:
5442 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5443 break;
5444 case CCValAssign::ZExt:
5445 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5446 DAG.getValueType(VA.getValVT()));
5447 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5448 break;
5449 case CCValAssign::SExt:
5450 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5451 DAG.getValueType(VA.getValVT()));
5452 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5453 break;
5454 }
5455
5456 InVals.push_back(Val);
5457 }
5458
5459 return Chain;
5460}
5461
5462static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5463 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5464 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5465 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5466
5467 // PatchPoint calls are not indirect.
5468 if (isPatchPoint)
5469 return false;
5470
5472 return false;
5473
5474 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5475 // becuase the immediate function pointer points to a descriptor instead of
5476 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5477 // pointer immediate points to the global entry point, while the BLA would
5478 // need to jump to the local entry point (see rL211174).
5479 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5480 isBLACompatibleAddress(Callee, DAG))
5481 return false;
5482
5483 return true;
5484}
5485
5486// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5487static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5488 return Subtarget.isAIXABI() ||
5489 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5490}
5491
5493 const Function &Caller, const SDValue &Callee,
5494 const PPCSubtarget &Subtarget,
5495 const TargetMachine &TM,
5496 bool IsStrictFPCall = false) {
5497 if (CFlags.IsTailCall)
5498 return PPCISD::TC_RETURN;
5499
5500 unsigned RetOpc = 0;
5501 // This is a call through a function pointer.
5502 if (CFlags.IsIndirect) {
5503 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5504 // indirect calls. The save of the caller's TOC pointer to the stack will be
5505 // inserted into the DAG as part of call lowering. The restore of the TOC
5506 // pointer is modeled by using a pseudo instruction for the call opcode that
5507 // represents the 2 instruction sequence of an indirect branch and link,
5508 // immediately followed by a load of the TOC pointer from the stack save
5509 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5510 // as it is not saved or used.
5512 : PPCISD::BCTRL;
5513 } else if (Subtarget.isUsingPCRelativeCalls()) {
5514 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5515 RetOpc = PPCISD::CALL_NOTOC;
5516 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5517 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5518 // immediately following the call instruction if the caller and callee may
5519 // have different TOC bases. At link time if the linker determines the calls
5520 // may not share a TOC base, the call is redirected to a trampoline inserted
5521 // by the linker. The trampoline will (among other things) save the callers
5522 // TOC pointer at an ABI designated offset in the linkage area and the
5523 // linker will rewrite the nop to be a load of the TOC pointer from the
5524 // linkage area into gpr2.
5525 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5526 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5527 RetOpc =
5528 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5529 } else
5530 RetOpc = PPCISD::CALL;
5531 if (IsStrictFPCall) {
5532 switch (RetOpc) {
5533 default:
5534 llvm_unreachable("Unknown call opcode");
5537 break;
5538 case PPCISD::BCTRL:
5539 RetOpc = PPCISD::BCTRL_RM;
5540 break;
5541 case PPCISD::CALL_NOTOC:
5542 RetOpc = PPCISD::CALL_NOTOC_RM;
5543 break;
5544 case PPCISD::CALL:
5545 RetOpc = PPCISD::CALL_RM;
5546 break;
5547 case PPCISD::CALL_NOP:
5548 RetOpc = PPCISD::CALL_NOP_RM;
5549 break;
5550 }
5551 }
5552 return RetOpc;
5553}
5554
5555static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5556 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5557 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5558 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5559 return SDValue(Dest, 0);
5560
5561 // Returns true if the callee is local, and false otherwise.
5562 auto isLocalCallee = [&]() {
5564 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5565
5566 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5568 };
5569
5570 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5571 // a static relocation model causes some versions of GNU LD (2.17.50, at
5572 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5573 // built with secure-PLT.
5574 bool UsePlt =
5575 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5577
5578 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5579 const TargetMachine &TM = Subtarget.getTargetMachine();
5580 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5581 auto *S =
5582 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5583
5585 return DAG.getMCSymbol(S, PtrVT);
5586 };
5587
5588 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5589 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5590 if (isFunctionGlobalAddress(GV)) {
5591 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5592
5593 if (Subtarget.isAIXABI()) {
5594 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5595 return getAIXFuncEntryPointSymbolSDNode(GV);
5596 }
5597 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5598 UsePlt ? PPCII::MO_PLT : 0);
5599 }
5600
5602 const char *SymName = S->getSymbol();
5603 if (Subtarget.isAIXABI()) {
5604 // If there exists a user-declared function whose name is the same as the
5605 // ExternalSymbol's, then we pick up the user-declared version.
5607 if (const Function *F =
5608 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5609 return getAIXFuncEntryPointSymbolSDNode(F);
5610
5611 // On AIX, direct function calls reference the symbol for the function's
5612 // entry point, which is named by prepending a "." before the function's
5613 // C-linkage name. A Qualname is returned here because an external
5614 // function entry point is a csect with XTY_ER property.
5615 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5616 auto &Context = DAG.getMachineFunction().getContext();
5617 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5618 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5620 return Sec->getQualNameSymbol();
5621 };
5622
5623 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5624 }
5625 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5626 UsePlt ? PPCII::MO_PLT : 0);
5627 }
5628
5629 // No transformation needed.
5630 assert(Callee.getNode() && "What no callee?");
5631 return Callee;
5632}
5633
5635 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5636 "Expected a CALLSEQ_STARTSDNode.");
5637
5638 // The last operand is the chain, except when the node has glue. If the node
5639 // has glue, then the last operand is the glue, and the chain is the second
5640 // last operand.
5641 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5642 if (LastValue.getValueType() != MVT::Glue)
5643 return LastValue;
5644
5645 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5646}
5647
5648// Creates the node that moves a functions address into the count register
5649// to prepare for an indirect call instruction.
5650static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5651 SDValue &Glue, SDValue &Chain,
5652 const SDLoc &dl) {
5653 SDValue MTCTROps[] = {Chain, Callee, Glue};
5654 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5655 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5656 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5657 // The glue is the second value produced.
5658 Glue = Chain.getValue(1);
5659}
5660
5662 SDValue &Glue, SDValue &Chain,
5663 SDValue CallSeqStart,
5664 const CallBase *CB, const SDLoc &dl,
5665 bool hasNest,
5666 const PPCSubtarget &Subtarget) {
5667 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5668 // entry point, but to the function descriptor (the function entry point
5669 // address is part of the function descriptor though).
5670 // The function descriptor is a three doubleword structure with the
5671 // following fields: function entry point, TOC base address and
5672 // environment pointer.
5673 // Thus for a call through a function pointer, the following actions need
5674 // to be performed:
5675 // 1. Save the TOC of the caller in the TOC save area of its stack
5676 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5677 // 2. Load the address of the function entry point from the function
5678 // descriptor.
5679 // 3. Load the TOC of the callee from the function descriptor into r2.
5680 // 4. Load the environment pointer from the function descriptor into
5681 // r11.
5682 // 5. Branch to the function entry point address.
5683 // 6. On return of the callee, the TOC of the caller needs to be
5684 // restored (this is done in FinishCall()).
5685 //
5686 // The loads are scheduled at the beginning of the call sequence, and the
5687 // register copies are flagged together to ensure that no other
5688 // operations can be scheduled in between. E.g. without flagging the
5689 // copies together, a TOC access in the caller could be scheduled between
5690 // the assignment of the callee TOC and the branch to the callee, which leads
5691 // to incorrect code.
5692
5693 // Start by loading the function address from the descriptor.
5694 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5695 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5699
5700 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5701
5702 // Registers used in building the DAG.
5703 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5704 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5705
5706 // Offsets of descriptor members.
5707 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5708 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5709
5710 const MVT RegVT = Subtarget.getScalarIntVT();
5711 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5712
5713 // One load for the functions entry point address.
5714 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5715 Alignment, MMOFlags);
5716
5717 // One for loading the TOC anchor for the module that contains the called
5718 // function.
5719 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5720 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5721 SDValue TOCPtr =
5722 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5723 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5724
5725 // One for loading the environment pointer.
5726 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5727 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5728 SDValue LoadEnvPtr =
5729 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5730 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5731
5732
5733 // Then copy the newly loaded TOC anchor to the TOC pointer.
5734 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5735 Chain = TOCVal.getValue(0);
5736 Glue = TOCVal.getValue(1);
5737
5738 // If the function call has an explicit 'nest' parameter, it takes the
5739 // place of the environment pointer.
5740 assert((!hasNest || !Subtarget.isAIXABI()) &&
5741 "Nest parameter is not supported on AIX.");
5742 if (!hasNest) {
5743 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5744 Chain = EnvVal.getValue(0);
5745 Glue = EnvVal.getValue(1);
5746 }
5747
5748 // The rest of the indirect call sequence is the same as the non-descriptor
5749 // DAG.
5750 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5751}
5752
5753static void
5755 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5756 SelectionDAG &DAG,
5757 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5758 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5759 const PPCSubtarget &Subtarget) {
5760 const bool IsPPC64 = Subtarget.isPPC64();
5761 // MVT for a general purpose register.
5762 const MVT RegVT = Subtarget.getScalarIntVT();
5763
5764 // First operand is always the chain.
5765 Ops.push_back(Chain);
5766
5767 // If it's a direct call pass the callee as the second operand.
5768 if (!CFlags.IsIndirect)
5769 Ops.push_back(Callee);
5770 else {
5771 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5772
5773 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5774 // on the stack (this would have been done in `LowerCall_64SVR4` or
5775 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5776 // represents both the indirect branch and a load that restores the TOC
5777 // pointer from the linkage area. The operand for the TOC restore is an add
5778 // of the TOC save offset to the stack pointer. This must be the second
5779 // operand: after the chain input but before any other variadic arguments.
5780 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5781 // saved or used.
5782 if (isTOCSaveRestoreRequired(Subtarget)) {
5783 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5784
5785 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5786 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5787 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5788 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5789 Ops.push_back(AddTOC);
5790 }
5791
5792 // Add the register used for the environment pointer.
5793 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5794 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5795 RegVT));
5796
5797
5798 // Add CTR register as callee so a bctr can be emitted later.
5799 if (CFlags.IsTailCall)
5800 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5801 }
5802
5803 // If this is a tail call add stack pointer delta.
5804 if (CFlags.IsTailCall)
5805 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5806
5807 // Add argument registers to the end of the list so that they are known live
5808 // into the call.
5809 for (const auto &[Reg, N] : RegsToPass)
5810 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5811
5812 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5813 // no way to mark dependencies as implicit here.
5814 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5815 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5816 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5817 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5818
5819 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5820 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5821 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5822
5823 // Add a register mask operand representing the call-preserved registers.
5824 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5825 const uint32_t *Mask =
5826 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5827 assert(Mask && "Missing call preserved mask for calling convention");
5828 Ops.push_back(DAG.getRegisterMask(Mask));
5829
5830 // If the glue is valid, it is the last operand.
5831 if (Glue.getNode())
5832 Ops.push_back(Glue);
5833}
5834
5835SDValue PPCTargetLowering::FinishCall(
5836 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5837 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5838 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5839 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5840 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5841
5842 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5843 Subtarget.isAIXABI())
5844 setUsesTOCBasePtr(DAG);
5845
5846 unsigned CallOpc =
5847 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5848 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5849
5850 if (!CFlags.IsIndirect)
5851 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5852 else if (Subtarget.usesFunctionDescriptors())
5853 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5854 dl, CFlags.HasNest, Subtarget);
5855 else
5856 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5857
5858 // Build the operand list for the call instruction.
5860 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5861 SPDiff, Subtarget);
5862
5863 // Emit tail call.
5864 if (CFlags.IsTailCall) {
5865 // Indirect tail call when using PC Relative calls do not have the same
5866 // constraints.
5867 assert(((Callee.getOpcode() == ISD::Register &&
5868 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5869 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5870 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5871 isa<ConstantSDNode>(Callee) ||
5872 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5873 "Expecting a global address, external symbol, absolute value, "
5874 "register or an indirect tail call when PC Relative calls are "
5875 "used.");
5876 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5877 assert(CallOpc == PPCISD::TC_RETURN &&
5878 "Unexpected call opcode for a tail call.");
5880 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5881 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5882 return Ret;
5883 }
5884
5885 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5886 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5887 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5888 Glue = Chain.getValue(1);
5889
5890 // When performing tail call optimization the callee pops its arguments off
5891 // the stack. Account for this here so these bytes can be pushed back on in
5892 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5893 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5895 ? NumBytes
5896 : 0;
5897
5898 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5899 Glue = Chain.getValue(1);
5900
5901 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5902 DAG, InVals);
5903}
5904
5906 CallingConv::ID CalleeCC = CB->getCallingConv();
5907 const Function *CallerFunc = CB->getCaller();
5908 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5909 const Function *CalleeFunc = CB->getCalledFunction();
5910 if (!CalleeFunc)
5911 return false;
5912 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5913
5916
5917 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5918 CalleeFunc->getAttributes(), Outs, *this,
5919 CalleeFunc->getDataLayout());
5920
5921 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5922 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5923 false /*isCalleeExternalSymbol*/);
5924}
5925
5926bool PPCTargetLowering::isEligibleForTCO(
5927 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5928 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5930 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5931 bool isCalleeExternalSymbol) const {
5932 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5933 return false;
5934
5935 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5936 return IsEligibleForTailCallOptimization_64SVR4(
5937 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5938 isCalleeExternalSymbol);
5939 else
5940 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5941 isVarArg, Ins);
5942}
5943
5944SDValue
5945PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5946 SmallVectorImpl<SDValue> &InVals) const {
5947 SelectionDAG &DAG = CLI.DAG;
5948 SDLoc &dl = CLI.DL;
5950 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5952 SDValue Chain = CLI.Chain;
5953 SDValue Callee = CLI.Callee;
5954 bool &isTailCall = CLI.IsTailCall;
5955 CallingConv::ID CallConv = CLI.CallConv;
5956 bool isVarArg = CLI.IsVarArg;
5957 bool isPatchPoint = CLI.IsPatchPoint;
5958 const CallBase *CB = CLI.CB;
5959
5960 if (isTailCall) {
5962 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5963 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5964 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5965 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5966
5967 isTailCall =
5968 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5969 &(MF.getFunction()), IsCalleeExternalSymbol);
5970 if (isTailCall) {
5971 ++NumTailCalls;
5972 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5973 ++NumSiblingCalls;
5974
5975 // PC Relative calls no longer guarantee that the callee is a Global
5976 // Address Node. The callee could be an indirect tail call in which
5977 // case the SDValue for the callee could be a load (to load the address
5978 // of a function pointer) or it may be a register copy (to move the
5979 // address of the callee from a function parameter into a virtual
5980 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5981 assert((Subtarget.isUsingPCRelativeCalls() ||
5982 isa<GlobalAddressSDNode>(Callee)) &&
5983 "Callee should be an llvm::Function object.");
5984
5985 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5986 << "\nTCO callee: ");
5987 LLVM_DEBUG(Callee.dump());
5988 }
5989 }
5990
5991 if (!isTailCall && CB && CB->isMustTailCall())
5992 report_fatal_error("failed to perform tail call elimination on a call "
5993 "site marked musttail");
5994
5995 // When long calls (i.e. indirect calls) are always used, calls are always
5996 // made via function pointer. If we have a function name, first translate it
5997 // into a pointer.
5998 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5999 !isTailCall)
6000 Callee = LowerGlobalAddress(Callee, DAG);
6001
6002 CallFlags CFlags(
6003 CallConv, isTailCall, isVarArg, isPatchPoint,
6004 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
6005 // hasNest
6006 Subtarget.is64BitELFABI() &&
6007 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
6008 CLI.NoMerge);
6009
6010 if (Subtarget.isAIXABI())
6011 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6012 InVals, CB);
6013
6014 assert(Subtarget.isSVR4ABI());
6015 if (Subtarget.isPPC64())
6016 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6017 InVals, CB);
6018 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6019 InVals, CB);
6020}
6021
6022SDValue PPCTargetLowering::LowerCall_32SVR4(
6023 SDValue Chain, SDValue Callee, CallFlags CFlags,
6025 const SmallVectorImpl<SDValue> &OutVals,
6026 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6028 const CallBase *CB) const {
6029 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
6030 // of the 32-bit SVR4 ABI stack frame layout.
6031
6032 const CallingConv::ID CallConv = CFlags.CallConv;
6033 const bool IsVarArg = CFlags.IsVarArg;
6034 const bool IsTailCall = CFlags.IsTailCall;
6035
6036 assert((CallConv == CallingConv::C ||
6037 CallConv == CallingConv::Cold ||
6038 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6039
6040 const Align PtrAlign(4);
6041
6042 MachineFunction &MF = DAG.getMachineFunction();
6043
6044 // Mark this function as potentially containing a function that contains a
6045 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6046 // and restoring the callers stack pointer in this functions epilog. This is
6047 // done because by tail calling the called function might overwrite the value
6048 // in this function's (MF) stack pointer stack slot 0(SP).
6049 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6050 CallConv == CallingConv::Fast)
6051 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6052
6053 // Count how many bytes are to be pushed on the stack, including the linkage
6054 // area, parameter list area and the part of the local variable space which
6055 // contains copies of aggregates which are passed by value.
6056
6057 // Assign locations to all of the outgoing arguments.
6059 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6060
6061 // Reserve space for the linkage area on the stack.
6062 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6063 PtrAlign);
6064
6065 if (IsVarArg) {
6066 // Handle fixed and variable vector arguments differently.
6067 // Fixed vector arguments go into registers as long as registers are
6068 // available. Variable vector arguments always go into memory.
6069 unsigned NumArgs = Outs.size();
6070
6071 for (unsigned i = 0; i != NumArgs; ++i) {
6072 MVT ArgVT = Outs[i].VT;
6073 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6074 bool Result;
6075
6076 if (!ArgFlags.isVarArg()) {
6077 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6078 Outs[i].OrigTy, CCInfo);
6079 } else {
6081 ArgFlags, Outs[i].OrigTy, CCInfo);
6082 }
6083
6084 if (Result) {
6085#ifndef NDEBUG
6086 errs() << "Call operand #" << i << " has unhandled type "
6087 << ArgVT << "\n";
6088#endif
6089 llvm_unreachable(nullptr);
6090 }
6091 }
6092 } else {
6093 // All arguments are treated the same.
6094 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6095 }
6096
6097 // Assign locations to all of the outgoing aggregate by value arguments.
6098 SmallVector<CCValAssign, 16> ByValArgLocs;
6099 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6100
6101 // Reserve stack space for the allocations in CCInfo.
6102 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6103
6104 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6105
6106 // Size of the linkage area, parameter list area and the part of the local
6107 // space variable where copies of aggregates which are passed by value are
6108 // stored.
6109 unsigned NumBytes = CCByValInfo.getStackSize();
6110
6111 // Calculate by how many bytes the stack has to be adjusted in case of tail
6112 // call optimization.
6113 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6114
6115 // Adjust the stack pointer for the new arguments...
6116 // These operations are automatically eliminated by the prolog/epilog pass
6117 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6118 SDValue CallSeqStart = Chain;
6119
6120 // Load the return address and frame pointer so it can be moved somewhere else
6121 // later.
6122 SDValue LROp, FPOp;
6123 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6124
6125 // Set up a copy of the stack pointer for use loading and storing any
6126 // arguments that may not fit in the registers available for argument
6127 // passing.
6128 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6129
6131 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6132 SmallVector<SDValue, 8> MemOpChains;
6133
6134 bool seenFloatArg = false;
6135 // Walk the register/memloc assignments, inserting copies/loads.
6136 // i - Tracks the index into the list of registers allocated for the call
6137 // RealArgIdx - Tracks the index into the list of actual function arguments
6138 // j - Tracks the index into the list of byval arguments
6139 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6140 i != e;
6141 ++i, ++RealArgIdx) {
6142 CCValAssign &VA = ArgLocs[i];
6143 SDValue Arg = OutVals[RealArgIdx];
6144 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6145
6146 if (Flags.isByVal()) {
6147 // Argument is an aggregate which is passed by value, thus we need to
6148 // create a copy of it in the local variable space of the current stack
6149 // frame (which is the stack frame of the caller) and pass the address of
6150 // this copy to the callee.
6151 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6152 CCValAssign &ByValVA = ByValArgLocs[j++];
6153 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6154
6155 // Memory reserved in the local variable space of the callers stack frame.
6156 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6157
6158 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6159 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6160 StackPtr, PtrOff);
6161
6162 // Create a copy of the argument in the local area of the current
6163 // stack frame.
6164 SDValue MemcpyCall =
6165 CreateCopyOfByValArgument(Arg, PtrOff,
6166 CallSeqStart.getNode()->getOperand(0),
6167 Flags, DAG, dl);
6168
6169 // This must go outside the CALLSEQ_START..END.
6170 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6171 SDLoc(MemcpyCall));
6172 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6173 NewCallSeqStart.getNode());
6174 Chain = CallSeqStart = NewCallSeqStart;
6175
6176 // Pass the address of the aggregate copy on the stack either in a
6177 // physical register or in the parameter list area of the current stack
6178 // frame to the callee.
6179 Arg = PtrOff;
6180 }
6181
6182 // When useCRBits() is true, there can be i1 arguments.
6183 // It is because getRegisterType(MVT::i1) => MVT::i1,
6184 // and for other integer types getRegisterType() => MVT::i32.
6185 // Extend i1 and ensure callee will get i32.
6186 if (Arg.getValueType() == MVT::i1)
6187 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6188 dl, MVT::i32, Arg);
6189
6190 if (VA.isRegLoc()) {
6191 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6192 // Put argument in a physical register.
6193 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6194 bool IsLE = Subtarget.isLittleEndian();
6195 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6196 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6197 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6198 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6199 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6200 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6201 SVal.getValue(0)));
6202 } else
6203 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6204 } else {
6205 // Put argument in the parameter list area of the current stack frame.
6206 assert(VA.isMemLoc());
6207 unsigned LocMemOffset = VA.getLocMemOffset();
6208
6209 if (!IsTailCall) {
6210 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6211 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6212 StackPtr, PtrOff);
6213
6214 MemOpChains.push_back(
6215 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6216 } else {
6217 // Calculate and remember argument location.
6218 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6219 TailCallArguments);
6220 }
6221 }
6222 }
6223
6224 if (!MemOpChains.empty())
6225 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6226
6227 // Build a sequence of copy-to-reg nodes chained together with token chain
6228 // and flag operands which copy the outgoing args into the appropriate regs.
6229 SDValue InGlue;
6230 for (const auto &[Reg, N] : RegsToPass) {
6231 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6232 InGlue = Chain.getValue(1);
6233 }
6234
6235 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6236 // registers.
6237 if (IsVarArg) {
6238 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6239 SDValue Ops[] = { Chain, InGlue };
6240
6241 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6242 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6243
6244 InGlue = Chain.getValue(1);
6245 }
6246
6247 if (IsTailCall)
6248 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6249 TailCallArguments);
6250
6251 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6252 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6253}
6254
6255// Copy an argument into memory, being careful to do this outside the
6256// call sequence for the call to which the argument belongs.
6257SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6258 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6259 SelectionDAG &DAG, const SDLoc &dl) const {
6260 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6261 CallSeqStart.getNode()->getOperand(0),
6262 Flags, DAG, dl);
6263 // The MEMCPY must go outside the CALLSEQ_START..END.
6264 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6265 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6266 SDLoc(MemcpyCall));
6267 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6268 NewCallSeqStart.getNode());
6269 return NewCallSeqStart;
6270}
6271
6272SDValue PPCTargetLowering::LowerCall_64SVR4(
6273 SDValue Chain, SDValue Callee, CallFlags CFlags,
6275 const SmallVectorImpl<SDValue> &OutVals,
6276 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6278 const CallBase *CB) const {
6279 bool isELFv2ABI = Subtarget.isELFv2ABI();
6280 bool isLittleEndian = Subtarget.isLittleEndian();
6281 unsigned NumOps = Outs.size();
6282 bool IsSibCall = false;
6283 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6284
6285 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6286 unsigned PtrByteSize = 8;
6287
6288 MachineFunction &MF = DAG.getMachineFunction();
6289
6290 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6291 IsSibCall = true;
6292
6293 // Mark this function as potentially containing a function that contains a
6294 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6295 // and restoring the callers stack pointer in this functions epilog. This is
6296 // done because by tail calling the called function might overwrite the value
6297 // in this function's (MF) stack pointer stack slot 0(SP).
6298 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6299 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6300
6301 assert(!(IsFastCall && CFlags.IsVarArg) &&
6302 "fastcc not supported on varargs functions");
6303
6304 // Count how many bytes are to be pushed on the stack, including the linkage
6305 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6306 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6307 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6308 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6309 unsigned NumBytes = LinkageSize;
6310 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6311
6312 static const MCPhysReg GPR[] = {
6313 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6314 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6315 };
6316 static const MCPhysReg VR[] = {
6317 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6318 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6319 };
6320
6321 const unsigned NumGPRs = std::size(GPR);
6322 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6323 const unsigned NumVRs = std::size(VR);
6324
6325 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6326 // can be passed to the callee in registers.
6327 // For the fast calling convention, there is another check below.
6328 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6329 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6330 if (!HasParameterArea) {
6331 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6332 unsigned AvailableFPRs = NumFPRs;
6333 unsigned AvailableVRs = NumVRs;
6334 unsigned NumBytesTmp = NumBytes;
6335 for (unsigned i = 0; i != NumOps; ++i) {
6336 if (Outs[i].Flags.isNest()) continue;
6337 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6338 PtrByteSize, LinkageSize, ParamAreaSize,
6339 NumBytesTmp, AvailableFPRs, AvailableVRs))
6340 HasParameterArea = true;
6341 }
6342 }
6343
6344 // When using the fast calling convention, we don't provide backing for
6345 // arguments that will be in registers.
6346 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6347
6348 // Avoid allocating parameter area for fastcc functions if all the arguments
6349 // can be passed in the registers.
6350 if (IsFastCall)
6351 HasParameterArea = false;
6352
6353 // Add up all the space actually used.
6354 for (unsigned i = 0; i != NumOps; ++i) {
6355 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6356 EVT ArgVT = Outs[i].VT;
6357 EVT OrigVT = Outs[i].ArgVT;
6358
6359 if (Flags.isNest())
6360 continue;
6361
6362 if (IsFastCall) {
6363 if (Flags.isByVal()) {
6364 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6365 if (NumGPRsUsed > NumGPRs)
6366 HasParameterArea = true;
6367 } else {
6368 switch (ArgVT.getSimpleVT().SimpleTy) {
6369 default: llvm_unreachable("Unexpected ValueType for argument!");
6370 case MVT::i1:
6371 case MVT::i32:
6372 case MVT::i64:
6373 if (++NumGPRsUsed <= NumGPRs)
6374 continue;
6375 break;
6376 case MVT::v4i32:
6377 case MVT::v8i16:
6378 case MVT::v16i8:
6379 case MVT::v2f64:
6380 case MVT::v2i64:
6381 case MVT::v1i128:
6382 case MVT::f128:
6383 if (++NumVRsUsed <= NumVRs)
6384 continue;
6385 break;
6386 case MVT::v4f32:
6387 if (++NumVRsUsed <= NumVRs)
6388 continue;
6389 break;
6390 case MVT::f32:
6391 case MVT::f64:
6392 if (++NumFPRsUsed <= NumFPRs)
6393 continue;
6394 break;
6395 }
6396 HasParameterArea = true;
6397 }
6398 }
6399
6400 /* Respect alignment of argument on the stack. */
6401 auto Alignement =
6402 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6403 NumBytes = alignTo(NumBytes, Alignement);
6404
6405 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6406 if (Flags.isInConsecutiveRegsLast())
6407 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6408 }
6409
6410 unsigned NumBytesActuallyUsed = NumBytes;
6411
6412 // In the old ELFv1 ABI,
6413 // the prolog code of the callee may store up to 8 GPR argument registers to
6414 // the stack, allowing va_start to index over them in memory if its varargs.
6415 // Because we cannot tell if this is needed on the caller side, we have to
6416 // conservatively assume that it is needed. As such, make sure we have at
6417 // least enough stack space for the caller to store the 8 GPRs.
6418 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6419 // really requires memory operands, e.g. a vararg function.
6420 if (HasParameterArea)
6421 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6422 else
6423 NumBytes = LinkageSize;
6424
6425 // Tail call needs the stack to be aligned.
6426 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6427 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6428
6429 int SPDiff = 0;
6430
6431 // Calculate by how many bytes the stack has to be adjusted in case of tail
6432 // call optimization.
6433 if (!IsSibCall)
6434 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6435
6436 // To protect arguments on the stack from being clobbered in a tail call,
6437 // force all the loads to happen before doing any other lowering.
6438 if (CFlags.IsTailCall)
6439 Chain = DAG.getStackArgumentTokenFactor(Chain);
6440
6441 // Adjust the stack pointer for the new arguments...
6442 // These operations are automatically eliminated by the prolog/epilog pass
6443 if (!IsSibCall)
6444 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6445 SDValue CallSeqStart = Chain;
6446
6447 // Load the return address and frame pointer so it can be move somewhere else
6448 // later.
6449 SDValue LROp, FPOp;
6450 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6451
6452 // Set up a copy of the stack pointer for use loading and storing any
6453 // arguments that may not fit in the registers available for argument
6454 // passing.
6455 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6456
6457 // Figure out which arguments are going to go in registers, and which in
6458 // memory. Also, if this is a vararg function, floating point operations
6459 // must be stored to our stack, and loaded into integer regs as well, if
6460 // any integer regs are available for argument passing.
6461 unsigned ArgOffset = LinkageSize;
6462
6464 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6465
6466 SmallVector<SDValue, 8> MemOpChains;
6467 for (unsigned i = 0; i != NumOps; ++i) {
6468 SDValue Arg = OutVals[i];
6469 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6470 EVT ArgVT = Outs[i].VT;
6471 EVT OrigVT = Outs[i].ArgVT;
6472
6473 // PtrOff will be used to store the current argument to the stack if a
6474 // register cannot be found for it.
6475 SDValue PtrOff;
6476
6477 // We re-align the argument offset for each argument, except when using the
6478 // fast calling convention, when we need to make sure we do that only when
6479 // we'll actually use a stack slot.
6480 auto ComputePtrOff = [&]() {
6481 /* Respect alignment of argument on the stack. */
6482 auto Alignment =
6483 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6484 ArgOffset = alignTo(ArgOffset, Alignment);
6485
6486 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6487
6488 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6489 };
6490
6491 if (!IsFastCall) {
6492 ComputePtrOff();
6493
6494 /* Compute GPR index associated with argument offset. */
6495 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6496 GPR_idx = std::min(GPR_idx, NumGPRs);
6497 }
6498
6499 // Promote integers to 64-bit values.
6500 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6501 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6502 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6503 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6504 }
6505
6506 // FIXME memcpy is used way more than necessary. Correctness first.
6507 // Note: "by value" is code for passing a structure by value, not
6508 // basic types.
6509 if (Flags.isByVal()) {
6510 // Note: Size includes alignment padding, so
6511 // struct x { short a; char b; }
6512 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6513 // These are the proper values we need for right-justifying the
6514 // aggregate in a parameter register.
6515 unsigned Size = Flags.getByValSize();
6516
6517 // An empty aggregate parameter takes up no storage and no
6518 // registers.
6519 if (Size == 0)
6520 continue;
6521
6522 if (IsFastCall)
6523 ComputePtrOff();
6524
6525 // All aggregates smaller than 8 bytes must be passed right-justified.
6526 if (Size==1 || Size==2 || Size==4) {
6527 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6528 if (GPR_idx != NumGPRs) {
6529 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6530 MachinePointerInfo(), VT);
6531 MemOpChains.push_back(Load.getValue(1));
6532 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6533
6534 ArgOffset += PtrByteSize;
6535 continue;
6536 }
6537 }
6538
6539 if (GPR_idx == NumGPRs && Size < 8) {
6540 SDValue AddPtr = PtrOff;
6541 if (!isLittleEndian) {
6542 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6543 PtrOff.getValueType());
6544 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6545 }
6546 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6547 CallSeqStart,
6548 Flags, DAG, dl);
6549 ArgOffset += PtrByteSize;
6550 continue;
6551 }
6552 // Copy the object to parameter save area if it can not be entirely passed
6553 // by registers.
6554 // FIXME: we only need to copy the parts which need to be passed in
6555 // parameter save area. For the parts passed by registers, we don't need
6556 // to copy them to the stack although we need to allocate space for them
6557 // in parameter save area.
6558 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6559 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6560 CallSeqStart,
6561 Flags, DAG, dl);
6562
6563 // When a register is available, pass a small aggregate right-justified.
6564 if (Size < 8 && GPR_idx != NumGPRs) {
6565 // The easiest way to get this right-justified in a register
6566 // is to copy the structure into the rightmost portion of a
6567 // local variable slot, then load the whole slot into the
6568 // register.
6569 // FIXME: The memcpy seems to produce pretty awful code for
6570 // small aggregates, particularly for packed ones.
6571 // FIXME: It would be preferable to use the slot in the
6572 // parameter save area instead of a new local variable.
6573 SDValue AddPtr = PtrOff;
6574 if (!isLittleEndian) {
6575 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6576 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6577 }
6578 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6579 CallSeqStart,
6580 Flags, DAG, dl);
6581
6582 // Load the slot into the register.
6583 SDValue Load =
6584 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6585 MemOpChains.push_back(Load.getValue(1));
6586 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6587
6588 // Done with this argument.
6589 ArgOffset += PtrByteSize;
6590 continue;
6591 }
6592
6593 // For aggregates larger than PtrByteSize, copy the pieces of the
6594 // object that fit into registers from the parameter save area.
6595 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6596 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6597 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6598 if (GPR_idx != NumGPRs) {
6599 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6600 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6601 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6602 MachinePointerInfo(), ObjType);
6603
6604 MemOpChains.push_back(Load.getValue(1));
6605 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6606 ArgOffset += PtrByteSize;
6607 } else {
6608 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6609 break;
6610 }
6611 }
6612 continue;
6613 }
6614
6615 switch (Arg.getSimpleValueType().SimpleTy) {
6616 default: llvm_unreachable("Unexpected ValueType for argument!");
6617 case MVT::i1:
6618 case MVT::i32:
6619 case MVT::i64:
6620 if (Flags.isNest()) {
6621 // The 'nest' parameter, if any, is passed in R11.
6622 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6623 break;
6624 }
6625
6626 // These can be scalar arguments or elements of an integer array type
6627 // passed directly. Clang may use those instead of "byval" aggregate
6628 // types to avoid forcing arguments to memory unnecessarily.
6629 if (GPR_idx != NumGPRs) {
6630 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6631 } else {
6632 if (IsFastCall)
6633 ComputePtrOff();
6634
6635 assert(HasParameterArea &&
6636 "Parameter area must exist to pass an argument in memory.");
6637 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6638 true, CFlags.IsTailCall, false, MemOpChains,
6639 TailCallArguments, dl);
6640 if (IsFastCall)
6641 ArgOffset += PtrByteSize;
6642 }
6643 if (!IsFastCall)
6644 ArgOffset += PtrByteSize;
6645 break;
6646 case MVT::f32:
6647 case MVT::f64: {
6648 // These can be scalar arguments or elements of a float array type
6649 // passed directly. The latter are used to implement ELFv2 homogenous
6650 // float aggregates.
6651
6652 // Named arguments go into FPRs first, and once they overflow, the
6653 // remaining arguments go into GPRs and then the parameter save area.
6654 // Unnamed arguments for vararg functions always go to GPRs and
6655 // then the parameter save area. For now, put all arguments to vararg
6656 // routines always in both locations (FPR *and* GPR or stack slot).
6657 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6658 bool NeededLoad = false;
6659
6660 // First load the argument into the next available FPR.
6661 if (FPR_idx != NumFPRs)
6662 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6663
6664 // Next, load the argument into GPR or stack slot if needed.
6665 if (!NeedGPROrStack)
6666 ;
6667 else if (GPR_idx != NumGPRs && !IsFastCall) {
6668 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6669 // once we support fp <-> gpr moves.
6670
6671 // In the non-vararg case, this can only ever happen in the
6672 // presence of f32 array types, since otherwise we never run
6673 // out of FPRs before running out of GPRs.
6674 SDValue ArgVal;
6675
6676 // Double values are always passed in a single GPR.
6677 if (Arg.getValueType() != MVT::f32) {
6678 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6679
6680 // Non-array float values are extended and passed in a GPR.
6681 } else if (!Flags.isInConsecutiveRegs()) {
6682 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6683 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6684
6685 // If we have an array of floats, we collect every odd element
6686 // together with its predecessor into one GPR.
6687 } else if (ArgOffset % PtrByteSize != 0) {
6688 SDValue Lo, Hi;
6689 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6690 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6691 if (!isLittleEndian)
6692 std::swap(Lo, Hi);
6693 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6694
6695 // The final element, if even, goes into the first half of a GPR.
6696 } else if (Flags.isInConsecutiveRegsLast()) {
6697 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6698 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6699 if (!isLittleEndian)
6700 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6701 DAG.getConstant(32, dl, MVT::i32));
6702
6703 // Non-final even elements are skipped; they will be handled
6704 // together the with subsequent argument on the next go-around.
6705 } else
6706 ArgVal = SDValue();
6707
6708 if (ArgVal.getNode())
6709 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6710 } else {
6711 if (IsFastCall)
6712 ComputePtrOff();
6713
6714 // Single-precision floating-point values are mapped to the
6715 // second (rightmost) word of the stack doubleword.
6716 if (Arg.getValueType() == MVT::f32 &&
6717 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6718 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6719 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6720 }
6721
6722 assert(HasParameterArea &&
6723 "Parameter area must exist to pass an argument in memory.");
6724 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6725 true, CFlags.IsTailCall, false, MemOpChains,
6726 TailCallArguments, dl);
6727
6728 NeededLoad = true;
6729 }
6730 // When passing an array of floats, the array occupies consecutive
6731 // space in the argument area; only round up to the next doubleword
6732 // at the end of the array. Otherwise, each float takes 8 bytes.
6733 if (!IsFastCall || NeededLoad) {
6734 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6735 Flags.isInConsecutiveRegs()) ? 4 : 8;
6736 if (Flags.isInConsecutiveRegsLast())
6737 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6738 }
6739 break;
6740 }
6741 case MVT::v4f32:
6742 case MVT::v4i32:
6743 case MVT::v8i16:
6744 case MVT::v16i8:
6745 case MVT::v2f64:
6746 case MVT::v2i64:
6747 case MVT::v1i128:
6748 case MVT::f128:
6749 // These can be scalar arguments or elements of a vector array type
6750 // passed directly. The latter are used to implement ELFv2 homogenous
6751 // vector aggregates.
6752
6753 // For a varargs call, named arguments go into VRs or on the stack as
6754 // usual; unnamed arguments always go to the stack or the corresponding
6755 // GPRs when within range. For now, we always put the value in both
6756 // locations (or even all three).
6757 if (CFlags.IsVarArg) {
6758 assert(HasParameterArea &&
6759 "Parameter area must exist if we have a varargs call.");
6760 // We could elide this store in the case where the object fits
6761 // entirely in R registers. Maybe later.
6762 SDValue Store =
6763 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6764 MemOpChains.push_back(Store);
6765 if (VR_idx != NumVRs) {
6766 SDValue Load =
6767 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6768 MemOpChains.push_back(Load.getValue(1));
6769 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6770 }
6771 ArgOffset += 16;
6772 for (unsigned i=0; i<16; i+=PtrByteSize) {
6773 if (GPR_idx == NumGPRs)
6774 break;
6775 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6776 DAG.getConstant(i, dl, PtrVT));
6777 SDValue Load =
6778 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6779 MemOpChains.push_back(Load.getValue(1));
6780 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6781 }
6782 break;
6783 }
6784
6785 // Non-varargs Altivec params go into VRs or on the stack.
6786 if (VR_idx != NumVRs) {
6787 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6788 } else {
6789 if (IsFastCall)
6790 ComputePtrOff();
6791
6792 assert(HasParameterArea &&
6793 "Parameter area must exist to pass an argument in memory.");
6794 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6795 true, CFlags.IsTailCall, true, MemOpChains,
6796 TailCallArguments, dl);
6797 if (IsFastCall)
6798 ArgOffset += 16;
6799 }
6800
6801 if (!IsFastCall)
6802 ArgOffset += 16;
6803 break;
6804 }
6805 }
6806
6807 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6808 "mismatch in size of parameter area");
6809 (void)NumBytesActuallyUsed;
6810
6811 if (!MemOpChains.empty())
6812 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6813
6814 // Check if this is an indirect call (MTCTR/BCTRL).
6815 // See prepareDescriptorIndirectCall and buildCallOperands for more
6816 // information about calls through function pointers in the 64-bit SVR4 ABI.
6817 if (CFlags.IsIndirect) {
6818 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6819 // caller in the TOC save area.
6820 if (isTOCSaveRestoreRequired(Subtarget)) {
6821 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6822 // Load r2 into a virtual register and store it to the TOC save area.
6823 setUsesTOCBasePtr(DAG);
6824 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6825 // TOC save area offset.
6826 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6827 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6828 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6829 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6831 DAG.getMachineFunction(), TOCSaveOffset));
6832 }
6833 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6834 // This does not mean the MTCTR instruction must use R12; it's easier
6835 // to model this as an extra parameter, so do that.
6836 if (isELFv2ABI && !CFlags.IsPatchPoint)
6837 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6838 }
6839
6840 // Build a sequence of copy-to-reg nodes chained together with token chain
6841 // and flag operands which copy the outgoing args into the appropriate regs.
6842 SDValue InGlue;
6843 for (const auto &[Reg, N] : RegsToPass) {
6844 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6845 InGlue = Chain.getValue(1);
6846 }
6847
6848 if (CFlags.IsTailCall && !IsSibCall)
6849 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6850 TailCallArguments);
6851
6852 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6853 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6854}
6855
6856// Returns true when the shadow of a general purpose argument register
6857// in the parameter save area is aligned to at least 'RequiredAlign'.
6858static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6859 assert(RequiredAlign.value() <= 16 &&
6860 "Required alignment greater than stack alignment.");
6861 switch (Reg) {
6862 default:
6863 report_fatal_error("called on invalid register.");
6864 case PPC::R5:
6865 case PPC::R9:
6866 case PPC::X3:
6867 case PPC::X5:
6868 case PPC::X7:
6869 case PPC::X9:
6870 // These registers are 16 byte aligned which is the most strict aligment
6871 // we can support.
6872 return true;
6873 case PPC::R3:
6874 case PPC::R7:
6875 case PPC::X4:
6876 case PPC::X6:
6877 case PPC::X8:
6878 case PPC::X10:
6879 // The shadow of these registers in the PSA is 8 byte aligned.
6880 return RequiredAlign <= 8;
6881 case PPC::R4:
6882 case PPC::R6:
6883 case PPC::R8:
6884 case PPC::R10:
6885 return RequiredAlign <= 4;
6886 }
6887}
6888
6889static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6890 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6891 Type *OrigTy, CCState &State) {
6892 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6893 State.getMachineFunction().getSubtarget());
6894 const bool IsPPC64 = Subtarget.isPPC64();
6895 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6896 const Align PtrAlign(PtrSize);
6897 const Align StackAlign(16);
6898 const MVT RegVT = Subtarget.getScalarIntVT();
6899
6900 if (ValVT == MVT::f128)
6901 report_fatal_error("f128 is unimplemented on AIX.");
6902
6903 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6904 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6905 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6906 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6907 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6908 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6909
6910 static const MCPhysReg VR[] = {// Vector registers.
6911 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6912 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6913 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6914
6915 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6916
6917 if (ArgFlags.isNest()) {
6918 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6919 if (!EnvReg)
6920 report_fatal_error("More then one nest argument.");
6921 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6922 return false;
6923 }
6924
6925 if (ArgFlags.isByVal()) {
6926 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6927 if (ByValAlign > StackAlign)
6928 report_fatal_error("Pass-by-value arguments with alignment greater than "
6929 "16 are not supported.");
6930
6931 const unsigned ByValSize = ArgFlags.getByValSize();
6932 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6933
6934 // An empty aggregate parameter takes up no storage and no registers,
6935 // but needs a MemLoc for a stack slot for the formal arguments side.
6936 if (ByValSize == 0) {
6938 State.getStackSize(), RegVT, LocInfo));
6939 return false;
6940 }
6941
6942 // Shadow allocate any registers that are not properly aligned.
6943 unsigned NextReg = State.getFirstUnallocated(GPRs);
6944 while (NextReg != GPRs.size() &&
6945 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6946 // Shadow allocate next registers since its aligment is not strict enough.
6947 MCRegister Reg = State.AllocateReg(GPRs);
6948 // Allocate the stack space shadowed by said register.
6949 State.AllocateStack(PtrSize, PtrAlign);
6950 assert(Reg && "Alocating register unexpectedly failed.");
6951 (void)Reg;
6952 NextReg = State.getFirstUnallocated(GPRs);
6953 }
6954
6955 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6956 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6957 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6958 if (MCRegister Reg = State.AllocateReg(GPRs))
6959 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6960 else {
6963 LocInfo));
6964 break;
6965 }
6966 }
6967 return false;
6968 }
6969
6970 // Arguments always reserve parameter save area.
6971 switch (ValVT.SimpleTy) {
6972 default:
6973 report_fatal_error("Unhandled value type for argument.");
6974 case MVT::i64:
6975 // i64 arguments should have been split to i32 for PPC32.
6976 assert(IsPPC64 && "PPC32 should have split i64 values.");
6977 [[fallthrough]];
6978 case MVT::i1:
6979 case MVT::i32: {
6980 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6981 // AIX integer arguments are always passed in register width.
6982 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6983 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6985 if (MCRegister Reg = State.AllocateReg(GPRs))
6986 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6987 else
6988 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6989
6990 return false;
6991 }
6992 case MVT::f32:
6993 case MVT::f64: {
6994 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6995 const unsigned StoreSize = LocVT.getStoreSize();
6996 // Floats are always 4-byte aligned in the PSA on AIX.
6997 // This includes f64 in 64-bit mode for ABI compatibility.
6998 const unsigned Offset =
6999 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7000 MCRegister FReg = State.AllocateReg(FPR);
7001 if (FReg)
7002 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7003
7004 // Reserve and initialize GPRs or initialize the PSA as required.
7005 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
7006 if (MCRegister Reg = State.AllocateReg(GPRs)) {
7007 assert(FReg && "An FPR should be available when a GPR is reserved.");
7008 if (State.isVarArg()) {
7009 // Successfully reserved GPRs are only initialized for vararg calls.
7010 // Custom handling is required for:
7011 // f64 in PPC32 needs to be split into 2 GPRs.
7012 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7013 State.addLoc(
7014 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7015 }
7016 } else {
7017 // If there are insufficient GPRs, the PSA needs to be initialized.
7018 // Initialization occurs even if an FPR was initialized for
7019 // compatibility with the AIX XL compiler. The full memory for the
7020 // argument will be initialized even if a prior word is saved in GPR.
7021 // A custom memLoc is used when the argument also passes in FPR so
7022 // that the callee handling can skip over it easily.
7023 State.addLoc(
7024 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7025 LocInfo)
7026 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7027 break;
7028 }
7029 }
7030
7031 return false;
7032 }
7033 case MVT::v4f32:
7034 case MVT::v4i32:
7035 case MVT::v8i16:
7036 case MVT::v16i8:
7037 case MVT::v2i64:
7038 case MVT::v2f64:
7039 case MVT::v1i128: {
7040 const unsigned VecSize = 16;
7041 const Align VecAlign(VecSize);
7042
7043 if (!State.isVarArg()) {
7044 // If there are vector registers remaining we don't consume any stack
7045 // space.
7046 if (MCRegister VReg = State.AllocateReg(VR)) {
7047 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7048 return false;
7049 }
7050 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7051 // might be allocated in the portion of the PSA that is shadowed by the
7052 // GPRs.
7053 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7054 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7055 return false;
7056 }
7057
7058 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7059 // Burn any underaligned registers and their shadowed stack space until
7060 // we reach the required alignment.
7061 while (NextRegIndex != GPRs.size() &&
7062 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7063 // Shadow allocate register and its stack shadow.
7064 MCRegister Reg = State.AllocateReg(GPRs);
7065 State.AllocateStack(PtrSize, PtrAlign);
7066 assert(Reg && "Allocating register unexpectedly failed.");
7067 (void)Reg;
7068 NextRegIndex = State.getFirstUnallocated(GPRs);
7069 }
7070
7071 // Vectors that are passed as fixed arguments are handled differently.
7072 // They are passed in VRs if any are available (unlike arguments passed
7073 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7074 // functions)
7075 if (!ArgFlags.isVarArg()) {
7076 if (MCRegister VReg = State.AllocateReg(VR)) {
7077 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7078 // Shadow allocate GPRs and stack space even though we pass in a VR.
7079 for (unsigned I = 0; I != VecSize; I += PtrSize)
7080 State.AllocateReg(GPRs);
7081 State.AllocateStack(VecSize, VecAlign);
7082 return false;
7083 }
7084 // No vector registers remain so pass on the stack.
7085 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7086 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7087 return false;
7088 }
7089
7090 // If all GPRS are consumed then we pass the argument fully on the stack.
7091 if (NextRegIndex == GPRs.size()) {
7092 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7093 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7094 return false;
7095 }
7096
7097 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7098 // half of the argument, and then need to pass the remaining half on the
7099 // stack.
7100 if (GPRs[NextRegIndex] == PPC::R9) {
7101 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7102 State.addLoc(
7103 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7104
7105 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7106 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7107 assert(FirstReg && SecondReg &&
7108 "Allocating R9 or R10 unexpectedly failed.");
7109 State.addLoc(
7110 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7111 State.addLoc(
7112 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7113 return false;
7114 }
7115
7116 // We have enough GPRs to fully pass the vector argument, and we have
7117 // already consumed any underaligned registers. Start with the custom
7118 // MemLoc and then the custom RegLocs.
7119 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7120 State.addLoc(
7121 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7122 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7123 const MCRegister Reg = State.AllocateReg(GPRs);
7124 assert(Reg && "Failed to allocated register for vararg vector argument");
7125 State.addLoc(
7126 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7127 }
7128 return false;
7129 }
7130 }
7131 return true;
7132}
7133
7134// So far, this function is only used by LowerFormalArguments_AIX()
7136 bool IsPPC64,
7137 bool HasP8Vector,
7138 bool HasVSX) {
7139 assert((IsPPC64 || SVT != MVT::i64) &&
7140 "i64 should have been split for 32-bit codegen.");
7141
7142 switch (SVT) {
7143 default:
7144 report_fatal_error("Unexpected value type for formal argument");
7145 case MVT::i1:
7146 case MVT::i32:
7147 case MVT::i64:
7148 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7149 case MVT::f32:
7150 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7151 case MVT::f64:
7152 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7153 case MVT::v4f32:
7154 case MVT::v4i32:
7155 case MVT::v8i16:
7156 case MVT::v16i8:
7157 case MVT::v2i64:
7158 case MVT::v2f64:
7159 case MVT::v1i128:
7160 return &PPC::VRRCRegClass;
7161 }
7162}
7163
7165 SelectionDAG &DAG, SDValue ArgValue,
7166 MVT LocVT, const SDLoc &dl) {
7167 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7168 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7169
7170 if (Flags.isSExt())
7171 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7172 DAG.getValueType(ValVT));
7173 else if (Flags.isZExt())
7174 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7175 DAG.getValueType(ValVT));
7176
7177 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7178}
7179
7180static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7181 const unsigned LASize = FL->getLinkageSize();
7182
7183 if (PPC::GPRCRegClass.contains(Reg)) {
7184 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7185 "Reg must be a valid argument register!");
7186 return LASize + 4 * (Reg - PPC::R3);
7187 }
7188
7189 if (PPC::G8RCRegClass.contains(Reg)) {
7190 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7191 "Reg must be a valid argument register!");
7192 return LASize + 8 * (Reg - PPC::X3);
7193 }
7194
7195 llvm_unreachable("Only general purpose registers expected.");
7196}
7197
7198// AIX ABI Stack Frame Layout:
7199//
7200// Low Memory +--------------------------------------------+
7201// SP +---> | Back chain | ---+
7202// | +--------------------------------------------+ |
7203// | | Saved Condition Register | |
7204// | +--------------------------------------------+ |
7205// | | Saved Linkage Register | |
7206// | +--------------------------------------------+ | Linkage Area
7207// | | Reserved for compilers | |
7208// | +--------------------------------------------+ |
7209// | | Reserved for binders | |
7210// | +--------------------------------------------+ |
7211// | | Saved TOC pointer | ---+
7212// | +--------------------------------------------+
7213// | | Parameter save area |
7214// | +--------------------------------------------+
7215// | | Alloca space |
7216// | +--------------------------------------------+
7217// | | Local variable space |
7218// | +--------------------------------------------+
7219// | | Float/int conversion temporary |
7220// | +--------------------------------------------+
7221// | | Save area for AltiVec registers |
7222// | +--------------------------------------------+
7223// | | AltiVec alignment padding |
7224// | +--------------------------------------------+
7225// | | Save area for VRSAVE register |
7226// | +--------------------------------------------+
7227// | | Save area for General Purpose registers |
7228// | +--------------------------------------------+
7229// | | Save area for Floating Point registers |
7230// | +--------------------------------------------+
7231// +---- | Back chain |
7232// High Memory +--------------------------------------------+
7233//
7234// Specifications:
7235// AIX 7.2 Assembler Language Reference
7236// Subroutine linkage convention
7237
7238SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7239 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7240 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7241 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7242
7243 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7244 CallConv == CallingConv::Fast) &&
7245 "Unexpected calling convention!");
7246
7247 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7248 report_fatal_error("Tail call support is unimplemented on AIX.");
7249
7250 if (useSoftFloat())
7251 report_fatal_error("Soft float support is unimplemented on AIX.");
7252
7253 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7254
7255 const bool IsPPC64 = Subtarget.isPPC64();
7256 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7257
7258 // Assign locations to all of the incoming arguments.
7260 MachineFunction &MF = DAG.getMachineFunction();
7261 MachineFrameInfo &MFI = MF.getFrameInfo();
7262 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7263 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7264
7265 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7266 // Reserve space for the linkage area on the stack.
7267 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7268 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7269 uint64_t SaveStackPos = CCInfo.getStackSize();
7270 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7271 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7272
7274
7275 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7276 CCValAssign &VA = ArgLocs[I++];
7277 MVT LocVT = VA.getLocVT();
7278 MVT ValVT = VA.getValVT();
7279 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7280
7281 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7282 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7283 // For compatibility with the AIX XL compiler, the float args in the
7284 // parameter save area are initialized even if the argument is available
7285 // in register. The caller is required to initialize both the register
7286 // and memory, however, the callee can choose to expect it in either.
7287 // The memloc is dismissed here because the argument is retrieved from
7288 // the register.
7289 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7290 continue;
7291
7292 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7293 const TargetRegisterClass *RegClass = getRegClassForSVT(
7294 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7295 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7296 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7297 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7298 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7299 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7300 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7301 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7302 MachinePointerInfo(), Align(PtrByteSize));
7303 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7304 MemOps.push_back(StoreReg);
7305 }
7306
7307 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7308 unsigned StoreSize =
7309 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7310 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7311 }
7312
7313 auto HandleMemLoc = [&]() {
7314 const unsigned LocSize = LocVT.getStoreSize();
7315 const unsigned ValSize = ValVT.getStoreSize();
7316 assert((ValSize <= LocSize) &&
7317 "Object size is larger than size of MemLoc");
7318 int CurArgOffset = VA.getLocMemOffset();
7319 // Objects are right-justified because AIX is big-endian.
7320 if (LocSize > ValSize)
7321 CurArgOffset += LocSize - ValSize;
7322 // Potential tail calls could cause overwriting of argument stack slots.
7323 const bool IsImmutable =
7325 (CallConv == CallingConv::Fast));
7326 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7327 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7328 SDValue ArgValue =
7329 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7330
7331 // While the ABI specifies the argument type is (sign or zero) extended
7332 // out to register width, not all code is compliant. We truncate and
7333 // re-extend to be more forgiving of these callers when the argument type
7334 // is smaller than register width.
7335 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7336 ValVT.isInteger() &&
7337 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7338 // It is possible to have either real integer values
7339 // or integers that were not originally integers.
7340 // In the latter case, these could have came from structs,
7341 // and these integers would not have an extend on the parameter.
7342 // Since these types of integers do not have an extend specified
7343 // in the first place, the type of extend that we do should not matter.
7344 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7345 ? MVT::i8
7346 : ArgVT;
7347 SDValue ArgValueTrunc =
7348 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7349 SDValue ArgValueExt =
7350 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7351 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7352 InVals.push_back(ArgValueExt);
7353 } else {
7354 InVals.push_back(ArgValue);
7355 }
7356 };
7357
7358 // Vector arguments to VaArg functions are passed both on the stack, and
7359 // in any available GPRs. Load the value from the stack and add the GPRs
7360 // as live ins.
7361 if (VA.isMemLoc() && VA.needsCustom()) {
7362 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7363 assert(isVarArg && "Only use custom memloc for vararg.");
7364 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7365 // matching custom RegLocs.
7366 const unsigned OriginalValNo = VA.getValNo();
7367 (void)OriginalValNo;
7368
7369 auto HandleCustomVecRegLoc = [&]() {
7370 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7371 "Missing custom RegLoc.");
7372 VA = ArgLocs[I++];
7373 assert(VA.getValVT().isVector() &&
7374 "Unexpected Val type for custom RegLoc.");
7375 assert(VA.getValNo() == OriginalValNo &&
7376 "ValNo mismatch between custom MemLoc and RegLoc.");
7378 MF.addLiveIn(VA.getLocReg(),
7379 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7380 Subtarget.hasVSX()));
7381 };
7382
7383 HandleMemLoc();
7384 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7385 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7386 // R10.
7387 HandleCustomVecRegLoc();
7388 HandleCustomVecRegLoc();
7389
7390 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7391 // we passed the vector in R5, R6, R7 and R8.
7392 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7393 assert(!IsPPC64 &&
7394 "Only 2 custom RegLocs expected for 64-bit codegen.");
7395 HandleCustomVecRegLoc();
7396 HandleCustomVecRegLoc();
7397 }
7398
7399 continue;
7400 }
7401
7402 if (VA.isRegLoc()) {
7403 if (VA.getValVT().isScalarInteger())
7405 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7406 switch (VA.getValVT().SimpleTy) {
7407 default:
7408 report_fatal_error("Unhandled value type for argument.");
7409 case MVT::f32:
7411 break;
7412 case MVT::f64:
7414 break;
7415 }
7416 } else if (VA.getValVT().isVector()) {
7417 switch (VA.getValVT().SimpleTy) {
7418 default:
7419 report_fatal_error("Unhandled value type for argument.");
7420 case MVT::v16i8:
7422 break;
7423 case MVT::v8i16:
7425 break;
7426 case MVT::v4i32:
7427 case MVT::v2i64:
7428 case MVT::v1i128:
7430 break;
7431 case MVT::v4f32:
7432 case MVT::v2f64:
7434 break;
7435 }
7436 }
7437 }
7438
7439 if (Flags.isByVal() && VA.isMemLoc()) {
7440 const unsigned Size =
7441 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7442 PtrByteSize);
7443 const int FI = MF.getFrameInfo().CreateFixedObject(
7444 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7445 /* IsAliased */ true);
7446 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7447 InVals.push_back(FIN);
7448
7449 continue;
7450 }
7451
7452 if (Flags.isByVal()) {
7453 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7454
7455 const MCPhysReg ArgReg = VA.getLocReg();
7456 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7457
7458 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7459 const int FI = MF.getFrameInfo().CreateFixedObject(
7460 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7461 /* IsAliased */ true);
7462 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7463 InVals.push_back(FIN);
7464
7465 // Add live ins for all the RegLocs for the same ByVal.
7466 const TargetRegisterClass *RegClass =
7467 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7468
7469 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7470 unsigned Offset) {
7471 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7472 // Since the callers side has left justified the aggregate in the
7473 // register, we can simply store the entire register into the stack
7474 // slot.
7475 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7476 // The store to the fixedstack object is needed becuase accessing a
7477 // field of the ByVal will use a gep and load. Ideally we will optimize
7478 // to extracting the value from the register directly, and elide the
7479 // stores when the arguments address is not taken, but that will need to
7480 // be future work.
7481 SDValue Store = DAG.getStore(
7482 CopyFrom.getValue(1), dl, CopyFrom,
7485
7486 MemOps.push_back(Store);
7487 };
7488
7489 unsigned Offset = 0;
7490 HandleRegLoc(VA.getLocReg(), Offset);
7491 Offset += PtrByteSize;
7492 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7493 Offset += PtrByteSize) {
7494 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7495 "RegLocs should be for ByVal argument.");
7496
7497 const CCValAssign RL = ArgLocs[I++];
7498 HandleRegLoc(RL.getLocReg(), Offset);
7500 }
7501
7502 if (Offset != StackSize) {
7503 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7504 "Expected MemLoc for remaining bytes.");
7505 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7506 // Consume the MemLoc.The InVal has already been emitted, so nothing
7507 // more needs to be done.
7508 ++I;
7509 }
7510
7511 continue;
7512 }
7513
7514 if (VA.isRegLoc() && !VA.needsCustom()) {
7515 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7516 Register VReg =
7517 MF.addLiveIn(VA.getLocReg(),
7518 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7519 Subtarget.hasVSX()));
7520 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7521 if (ValVT.isScalarInteger() &&
7522 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7523 ArgValue =
7524 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7525 }
7526 InVals.push_back(ArgValue);
7527 continue;
7528 }
7529 if (VA.isMemLoc()) {
7530 HandleMemLoc();
7531 continue;
7532 }
7533 }
7534
7535 // On AIX a minimum of 8 words is saved to the parameter save area.
7536 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7537 // Area that is at least reserved in the caller of this function.
7538 unsigned CallerReservedArea = std::max<unsigned>(
7539 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7540
7541 // Set the size that is at least reserved in caller of this function. Tail
7542 // call optimized function's reserved stack space needs to be aligned so
7543 // that taking the difference between two stack areas will result in an
7544 // aligned stack.
7545 CallerReservedArea =
7546 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7547 FuncInfo->setMinReservedArea(CallerReservedArea);
7548
7549 if (isVarArg) {
7550 FuncInfo->setVarArgsFrameIndex(
7551 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7552 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7553
7554 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7555 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7556
7557 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7558 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7559 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7560
7561 // The fixed integer arguments of a variadic function are stored to the
7562 // VarArgsFrameIndex on the stack so that they may be loaded by
7563 // dereferencing the result of va_next.
7564 for (unsigned GPRIndex =
7565 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7566 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7567
7568 const Register VReg =
7569 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7570 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7571
7572 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7573 SDValue Store =
7574 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7575 MemOps.push_back(Store);
7576 // Increment the address for the next argument to store.
7577 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7578 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7579 }
7580 }
7581
7582 if (!MemOps.empty())
7583 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7584
7585 return Chain;
7586}
7587
7588SDValue PPCTargetLowering::LowerCall_AIX(
7589 SDValue Chain, SDValue Callee, CallFlags CFlags,
7591 const SmallVectorImpl<SDValue> &OutVals,
7592 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7594 const CallBase *CB) const {
7595 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7596 // AIX ABI stack frame layout.
7597
7598 assert((CFlags.CallConv == CallingConv::C ||
7599 CFlags.CallConv == CallingConv::Cold ||
7600 CFlags.CallConv == CallingConv::Fast) &&
7601 "Unexpected calling convention!");
7602
7603 if (CFlags.IsPatchPoint)
7604 report_fatal_error("This call type is unimplemented on AIX.");
7605
7606 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7607
7608 MachineFunction &MF = DAG.getMachineFunction();
7610 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7611 *DAG.getContext());
7612
7613 // Reserve space for the linkage save area (LSA) on the stack.
7614 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7615 // [SP][CR][LR][2 x reserved][TOC].
7616 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7617 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7618 const bool IsPPC64 = Subtarget.isPPC64();
7619 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7620 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7621 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7622 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7623
7624 // The prolog code of the callee may store up to 8 GPR argument registers to
7625 // the stack, allowing va_start to index over them in memory if the callee
7626 // is variadic.
7627 // Because we cannot tell if this is needed on the caller side, we have to
7628 // conservatively assume that it is needed. As such, make sure we have at
7629 // least enough stack space for the caller to store the 8 GPRs.
7630 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7631 const unsigned NumBytes = std::max<unsigned>(
7632 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7633
7634 // Adjust the stack pointer for the new arguments...
7635 // These operations are automatically eliminated by the prolog/epilog pass.
7636 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7637 SDValue CallSeqStart = Chain;
7638
7640 SmallVector<SDValue, 8> MemOpChains;
7641
7642 // Set up a copy of the stack pointer for loading and storing any
7643 // arguments that may not fit in the registers available for argument
7644 // passing.
7645 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7646 : DAG.getRegister(PPC::R1, MVT::i32);
7647
7648 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7649 const unsigned ValNo = ArgLocs[I].getValNo();
7650 SDValue Arg = OutVals[ValNo];
7651 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7652
7653 if (Flags.isByVal()) {
7654 const unsigned ByValSize = Flags.getByValSize();
7655
7656 // Nothing to do for zero-sized ByVals on the caller side.
7657 if (!ByValSize) {
7658 ++I;
7659 continue;
7660 }
7661
7662 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7663 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7664 (LoadOffset != 0)
7665 ? DAG.getObjectPtrOffset(
7666 dl, Arg, TypeSize::getFixed(LoadOffset))
7667 : Arg,
7668 MachinePointerInfo(), VT);
7669 };
7670
7671 unsigned LoadOffset = 0;
7672
7673 // Initialize registers, which are fully occupied by the by-val argument.
7674 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7675 SDValue Load = GetLoad(PtrVT, LoadOffset);
7676 MemOpChains.push_back(Load.getValue(1));
7677 LoadOffset += PtrByteSize;
7678 const CCValAssign &ByValVA = ArgLocs[I++];
7679 assert(ByValVA.getValNo() == ValNo &&
7680 "Unexpected location for pass-by-value argument.");
7681 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7682 }
7683
7684 if (LoadOffset == ByValSize)
7685 continue;
7686
7687 // There must be one more loc to handle the remainder.
7688 assert(ArgLocs[I].getValNo() == ValNo &&
7689 "Expected additional location for by-value argument.");
7690
7691 if (ArgLocs[I].isMemLoc()) {
7692 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7693 const CCValAssign &ByValVA = ArgLocs[I++];
7694 ISD::ArgFlagsTy MemcpyFlags = Flags;
7695 // Only memcpy the bytes that don't pass in register.
7696 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7697 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7698 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7699 dl, Arg, TypeSize::getFixed(LoadOffset))
7700 : Arg,
7702 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7703 CallSeqStart, MemcpyFlags, DAG, dl);
7704 continue;
7705 }
7706
7707 // Initialize the final register residue.
7708 // Any residue that occupies the final by-val arg register must be
7709 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7710 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7711 // 2 and 1 byte loads.
7712 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7713 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7714 "Unexpected register residue for by-value argument.");
7715 SDValue ResidueVal;
7716 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7717 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7718 const MVT VT =
7719 N == 1 ? MVT::i8
7720 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7721 SDValue Load = GetLoad(VT, LoadOffset);
7722 MemOpChains.push_back(Load.getValue(1));
7723 LoadOffset += N;
7724 Bytes += N;
7725
7726 // By-val arguments are passed left-justfied in register.
7727 // Every load here needs to be shifted, otherwise a full register load
7728 // should have been used.
7729 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7730 "Unexpected load emitted during handling of pass-by-value "
7731 "argument.");
7732 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7733 EVT ShiftAmountTy =
7734 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7735 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7736 SDValue ShiftedLoad =
7737 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7738 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7739 ShiftedLoad)
7740 : ShiftedLoad;
7741 }
7742
7743 const CCValAssign &ByValVA = ArgLocs[I++];
7744 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7745 continue;
7746 }
7747
7748 CCValAssign &VA = ArgLocs[I++];
7749 const MVT LocVT = VA.getLocVT();
7750 const MVT ValVT = VA.getValVT();
7751
7752 switch (VA.getLocInfo()) {
7753 default:
7754 report_fatal_error("Unexpected argument extension type.");
7755 case CCValAssign::Full:
7756 break;
7757 case CCValAssign::ZExt:
7758 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7759 break;
7760 case CCValAssign::SExt:
7761 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7762 break;
7763 }
7764
7765 if (VA.isRegLoc() && !VA.needsCustom()) {
7766 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7767 continue;
7768 }
7769
7770 // Vector arguments passed to VarArg functions need custom handling when
7771 // they are passed (at least partially) in GPRs.
7772 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7773 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7774 // Store value to its stack slot.
7775 SDValue PtrOff =
7776 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7777 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7778 SDValue Store =
7779 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7780 MemOpChains.push_back(Store);
7781 const unsigned OriginalValNo = VA.getValNo();
7782 // Then load the GPRs from the stack
7783 unsigned LoadOffset = 0;
7784 auto HandleCustomVecRegLoc = [&]() {
7785 assert(I != E && "Unexpected end of CCvalAssigns.");
7786 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7787 "Expected custom RegLoc.");
7788 CCValAssign RegVA = ArgLocs[I++];
7789 assert(RegVA.getValNo() == OriginalValNo &&
7790 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7791 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7792 DAG.getConstant(LoadOffset, dl, PtrVT));
7793 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7794 MemOpChains.push_back(Load.getValue(1));
7795 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7796 LoadOffset += PtrByteSize;
7797 };
7798
7799 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7800 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7801 // R10.
7802 HandleCustomVecRegLoc();
7803 HandleCustomVecRegLoc();
7804
7805 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7806 ArgLocs[I].getValNo() == OriginalValNo) {
7807 assert(!IsPPC64 &&
7808 "Only 2 custom RegLocs expected for 64-bit codegen.");
7809 HandleCustomVecRegLoc();
7810 HandleCustomVecRegLoc();
7811 }
7812
7813 continue;
7814 }
7815
7816 if (VA.isMemLoc()) {
7817 SDValue PtrOff =
7818 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7819 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7820 MemOpChains.push_back(
7821 DAG.getStore(Chain, dl, Arg, PtrOff,
7823 Subtarget.getFrameLowering()->getStackAlign()));
7824
7825 continue;
7826 }
7827
7828 if (!ValVT.isFloatingPoint())
7830 "Unexpected register handling for calling convention.");
7831
7832 // Custom handling is used for GPR initializations for vararg float
7833 // arguments.
7834 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7835 LocVT.isInteger() &&
7836 "Custom register handling only expected for VarArg.");
7837
7838 SDValue ArgAsInt =
7839 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7840
7841 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7842 // f32 in 32-bit GPR
7843 // f64 in 64-bit GPR
7844 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7845 else if (Arg.getValueType().getFixedSizeInBits() <
7846 LocVT.getFixedSizeInBits())
7847 // f32 in 64-bit GPR.
7848 RegsToPass.push_back(std::make_pair(
7849 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7850 else {
7851 // f64 in two 32-bit GPRs
7852 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7853 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7854 "Unexpected custom register for argument!");
7855 CCValAssign &GPR1 = VA;
7856 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7857 DAG.getConstant(32, dl, MVT::i8));
7858 RegsToPass.push_back(std::make_pair(
7859 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7860
7861 if (I != E) {
7862 // If only 1 GPR was available, there will only be one custom GPR and
7863 // the argument will also pass in memory.
7864 CCValAssign &PeekArg = ArgLocs[I];
7865 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7866 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7867 CCValAssign &GPR2 = ArgLocs[I++];
7868 RegsToPass.push_back(std::make_pair(
7869 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7870 }
7871 }
7872 }
7873 }
7874
7875 if (!MemOpChains.empty())
7876 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7877
7878 // For indirect calls, we need to save the TOC base to the stack for
7879 // restoration after the call.
7880 if (CFlags.IsIndirect) {
7881 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7882 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7883 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7884 const MVT PtrVT = Subtarget.getScalarIntVT();
7885 const unsigned TOCSaveOffset =
7886 Subtarget.getFrameLowering()->getTOCSaveOffset();
7887
7888 setUsesTOCBasePtr(DAG);
7889 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7890 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7891 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7892 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7893 Chain = DAG.getStore(
7894 Val.getValue(1), dl, Val, AddPtr,
7895 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7896 }
7897
7898 // Build a sequence of copy-to-reg nodes chained together with token chain
7899 // and flag operands which copy the outgoing args into the appropriate regs.
7900 SDValue InGlue;
7901 for (auto Reg : RegsToPass) {
7902 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7903 InGlue = Chain.getValue(1);
7904 }
7905
7906 const int SPDiff = 0;
7907 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7908 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7909}
7910
7911bool
7912PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7913 MachineFunction &MF, bool isVarArg,
7916 const Type *RetTy) const {
7918 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7919 return CCInfo.CheckReturn(
7920 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7922 : RetCC_PPC);
7923}
7924
7925SDValue
7926PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7927 bool isVarArg,
7929 const SmallVectorImpl<SDValue> &OutVals,
7930 const SDLoc &dl, SelectionDAG &DAG) const {
7932 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7933 *DAG.getContext());
7934 CCInfo.AnalyzeReturn(Outs,
7935 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7937 : RetCC_PPC);
7938
7939 SDValue Glue;
7940 SmallVector<SDValue, 4> RetOps(1, Chain);
7941
7942 // Copy the result values into the output registers.
7943 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7944 CCValAssign &VA = RVLocs[i];
7945 assert(VA.isRegLoc() && "Can only return in registers!");
7946
7947 SDValue Arg = OutVals[RealResIdx];
7948
7949 switch (VA.getLocInfo()) {
7950 default: llvm_unreachable("Unknown loc info!");
7951 case CCValAssign::Full: break;
7952 case CCValAssign::AExt:
7953 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7954 break;
7955 case CCValAssign::ZExt:
7956 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7957 break;
7958 case CCValAssign::SExt:
7959 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7960 break;
7961 }
7962 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7963 bool isLittleEndian = Subtarget.isLittleEndian();
7964 // Legalize ret f64 -> ret 2 x i32.
7965 SDValue SVal =
7966 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7967 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7968 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7969 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7970 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7971 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7972 Glue = Chain.getValue(1);
7973 VA = RVLocs[++i]; // skip ahead to next loc
7974 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7975 } else
7976 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7977 Glue = Chain.getValue(1);
7978 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7979 }
7980
7981 RetOps[0] = Chain; // Update chain.
7982
7983 // Add the glue if we have it.
7984 if (Glue.getNode())
7985 RetOps.push_back(Glue);
7986
7987 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7988}
7989
7990SDValue
7991PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7992 SelectionDAG &DAG) const {
7993 SDLoc dl(Op);
7994
7995 // Get the correct type for integers.
7996 EVT IntVT = Op.getValueType();
7997
7998 // Get the inputs.
7999 SDValue Chain = Op.getOperand(0);
8000 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8001 // Build a DYNAREAOFFSET node.
8002 SDValue Ops[2] = {Chain, FPSIdx};
8003 SDVTList VTs = DAG.getVTList(IntVT);
8004 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
8005}
8006
8007SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
8008 SelectionDAG &DAG) const {
8009 // When we pop the dynamic allocation we need to restore the SP link.
8010 SDLoc dl(Op);
8011
8012 // Get the correct type for pointers.
8013 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8014
8015 // Construct the stack pointer operand.
8016 bool isPPC64 = Subtarget.isPPC64();
8017 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
8018 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
8019
8020 // Get the operands for the STACKRESTORE.
8021 SDValue Chain = Op.getOperand(0);
8022 SDValue SaveSP = Op.getOperand(1);
8023
8024 // Load the old link SP.
8025 SDValue LoadLinkSP =
8026 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
8027
8028 // Restore the stack pointer.
8029 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
8030
8031 // Store the old link SP.
8032 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
8033}
8034
8035SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
8036 MachineFunction &MF = DAG.getMachineFunction();
8037 bool isPPC64 = Subtarget.isPPC64();
8038 EVT PtrVT = getPointerTy(MF.getDataLayout());
8039
8040 // Get current frame pointer save index. The users of this index will be
8041 // primarily DYNALLOC instructions.
8042 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8043 int RASI = FI->getReturnAddrSaveIndex();
8044
8045 // If the frame pointer save index hasn't been defined yet.
8046 if (!RASI) {
8047 // Find out what the fix offset of the frame pointer save area.
8048 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8049 // Allocate the frame index for frame pointer save area.
8050 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8051 // Save the result.
8052 FI->setReturnAddrSaveIndex(RASI);
8053 }
8054 return DAG.getFrameIndex(RASI, PtrVT);
8055}
8056
8057SDValue
8058PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8059 MachineFunction &MF = DAG.getMachineFunction();
8060 bool isPPC64 = Subtarget.isPPC64();
8061 EVT PtrVT = getPointerTy(MF.getDataLayout());
8062
8063 // Get current frame pointer save index. The users of this index will be
8064 // primarily DYNALLOC instructions.
8065 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8066 int FPSI = FI->getFramePointerSaveIndex();
8067
8068 // If the frame pointer save index hasn't been defined yet.
8069 if (!FPSI) {
8070 // Find out what the fix offset of the frame pointer save area.
8071 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8072 // Allocate the frame index for frame pointer save area.
8073 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8074 // Save the result.
8075 FI->setFramePointerSaveIndex(FPSI);
8076 }
8077 return DAG.getFrameIndex(FPSI, PtrVT);
8078}
8079
8080SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8081 SelectionDAG &DAG) const {
8082 MachineFunction &MF = DAG.getMachineFunction();
8083 // Get the inputs.
8084 SDValue Chain = Op.getOperand(0);
8085 SDValue Size = Op.getOperand(1);
8086 SDLoc dl(Op);
8087
8088 // Get the correct type for pointers.
8089 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8090 // Negate the size.
8091 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8092 DAG.getConstant(0, dl, PtrVT), Size);
8093 // Construct a node for the frame pointer save index.
8094 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8095 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8096 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8097 if (hasInlineStackProbe(MF))
8098 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8099 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8100}
8101
8102SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8103 SelectionDAG &DAG) const {
8104 MachineFunction &MF = DAG.getMachineFunction();
8105
8106 bool isPPC64 = Subtarget.isPPC64();
8107 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8108
8109 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8110 return DAG.getFrameIndex(FI, PtrVT);
8111}
8112
8113SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8114 SelectionDAG &DAG) const {
8115 SDLoc DL(Op);
8116 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8117 DAG.getVTList(MVT::i32, MVT::Other),
8118 Op.getOperand(0), Op.getOperand(1));
8119}
8120
8121SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8122 SelectionDAG &DAG) const {
8123 SDLoc DL(Op);
8124 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8125 Op.getOperand(0), Op.getOperand(1));
8126}
8127
8128SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8129 if (Op.getValueType().isVector())
8130 return LowerVectorLoad(Op, DAG);
8131
8132 assert(Op.getValueType() == MVT::i1 &&
8133 "Custom lowering only for i1 loads");
8134
8135 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8136
8137 SDLoc dl(Op);
8138 LoadSDNode *LD = cast<LoadSDNode>(Op);
8139
8140 SDValue Chain = LD->getChain();
8141 SDValue BasePtr = LD->getBasePtr();
8142 MachineMemOperand *MMO = LD->getMemOperand();
8143
8144 SDValue NewLD =
8145 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8146 BasePtr, MVT::i8, MMO);
8147 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8148
8149 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8150 return DAG.getMergeValues(Ops, dl);
8151}
8152
8153SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8154 if (Op.getOperand(1).getValueType().isVector())
8155 return LowerVectorStore(Op, DAG);
8156
8157 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8158 "Custom lowering only for i1 stores");
8159
8160 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8161
8162 SDLoc dl(Op);
8163 StoreSDNode *ST = cast<StoreSDNode>(Op);
8164
8165 SDValue Chain = ST->getChain();
8166 SDValue BasePtr = ST->getBasePtr();
8167 SDValue Value = ST->getValue();
8168 MachineMemOperand *MMO = ST->getMemOperand();
8169
8171 Value);
8172 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8173}
8174
8175// FIXME: Remove this once the ANDI glue bug is fixed:
8176SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8177 assert(Op.getValueType() == MVT::i1 &&
8178 "Custom lowering only for i1 results");
8179
8180 SDLoc DL(Op);
8181 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8182}
8183
8184SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8185 SelectionDAG &DAG) const {
8186
8187 // Implements a vector truncate that fits in a vector register as a shuffle.
8188 // We want to legalize vector truncates down to where the source fits in
8189 // a vector register (and target is therefore smaller than vector register
8190 // size). At that point legalization will try to custom lower the sub-legal
8191 // result and get here - where we can contain the truncate as a single target
8192 // operation.
8193
8194 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8195 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8196 //
8197 // We will implement it for big-endian ordering as this (where x denotes
8198 // undefined):
8199 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8200 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8201 //
8202 // The same operation in little-endian ordering will be:
8203 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8204 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8205
8206 EVT TrgVT = Op.getValueType();
8207 assert(TrgVT.isVector() && "Vector type expected.");
8208 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8209 EVT EltVT = TrgVT.getVectorElementType();
8210 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8211 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8213 return SDValue();
8214
8215 SDValue N1 = Op.getOperand(0);
8216 EVT SrcVT = N1.getValueType();
8217 unsigned SrcSize = SrcVT.getSizeInBits();
8218 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8221 return SDValue();
8222 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8223 return SDValue();
8224
8225 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8226 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8227
8228 SDLoc DL(Op);
8229 SDValue Op1, Op2;
8230 if (SrcSize == 256) {
8231 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8232 EVT SplitVT =
8234 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8235 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8236 DAG.getConstant(0, DL, VecIdxTy));
8237 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8238 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8239 }
8240 else {
8241 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8242 Op2 = DAG.getUNDEF(WideVT);
8243 }
8244
8245 // First list the elements we want to keep.
8246 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8247 SmallVector<int, 16> ShuffV;
8248 if (Subtarget.isLittleEndian())
8249 for (unsigned i = 0; i < TrgNumElts; ++i)
8250 ShuffV.push_back(i * SizeMult);
8251 else
8252 for (unsigned i = 1; i <= TrgNumElts; ++i)
8253 ShuffV.push_back(i * SizeMult - 1);
8254
8255 // Populate the remaining elements with undefs.
8256 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8257 // ShuffV.push_back(i + WideNumElts);
8258 ShuffV.push_back(WideNumElts + 1);
8259
8260 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8261 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8262 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8263}
8264
8265/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8266/// possible.
8267SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8268 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8269 EVT ResVT = Op.getValueType();
8270 EVT CmpVT = Op.getOperand(0).getValueType();
8271 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8272 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8273 SDLoc dl(Op);
8274
8275 // Without power9-vector, we don't have native instruction for f128 comparison.
8276 // Following transformation to libcall is needed for setcc:
8277 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8278 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8279 SDValue Z = DAG.getSetCC(
8280 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8281 LHS, RHS, CC);
8282 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8283 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8284 }
8285
8286 // Not FP, or using SPE? Not a fsel.
8287 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8288 Subtarget.hasSPE())
8289 return Op;
8290
8291 SDNodeFlags Flags = Op.getNode()->getFlags();
8292
8293 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8294 // presence of infinities.
8295 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8296 switch (CC) {
8297 default:
8298 break;
8299 case ISD::SETOGT:
8300 case ISD::SETGT:
8301 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8302 case ISD::SETOLT:
8303 case ISD::SETLT:
8304 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8305 }
8306 }
8307
8308 // We might be able to do better than this under some circumstances, but in
8309 // general, fsel-based lowering of select is a finite-math-only optimization.
8310 // For more information, see section F.3 of the 2.06 ISA specification.
8311 // With ISA 3.0
8312 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8313 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8314 ResVT == MVT::f128)
8315 return Op;
8316
8317 // If the RHS of the comparison is a 0.0, we don't need to do the
8318 // subtraction at all.
8319 SDValue Sel1;
8321 switch (CC) {
8322 default: break; // SETUO etc aren't handled by fsel.
8323 case ISD::SETNE:
8324 std::swap(TV, FV);
8325 [[fallthrough]];
8326 case ISD::SETEQ:
8327 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8328 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8329 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8330 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8331 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8332 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8333 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8334 case ISD::SETULT:
8335 case ISD::SETLT:
8336 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8337 [[fallthrough]];
8338 case ISD::SETOGE:
8339 case ISD::SETGE:
8340 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8341 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8342 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8343 case ISD::SETUGT:
8344 case ISD::SETGT:
8345 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8346 [[fallthrough]];
8347 case ISD::SETOLE:
8348 case ISD::SETLE:
8349 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8350 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8351 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8352 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8353 }
8354
8355 SDValue Cmp;
8356 switch (CC) {
8357 default: break; // SETUO etc aren't handled by fsel.
8358 case ISD::SETNE:
8359 std::swap(TV, FV);
8360 [[fallthrough]];
8361 case ISD::SETEQ:
8362 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8363 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8364 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8365 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8366 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8367 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8368 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8369 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8370 case ISD::SETULT:
8371 case ISD::SETLT:
8372 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8373 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8374 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8375 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8376 case ISD::SETOGE:
8377 case ISD::SETGE:
8378 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8379 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8380 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8381 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8382 case ISD::SETUGT:
8383 case ISD::SETGT:
8384 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8385 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8386 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8387 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8388 case ISD::SETOLE:
8389 case ISD::SETLE:
8390 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8391 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8392 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8393 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8394 }
8395 return Op;
8396}
8397
8398static unsigned getPPCStrictOpcode(unsigned Opc) {
8399 switch (Opc) {
8400 default:
8401 llvm_unreachable("No strict version of this opcode!");
8402 case PPCISD::FCTIDZ:
8403 return PPCISD::STRICT_FCTIDZ;
8404 case PPCISD::FCTIWZ:
8405 return PPCISD::STRICT_FCTIWZ;
8406 case PPCISD::FCTIDUZ:
8408 case PPCISD::FCTIWUZ:
8410 case PPCISD::FCFID:
8411 return PPCISD::STRICT_FCFID;
8412 case PPCISD::FCFIDU:
8413 return PPCISD::STRICT_FCFIDU;
8414 case PPCISD::FCFIDS:
8415 return PPCISD::STRICT_FCFIDS;
8416 case PPCISD::FCFIDUS:
8418 }
8419}
8420
8422 const PPCSubtarget &Subtarget) {
8423 SDLoc dl(Op);
8424 bool IsStrict = Op->isStrictFPOpcode();
8425 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8426 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8427
8428 // TODO: Any other flags to propagate?
8429 SDNodeFlags Flags;
8430 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8431
8432 // For strict nodes, source is the second operand.
8433 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8434 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8435 MVT DestTy = Op.getSimpleValueType();
8436 assert(Src.getValueType().isFloatingPoint() &&
8437 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8438 DestTy == MVT::i64) &&
8439 "Invalid FP_TO_INT types");
8440 if (Src.getValueType() == MVT::f32) {
8441 if (IsStrict) {
8442 Src =
8444 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8445 Chain = Src.getValue(1);
8446 } else
8447 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8448 }
8449 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8450 DestTy = Subtarget.getScalarIntVT();
8451 unsigned Opc = ISD::DELETED_NODE;
8452 switch (DestTy.SimpleTy) {
8453 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8454 case MVT::i32:
8455 Opc = IsSigned ? PPCISD::FCTIWZ
8456 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8457 break;
8458 case MVT::i64:
8459 assert((IsSigned || Subtarget.hasFPCVT()) &&
8460 "i64 FP_TO_UINT is supported only with FPCVT");
8461 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8462 }
8463 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8464 SDValue Conv;
8465 if (IsStrict) {
8467 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8468 Flags);
8469 } else {
8470 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8471 }
8472 return Conv;
8473}
8474
8475void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8476 SelectionDAG &DAG,
8477 const SDLoc &dl) const {
8478 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8479 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8480 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8481 bool IsStrict = Op->isStrictFPOpcode();
8482
8483 // Convert the FP value to an int value through memory.
8484 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8485 (IsSigned || Subtarget.hasFPCVT());
8486 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8487 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8488 MachinePointerInfo MPI =
8490
8491 // Emit a store to the stack slot.
8492 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8493 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8494 if (i32Stack) {
8495 MachineFunction &MF = DAG.getMachineFunction();
8496 Alignment = Align(4);
8497 MachineMemOperand *MMO =
8498 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8499 SDValue Ops[] = { Chain, Tmp, FIPtr };
8500 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8501 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8502 } else
8503 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8504
8505 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8506 // add in a bias on big endian.
8507 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8508 !Subtarget.isLittleEndian()) {
8509 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8510 DAG.getConstant(4, dl, FIPtr.getValueType()));
8511 MPI = MPI.getWithOffset(4);
8512 }
8513
8514 RLI.Chain = Chain;
8515 RLI.Ptr = FIPtr;
8516 RLI.MPI = MPI;
8517 RLI.Alignment = Alignment;
8518}
8519
8520/// Custom lowers floating point to integer conversions to use
8521/// the direct move instructions available in ISA 2.07 to avoid the
8522/// need for load/store combinations.
8523SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8524 SelectionDAG &DAG,
8525 const SDLoc &dl) const {
8526 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8527 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8528 if (Op->isStrictFPOpcode())
8529 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8530 else
8531 return Mov;
8532}
8533
8534SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8535 const SDLoc &dl) const {
8536 bool IsStrict = Op->isStrictFPOpcode();
8537 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8538 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8539 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8540 EVT SrcVT = Src.getValueType();
8541 EVT DstVT = Op.getValueType();
8542
8543 // FP to INT conversions are legal for f128.
8544 if (SrcVT == MVT::f128)
8545 return Subtarget.hasP9Vector() ? Op : SDValue();
8546
8547 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8548 // PPC (the libcall is not available).
8549 if (SrcVT == MVT::ppcf128) {
8550 if (DstVT == MVT::i32) {
8551 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8552 // set other fast-math flags to FP operations in both strict and
8553 // non-strict cases. (FP_TO_SINT, FSUB)
8554 SDNodeFlags Flags;
8555 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8556
8557 if (IsSigned) {
8558 SDValue Lo, Hi;
8559 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8560
8561 // Add the two halves of the long double in round-to-zero mode, and use
8562 // a smaller FP_TO_SINT.
8563 if (IsStrict) {
8565 DAG.getVTList(MVT::f64, MVT::Other),
8566 {Op.getOperand(0), Lo, Hi}, Flags);
8567 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8568 DAG.getVTList(MVT::i32, MVT::Other),
8569 {Res.getValue(1), Res}, Flags);
8570 } else {
8571 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8572 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8573 }
8574 } else {
8575 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8576 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8577 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8578 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8579 if (IsStrict) {
8580 // Sel = Src < 0x80000000
8581 // FltOfs = select Sel, 0.0, 0x80000000
8582 // IntOfs = select Sel, 0, 0x80000000
8583 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8584 SDValue Chain = Op.getOperand(0);
8585 EVT SetCCVT =
8586 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8587 EVT DstSetCCVT =
8588 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8589 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8590 Chain, true);
8591 Chain = Sel.getValue(1);
8592
8593 SDValue FltOfs = DAG.getSelect(
8594 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8595 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8596
8597 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8598 DAG.getVTList(SrcVT, MVT::Other),
8599 {Chain, Src, FltOfs}, Flags);
8600 Chain = Val.getValue(1);
8601 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8602 DAG.getVTList(DstVT, MVT::Other),
8603 {Chain, Val}, Flags);
8604 Chain = SInt.getValue(1);
8605 SDValue IntOfs = DAG.getSelect(
8606 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8607 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8608 return DAG.getMergeValues({Result, Chain}, dl);
8609 } else {
8610 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8611 // FIXME: generated code sucks.
8612 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8613 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8614 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8615 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8616 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8617 }
8618 }
8619 }
8620
8621 return SDValue();
8622 }
8623
8624 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8625 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8626
8627 ReuseLoadInfo RLI;
8628 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8629
8630 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8631 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8632}
8633
8634// We're trying to insert a regular store, S, and then a load, L. If the
8635// incoming value, O, is a load, we might just be able to have our load use the
8636// address used by O. However, we don't know if anything else will store to
8637// that address before we can load from it. To prevent this situation, we need
8638// to insert our load, L, into the chain as a peer of O. To do this, we give L
8639// the same chain operand as O, we create a token factor from the chain results
8640// of O and L, and we replace all uses of O's chain result with that token
8641// factor (this last part is handled by makeEquivalentMemoryOrdering).
8642bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8643 ReuseLoadInfo &RLI,
8644 SelectionDAG &DAG,
8645 ISD::LoadExtType ET) const {
8646 // Conservatively skip reusing for constrained FP nodes.
8647 if (Op->isStrictFPOpcode())
8648 return false;
8649
8650 SDLoc dl(Op);
8651 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8652 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8653 if (ET == ISD::NON_EXTLOAD &&
8654 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8655 isOperationLegalOrCustom(Op.getOpcode(),
8656 Op.getOperand(0).getValueType())) {
8657
8658 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8659 return true;
8660 }
8661
8662 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8663 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8664 LD->isNonTemporal())
8665 return false;
8666 if (LD->getMemoryVT() != MemVT)
8667 return false;
8668
8669 // If the result of the load is an illegal type, then we can't build a
8670 // valid chain for reuse since the legalised loads and token factor node that
8671 // ties the legalised loads together uses a different output chain then the
8672 // illegal load.
8673 if (!isTypeLegal(LD->getValueType(0)))
8674 return false;
8675
8676 RLI.Ptr = LD->getBasePtr();
8677 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8678 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8679 "Non-pre-inc AM on PPC?");
8680 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8681 LD->getOffset());
8682 }
8683
8684 RLI.Chain = LD->getChain();
8685 RLI.MPI = LD->getPointerInfo();
8686 RLI.IsDereferenceable = LD->isDereferenceable();
8687 RLI.IsInvariant = LD->isInvariant();
8688 RLI.Alignment = LD->getAlign();
8689 RLI.AAInfo = LD->getAAInfo();
8690 RLI.Ranges = LD->getRanges();
8691
8692 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8693 return true;
8694}
8695
8696/// Analyze profitability of direct move
8697/// prefer float load to int load plus direct move
8698/// when there is no integer use of int load
8699bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8700 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8701 if (Origin->getOpcode() != ISD::LOAD)
8702 return true;
8703
8704 // If there is no LXSIBZX/LXSIHZX, like Power8,
8705 // prefer direct move if the memory size is 1 or 2 bytes.
8706 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8707 if (!Subtarget.hasP9Vector() &&
8708 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8709 return true;
8710
8711 for (SDUse &Use : Origin->uses()) {
8712
8713 // Only look at the users of the loaded value.
8714 if (Use.getResNo() != 0)
8715 continue;
8716
8717 SDNode *User = Use.getUser();
8718 if (User->getOpcode() != ISD::SINT_TO_FP &&
8719 User->getOpcode() != ISD::UINT_TO_FP &&
8720 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8721 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8722 return true;
8723 }
8724
8725 return false;
8726}
8727
8729 const PPCSubtarget &Subtarget,
8730 SDValue Chain = SDValue()) {
8731 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8732 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8733 SDLoc dl(Op);
8734
8735 // TODO: Any other flags to propagate?
8736 SDNodeFlags Flags;
8737 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8738
8739 // If we have FCFIDS, then use it when converting to single-precision.
8740 // Otherwise, convert to double-precision and then round.
8741 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8742 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8743 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8744 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8745 if (Op->isStrictFPOpcode()) {
8746 if (!Chain)
8747 Chain = Op.getOperand(0);
8748 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8749 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8750 } else
8751 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8752}
8753
8754/// Custom lowers integer to floating point conversions to use
8755/// the direct move instructions available in ISA 2.07 to avoid the
8756/// need for load/store combinations.
8757SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8758 SelectionDAG &DAG,
8759 const SDLoc &dl) const {
8760 assert((Op.getValueType() == MVT::f32 ||
8761 Op.getValueType() == MVT::f64) &&
8762 "Invalid floating point type as target of conversion");
8763 assert(Subtarget.hasFPCVT() &&
8764 "Int to FP conversions with direct moves require FPCVT");
8765 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8766 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8767 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8768 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8769 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8770 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8771 return convertIntToFP(Op, Mov, DAG, Subtarget);
8772}
8773
8774static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8775
8776 EVT VecVT = Vec.getValueType();
8777 assert(VecVT.isVector() && "Expected a vector type.");
8778 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8779
8780 EVT EltVT = VecVT.getVectorElementType();
8781 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8782 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8783
8784 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8785 SmallVector<SDValue, 16> Ops(NumConcat);
8786 Ops[0] = Vec;
8787 SDValue UndefVec = DAG.getUNDEF(VecVT);
8788 for (unsigned i = 1; i < NumConcat; ++i)
8789 Ops[i] = UndefVec;
8790
8791 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8792}
8793
8794SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8795 const SDLoc &dl) const {
8796 bool IsStrict = Op->isStrictFPOpcode();
8797 unsigned Opc = Op.getOpcode();
8798 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8801 "Unexpected conversion type");
8802 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8803 "Supports conversions to v2f64/v4f32 only.");
8804
8805 // TODO: Any other flags to propagate?
8806 SDNodeFlags Flags;
8807 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8808
8809 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8810 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8811
8812 SDValue Wide = widenVec(DAG, Src, dl);
8813 EVT WideVT = Wide.getValueType();
8814 unsigned WideNumElts = WideVT.getVectorNumElements();
8815 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8816
8817 SmallVector<int, 16> ShuffV;
8818 for (unsigned i = 0; i < WideNumElts; ++i)
8819 ShuffV.push_back(i + WideNumElts);
8820
8821 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8822 int SaveElts = FourEltRes ? 4 : 2;
8823 if (Subtarget.isLittleEndian())
8824 for (int i = 0; i < SaveElts; i++)
8825 ShuffV[i * Stride] = i;
8826 else
8827 for (int i = 1; i <= SaveElts; i++)
8828 ShuffV[i * Stride - 1] = i - 1;
8829
8830 SDValue ShuffleSrc2 =
8831 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8832 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8833
8834 SDValue Extend;
8835 if (SignedConv) {
8836 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8837 EVT ExtVT = Src.getValueType();
8838 if (Subtarget.hasP9Altivec())
8839 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8840 IntermediateVT.getVectorNumElements());
8841
8842 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8843 DAG.getValueType(ExtVT));
8844 } else
8845 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8846
8847 if (IsStrict)
8848 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8849 {Op.getOperand(0), Extend}, Flags);
8850
8851 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8852}
8853
8854SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8855 SelectionDAG &DAG) const {
8856 SDLoc dl(Op);
8857 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8858 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8859 bool IsStrict = Op->isStrictFPOpcode();
8860 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8861 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8862
8863 // TODO: Any other flags to propagate?
8864 SDNodeFlags Flags;
8865 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8866
8867 EVT InVT = Src.getValueType();
8868 EVT OutVT = Op.getValueType();
8869 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8870 isOperationCustom(Op.getOpcode(), InVT))
8871 return LowerINT_TO_FPVector(Op, DAG, dl);
8872
8873 // Conversions to f128 are legal.
8874 if (Op.getValueType() == MVT::f128)
8875 return Subtarget.hasP9Vector() ? Op : SDValue();
8876
8877 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8878 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8879 return SDValue();
8880
8881 if (Src.getValueType() == MVT::i1) {
8882 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8883 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8884 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8885 if (IsStrict)
8886 return DAG.getMergeValues({Sel, Chain}, dl);
8887 else
8888 return Sel;
8889 }
8890
8891 // If we have direct moves, we can do all the conversion, skip the store/load
8892 // however, without FPCVT we can't do most conversions.
8893 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8894 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8895 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8896
8897 assert((IsSigned || Subtarget.hasFPCVT()) &&
8898 "UINT_TO_FP is supported only with FPCVT");
8899
8900 if (Src.getValueType() == MVT::i64) {
8901 SDValue SINT = Src;
8902 // When converting to single-precision, we actually need to convert
8903 // to double-precision first and then round to single-precision.
8904 // To avoid double-rounding effects during that operation, we have
8905 // to prepare the input operand. Bits that might be truncated when
8906 // converting to double-precision are replaced by a bit that won't
8907 // be lost at this stage, but is below the single-precision rounding
8908 // position.
8909 //
8910 // However, if -enable-unsafe-fp-math is in effect, accept double
8911 // rounding to avoid the extra overhead.
8912 if (Op.getValueType() == MVT::f32 &&
8913 !Subtarget.hasFPCVT() &&
8915
8916 // Twiddle input to make sure the low 11 bits are zero. (If this
8917 // is the case, we are guaranteed the value will fit into the 53 bit
8918 // mantissa of an IEEE double-precision value without rounding.)
8919 // If any of those low 11 bits were not zero originally, make sure
8920 // bit 12 (value 2048) is set instead, so that the final rounding
8921 // to single-precision gets the correct result.
8922 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8923 SINT, DAG.getConstant(2047, dl, MVT::i64));
8924 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8925 Round, DAG.getConstant(2047, dl, MVT::i64));
8926 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8927 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8928 DAG.getSignedConstant(-2048, dl, MVT::i64));
8929
8930 // However, we cannot use that value unconditionally: if the magnitude
8931 // of the input value is small, the bit-twiddling we did above might
8932 // end up visibly changing the output. Fortunately, in that case, we
8933 // don't need to twiddle bits since the original input will convert
8934 // exactly to double-precision floating-point already. Therefore,
8935 // construct a conditional to use the original value if the top 11
8936 // bits are all sign-bit copies, and use the rounded value computed
8937 // above otherwise.
8938 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8939 SINT, DAG.getConstant(53, dl, MVT::i32));
8940 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8941 Cond, DAG.getConstant(1, dl, MVT::i64));
8942 Cond = DAG.getSetCC(
8943 dl,
8944 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8945 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8946
8947 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8948 }
8949
8950 ReuseLoadInfo RLI;
8951 SDValue Bits;
8952
8953 MachineFunction &MF = DAG.getMachineFunction();
8954 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8955 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8956 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8957 if (RLI.ResChain)
8958 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8959 } else if (Subtarget.hasLFIWAX() &&
8960 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8961 MachineMemOperand *MMO =
8963 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8964 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8966 DAG.getVTList(MVT::f64, MVT::Other),
8967 Ops, MVT::i32, MMO);
8968 if (RLI.ResChain)
8969 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8970 } else if (Subtarget.hasFPCVT() &&
8971 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8972 MachineMemOperand *MMO =
8974 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8975 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8977 DAG.getVTList(MVT::f64, MVT::Other),
8978 Ops, MVT::i32, MMO);
8979 if (RLI.ResChain)
8980 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8981 } else if (((Subtarget.hasLFIWAX() &&
8982 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8983 (Subtarget.hasFPCVT() &&
8984 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8985 SINT.getOperand(0).getValueType() == MVT::i32) {
8986 MachineFrameInfo &MFI = MF.getFrameInfo();
8987 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8988
8989 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8990 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8991
8992 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8994 DAG.getMachineFunction(), FrameIdx));
8995 Chain = Store;
8996
8997 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8998 "Expected an i32 store");
8999
9000 RLI.Ptr = FIdx;
9001 RLI.Chain = Chain;
9002 RLI.MPI =
9004 RLI.Alignment = Align(4);
9005
9006 MachineMemOperand *MMO =
9008 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9009 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9012 dl, DAG.getVTList(MVT::f64, MVT::Other),
9013 Ops, MVT::i32, MMO);
9014 Chain = Bits.getValue(1);
9015 } else
9016 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
9017
9018 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
9019 if (IsStrict)
9020 Chain = FP.getValue(1);
9021
9022 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9023 if (IsStrict)
9024 FP = DAG.getNode(
9025 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9026 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
9027 Flags);
9028 else
9029 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9030 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9031 }
9032 return FP;
9033 }
9034
9035 assert(Src.getValueType() == MVT::i32 &&
9036 "Unhandled INT_TO_FP type in custom expander!");
9037 // Since we only generate this in 64-bit mode, we can take advantage of
9038 // 64-bit registers. In particular, sign extend the input value into the
9039 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9040 // then lfd it and fcfid it.
9041 MachineFunction &MF = DAG.getMachineFunction();
9042 MachineFrameInfo &MFI = MF.getFrameInfo();
9043 EVT PtrVT = getPointerTy(MF.getDataLayout());
9044
9045 SDValue Ld;
9046 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9047 ReuseLoadInfo RLI;
9048 bool ReusingLoad;
9049 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9050 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9051 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9052
9053 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9055 DAG.getMachineFunction(), FrameIdx));
9056 Chain = Store;
9057
9058 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9059 "Expected an i32 store");
9060
9061 RLI.Ptr = FIdx;
9062 RLI.Chain = Chain;
9063 RLI.MPI =
9065 RLI.Alignment = Align(4);
9066 }
9067
9068 MachineMemOperand *MMO =
9070 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9071 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9072 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9073 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9074 MVT::i32, MMO);
9075 Chain = Ld.getValue(1);
9076 if (ReusingLoad && RLI.ResChain) {
9077 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9078 }
9079 } else {
9080 assert(Subtarget.isPPC64() &&
9081 "i32->FP without LFIWAX supported only on PPC64");
9082
9083 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9084 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9085
9086 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9087
9088 // STD the extended value into the stack slot.
9089 SDValue Store = DAG.getStore(
9090 Chain, dl, Ext64, FIdx,
9092 Chain = Store;
9093
9094 // Load the value as a double.
9095 Ld = DAG.getLoad(
9096 MVT::f64, dl, Chain, FIdx,
9098 Chain = Ld.getValue(1);
9099 }
9100
9101 // FCFID it and return it.
9102 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9103 if (IsStrict)
9104 Chain = FP.getValue(1);
9105 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9106 if (IsStrict)
9107 FP = DAG.getNode(
9108 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9109 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9110 else
9111 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9112 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9113 }
9114 return FP;
9115}
9116
9117SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9118 SelectionDAG &DAG) const {
9119 SDLoc Dl(Op);
9120 MachineFunction &MF = DAG.getMachineFunction();
9121 EVT PtrVT = getPointerTy(MF.getDataLayout());
9122 SDValue Chain = Op.getOperand(0);
9123
9124 // If requested mode is constant, just use simpler mtfsb/mffscrni
9125 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9126 uint64_t Mode = CVal->getZExtValue();
9127 assert(Mode < 4 && "Unsupported rounding mode!");
9128 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9129 if (Subtarget.isISA3_0())
9130 return SDValue(
9131 DAG.getMachineNode(
9132 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9133 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9134 1);
9135 SDNode *SetHi = DAG.getMachineNode(
9136 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9137 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9138 SDNode *SetLo = DAG.getMachineNode(
9139 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9140 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9141 return SDValue(SetLo, 0);
9142 }
9143
9144 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9145 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9146 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9147 DAG.getConstant(3, Dl, MVT::i32));
9148 SDValue DstFlag = DAG.getNode(
9149 ISD::XOR, Dl, MVT::i32, SrcFlag,
9150 DAG.getNode(ISD::AND, Dl, MVT::i32,
9151 DAG.getNOT(Dl,
9152 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9153 MVT::i32),
9154 One));
9155 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9156 SDValue MFFS;
9157 if (!Subtarget.isISA3_0()) {
9158 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9159 Chain = MFFS.getValue(1);
9160 }
9161 SDValue NewFPSCR;
9162 if (Subtarget.isPPC64()) {
9163 if (Subtarget.isISA3_0()) {
9164 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9165 } else {
9166 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9167 SDNode *InsertRN = DAG.getMachineNode(
9168 PPC::RLDIMI, Dl, MVT::i64,
9169 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9170 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9171 DAG.getTargetConstant(0, Dl, MVT::i32),
9172 DAG.getTargetConstant(62, Dl, MVT::i32)});
9173 NewFPSCR = SDValue(InsertRN, 0);
9174 }
9175 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9176 } else {
9177 // In 32-bit mode, store f64, load and update the lower half.
9178 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9179 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9180 SDValue Addr = Subtarget.isLittleEndian()
9181 ? StackSlot
9182 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9183 DAG.getConstant(4, Dl, PtrVT));
9184 if (Subtarget.isISA3_0()) {
9185 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9186 } else {
9187 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9188 SDValue Tmp =
9189 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9190 Chain = Tmp.getValue(1);
9191 Tmp = SDValue(DAG.getMachineNode(
9192 PPC::RLWIMI, Dl, MVT::i32,
9193 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9194 DAG.getTargetConstant(30, Dl, MVT::i32),
9195 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9196 0);
9197 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9198 }
9199 NewFPSCR =
9200 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9201 Chain = NewFPSCR.getValue(1);
9202 }
9203 if (Subtarget.isISA3_0())
9204 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9205 {NewFPSCR, Chain}),
9206 1);
9207 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9208 SDNode *MTFSF = DAG.getMachineNode(
9209 PPC::MTFSF, Dl, MVT::Other,
9210 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9211 return SDValue(MTFSF, 0);
9212}
9213
9214SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9215 SelectionDAG &DAG) const {
9216 SDLoc dl(Op);
9217 /*
9218 The rounding mode is in bits 30:31 of FPSR, and has the following
9219 settings:
9220 00 Round to nearest
9221 01 Round to 0
9222 10 Round to +inf
9223 11 Round to -inf
9224
9225 GET_ROUNDING, on the other hand, expects the following:
9226 -1 Undefined
9227 0 Round to 0
9228 1 Round to nearest
9229 2 Round to +inf
9230 3 Round to -inf
9231
9232 To perform the conversion, we do:
9233 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9234 */
9235
9236 MachineFunction &MF = DAG.getMachineFunction();
9237 EVT VT = Op.getValueType();
9238 EVT PtrVT = getPointerTy(MF.getDataLayout());
9239
9240 // Save FP Control Word to register
9241 SDValue Chain = Op.getOperand(0);
9242 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9243 Chain = MFFS.getValue(1);
9244
9245 SDValue CWD;
9246 if (isTypeLegal(MVT::i64)) {
9247 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9248 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9249 } else {
9250 // Save FP register to stack slot
9251 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9252 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9253 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9254
9255 // Load FP Control Word from low 32 bits of stack slot.
9257 "Stack slot adjustment is valid only on big endian subtargets!");
9258 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9259 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9260 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9261 Chain = CWD.getValue(1);
9262 }
9263
9264 // Transform as necessary
9265 SDValue CWD1 =
9266 DAG.getNode(ISD::AND, dl, MVT::i32,
9267 CWD, DAG.getConstant(3, dl, MVT::i32));
9268 SDValue CWD2 =
9269 DAG.getNode(ISD::SRL, dl, MVT::i32,
9270 DAG.getNode(ISD::AND, dl, MVT::i32,
9271 DAG.getNode(ISD::XOR, dl, MVT::i32,
9272 CWD, DAG.getConstant(3, dl, MVT::i32)),
9273 DAG.getConstant(3, dl, MVT::i32)),
9274 DAG.getConstant(1, dl, MVT::i32));
9275
9276 SDValue RetVal =
9277 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9278
9279 RetVal =
9281 dl, VT, RetVal);
9282
9283 return DAG.getMergeValues({RetVal, Chain}, dl);
9284}
9285
9286SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9287 EVT VT = Op.getValueType();
9288 uint64_t BitWidth = VT.getSizeInBits();
9289 SDLoc dl(Op);
9290 assert(Op.getNumOperands() == 3 &&
9291 VT == Op.getOperand(1).getValueType() &&
9292 "Unexpected SHL!");
9293
9294 // Expand into a bunch of logical ops. Note that these ops
9295 // depend on the PPC behavior for oversized shift amounts.
9296 SDValue Lo = Op.getOperand(0);
9297 SDValue Hi = Op.getOperand(1);
9298 SDValue Amt = Op.getOperand(2);
9299 EVT AmtVT = Amt.getValueType();
9300
9301 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9302 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9303 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9304 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9305 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9306 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9307 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9308 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9309 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9310 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9311 SDValue OutOps[] = { OutLo, OutHi };
9312 return DAG.getMergeValues(OutOps, dl);
9313}
9314
9315SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9316 EVT VT = Op.getValueType();
9317 SDLoc dl(Op);
9318 uint64_t BitWidth = VT.getSizeInBits();
9319 assert(Op.getNumOperands() == 3 &&
9320 VT == Op.getOperand(1).getValueType() &&
9321 "Unexpected SRL!");
9322
9323 // Expand into a bunch of logical ops. Note that these ops
9324 // depend on the PPC behavior for oversized shift amounts.
9325 SDValue Lo = Op.getOperand(0);
9326 SDValue Hi = Op.getOperand(1);
9327 SDValue Amt = Op.getOperand(2);
9328 EVT AmtVT = Amt.getValueType();
9329
9330 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9331 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9332 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9333 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9334 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9335 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9336 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9337 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9338 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9339 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9340 SDValue OutOps[] = { OutLo, OutHi };
9341 return DAG.getMergeValues(OutOps, dl);
9342}
9343
9344SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9345 SDLoc dl(Op);
9346 EVT VT = Op.getValueType();
9347 uint64_t BitWidth = VT.getSizeInBits();
9348 assert(Op.getNumOperands() == 3 &&
9349 VT == Op.getOperand(1).getValueType() &&
9350 "Unexpected SRA!");
9351
9352 // Expand into a bunch of logical ops, followed by a select_cc.
9353 SDValue Lo = Op.getOperand(0);
9354 SDValue Hi = Op.getOperand(1);
9355 SDValue Amt = Op.getOperand(2);
9356 EVT AmtVT = Amt.getValueType();
9357
9358 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9359 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9360 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9361 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9362 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9363 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9364 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9365 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9366 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9367 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9368 Tmp4, Tmp6, ISD::SETLE);
9369 SDValue OutOps[] = { OutLo, OutHi };
9370 return DAG.getMergeValues(OutOps, dl);
9371}
9372
9373SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9374 SelectionDAG &DAG) const {
9375 SDLoc dl(Op);
9376 EVT VT = Op.getValueType();
9377 unsigned BitWidth = VT.getSizeInBits();
9378
9379 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9380 SDValue X = Op.getOperand(0);
9381 SDValue Y = Op.getOperand(1);
9382 SDValue Z = Op.getOperand(2);
9383 EVT AmtVT = Z.getValueType();
9384
9385 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9386 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9387 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9388 // on PowerPC shift by BW being well defined.
9389 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9390 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9391 SDValue SubZ =
9392 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9393 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9394 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9395 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9396}
9397
9398//===----------------------------------------------------------------------===//
9399// Vector related lowering.
9400//
9401
9402/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9403/// element size of SplatSize. Cast the result to VT.
9404static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9405 SelectionDAG &DAG, const SDLoc &dl) {
9406 static const MVT VTys[] = { // canonical VT to use for each size.
9407 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9408 };
9409
9410 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9411
9412 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9413 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9414 SplatSize = 1;
9415 Val = 0xFF;
9416 }
9417
9418 EVT CanonicalVT = VTys[SplatSize-1];
9419
9420 // Build a canonical splat for this value.
9421 // Explicitly truncate APInt here, as this API is used with a mix of
9422 // signed and unsigned values.
9423 return DAG.getBitcast(
9424 ReqVT,
9425 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9426}
9427
9428/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9429/// specified intrinsic ID.
9431 const SDLoc &dl, EVT DestVT = MVT::Other) {
9432 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9433 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9434 DAG.getConstant(IID, dl, MVT::i32), Op);
9435}
9436
9437/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9438/// specified intrinsic ID.
9440 SelectionDAG &DAG, const SDLoc &dl,
9441 EVT DestVT = MVT::Other) {
9442 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9443 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9444 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9445}
9446
9447/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9448/// specified intrinsic ID.
9449static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9450 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9451 EVT DestVT = MVT::Other) {
9452 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9453 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9454 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9455}
9456
9457/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9458/// amount. The result has the specified value type.
9459static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9460 SelectionDAG &DAG, const SDLoc &dl) {
9461 // Force LHS/RHS to be the right type.
9462 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9463 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9464
9465 int Ops[16];
9466 for (unsigned i = 0; i != 16; ++i)
9467 Ops[i] = i + Amt;
9468 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9469 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9470}
9471
9472/// Do we have an efficient pattern in a .td file for this node?
9473///
9474/// \param V - pointer to the BuildVectorSDNode being matched
9475/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9476///
9477/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9478/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9479/// the opposite is true (expansion is beneficial) are:
9480/// - The node builds a vector out of integers that are not 32 or 64-bits
9481/// - The node builds a vector out of constants
9482/// - The node is a "load-and-splat"
9483/// In all other cases, we will choose to keep the BUILD_VECTOR.
9485 bool HasDirectMove,
9486 bool HasP8Vector) {
9487 EVT VecVT = V->getValueType(0);
9488 bool RightType = VecVT == MVT::v2f64 ||
9489 (HasP8Vector && VecVT == MVT::v4f32) ||
9490 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9491 if (!RightType)
9492 return false;
9493
9494 bool IsSplat = true;
9495 bool IsLoad = false;
9496 SDValue Op0 = V->getOperand(0);
9497
9498 // This function is called in a block that confirms the node is not a constant
9499 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9500 // different constants.
9501 if (V->isConstant())
9502 return false;
9503 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9504 if (V->getOperand(i).isUndef())
9505 return false;
9506 // We want to expand nodes that represent load-and-splat even if the
9507 // loaded value is a floating point truncation or conversion to int.
9508 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9509 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9510 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9511 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9512 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9513 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9514 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9515 IsLoad = true;
9516 // If the operands are different or the input is not a load and has more
9517 // uses than just this BV node, then it isn't a splat.
9518 if (V->getOperand(i) != Op0 ||
9519 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9520 IsSplat = false;
9521 }
9522 return !(IsSplat && IsLoad);
9523}
9524
9525// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9526SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9527
9528 SDLoc dl(Op);
9529 SDValue Op0 = Op->getOperand(0);
9530
9531 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9532 (Op.getValueType() != MVT::f128))
9533 return SDValue();
9534
9535 SDValue Lo = Op0.getOperand(0);
9536 SDValue Hi = Op0.getOperand(1);
9537 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9538 return SDValue();
9539
9540 if (!Subtarget.isLittleEndian())
9541 std::swap(Lo, Hi);
9542
9543 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9544}
9545
9546static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9547 const SDValue *InputLoad = &Op;
9548 while (InputLoad->getOpcode() == ISD::BITCAST)
9549 InputLoad = &InputLoad->getOperand(0);
9550 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9552 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9553 InputLoad = &InputLoad->getOperand(0);
9554 }
9555 if (InputLoad->getOpcode() != ISD::LOAD)
9556 return nullptr;
9557 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9558 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9559}
9560
9561// Convert the argument APFloat to a single precision APFloat if there is no
9562// loss in information during the conversion to single precision APFloat and the
9563// resulting number is not a denormal number. Return true if successful.
9565 APFloat APFloatToConvert = ArgAPFloat;
9566 bool LosesInfo = true;
9568 &LosesInfo);
9569 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9570 if (Success)
9571 ArgAPFloat = APFloatToConvert;
9572 return Success;
9573}
9574
9575// Bitcast the argument APInt to a double and convert it to a single precision
9576// APFloat, bitcast the APFloat to an APInt and assign it to the original
9577// argument if there is no loss in information during the conversion from
9578// double to single precision APFloat and the resulting number is not a denormal
9579// number. Return true if successful.
9581 double DpValue = ArgAPInt.bitsToDouble();
9582 APFloat APFloatDp(DpValue);
9583 bool Success = convertToNonDenormSingle(APFloatDp);
9584 if (Success)
9585 ArgAPInt = APFloatDp.bitcastToAPInt();
9586 return Success;
9587}
9588
9589// Nondestructive check for convertTonNonDenormSingle.
9591 // Only convert if it loses info, since XXSPLTIDP should
9592 // handle the other case.
9593 APFloat APFloatToConvert = ArgAPFloat;
9594 bool LosesInfo = true;
9596 &LosesInfo);
9597
9598 return (!LosesInfo && !APFloatToConvert.isDenormal());
9599}
9600
9601static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9602 unsigned &Opcode) {
9603 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9604 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9605 return false;
9606
9607 EVT Ty = Op->getValueType(0);
9608 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9609 // as we cannot handle extending loads for these types.
9610 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9611 ISD::isNON_EXTLoad(InputNode))
9612 return true;
9613
9614 EVT MemVT = InputNode->getMemoryVT();
9615 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9616 // memory VT is the same vector element VT type.
9617 // The loads feeding into the v8i16 and v16i8 types will be extending because
9618 // scalar i8/i16 are not legal types.
9619 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9620 (MemVT == Ty.getVectorElementType()))
9621 return true;
9622
9623 if (Ty == MVT::v2i64) {
9624 // Check the extend type, when the input type is i32, and the output vector
9625 // type is v2i64.
9626 if (MemVT == MVT::i32) {
9627 if (ISD::isZEXTLoad(InputNode))
9628 Opcode = PPCISD::ZEXT_LD_SPLAT;
9629 if (ISD::isSEXTLoad(InputNode))
9630 Opcode = PPCISD::SEXT_LD_SPLAT;
9631 }
9632 return true;
9633 }
9634 return false;
9635}
9636
9638 bool IsLittleEndian) {
9639 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9640
9641 BitMask.clearAllBits();
9642 EVT VT = BVN.getValueType(0);
9643 unsigned VTSize = VT.getSizeInBits();
9644 APInt ConstValue(VTSize, 0);
9645
9646 unsigned EltWidth = VT.getScalarSizeInBits();
9647
9648 unsigned BitPos = 0;
9649 for (auto OpVal : BVN.op_values()) {
9650 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9651
9652 if (!CN)
9653 return false;
9654 // The elements in a vector register are ordered in reverse byte order
9655 // between little-endian and big-endian modes.
9656 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9657 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9658 BitPos += EltWidth;
9659 }
9660
9661 for (unsigned J = 0; J < 16; ++J) {
9662 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9663 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9664 return false;
9665 if (ExtractValue == 0xFF)
9666 BitMask.setBit(J);
9667 }
9668 return true;
9669}
9670
9671// If this is a case we can't handle, return null and let the default
9672// expansion code take care of it. If we CAN select this case, and if it
9673// selects to a single instruction, return Op. Otherwise, if we can codegen
9674// this case more efficiently than a constant pool load, lower it to the
9675// sequence of ops that should be used.
9676SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9677 SelectionDAG &DAG) const {
9678 SDLoc dl(Op);
9679 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9680 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9681
9682 if (Subtarget.hasP10Vector()) {
9683 APInt BitMask(32, 0);
9684 // If the value of the vector is all zeros or all ones,
9685 // we do not convert it to MTVSRBMI.
9686 // The xxleqv instruction sets a vector with all ones.
9687 // The xxlxor instruction sets a vector with all zeros.
9688 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9689 BitMask != 0 && BitMask != 0xffff) {
9690 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9691 MachineSDNode *MSDNode =
9692 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9693 SDValue SDV = SDValue(MSDNode, 0);
9694 EVT DVT = BVN->getValueType(0);
9695 EVT SVT = SDV.getValueType();
9696 if (SVT != DVT) {
9697 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9698 }
9699 return SDV;
9700 }
9701 }
9702 // Check if this is a splat of a constant value.
9703 APInt APSplatBits, APSplatUndef;
9704 unsigned SplatBitSize;
9705 bool HasAnyUndefs;
9706 bool BVNIsConstantSplat =
9707 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9708 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9709
9710 // If it is a splat of a double, check if we can shrink it to a 32 bit
9711 // non-denormal float which when converted back to double gives us the same
9712 // double. This is to exploit the XXSPLTIDP instruction.
9713 // If we lose precision, we use XXSPLTI32DX.
9714 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9715 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9716 // Check the type first to short-circuit so we don't modify APSplatBits if
9717 // this block isn't executed.
9718 if ((Op->getValueType(0) == MVT::v2f64) &&
9719 convertToNonDenormSingle(APSplatBits)) {
9720 SDValue SplatNode = DAG.getNode(
9721 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9722 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9723 return DAG.getBitcast(Op.getValueType(), SplatNode);
9724 } else {
9725 // We may lose precision, so we have to use XXSPLTI32DX.
9726
9727 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9728 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9729 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9730
9731 if (!Hi || !Lo)
9732 // If either load is 0, then we should generate XXLXOR to set to 0.
9733 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9734
9735 if (Hi)
9736 SplatNode = DAG.getNode(
9737 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9738 DAG.getTargetConstant(0, dl, MVT::i32),
9739 DAG.getTargetConstant(Hi, dl, MVT::i32));
9740
9741 if (Lo)
9742 SplatNode =
9743 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9744 DAG.getTargetConstant(1, dl, MVT::i32),
9745 DAG.getTargetConstant(Lo, dl, MVT::i32));
9746
9747 return DAG.getBitcast(Op.getValueType(), SplatNode);
9748 }
9749 }
9750
9751 bool IsSplat64 = false;
9752 uint64_t SplatBits = 0;
9753 int32_t SextVal = 0;
9754 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9755 SplatBits = APSplatBits.getZExtValue();
9756 if (SplatBitSize <= 32) {
9757 SextVal = SignExtend32(SplatBits, SplatBitSize);
9758 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9759 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9760 bool P9Vector = Subtarget.hasP9Vector();
9761 int32_t Hi = P9Vector ? 127 : 15;
9762 int32_t Lo = P9Vector ? -128 : -16;
9763 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9764 SextVal = static_cast<int32_t>(SplatBits);
9765 }
9766 }
9767
9768 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9769 unsigned NewOpcode = PPCISD::LD_SPLAT;
9770
9771 // Handle load-and-splat patterns as we have instructions that will do this
9772 // in one go.
9773 if (DAG.isSplatValue(Op, true) &&
9774 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9775 const SDValue *InputLoad = &Op.getOperand(0);
9776 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9777
9778 // If the input load is an extending load, it will be an i32 -> i64
9779 // extending load and isValidSplatLoad() will update NewOpcode.
9780 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9781 unsigned ElementSize =
9782 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9783
9784 assert(((ElementSize == 2 * MemorySize)
9785 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9786 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9787 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9788 "Unmatched element size and opcode!\n");
9789
9790 // Checking for a single use of this load, we have to check for vector
9791 // width (128 bits) / ElementSize uses (since each operand of the
9792 // BUILD_VECTOR is a separate use of the value.
9793 unsigned NumUsesOfInputLD = 128 / ElementSize;
9794 for (SDValue BVInOp : Op->ops())
9795 if (BVInOp.isUndef())
9796 NumUsesOfInputLD--;
9797
9798 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9799 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9800 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9801 // 15", but function IsValidSplatLoad() now will only return true when
9802 // the data at index 0 is not nullptr. So we will not get into trouble for
9803 // these cases.
9804 //
9805 // case 1 - lfiwzx/lfiwax
9806 // 1.1: load result is i32 and is sign/zero extend to i64;
9807 // 1.2: build a v2i64 vector type with above loaded value;
9808 // 1.3: the vector has only one value at index 0, others are all undef;
9809 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9810 if (NumUsesOfInputLD == 1 &&
9811 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9812 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9813 Subtarget.hasLFIWAX()))
9814 return SDValue();
9815
9816 // case 2 - lxvr[hb]x
9817 // 2.1: load result is at most i16;
9818 // 2.2: build a vector with above loaded value;
9819 // 2.3: the vector has only one value at index 0, others are all undef;
9820 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9821 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9822 Subtarget.isISA3_1() && ElementSize <= 16)
9823 return SDValue();
9824
9825 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9826 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9827 Subtarget.hasVSX()) {
9828 SDValue Ops[] = {
9829 LD->getChain(), // Chain
9830 LD->getBasePtr(), // Ptr
9831 DAG.getValueType(Op.getValueType()) // VT
9832 };
9833 SDValue LdSplt = DAG.getMemIntrinsicNode(
9834 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9835 LD->getMemoryVT(), LD->getMemOperand());
9836 // Replace all uses of the output chain of the original load with the
9837 // output chain of the new load.
9838 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9839 LdSplt.getValue(1));
9840 return LdSplt;
9841 }
9842 }
9843
9844 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9845 // 32-bits can be lowered to VSX instructions under certain conditions.
9846 // Without VSX, there is no pattern more efficient than expanding the node.
9847 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9848 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9849 Subtarget.hasP8Vector()))
9850 return Op;
9851 return SDValue();
9852 }
9853
9854 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9855 unsigned SplatSize = SplatBitSize / 8;
9856
9857 // First, handle single instruction cases.
9858
9859 // All zeros?
9860 if (SplatBits == 0) {
9861 // Canonicalize all zero vectors to be v4i32.
9862 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9863 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9864 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9865 }
9866 return Op;
9867 }
9868
9869 // We have XXSPLTIW for constant splats four bytes wide.
9870 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9871 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9872 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9873 // turned into a 4-byte splat of 0xABABABAB.
9874 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9875 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9876 Op.getValueType(), DAG, dl);
9877
9878 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9879 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9880 dl);
9881
9882 // We have XXSPLTIB for constant splats one byte wide.
9883 if (Subtarget.hasP9Vector() && SplatSize == 1)
9884 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9885 dl);
9886
9887 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9888 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9889 if (SextVal >= -16 && SextVal <= 15) {
9890 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9891 // generate a splat word with extend for size 8.
9892 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9893 SDValue Res =
9894 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9895 if (SplatSize != 8)
9896 return Res;
9897 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9898 }
9899
9900 // Two instruction sequences.
9901
9902 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9903 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9905 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9906 unsigned IID;
9907 EVT VT;
9908 switch (SplatSize) {
9909 default:
9910 llvm_unreachable("Unexpected type for vector constant.");
9911 case 2:
9912 IID = Intrinsic::ppc_altivec_vupklsb;
9913 VT = MVT::v8i16;
9914 break;
9915 case 4:
9916 IID = Intrinsic::ppc_altivec_vextsb2w;
9917 VT = MVT::v4i32;
9918 break;
9919 case 8:
9920 IID = Intrinsic::ppc_altivec_vextsb2d;
9921 VT = MVT::v2i64;
9922 break;
9923 }
9924 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9925 return DAG.getBitcast(Op->getValueType(0), Extend);
9926 }
9927 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9928
9929 // If this value is in the range [-32,30] and is even, use:
9930 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9931 // If this value is in the range [17,31] and is odd, use:
9932 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9933 // If this value is in the range [-31,-17] and is odd, use:
9934 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9935 // Note the last two are three-instruction sequences.
9936 if (SextVal >= -32 && SextVal <= 31) {
9937 // To avoid having these optimizations undone by constant folding,
9938 // we convert to a pseudo that will be expanded later into one of
9939 // the above forms.
9940 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9941 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9942 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9943 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9944 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9945 if (VT == Op.getValueType())
9946 return RetVal;
9947 else
9948 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9949 }
9950
9951 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9952 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9953 // for fneg/fabs.
9954 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9955 // Make -1 and vspltisw -1:
9956 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9957
9958 // Make the VSLW intrinsic, computing 0x8000_0000.
9959 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9960 OnesV, DAG, dl);
9961
9962 // xor by OnesV to invert it.
9963 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9964 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9965 }
9966
9967 // Check to see if this is a wide variety of vsplti*, binop self cases.
9968 static const signed char SplatCsts[] = {
9969 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9970 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9971 };
9972
9973 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9974 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9975 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9976 int i = SplatCsts[idx];
9977
9978 // Figure out what shift amount will be used by altivec if shifted by i in
9979 // this splat size.
9980 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9981
9982 // vsplti + shl self.
9983 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9984 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9985 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9986 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9987 Intrinsic::ppc_altivec_vslw
9988 };
9989 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9990 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9991 }
9992
9993 // vsplti + srl self.
9994 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9995 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9996 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9997 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9998 Intrinsic::ppc_altivec_vsrw
9999 };
10000 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10001 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10002 }
10003
10004 // vsplti + rol self.
10005 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
10006 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
10007 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10008 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10009 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
10010 Intrinsic::ppc_altivec_vrlw
10011 };
10012 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10013 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10014 }
10015
10016 // t = vsplti c, result = vsldoi t, t, 1
10017 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
10018 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10019 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
10020 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10021 }
10022 // t = vsplti c, result = vsldoi t, t, 2
10023 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
10024 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10025 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
10026 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10027 }
10028 // t = vsplti c, result = vsldoi t, t, 3
10029 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
10030 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10031 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
10032 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10033 }
10034 }
10035
10036 return SDValue();
10037}
10038
10039/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
10040/// the specified operations to build the shuffle.
10042 SDValue RHS, SelectionDAG &DAG,
10043 const SDLoc &dl) {
10044 unsigned OpNum = (PFEntry >> 26) & 0x0F;
10045 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
10046 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
10047
10048 enum {
10049 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
10050 OP_VMRGHW,
10051 OP_VMRGLW,
10052 OP_VSPLTISW0,
10053 OP_VSPLTISW1,
10054 OP_VSPLTISW2,
10055 OP_VSPLTISW3,
10056 OP_VSLDOI4,
10057 OP_VSLDOI8,
10058 OP_VSLDOI12
10059 };
10060
10061 if (OpNum == OP_COPY) {
10062 if (LHSID == (1*9+2)*9+3) return LHS;
10063 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
10064 return RHS;
10065 }
10066
10067 SDValue OpLHS, OpRHS;
10068 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
10069 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
10070
10071 int ShufIdxs[16];
10072 switch (OpNum) {
10073 default: llvm_unreachable("Unknown i32 permute!");
10074 case OP_VMRGHW:
10075 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
10076 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
10077 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
10078 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
10079 break;
10080 case OP_VMRGLW:
10081 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10082 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10083 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10084 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10085 break;
10086 case OP_VSPLTISW0:
10087 for (unsigned i = 0; i != 16; ++i)
10088 ShufIdxs[i] = (i&3)+0;
10089 break;
10090 case OP_VSPLTISW1:
10091 for (unsigned i = 0; i != 16; ++i)
10092 ShufIdxs[i] = (i&3)+4;
10093 break;
10094 case OP_VSPLTISW2:
10095 for (unsigned i = 0; i != 16; ++i)
10096 ShufIdxs[i] = (i&3)+8;
10097 break;
10098 case OP_VSPLTISW3:
10099 for (unsigned i = 0; i != 16; ++i)
10100 ShufIdxs[i] = (i&3)+12;
10101 break;
10102 case OP_VSLDOI4:
10103 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10104 case OP_VSLDOI8:
10105 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10106 case OP_VSLDOI12:
10107 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10108 }
10109 EVT VT = OpLHS.getValueType();
10110 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10111 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10112 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10113 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10114}
10115
10116/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10117/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10118/// SDValue.
10119SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10120 SelectionDAG &DAG) const {
10121 const unsigned BytesInVector = 16;
10122 bool IsLE = Subtarget.isLittleEndian();
10123 SDLoc dl(N);
10124 SDValue V1 = N->getOperand(0);
10125 SDValue V2 = N->getOperand(1);
10126 unsigned ShiftElts = 0, InsertAtByte = 0;
10127 bool Swap = false;
10128
10129 // Shifts required to get the byte we want at element 7.
10130 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10131 0, 15, 14, 13, 12, 11, 10, 9};
10132 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10133 1, 2, 3, 4, 5, 6, 7, 8};
10134
10135 ArrayRef<int> Mask = N->getMask();
10136 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10137
10138 // For each mask element, find out if we're just inserting something
10139 // from V2 into V1 or vice versa.
10140 // Possible permutations inserting an element from V2 into V1:
10141 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10142 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10143 // ...
10144 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10145 // Inserting from V1 into V2 will be similar, except mask range will be
10146 // [16,31].
10147
10148 bool FoundCandidate = false;
10149 // If both vector operands for the shuffle are the same vector, the mask
10150 // will contain only elements from the first one and the second one will be
10151 // undef.
10152 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10153 // Go through the mask of half-words to find an element that's being moved
10154 // from one vector to the other.
10155 for (unsigned i = 0; i < BytesInVector; ++i) {
10156 unsigned CurrentElement = Mask[i];
10157 // If 2nd operand is undefined, we should only look for element 7 in the
10158 // Mask.
10159 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10160 continue;
10161
10162 bool OtherElementsInOrder = true;
10163 // Examine the other elements in the Mask to see if they're in original
10164 // order.
10165 for (unsigned j = 0; j < BytesInVector; ++j) {
10166 if (j == i)
10167 continue;
10168 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10169 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10170 // in which we always assume we're always picking from the 1st operand.
10171 int MaskOffset =
10172 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10173 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10174 OtherElementsInOrder = false;
10175 break;
10176 }
10177 }
10178 // If other elements are in original order, we record the number of shifts
10179 // we need to get the element we want into element 7. Also record which byte
10180 // in the vector we should insert into.
10181 if (OtherElementsInOrder) {
10182 // If 2nd operand is undefined, we assume no shifts and no swapping.
10183 if (V2.isUndef()) {
10184 ShiftElts = 0;
10185 Swap = false;
10186 } else {
10187 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10188 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10189 : BigEndianShifts[CurrentElement & 0xF];
10190 Swap = CurrentElement < BytesInVector;
10191 }
10192 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10193 FoundCandidate = true;
10194 break;
10195 }
10196 }
10197
10198 if (!FoundCandidate)
10199 return SDValue();
10200
10201 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10202 // optionally with VECSHL if shift is required.
10203 if (Swap)
10204 std::swap(V1, V2);
10205 if (V2.isUndef())
10206 V2 = V1;
10207 if (ShiftElts) {
10208 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10209 DAG.getConstant(ShiftElts, dl, MVT::i32));
10210 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10211 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10212 }
10213 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10214 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10215}
10216
10217/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10218/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10219/// SDValue.
10220SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10221 SelectionDAG &DAG) const {
10222 const unsigned NumHalfWords = 8;
10223 const unsigned BytesInVector = NumHalfWords * 2;
10224 // Check that the shuffle is on half-words.
10225 if (!isNByteElemShuffleMask(N, 2, 1))
10226 return SDValue();
10227
10228 bool IsLE = Subtarget.isLittleEndian();
10229 SDLoc dl(N);
10230 SDValue V1 = N->getOperand(0);
10231 SDValue V2 = N->getOperand(1);
10232 unsigned ShiftElts = 0, InsertAtByte = 0;
10233 bool Swap = false;
10234
10235 // Shifts required to get the half-word we want at element 3.
10236 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10237 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10238
10239 uint32_t Mask = 0;
10240 uint32_t OriginalOrderLow = 0x1234567;
10241 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10242 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10243 // 32-bit space, only need 4-bit nibbles per element.
10244 for (unsigned i = 0; i < NumHalfWords; ++i) {
10245 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10246 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10247 }
10248
10249 // For each mask element, find out if we're just inserting something
10250 // from V2 into V1 or vice versa. Possible permutations inserting an element
10251 // from V2 into V1:
10252 // X, 1, 2, 3, 4, 5, 6, 7
10253 // 0, X, 2, 3, 4, 5, 6, 7
10254 // 0, 1, X, 3, 4, 5, 6, 7
10255 // 0, 1, 2, X, 4, 5, 6, 7
10256 // 0, 1, 2, 3, X, 5, 6, 7
10257 // 0, 1, 2, 3, 4, X, 6, 7
10258 // 0, 1, 2, 3, 4, 5, X, 7
10259 // 0, 1, 2, 3, 4, 5, 6, X
10260 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10261
10262 bool FoundCandidate = false;
10263 // Go through the mask of half-words to find an element that's being moved
10264 // from one vector to the other.
10265 for (unsigned i = 0; i < NumHalfWords; ++i) {
10266 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10267 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10268 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10269 uint32_t TargetOrder = 0x0;
10270
10271 // If both vector operands for the shuffle are the same vector, the mask
10272 // will contain only elements from the first one and the second one will be
10273 // undef.
10274 if (V2.isUndef()) {
10275 ShiftElts = 0;
10276 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10277 TargetOrder = OriginalOrderLow;
10278 Swap = false;
10279 // Skip if not the correct element or mask of other elements don't equal
10280 // to our expected order.
10281 if (MaskOneElt == VINSERTHSrcElem &&
10282 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10283 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10284 FoundCandidate = true;
10285 break;
10286 }
10287 } else { // If both operands are defined.
10288 // Target order is [8,15] if the current mask is between [0,7].
10289 TargetOrder =
10290 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10291 // Skip if mask of other elements don't equal our expected order.
10292 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10293 // We only need the last 3 bits for the number of shifts.
10294 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10295 : BigEndianShifts[MaskOneElt & 0x7];
10296 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10297 Swap = MaskOneElt < NumHalfWords;
10298 FoundCandidate = true;
10299 break;
10300 }
10301 }
10302 }
10303
10304 if (!FoundCandidate)
10305 return SDValue();
10306
10307 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10308 // optionally with VECSHL if shift is required.
10309 if (Swap)
10310 std::swap(V1, V2);
10311 if (V2.isUndef())
10312 V2 = V1;
10313 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10314 if (ShiftElts) {
10315 // Double ShiftElts because we're left shifting on v16i8 type.
10316 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10317 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10318 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10319 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10320 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10321 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10322 }
10323 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10324 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10325 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10326 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10327}
10328
10329/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10330/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10331/// return the default SDValue.
10332SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10333 SelectionDAG &DAG) const {
10334 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10335 // to v16i8. Peek through the bitcasts to get the actual operands.
10338
10339 auto ShuffleMask = SVN->getMask();
10340 SDValue VecShuffle(SVN, 0);
10341 SDLoc DL(SVN);
10342
10343 // Check that we have a four byte shuffle.
10344 if (!isNByteElemShuffleMask(SVN, 4, 1))
10345 return SDValue();
10346
10347 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10348 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10349 std::swap(LHS, RHS);
10351 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10352 if (!CommutedSV)
10353 return SDValue();
10354 ShuffleMask = CommutedSV->getMask();
10355 }
10356
10357 // Ensure that the RHS is a vector of constants.
10358 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10359 if (!BVN)
10360 return SDValue();
10361
10362 // Check if RHS is a splat of 4-bytes (or smaller).
10363 APInt APSplatValue, APSplatUndef;
10364 unsigned SplatBitSize;
10365 bool HasAnyUndefs;
10366 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10367 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10368 SplatBitSize > 32)
10369 return SDValue();
10370
10371 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10372 // The instruction splats a constant C into two words of the source vector
10373 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10374 // Thus we check that the shuffle mask is the equivalent of
10375 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10376 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10377 // within each word are consecutive, so we only need to check the first byte.
10378 SDValue Index;
10379 bool IsLE = Subtarget.isLittleEndian();
10380 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10381 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10382 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10383 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10384 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10385 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10386 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10387 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10388 else
10389 return SDValue();
10390
10391 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10392 // for XXSPLTI32DX.
10393 unsigned SplatVal = APSplatValue.getZExtValue();
10394 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10395 SplatVal |= (SplatVal << SplatBitSize);
10396
10397 SDValue SplatNode = DAG.getNode(
10398 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10399 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10400 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10401}
10402
10403/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10404/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10405/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10406/// i.e (or (shl x, C1), (srl x, 128-C1)).
10407SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10408 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10409 assert(Op.getValueType() == MVT::v1i128 &&
10410 "Only set v1i128 as custom, other type shouldn't reach here!");
10411 SDLoc dl(Op);
10412 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10413 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10414 unsigned SHLAmt = N1.getConstantOperandVal(0);
10415 if (SHLAmt % 8 == 0) {
10416 std::array<int, 16> Mask;
10417 std::iota(Mask.begin(), Mask.end(), 0);
10418 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10419 if (SDValue Shuffle =
10420 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10421 DAG.getUNDEF(MVT::v16i8), Mask))
10422 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10423 }
10424 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10425 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10426 DAG.getConstant(SHLAmt, dl, MVT::i32));
10427 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10428 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10429 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10430 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10431}
10432
10433/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10434/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10435/// return the code it can be lowered into. Worst case, it can always be
10436/// lowered into a vperm.
10437SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10438 SelectionDAG &DAG) const {
10439 SDLoc dl(Op);
10440 SDValue V1 = Op.getOperand(0);
10441 SDValue V2 = Op.getOperand(1);
10442 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10443
10444 // Any nodes that were combined in the target-independent combiner prior
10445 // to vector legalization will not be sent to the target combine. Try to
10446 // combine it here.
10447 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10448 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10449 return NewShuffle;
10450 Op = NewShuffle;
10452 V1 = Op.getOperand(0);
10453 V2 = Op.getOperand(1);
10454 }
10455 EVT VT = Op.getValueType();
10456 bool isLittleEndian = Subtarget.isLittleEndian();
10457
10458 unsigned ShiftElts, InsertAtByte;
10459 bool Swap = false;
10460
10461 // If this is a load-and-splat, we can do that with a single instruction
10462 // in some cases. However if the load has multiple uses, we don't want to
10463 // combine it because that will just produce multiple loads.
10464 bool IsPermutedLoad = false;
10465 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10466 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10467 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10468 InputLoad->hasOneUse()) {
10469 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10470 int SplatIdx =
10471 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10472
10473 // The splat index for permuted loads will be in the left half of the vector
10474 // which is strictly wider than the loaded value by 8 bytes. So we need to
10475 // adjust the splat index to point to the correct address in memory.
10476 if (IsPermutedLoad) {
10477 assert((isLittleEndian || IsFourByte) &&
10478 "Unexpected size for permuted load on big endian target");
10479 SplatIdx += IsFourByte ? 2 : 1;
10480 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10481 "Splat of a value outside of the loaded memory");
10482 }
10483
10484 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10485 // For 4-byte load-and-splat, we need Power9.
10486 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10487 uint64_t Offset = 0;
10488 if (IsFourByte)
10489 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10490 else
10491 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10492
10493 // If the width of the load is the same as the width of the splat,
10494 // loading with an offset would load the wrong memory.
10495 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10496 Offset = 0;
10497
10498 SDValue BasePtr = LD->getBasePtr();
10499 if (Offset != 0)
10501 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10502 SDValue Ops[] = {
10503 LD->getChain(), // Chain
10504 BasePtr, // BasePtr
10505 DAG.getValueType(Op.getValueType()) // VT
10506 };
10507 SDVTList VTL =
10508 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10509 SDValue LdSplt =
10511 Ops, LD->getMemoryVT(), LD->getMemOperand());
10512 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10513 if (LdSplt.getValueType() != SVOp->getValueType(0))
10514 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10515 return LdSplt;
10516 }
10517 }
10518
10519 // All v2i64 and v2f64 shuffles are legal
10520 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10521 return Op;
10522
10523 if (Subtarget.hasP9Vector() &&
10524 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10525 isLittleEndian)) {
10526 if (V2.isUndef())
10527 V2 = V1;
10528 else if (Swap)
10529 std::swap(V1, V2);
10530 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10531 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10532 if (ShiftElts) {
10533 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10534 DAG.getConstant(ShiftElts, dl, MVT::i32));
10535 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10536 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10537 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10538 }
10539 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10540 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10541 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10542 }
10543
10544 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10545 SDValue SplatInsertNode;
10546 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10547 return SplatInsertNode;
10548 }
10549
10550 if (Subtarget.hasP9Altivec()) {
10551 SDValue NewISDNode;
10552 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10553 return NewISDNode;
10554
10555 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10556 return NewISDNode;
10557 }
10558
10559 if (Subtarget.hasVSX() &&
10560 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10561 if (Swap)
10562 std::swap(V1, V2);
10563 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10564 SDValue Conv2 =
10565 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10566
10567 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10568 DAG.getConstant(ShiftElts, dl, MVT::i32));
10569 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10570 }
10571
10572 if (Subtarget.hasVSX() &&
10573 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10574 if (Swap)
10575 std::swap(V1, V2);
10576 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10577 SDValue Conv2 =
10578 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10579
10580 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10581 DAG.getConstant(ShiftElts, dl, MVT::i32));
10582 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10583 }
10584
10585 if (Subtarget.hasP9Vector()) {
10586 if (PPC::isXXBRHShuffleMask(SVOp)) {
10587 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10588 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10589 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10590 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10591 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10592 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10593 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10594 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10595 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10596 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10597 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10598 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10599 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10600 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10601 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10602 }
10603 }
10604
10605 if (Subtarget.hasVSX()) {
10606 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10607 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10608
10609 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10610 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10611 DAG.getConstant(SplatIdx, dl, MVT::i32));
10612 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10613 }
10614
10615 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10616 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10617 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10618 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10619 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10620 }
10621 }
10622
10623 // Cases that are handled by instructions that take permute immediates
10624 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10625 // selected by the instruction selector.
10626 if (V2.isUndef()) {
10627 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10628 PPC::isSplatShuffleMask(SVOp, 2) ||
10629 PPC::isSplatShuffleMask(SVOp, 4) ||
10630 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10631 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10632 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10633 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10634 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10635 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10636 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10637 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10638 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10639 (Subtarget.hasP8Altivec() && (
10640 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10641 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10642 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10643 return Op;
10644 }
10645 }
10646
10647 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10648 // and produce a fixed permutation. If any of these match, do not lower to
10649 // VPERM.
10650 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10651 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10652 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10653 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10654 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10655 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10656 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10657 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10658 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10659 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10660 (Subtarget.hasP8Altivec() && (
10661 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10662 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10663 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10664 return Op;
10665
10666 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10667 // perfect shuffle table to emit an optimal matching sequence.
10668 ArrayRef<int> PermMask = SVOp->getMask();
10669
10670 if (!DisablePerfectShuffle && !isLittleEndian) {
10671 unsigned PFIndexes[4];
10672 bool isFourElementShuffle = true;
10673 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10674 ++i) { // Element number
10675 unsigned EltNo = 8; // Start out undef.
10676 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10677 if (PermMask[i * 4 + j] < 0)
10678 continue; // Undef, ignore it.
10679
10680 unsigned ByteSource = PermMask[i * 4 + j];
10681 if ((ByteSource & 3) != j) {
10682 isFourElementShuffle = false;
10683 break;
10684 }
10685
10686 if (EltNo == 8) {
10687 EltNo = ByteSource / 4;
10688 } else if (EltNo != ByteSource / 4) {
10689 isFourElementShuffle = false;
10690 break;
10691 }
10692 }
10693 PFIndexes[i] = EltNo;
10694 }
10695
10696 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10697 // perfect shuffle vector to determine if it is cost effective to do this as
10698 // discrete instructions, or whether we should use a vperm.
10699 // For now, we skip this for little endian until such time as we have a
10700 // little-endian perfect shuffle table.
10701 if (isFourElementShuffle) {
10702 // Compute the index in the perfect shuffle table.
10703 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10704 PFIndexes[2] * 9 + PFIndexes[3];
10705
10706 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10707 unsigned Cost = (PFEntry >> 30);
10708
10709 // Determining when to avoid vperm is tricky. Many things affect the cost
10710 // of vperm, particularly how many times the perm mask needs to be
10711 // computed. For example, if the perm mask can be hoisted out of a loop or
10712 // is already used (perhaps because there are multiple permutes with the
10713 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10714 // permute mask out of the loop requires an extra register.
10715 //
10716 // As a compromise, we only emit discrete instructions if the shuffle can
10717 // be generated in 3 or fewer operations. When we have loop information
10718 // available, if this block is within a loop, we should avoid using vperm
10719 // for 3-operation perms and use a constant pool load instead.
10720 if (Cost < 3)
10721 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10722 }
10723 }
10724
10725 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10726 // vector that will get spilled to the constant pool.
10727 if (V2.isUndef()) V2 = V1;
10728
10729 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10730}
10731
10732SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10733 ArrayRef<int> PermMask, EVT VT,
10734 SDValue V1, SDValue V2) const {
10735 unsigned Opcode = PPCISD::VPERM;
10736 EVT ValType = V1.getValueType();
10737 SDLoc dl(Op);
10738 bool NeedSwap = false;
10739 bool isLittleEndian = Subtarget.isLittleEndian();
10740 bool isPPC64 = Subtarget.isPPC64();
10741
10742 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10743 (V1->hasOneUse() || V2->hasOneUse())) {
10744 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10745 "XXPERM instead\n");
10746 Opcode = PPCISD::XXPERM;
10747
10748 // The second input to XXPERM is also an output so if the second input has
10749 // multiple uses then copying is necessary, as a result we want the
10750 // single-use operand to be used as the second input to prevent copying.
10751 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10752 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10753 std::swap(V1, V2);
10754 NeedSwap = !NeedSwap;
10755 }
10756 }
10757
10758 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10759 // that it is in input element units, not in bytes. Convert now.
10760
10761 // For little endian, the order of the input vectors is reversed, and
10762 // the permutation mask is complemented with respect to 31. This is
10763 // necessary to produce proper semantics with the big-endian-based vperm
10764 // instruction.
10765 EVT EltVT = V1.getValueType().getVectorElementType();
10766 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10767
10768 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10769 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10770
10771 /*
10772 Vectors will be appended like so: [ V1 | v2 ]
10773 XXSWAPD on V1:
10774 [ A | B | C | D ] -> [ C | D | A | B ]
10775 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10776 i.e. index of A, B += 8, and index of C, D -= 8.
10777 XXSWAPD on V2:
10778 [ E | F | G | H ] -> [ G | H | E | F ]
10779 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10780 i.e. index of E, F += 8, index of G, H -= 8
10781 Swap V1 and V2:
10782 [ V1 | V2 ] -> [ V2 | V1 ]
10783 0-15 16-31 0-15 16-31
10784 i.e. index of V1 += 16, index of V2 -= 16
10785 */
10786
10787 SmallVector<SDValue, 16> ResultMask;
10788 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10789 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10790
10791 if (V1HasXXSWAPD) {
10792 if (SrcElt < 8)
10793 SrcElt += 8;
10794 else if (SrcElt < 16)
10795 SrcElt -= 8;
10796 }
10797 if (V2HasXXSWAPD) {
10798 if (SrcElt > 23)
10799 SrcElt -= 8;
10800 else if (SrcElt > 15)
10801 SrcElt += 8;
10802 }
10803 if (NeedSwap) {
10804 if (SrcElt < 16)
10805 SrcElt += 16;
10806 else
10807 SrcElt -= 16;
10808 }
10809 for (unsigned j = 0; j != BytesPerElement; ++j)
10810 if (isLittleEndian)
10811 ResultMask.push_back(
10812 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10813 else
10814 ResultMask.push_back(
10815 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10816 }
10817
10818 if (V1HasXXSWAPD) {
10819 dl = SDLoc(V1->getOperand(0));
10820 V1 = V1->getOperand(0)->getOperand(1);
10821 }
10822 if (V2HasXXSWAPD) {
10823 dl = SDLoc(V2->getOperand(0));
10824 V2 = V2->getOperand(0)->getOperand(1);
10825 }
10826
10827 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10828 if (ValType != MVT::v2f64)
10829 V1 = DAG.getBitcast(MVT::v2f64, V1);
10830 if (V2.getValueType() != MVT::v2f64)
10831 V2 = DAG.getBitcast(MVT::v2f64, V2);
10832 }
10833
10834 ShufflesHandledWithVPERM++;
10835 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10836 LLVM_DEBUG({
10837 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10838 if (Opcode == PPCISD::XXPERM) {
10839 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10840 } else {
10841 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10842 }
10843 SVOp->dump();
10844 dbgs() << "With the following permute control vector:\n";
10845 VPermMask.dump();
10846 });
10847
10848 if (Opcode == PPCISD::XXPERM)
10849 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10850
10851 // Only need to place items backwards in LE,
10852 // the mask was properly calculated.
10853 if (isLittleEndian)
10854 std::swap(V1, V2);
10855
10856 SDValue VPERMNode =
10857 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10858
10859 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10860 return VPERMNode;
10861}
10862
10863/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10864/// vector comparison. If it is, return true and fill in Opc/isDot with
10865/// information about the intrinsic.
10866static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10867 bool &isDot, const PPCSubtarget &Subtarget) {
10868 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10869 CompareOpc = -1;
10870 isDot = false;
10871 switch (IntrinsicID) {
10872 default:
10873 return false;
10874 // Comparison predicates.
10875 case Intrinsic::ppc_altivec_vcmpbfp_p:
10876 CompareOpc = 966;
10877 isDot = true;
10878 break;
10879 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10880 CompareOpc = 198;
10881 isDot = true;
10882 break;
10883 case Intrinsic::ppc_altivec_vcmpequb_p:
10884 CompareOpc = 6;
10885 isDot = true;
10886 break;
10887 case Intrinsic::ppc_altivec_vcmpequh_p:
10888 CompareOpc = 70;
10889 isDot = true;
10890 break;
10891 case Intrinsic::ppc_altivec_vcmpequw_p:
10892 CompareOpc = 134;
10893 isDot = true;
10894 break;
10895 case Intrinsic::ppc_altivec_vcmpequd_p:
10896 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10897 CompareOpc = 199;
10898 isDot = true;
10899 } else
10900 return false;
10901 break;
10902 case Intrinsic::ppc_altivec_vcmpneb_p:
10903 case Intrinsic::ppc_altivec_vcmpneh_p:
10904 case Intrinsic::ppc_altivec_vcmpnew_p:
10905 case Intrinsic::ppc_altivec_vcmpnezb_p:
10906 case Intrinsic::ppc_altivec_vcmpnezh_p:
10907 case Intrinsic::ppc_altivec_vcmpnezw_p:
10908 if (Subtarget.hasP9Altivec()) {
10909 switch (IntrinsicID) {
10910 default:
10911 llvm_unreachable("Unknown comparison intrinsic.");
10912 case Intrinsic::ppc_altivec_vcmpneb_p:
10913 CompareOpc = 7;
10914 break;
10915 case Intrinsic::ppc_altivec_vcmpneh_p:
10916 CompareOpc = 71;
10917 break;
10918 case Intrinsic::ppc_altivec_vcmpnew_p:
10919 CompareOpc = 135;
10920 break;
10921 case Intrinsic::ppc_altivec_vcmpnezb_p:
10922 CompareOpc = 263;
10923 break;
10924 case Intrinsic::ppc_altivec_vcmpnezh_p:
10925 CompareOpc = 327;
10926 break;
10927 case Intrinsic::ppc_altivec_vcmpnezw_p:
10928 CompareOpc = 391;
10929 break;
10930 }
10931 isDot = true;
10932 } else
10933 return false;
10934 break;
10935 case Intrinsic::ppc_altivec_vcmpgefp_p:
10936 CompareOpc = 454;
10937 isDot = true;
10938 break;
10939 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10940 CompareOpc = 710;
10941 isDot = true;
10942 break;
10943 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10944 CompareOpc = 774;
10945 isDot = true;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10948 CompareOpc = 838;
10949 isDot = true;
10950 break;
10951 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10952 CompareOpc = 902;
10953 isDot = true;
10954 break;
10955 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10956 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10957 CompareOpc = 967;
10958 isDot = true;
10959 } else
10960 return false;
10961 break;
10962 case Intrinsic::ppc_altivec_vcmpgtub_p:
10963 CompareOpc = 518;
10964 isDot = true;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10967 CompareOpc = 582;
10968 isDot = true;
10969 break;
10970 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10971 CompareOpc = 646;
10972 isDot = true;
10973 break;
10974 case Intrinsic::ppc_altivec_vcmpgtud_p:
10975 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10976 CompareOpc = 711;
10977 isDot = true;
10978 } else
10979 return false;
10980 break;
10981
10982 case Intrinsic::ppc_altivec_vcmpequq:
10983 case Intrinsic::ppc_altivec_vcmpgtsq:
10984 case Intrinsic::ppc_altivec_vcmpgtuq:
10985 if (!Subtarget.isISA3_1())
10986 return false;
10987 switch (IntrinsicID) {
10988 default:
10989 llvm_unreachable("Unknown comparison intrinsic.");
10990 case Intrinsic::ppc_altivec_vcmpequq:
10991 CompareOpc = 455;
10992 break;
10993 case Intrinsic::ppc_altivec_vcmpgtsq:
10994 CompareOpc = 903;
10995 break;
10996 case Intrinsic::ppc_altivec_vcmpgtuq:
10997 CompareOpc = 647;
10998 break;
10999 }
11000 break;
11001
11002 // VSX predicate comparisons use the same infrastructure
11003 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11004 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11005 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11006 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11007 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11008 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11009 if (Subtarget.hasVSX()) {
11010 switch (IntrinsicID) {
11011 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11012 CompareOpc = 99;
11013 break;
11014 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11015 CompareOpc = 115;
11016 break;
11017 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11018 CompareOpc = 107;
11019 break;
11020 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11021 CompareOpc = 67;
11022 break;
11023 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11024 CompareOpc = 83;
11025 break;
11026 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11027 CompareOpc = 75;
11028 break;
11029 }
11030 isDot = true;
11031 } else
11032 return false;
11033 break;
11034
11035 // Normal Comparisons.
11036 case Intrinsic::ppc_altivec_vcmpbfp:
11037 CompareOpc = 966;
11038 break;
11039 case Intrinsic::ppc_altivec_vcmpeqfp:
11040 CompareOpc = 198;
11041 break;
11042 case Intrinsic::ppc_altivec_vcmpequb:
11043 CompareOpc = 6;
11044 break;
11045 case Intrinsic::ppc_altivec_vcmpequh:
11046 CompareOpc = 70;
11047 break;
11048 case Intrinsic::ppc_altivec_vcmpequw:
11049 CompareOpc = 134;
11050 break;
11051 case Intrinsic::ppc_altivec_vcmpequd:
11052 if (Subtarget.hasP8Altivec())
11053 CompareOpc = 199;
11054 else
11055 return false;
11056 break;
11057 case Intrinsic::ppc_altivec_vcmpneb:
11058 case Intrinsic::ppc_altivec_vcmpneh:
11059 case Intrinsic::ppc_altivec_vcmpnew:
11060 case Intrinsic::ppc_altivec_vcmpnezb:
11061 case Intrinsic::ppc_altivec_vcmpnezh:
11062 case Intrinsic::ppc_altivec_vcmpnezw:
11063 if (Subtarget.hasP9Altivec())
11064 switch (IntrinsicID) {
11065 default:
11066 llvm_unreachable("Unknown comparison intrinsic.");
11067 case Intrinsic::ppc_altivec_vcmpneb:
11068 CompareOpc = 7;
11069 break;
11070 case Intrinsic::ppc_altivec_vcmpneh:
11071 CompareOpc = 71;
11072 break;
11073 case Intrinsic::ppc_altivec_vcmpnew:
11074 CompareOpc = 135;
11075 break;
11076 case Intrinsic::ppc_altivec_vcmpnezb:
11077 CompareOpc = 263;
11078 break;
11079 case Intrinsic::ppc_altivec_vcmpnezh:
11080 CompareOpc = 327;
11081 break;
11082 case Intrinsic::ppc_altivec_vcmpnezw:
11083 CompareOpc = 391;
11084 break;
11085 }
11086 else
11087 return false;
11088 break;
11089 case Intrinsic::ppc_altivec_vcmpgefp:
11090 CompareOpc = 454;
11091 break;
11092 case Intrinsic::ppc_altivec_vcmpgtfp:
11093 CompareOpc = 710;
11094 break;
11095 case Intrinsic::ppc_altivec_vcmpgtsb:
11096 CompareOpc = 774;
11097 break;
11098 case Intrinsic::ppc_altivec_vcmpgtsh:
11099 CompareOpc = 838;
11100 break;
11101 case Intrinsic::ppc_altivec_vcmpgtsw:
11102 CompareOpc = 902;
11103 break;
11104 case Intrinsic::ppc_altivec_vcmpgtsd:
11105 if (Subtarget.hasP8Altivec())
11106 CompareOpc = 967;
11107 else
11108 return false;
11109 break;
11110 case Intrinsic::ppc_altivec_vcmpgtub:
11111 CompareOpc = 518;
11112 break;
11113 case Intrinsic::ppc_altivec_vcmpgtuh:
11114 CompareOpc = 582;
11115 break;
11116 case Intrinsic::ppc_altivec_vcmpgtuw:
11117 CompareOpc = 646;
11118 break;
11119 case Intrinsic::ppc_altivec_vcmpgtud:
11120 if (Subtarget.hasP8Altivec())
11121 CompareOpc = 711;
11122 else
11123 return false;
11124 break;
11125 case Intrinsic::ppc_altivec_vcmpequq_p:
11126 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11127 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11128 if (!Subtarget.isISA3_1())
11129 return false;
11130 switch (IntrinsicID) {
11131 default:
11132 llvm_unreachable("Unknown comparison intrinsic.");
11133 case Intrinsic::ppc_altivec_vcmpequq_p:
11134 CompareOpc = 455;
11135 break;
11136 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11137 CompareOpc = 903;
11138 break;
11139 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11140 CompareOpc = 647;
11141 break;
11142 }
11143 isDot = true;
11144 break;
11145 }
11146 return true;
11147}
11148
11149/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11150/// lower, do it, otherwise return null.
11151SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11152 SelectionDAG &DAG) const {
11153 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11154
11155 SDLoc dl(Op);
11156
11157 switch (IntrinsicID) {
11158 case Intrinsic::thread_pointer:
11159 // Reads the thread pointer register, used for __builtin_thread_pointer.
11160 if (Subtarget.isPPC64())
11161 return DAG.getRegister(PPC::X13, MVT::i64);
11162 return DAG.getRegister(PPC::R2, MVT::i32);
11163
11164 case Intrinsic::ppc_rldimi: {
11165 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11166 SDValue Src = Op.getOperand(1);
11167 APInt Mask = Op.getConstantOperandAPInt(4);
11168 if (Mask.isZero())
11169 return Op.getOperand(2);
11170 if (Mask.isAllOnes())
11171 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11172 uint64_t SH = Op.getConstantOperandVal(3);
11173 unsigned MB = 0, ME = 0;
11174 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11175 report_fatal_error("invalid rldimi mask!");
11176 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11177 if (ME < 63 - SH) {
11178 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11179 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11180 } else if (ME > 63 - SH) {
11181 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11182 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11183 }
11184 return SDValue(
11185 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11186 {Op.getOperand(2), Src,
11187 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11188 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11189 0);
11190 }
11191
11192 case Intrinsic::ppc_rlwimi: {
11193 APInt Mask = Op.getConstantOperandAPInt(4);
11194 if (Mask.isZero())
11195 return Op.getOperand(2);
11196 if (Mask.isAllOnes())
11197 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11198 Op.getOperand(3));
11199 unsigned MB = 0, ME = 0;
11200 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11201 report_fatal_error("invalid rlwimi mask!");
11202 return SDValue(DAG.getMachineNode(
11203 PPC::RLWIMI, dl, MVT::i32,
11204 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11205 DAG.getTargetConstant(MB, dl, MVT::i32),
11206 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11207 0);
11208 }
11209
11210 case Intrinsic::ppc_rlwnm: {
11211 if (Op.getConstantOperandVal(3) == 0)
11212 return DAG.getConstant(0, dl, MVT::i32);
11213 unsigned MB = 0, ME = 0;
11214 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11215 report_fatal_error("invalid rlwnm mask!");
11216 return SDValue(
11217 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11218 {Op.getOperand(1), Op.getOperand(2),
11219 DAG.getTargetConstant(MB, dl, MVT::i32),
11220 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11221 0);
11222 }
11223
11224 case Intrinsic::ppc_mma_disassemble_acc: {
11225 if (Subtarget.isISAFuture()) {
11226 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11227 SDValue WideVec =
11228 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11229 Op.getOperand(1)),
11230 0);
11232 SDValue Value = SDValue(WideVec.getNode(), 0);
11233 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11234
11235 SDValue Extract;
11236 Extract = DAG.getNode(
11237 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11238 Subtarget.isLittleEndian() ? Value2 : Value,
11239 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11240 dl, getPointerTy(DAG.getDataLayout())));
11241 RetOps.push_back(Extract);
11242 Extract = DAG.getNode(
11243 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11244 Subtarget.isLittleEndian() ? Value2 : Value,
11245 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11246 dl, getPointerTy(DAG.getDataLayout())));
11247 RetOps.push_back(Extract);
11248 Extract = DAG.getNode(
11249 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11250 Subtarget.isLittleEndian() ? Value : Value2,
11251 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11252 dl, getPointerTy(DAG.getDataLayout())));
11253 RetOps.push_back(Extract);
11254 Extract = DAG.getNode(
11255 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11256 Subtarget.isLittleEndian() ? Value : Value2,
11257 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11258 dl, getPointerTy(DAG.getDataLayout())));
11259 RetOps.push_back(Extract);
11260 return DAG.getMergeValues(RetOps, dl);
11261 }
11262 [[fallthrough]];
11263 }
11264 case Intrinsic::ppc_vsx_disassemble_pair: {
11265 int NumVecs = 2;
11266 SDValue WideVec = Op.getOperand(1);
11267 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11268 NumVecs = 4;
11269 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11270 }
11272 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11273 SDValue Extract = DAG.getNode(
11274 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11275 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11276 : VecNo,
11277 dl, getPointerTy(DAG.getDataLayout())));
11278 RetOps.push_back(Extract);
11279 }
11280 return DAG.getMergeValues(RetOps, dl);
11281 }
11282
11283 case Intrinsic::ppc_mma_build_dmr: {
11286 for (int i = 1; i < 9; i += 2) {
11287 SDValue Hi = Op.getOperand(i);
11288 SDValue Lo = Op.getOperand(i + 1);
11289 if (Hi->getOpcode() == ISD::LOAD)
11290 Chains.push_back(Hi.getValue(1));
11291 if (Lo->getOpcode() == ISD::LOAD)
11292 Chains.push_back(Lo.getValue(1));
11293 Pairs.push_back(
11294 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11295 }
11296 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11297 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11298 return DAG.getMergeValues({Value, TF}, dl);
11299 }
11300
11301 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11302 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11303 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11304 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11305 "Specify P of 0 or 1 for lower or upper 512 bytes");
11306 unsigned HiLo = Idx->getSExtValue();
11307 unsigned Opcode;
11308 unsigned Subx;
11309 if (HiLo == 0) {
11310 Opcode = PPC::DMXXEXTFDMR512;
11311 Subx = PPC::sub_wacc_lo;
11312 } else {
11313 Opcode = PPC::DMXXEXTFDMR512_HI;
11314 Subx = PPC::sub_wacc_hi;
11315 }
11316 SDValue Subreg(
11317 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11318 Op.getOperand(1),
11319 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11320 0);
11321 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11322 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11323 }
11324
11325 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11326 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11327 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11328 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11329 "Specify a dmr row pair 0-3");
11330 unsigned IdxVal = Idx->getSExtValue();
11331 unsigned Subx;
11332 switch (IdxVal) {
11333 case 0:
11334 Subx = PPC::sub_dmrrowp0;
11335 break;
11336 case 1:
11337 Subx = PPC::sub_dmrrowp1;
11338 break;
11339 case 2:
11340 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11341 break;
11342 case 3:
11343 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11344 break;
11345 }
11346 SDValue Subreg(
11347 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11348 Op.getOperand(1),
11349 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11350 0);
11351 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11352 return SDValue(
11353 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11354 0);
11355 }
11356
11357 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11358 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11359 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11360 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11361 "Specify P of 0 or 1 for lower or upper 512 bytes");
11362 unsigned HiLo = Idx->getSExtValue();
11363 unsigned Opcode;
11364 unsigned Subx;
11365 if (HiLo == 0) {
11366 Opcode = PPC::DMXXINSTDMR512;
11367 Subx = PPC::sub_wacc_lo;
11368 } else {
11369 Opcode = PPC::DMXXINSTDMR512_HI;
11370 Subx = PPC::sub_wacc_hi;
11371 }
11372 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11373 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11374 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11375 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11376 Op.getOperand(1), Wacc, SubReg),
11377 0);
11378 }
11379
11380 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11381 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11382 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11383 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11384 "Specify a dmr row pair 0-3");
11385 unsigned IdxVal = Idx->getSExtValue();
11386 unsigned Subx;
11387 switch (IdxVal) {
11388 case 0:
11389 Subx = PPC::sub_dmrrowp0;
11390 break;
11391 case 1:
11392 Subx = PPC::sub_dmrrowp1;
11393 break;
11394 case 2:
11395 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11396 break;
11397 case 3:
11398 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11399 break;
11400 }
11401 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11402 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11403 SDValue Ops[] = {Op.getOperand(2), P};
11404 SDValue DMRRowp = SDValue(
11405 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11406 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11407 Op.getOperand(1), DMRRowp, SubReg),
11408 0);
11409 }
11410
11411 case Intrinsic::ppc_mma_xxmfacc:
11412 case Intrinsic::ppc_mma_xxmtacc: {
11413 // Allow pre-isa-future subtargets to lower as normal.
11414 if (!Subtarget.isISAFuture())
11415 return SDValue();
11416 // The intrinsics for xxmtacc and xxmfacc take one argument of
11417 // type v512i1, for future cpu the corresponding wacc instruction
11418 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11419 // the need to produce the xxm[t|f]acc.
11420 SDValue WideVec = Op.getOperand(1);
11421 DAG.ReplaceAllUsesWith(Op, WideVec);
11422 return SDValue();
11423 }
11424
11425 case Intrinsic::ppc_unpack_longdouble: {
11426 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11427 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11428 "Argument of long double unpack must be 0 or 1!");
11429 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11430 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11431 Idx->getValueType(0)));
11432 }
11433
11434 case Intrinsic::ppc_compare_exp_lt:
11435 case Intrinsic::ppc_compare_exp_gt:
11436 case Intrinsic::ppc_compare_exp_eq:
11437 case Intrinsic::ppc_compare_exp_uo: {
11438 unsigned Pred;
11439 switch (IntrinsicID) {
11440 case Intrinsic::ppc_compare_exp_lt:
11441 Pred = PPC::PRED_LT;
11442 break;
11443 case Intrinsic::ppc_compare_exp_gt:
11444 Pred = PPC::PRED_GT;
11445 break;
11446 case Intrinsic::ppc_compare_exp_eq:
11447 Pred = PPC::PRED_EQ;
11448 break;
11449 case Intrinsic::ppc_compare_exp_uo:
11450 Pred = PPC::PRED_UN;
11451 break;
11452 }
11453 return SDValue(
11454 DAG.getMachineNode(
11455 PPC::SELECT_CC_I4, dl, MVT::i32,
11456 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11457 Op.getOperand(1), Op.getOperand(2)),
11458 0),
11459 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11460 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11461 0);
11462 }
11463 case Intrinsic::ppc_test_data_class: {
11464 EVT OpVT = Op.getOperand(1).getValueType();
11465 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11466 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11467 : PPC::XSTSTDCSP);
11468 return SDValue(
11469 DAG.getMachineNode(
11470 PPC::SELECT_CC_I4, dl, MVT::i32,
11471 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11472 Op.getOperand(1)),
11473 0),
11474 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11475 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11476 0);
11477 }
11478 case Intrinsic::ppc_fnmsub: {
11479 EVT VT = Op.getOperand(1).getValueType();
11480 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11481 return DAG.getNode(
11482 ISD::FNEG, dl, VT,
11483 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11484 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11485 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11486 Op.getOperand(2), Op.getOperand(3));
11487 }
11488 case Intrinsic::ppc_convert_f128_to_ppcf128:
11489 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11490 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11491 ? RTLIB::CONVERT_PPCF128_F128
11492 : RTLIB::CONVERT_F128_PPCF128;
11493 MakeLibCallOptions CallOptions;
11494 std::pair<SDValue, SDValue> Result =
11495 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11496 dl, SDValue());
11497 return Result.first;
11498 }
11499 case Intrinsic::ppc_maxfe:
11500 case Intrinsic::ppc_maxfl:
11501 case Intrinsic::ppc_maxfs:
11502 case Intrinsic::ppc_minfe:
11503 case Intrinsic::ppc_minfl:
11504 case Intrinsic::ppc_minfs: {
11505 EVT VT = Op.getValueType();
11506 assert(
11507 all_of(Op->ops().drop_front(4),
11508 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11509 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11510 (void)VT;
11512 if (IntrinsicID == Intrinsic::ppc_minfe ||
11513 IntrinsicID == Intrinsic::ppc_minfl ||
11514 IntrinsicID == Intrinsic::ppc_minfs)
11515 CC = ISD::SETLT;
11516 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11517 SDValue Res = Op.getOperand(I);
11518 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11519 Res =
11520 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11521 }
11522 return Res;
11523 }
11524 }
11525
11526 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11527 // opcode number of the comparison.
11528 int CompareOpc;
11529 bool isDot;
11530 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11531 return SDValue(); // Don't custom lower most intrinsics.
11532
11533 // If this is a non-dot comparison, make the VCMP node and we are done.
11534 if (!isDot) {
11535 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11536 Op.getOperand(1), Op.getOperand(2),
11537 DAG.getConstant(CompareOpc, dl, MVT::i32));
11538 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11539 }
11540
11541 // Create the PPCISD altivec 'dot' comparison node.
11542 SDValue Ops[] = {
11543 Op.getOperand(2), // LHS
11544 Op.getOperand(3), // RHS
11545 DAG.getConstant(CompareOpc, dl, MVT::i32)
11546 };
11547 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11548 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11549
11550 // Unpack the result based on how the target uses it.
11551 unsigned BitNo; // Bit # of CR6.
11552 bool InvertBit; // Invert result?
11553 unsigned Bitx;
11554 unsigned SetOp;
11555 switch (Op.getConstantOperandVal(1)) {
11556 default: // Can't happen, don't crash on invalid number though.
11557 case 0: // Return the value of the EQ bit of CR6.
11558 BitNo = 0;
11559 InvertBit = false;
11560 Bitx = PPC::sub_eq;
11561 SetOp = PPCISD::SETBC;
11562 break;
11563 case 1: // Return the inverted value of the EQ bit of CR6.
11564 BitNo = 0;
11565 InvertBit = true;
11566 Bitx = PPC::sub_eq;
11567 SetOp = PPCISD::SETBCR;
11568 break;
11569 case 2: // Return the value of the LT bit of CR6.
11570 BitNo = 2;
11571 InvertBit = false;
11572 Bitx = PPC::sub_lt;
11573 SetOp = PPCISD::SETBC;
11574 break;
11575 case 3: // Return the inverted value of the LT bit of CR6.
11576 BitNo = 2;
11577 InvertBit = true;
11578 Bitx = PPC::sub_lt;
11579 SetOp = PPCISD::SETBCR;
11580 break;
11581 }
11582
11583 SDValue GlueOp = CompNode.getValue(1);
11584 if (Subtarget.isISA3_1()) {
11585 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11586 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11587 SDValue CRBit =
11588 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11589 CR6Reg, SubRegIdx, GlueOp),
11590 0);
11591 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11592 }
11593
11594 // Now that we have the comparison, emit a copy from the CR to a GPR.
11595 // This is flagged to the above dot comparison.
11596 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11597 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11598
11599 // Shift the bit into the low position.
11600 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11601 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11602 // Isolate the bit.
11603 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11604 DAG.getConstant(1, dl, MVT::i32));
11605
11606 // If we are supposed to, toggle the bit.
11607 if (InvertBit)
11608 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11609 DAG.getConstant(1, dl, MVT::i32));
11610 return Flags;
11611}
11612
11613SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11614 SelectionDAG &DAG) const {
11615 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11616 // the beginning of the argument list.
11617 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11618 SDLoc DL(Op);
11619 switch (Op.getConstantOperandVal(ArgStart)) {
11620 case Intrinsic::ppc_cfence: {
11621 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11622 SDValue Val = Op.getOperand(ArgStart + 1);
11623 EVT Ty = Val.getValueType();
11624 if (Ty == MVT::i128) {
11625 // FIXME: Testing one of two paired registers is sufficient to guarantee
11626 // ordering?
11627 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11628 }
11629 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11630 return SDValue(
11631 DAG.getMachineNode(
11632 Opcode, DL, MVT::Other,
11633 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11634 Op.getOperand(0)),
11635 0);
11636 }
11637 case Intrinsic::ppc_mma_disassemble_dmr: {
11638 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11639 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11640 }
11641 default:
11642 break;
11643 }
11644 return SDValue();
11645}
11646
11647// Lower scalar BSWAP64 to xxbrd.
11648SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11649 SDLoc dl(Op);
11650 if (!Subtarget.isPPC64())
11651 return Op;
11652 // MTVSRDD
11653 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11654 Op.getOperand(0));
11655 // XXBRD
11656 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11657 // MFVSRD
11658 int VectorIndex = 0;
11659 if (Subtarget.isLittleEndian())
11660 VectorIndex = 1;
11661 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11662 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11663 return Op;
11664}
11665
11666// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11667// compared to a value that is atomically loaded (atomic loads zero-extend).
11668SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11669 SelectionDAG &DAG) const {
11670 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11671 "Expecting an atomic compare-and-swap here.");
11672 SDLoc dl(Op);
11673 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11674 EVT MemVT = AtomicNode->getMemoryVT();
11675 if (MemVT.getSizeInBits() >= 32)
11676 return Op;
11677
11678 SDValue CmpOp = Op.getOperand(2);
11679 // If this is already correctly zero-extended, leave it alone.
11680 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11681 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11682 return Op;
11683
11684 // Clear the high bits of the compare operand.
11685 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11686 SDValue NewCmpOp =
11687 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11688 DAG.getConstant(MaskVal, dl, MVT::i32));
11689
11690 // Replace the existing compare operand with the properly zero-extended one.
11692 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11693 Ops.push_back(AtomicNode->getOperand(i));
11694 Ops[2] = NewCmpOp;
11695 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11696 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11697 auto NodeTy =
11698 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11699 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11700}
11701
11702SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11703 SelectionDAG &DAG) const {
11704 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11705 EVT MemVT = N->getMemoryVT();
11706 assert(MemVT.getSimpleVT() == MVT::i128 &&
11707 "Expect quadword atomic operations");
11708 SDLoc dl(N);
11709 unsigned Opc = N->getOpcode();
11710 switch (Opc) {
11711 case ISD::ATOMIC_LOAD: {
11712 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11713 // lowered to ppc instructions by pattern matching instruction selector.
11714 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11716 N->getOperand(0),
11717 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11718 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11719 Ops.push_back(N->getOperand(I));
11720 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11721 Ops, MemVT, N->getMemOperand());
11722 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11723 SDValue ValHi =
11724 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11725 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11726 DAG.getConstant(64, dl, MVT::i32));
11727 SDValue Val =
11728 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11729 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11730 {Val, LoadedVal.getValue(2)});
11731 }
11732 case ISD::ATOMIC_STORE: {
11733 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11734 // lowered to ppc instructions by pattern matching instruction selector.
11735 SDVTList Tys = DAG.getVTList(MVT::Other);
11737 N->getOperand(0),
11738 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11739 SDValue Val = N->getOperand(1);
11740 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11741 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11742 DAG.getConstant(64, dl, MVT::i32));
11743 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11744 Ops.push_back(ValLo);
11745 Ops.push_back(ValHi);
11746 Ops.push_back(N->getOperand(2));
11747 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11748 N->getMemOperand());
11749 }
11750 default:
11751 llvm_unreachable("Unexpected atomic opcode");
11752 }
11753}
11754
11756 SelectionDAG &DAG,
11757 const PPCSubtarget &Subtarget) {
11758 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11759
11760 enum DataClassMask {
11761 DC_NAN = 1 << 6,
11762 DC_NEG_INF = 1 << 4,
11763 DC_POS_INF = 1 << 5,
11764 DC_NEG_ZERO = 1 << 2,
11765 DC_POS_ZERO = 1 << 3,
11766 DC_NEG_SUBNORM = 1,
11767 DC_POS_SUBNORM = 1 << 1,
11768 };
11769
11770 EVT VT = Op.getValueType();
11771
11772 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11773 : VT == MVT::f64 ? PPC::XSTSTDCDP
11774 : PPC::XSTSTDCSP;
11775
11776 if (Mask == fcAllFlags)
11777 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11778 if (Mask == 0)
11779 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11780
11781 // When it's cheaper or necessary to test reverse flags.
11782 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11783 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11784 return DAG.getNOT(Dl, Rev, MVT::i1);
11785 }
11786
11787 // Power doesn't support testing whether a value is 'normal'. Test the rest
11788 // first, and test if it's 'not not-normal' with expected sign.
11789 if (Mask & fcNormal) {
11790 SDValue Rev(DAG.getMachineNode(
11791 TestOp, Dl, MVT::i32,
11792 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11793 DC_NEG_ZERO | DC_POS_ZERO |
11794 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11795 Dl, MVT::i32),
11796 Op),
11797 0);
11798 // Sign are stored in CR bit 0, result are in CR bit 2.
11799 SDValue Sign(
11800 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11801 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11802 0);
11803 SDValue Normal(DAG.getNOT(
11804 Dl,
11806 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11807 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11808 0),
11809 MVT::i1));
11810 if (Mask & fcPosNormal)
11811 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11812 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11813 if (Mask == fcPosNormal || Mask == fcNegNormal)
11814 return Result;
11815
11816 return DAG.getNode(
11817 ISD::OR, Dl, MVT::i1,
11818 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11819 }
11820
11821 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11822 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11823 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11824 bool IsQuiet = Mask & fcQNan;
11825 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11826
11827 // Quietness is determined by the first bit in fraction field.
11828 uint64_t QuietMask = 0;
11829 SDValue HighWord;
11830 if (VT == MVT::f128) {
11831 HighWord = DAG.getNode(
11832 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11833 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11834 QuietMask = 0x8000;
11835 } else if (VT == MVT::f64) {
11836 if (Subtarget.isPPC64()) {
11837 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11838 DAG.getBitcast(MVT::i64, Op),
11839 DAG.getConstant(1, Dl, MVT::i32));
11840 } else {
11841 SDValue Vec = DAG.getBitcast(
11842 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11843 HighWord = DAG.getNode(
11844 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11845 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11846 }
11847 QuietMask = 0x80000;
11848 } else if (VT == MVT::f32) {
11849 HighWord = DAG.getBitcast(MVT::i32, Op);
11850 QuietMask = 0x400000;
11851 }
11852 SDValue NanRes = DAG.getSetCC(
11853 Dl, MVT::i1,
11854 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11855 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11856 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11857 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11858 if (Mask == fcQNan || Mask == fcSNan)
11859 return NanRes;
11860
11861 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11862 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11863 NanRes);
11864 }
11865
11866 unsigned NativeMask = 0;
11867 if ((Mask & fcNan) == fcNan)
11868 NativeMask |= DC_NAN;
11869 if (Mask & fcNegInf)
11870 NativeMask |= DC_NEG_INF;
11871 if (Mask & fcPosInf)
11872 NativeMask |= DC_POS_INF;
11873 if (Mask & fcNegZero)
11874 NativeMask |= DC_NEG_ZERO;
11875 if (Mask & fcPosZero)
11876 NativeMask |= DC_POS_ZERO;
11877 if (Mask & fcNegSubnormal)
11878 NativeMask |= DC_NEG_SUBNORM;
11879 if (Mask & fcPosSubnormal)
11880 NativeMask |= DC_POS_SUBNORM;
11881 return SDValue(
11882 DAG.getMachineNode(
11883 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11885 TestOp, Dl, MVT::i32,
11886 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11887 0),
11888 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11889 0);
11890}
11891
11892SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11893 SelectionDAG &DAG) const {
11894 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11895 SDValue LHS = Op.getOperand(0);
11896 uint64_t RHSC = Op.getConstantOperandVal(1);
11897 SDLoc Dl(Op);
11898 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11899 if (LHS.getValueType() == MVT::ppcf128) {
11900 // The higher part determines the value class.
11901 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11902 DAG.getConstant(1, Dl, MVT::i32));
11903 }
11904
11905 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11906}
11907
11908SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11909 SelectionDAG &DAG) const {
11910 SDLoc dl(Op);
11911
11912 MachineFunction &MF = DAG.getMachineFunction();
11913 SDValue Op0 = Op.getOperand(0);
11914 EVT ValVT = Op0.getValueType();
11915 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11916 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11917 int64_t IntVal = Op.getConstantOperandVal(0);
11918 if (IntVal >= -16 && IntVal <= 15)
11919 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11920 dl);
11921 }
11922
11923 ReuseLoadInfo RLI;
11924 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11925 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11926 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11927 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11928
11929 MachineMemOperand *MMO =
11931 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11932 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11934 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11935 MVT::i32, MMO);
11936 if (RLI.ResChain)
11937 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11938 return Bits.getValue(0);
11939 }
11940
11941 // Create a stack slot that is 16-byte aligned.
11942 MachineFrameInfo &MFI = MF.getFrameInfo();
11943 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11944 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11945 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11946
11947 SDValue Val = Op0;
11948 // P10 hardware store forwarding requires that a single store contains all
11949 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11950 // to avoid load hit store on P10 when running binaries compiled for older
11951 // processors by generating two mergeable scalar stores to forward with the
11952 // vector load.
11953 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11954 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11955 ValVT.getSizeInBits() <= 64) {
11956 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11957 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11958 SDValue ShiftBy = DAG.getConstant(
11959 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11960 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11961 SDValue Plus8 =
11962 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11963 SDValue Store2 =
11964 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11965 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11966 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11967 MachinePointerInfo());
11968 }
11969
11970 // Store the input value into Value#0 of the stack slot.
11971 SDValue Store =
11972 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11973 // Load it out.
11974 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11975}
11976
11977SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11978 SelectionDAG &DAG) const {
11979 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11980 "Should only be called for ISD::INSERT_VECTOR_ELT");
11981
11982 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11983
11984 EVT VT = Op.getValueType();
11985 SDLoc dl(Op);
11986 SDValue V1 = Op.getOperand(0);
11987 SDValue V2 = Op.getOperand(1);
11988
11989 if (VT == MVT::v2f64 && C)
11990 return Op;
11991
11992 if (Subtarget.hasP9Vector()) {
11993 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11994 // because on P10, it allows this specific insert_vector_elt load pattern to
11995 // utilize the refactored load and store infrastructure in order to exploit
11996 // prefixed loads.
11997 // On targets with inexpensive direct moves (Power9 and up), a
11998 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11999 // load since a single precision load will involve conversion to double
12000 // precision on the load followed by another conversion to single precision.
12001 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12002 (isa<LoadSDNode>(V2))) {
12003 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12004 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12005 SDValue InsVecElt =
12006 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12007 BitcastLoad, Op.getOperand(2));
12008 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12009 }
12010 }
12011
12012 if (Subtarget.isISA3_1()) {
12013 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12014 return SDValue();
12015 // On P10, we have legal lowering for constant and variable indices for
12016 // all vectors.
12017 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12018 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12019 return Op;
12020 }
12021
12022 // Before P10, we have legal lowering for constant indices but not for
12023 // variable ones.
12024 if (!C)
12025 return SDValue();
12026
12027 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12028 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12029 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12030 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12031 unsigned InsertAtElement = C->getZExtValue();
12032 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12033 if (Subtarget.isLittleEndian()) {
12034 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12035 }
12036 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12037 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12038 }
12039 return Op;
12040}
12041
12042SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12043 SelectionDAG &DAG) const {
12044 SDLoc dl(Op);
12045 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12046 SDValue LoadChain = LN->getChain();
12047 SDValue BasePtr = LN->getBasePtr();
12048 EVT VT = Op.getValueType();
12049 bool IsV1024i1 = VT == MVT::v1024i1;
12050 bool IsV2048i1 = VT == MVT::v2048i1;
12051
12052 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12053 // Dense Math dmr pair registers, respectively.
12054 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12055 (void)IsV2048i1;
12056 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12057 "Dense Math support required.");
12058 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12059
12061 SmallVector<SDValue, 8> LoadChains;
12062
12063 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12064 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12065 MachineMemOperand *MMO = LN->getMemOperand();
12066 unsigned NumVecs = VT.getSizeInBits() / 256;
12067 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12068 MachineMemOperand *NewMMO =
12069 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12070 if (Idx > 0) {
12071 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12072 DAG.getConstant(32, dl, BasePtr.getValueType()));
12073 LoadOps[2] = BasePtr;
12074 }
12076 DAG.getVTList(MVT::v256i1, MVT::Other),
12077 LoadOps, MVT::v256i1, NewMMO);
12078 LoadChains.push_back(Ld.getValue(1));
12079 Loads.push_back(Ld);
12080 }
12081
12082 if (Subtarget.isLittleEndian()) {
12083 std::reverse(Loads.begin(), Loads.end());
12084 std::reverse(LoadChains.begin(), LoadChains.end());
12085 }
12086
12087 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12088 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
12089 Loads[1]),
12090 0);
12091 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12092 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12093 Loads[2], Loads[3]),
12094 0);
12095 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12096 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12097 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12098
12099 SDValue Value =
12100 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12101
12102 if (IsV1024i1) {
12103 return DAG.getMergeValues({Value, TF}, dl);
12104 }
12105
12106 // Handle Loads for V2048i1 which represents a dmr pair.
12107 SDValue DmrPValue;
12108 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12109 Loads[4], Loads[5]),
12110 0);
12111 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12112 Loads[6], Loads[7]),
12113 0);
12114 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12115 SDValue Dmr1Value = SDValue(
12116 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12117
12118 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12119 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12120
12121 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12122 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12123
12124 DmrPValue = SDValue(
12125 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12126
12127 return DAG.getMergeValues({DmrPValue, TF}, dl);
12128}
12129
12130SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12131 const SDLoc &dl,
12132 SelectionDAG &DAG) const {
12133 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12134 Pairs[1]),
12135 0);
12136 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12137 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12138 Pairs[2], Pairs[3]),
12139 0);
12140 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12141 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12142
12143 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12144 {RC, Lo, LoSub, Hi, HiSub}),
12145 0);
12146}
12147
12148SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12149 SelectionDAG &DAG) const {
12150 SDLoc dl(Op);
12151 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12152 SDValue LoadChain = LN->getChain();
12153 SDValue BasePtr = LN->getBasePtr();
12154 EVT VT = Op.getValueType();
12155
12156 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12157 return LowerDMFVectorLoad(Op, DAG);
12158
12159 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12160 return Op;
12161
12162 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12163 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12164 // 2 or 4 vsx registers.
12165 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12166 "Type unsupported without MMA");
12167 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12168 "Type unsupported without paired vector support");
12169 Align Alignment = LN->getAlign();
12171 SmallVector<SDValue, 4> LoadChains;
12172 unsigned NumVecs = VT.getSizeInBits() / 128;
12173 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12174 SDValue Load =
12175 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12176 LN->getPointerInfo().getWithOffset(Idx * 16),
12177 commonAlignment(Alignment, Idx * 16),
12178 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12179 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12180 DAG.getConstant(16, dl, BasePtr.getValueType()));
12181 Loads.push_back(Load);
12182 LoadChains.push_back(Load.getValue(1));
12183 }
12184 if (Subtarget.isLittleEndian()) {
12185 std::reverse(Loads.begin(), Loads.end());
12186 std::reverse(LoadChains.begin(), LoadChains.end());
12187 }
12188 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12189 SDValue Value =
12190 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12191 dl, VT, Loads);
12192 SDValue RetOps[] = {Value, TF};
12193 return DAG.getMergeValues(RetOps, dl);
12194}
12195
12196SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12197 SelectionDAG &DAG) const {
12198
12199 SDLoc dl(Op);
12200 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12201 SDValue StoreChain = SN->getChain();
12202 SDValue BasePtr = SN->getBasePtr();
12205 EVT VT = SN->getValue().getValueType();
12206 bool IsV1024i1 = VT == MVT::v1024i1;
12207 bool IsV2048i1 = VT == MVT::v2048i1;
12208
12209 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12210 // Dense Math dmr pair registers, respectively.
12211 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12212 (void)IsV2048i1;
12213 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12214 "Dense Math support required.");
12215 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12216
12217 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12218 if (IsV1024i1) {
12220 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12221 Op.getOperand(1),
12222 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12223 0);
12225 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12226 Op.getOperand(1),
12227 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12228 0);
12229 MachineSDNode *ExtNode =
12230 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12231 Values.push_back(SDValue(ExtNode, 0));
12232 Values.push_back(SDValue(ExtNode, 1));
12233 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12234 Values.push_back(SDValue(ExtNode, 0));
12235 Values.push_back(SDValue(ExtNode, 1));
12236 } else {
12237 // This corresponds to v2048i1 which represents a dmr pair.
12238 SDValue Dmr0(
12239 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12240 Op.getOperand(1),
12241 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12242 0);
12243
12244 SDValue Dmr1(
12245 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12246 Op.getOperand(1),
12247 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12248 0);
12249
12250 SDValue Dmr0Lo(DAG.getMachineNode(
12251 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12252 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12253 0);
12254
12255 SDValue Dmr0Hi(DAG.getMachineNode(
12256 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12257 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12258 0);
12259
12260 SDValue Dmr1Lo(DAG.getMachineNode(
12261 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12262 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12263 0);
12264
12265 SDValue Dmr1Hi(DAG.getMachineNode(
12266 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12267 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12268 0);
12269
12270 MachineSDNode *ExtNode =
12271 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12272 Values.push_back(SDValue(ExtNode, 0));
12273 Values.push_back(SDValue(ExtNode, 1));
12274 ExtNode =
12275 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12276 Values.push_back(SDValue(ExtNode, 0));
12277 Values.push_back(SDValue(ExtNode, 1));
12278 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12279 Values.push_back(SDValue(ExtNode, 0));
12280 Values.push_back(SDValue(ExtNode, 1));
12281 ExtNode =
12282 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12283 Values.push_back(SDValue(ExtNode, 0));
12284 Values.push_back(SDValue(ExtNode, 1));
12285 }
12286
12287 if (Subtarget.isLittleEndian())
12288 std::reverse(Values.begin(), Values.end());
12289
12290 SDVTList Tys = DAG.getVTList(MVT::Other);
12292 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12293 Values[0], BasePtr};
12294 MachineMemOperand *MMO = SN->getMemOperand();
12295 unsigned NumVecs = VT.getSizeInBits() / 256;
12296 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12297 MachineMemOperand *NewMMO =
12298 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12299 if (Idx > 0) {
12300 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12301 DAG.getConstant(32, dl, BasePtr.getValueType()));
12302 Ops[3] = BasePtr;
12303 }
12304 Ops[2] = Values[Idx];
12306 MVT::v256i1, NewMMO);
12307 Stores.push_back(St);
12308 }
12309
12310 SDValue TF = DAG.getTokenFactor(dl, Stores);
12311 return TF;
12312}
12313
12314SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12315 SelectionDAG &DAG) const {
12316 SDLoc dl(Op);
12317 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12318 SDValue StoreChain = SN->getChain();
12319 SDValue BasePtr = SN->getBasePtr();
12320 SDValue Value = SN->getValue();
12321 SDValue Value2 = SN->getValue();
12322 EVT StoreVT = Value.getValueType();
12323
12324 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12325 return LowerDMFVectorStore(Op, DAG);
12326
12327 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12328 return Op;
12329
12330 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12331 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12332 // underlying registers individually.
12333 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12334 "Type unsupported without MMA");
12335 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12336 "Type unsupported without paired vector support");
12337 Align Alignment = SN->getAlign();
12339 unsigned NumVecs = 2;
12340 if (StoreVT == MVT::v512i1) {
12341 if (Subtarget.isISAFuture()) {
12342 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12343 MachineSDNode *ExtNode = DAG.getMachineNode(
12344 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12345
12346 Value = SDValue(ExtNode, 0);
12347 Value2 = SDValue(ExtNode, 1);
12348 } else
12349 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12350 NumVecs = 4;
12351 }
12352 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12353 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12354 SDValue Elt;
12355 if (Subtarget.isISAFuture()) {
12356 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12357 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12358 Idx > 1 ? Value2 : Value,
12359 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12360 } else
12361 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12362 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12363
12364 SDValue Store =
12365 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12366 SN->getPointerInfo().getWithOffset(Idx * 16),
12367 commonAlignment(Alignment, Idx * 16),
12368 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12369 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12370 DAG.getConstant(16, dl, BasePtr.getValueType()));
12371 Stores.push_back(Store);
12372 }
12373 SDValue TF = DAG.getTokenFactor(dl, Stores);
12374 return TF;
12375}
12376
12377SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12378 SDLoc dl(Op);
12379 if (Op.getValueType() == MVT::v4i32) {
12380 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12381
12382 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12383 // +16 as shift amt.
12384 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12385 SDValue RHSSwap = // = vrlw RHS, 16
12386 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12387
12388 // Shrinkify inputs to v8i16.
12389 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12390 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12391 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12392
12393 // Low parts multiplied together, generating 32-bit results (we ignore the
12394 // top parts).
12395 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12396 LHS, RHS, DAG, dl, MVT::v4i32);
12397
12398 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12399 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12400 // Shift the high parts up 16 bits.
12401 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12402 Neg16, DAG, dl);
12403 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12404 } else if (Op.getValueType() == MVT::v16i8) {
12405 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12406 bool isLittleEndian = Subtarget.isLittleEndian();
12407
12408 // Multiply the even 8-bit parts, producing 16-bit sums.
12409 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12410 LHS, RHS, DAG, dl, MVT::v8i16);
12411 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12412
12413 // Multiply the odd 8-bit parts, producing 16-bit sums.
12414 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12415 LHS, RHS, DAG, dl, MVT::v8i16);
12416 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12417
12418 // Merge the results together. Because vmuleub and vmuloub are
12419 // instructions with a big-endian bias, we must reverse the
12420 // element numbering and reverse the meaning of "odd" and "even"
12421 // when generating little endian code.
12422 int Ops[16];
12423 for (unsigned i = 0; i != 8; ++i) {
12424 if (isLittleEndian) {
12425 Ops[i*2 ] = 2*i;
12426 Ops[i*2+1] = 2*i+16;
12427 } else {
12428 Ops[i*2 ] = 2*i+1;
12429 Ops[i*2+1] = 2*i+1+16;
12430 }
12431 }
12432 if (isLittleEndian)
12433 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12434 else
12435 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12436 } else {
12437 llvm_unreachable("Unknown mul to lower!");
12438 }
12439}
12440
12441SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12442 bool IsStrict = Op->isStrictFPOpcode();
12443 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12444 !Subtarget.hasP9Vector())
12445 return SDValue();
12446
12447 return Op;
12448}
12449
12450// Custom lowering for fpext vf32 to v2f64
12451SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12452
12453 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12454 "Should only be called for ISD::FP_EXTEND");
12455
12456 // FIXME: handle extends from half precision float vectors on P9.
12457 // We only want to custom lower an extend from v2f32 to v2f64.
12458 if (Op.getValueType() != MVT::v2f64 ||
12459 Op.getOperand(0).getValueType() != MVT::v2f32)
12460 return SDValue();
12461
12462 SDLoc dl(Op);
12463 SDValue Op0 = Op.getOperand(0);
12464
12465 switch (Op0.getOpcode()) {
12466 default:
12467 return SDValue();
12469 assert(Op0.getNumOperands() == 2 &&
12471 "Node should have 2 operands with second one being a constant!");
12472
12473 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12474 return SDValue();
12475
12476 // Custom lower is only done for high or low doubleword.
12477 int Idx = Op0.getConstantOperandVal(1);
12478 if (Idx % 2 != 0)
12479 return SDValue();
12480
12481 // Since input is v4f32, at this point Idx is either 0 or 2.
12482 // Shift to get the doubleword position we want.
12483 int DWord = Idx >> 1;
12484
12485 // High and low word positions are different on little endian.
12486 if (Subtarget.isLittleEndian())
12487 DWord ^= 0x1;
12488
12489 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12490 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12491 }
12492 case ISD::FADD:
12493 case ISD::FMUL:
12494 case ISD::FSUB: {
12495 SDValue NewLoad[2];
12496 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12497 // Ensure both input are loads.
12498 SDValue LdOp = Op0.getOperand(i);
12499 if (LdOp.getOpcode() != ISD::LOAD)
12500 return SDValue();
12501 // Generate new load node.
12502 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12503 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12504 NewLoad[i] = DAG.getMemIntrinsicNode(
12505 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12506 LD->getMemoryVT(), LD->getMemOperand());
12507 }
12508 SDValue NewOp =
12509 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12510 NewLoad[1], Op0.getNode()->getFlags());
12511 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12512 DAG.getConstant(0, dl, MVT::i32));
12513 }
12514 case ISD::LOAD: {
12515 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12516 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12517 SDValue NewLd = DAG.getMemIntrinsicNode(
12518 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12519 LD->getMemoryVT(), LD->getMemOperand());
12520 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12521 DAG.getConstant(0, dl, MVT::i32));
12522 }
12523 }
12524 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12525}
12526
12528 SelectionDAG &DAG,
12529 const PPCSubtarget &STI) {
12530 SDLoc DL(Value);
12531 if (STI.useCRBits())
12532 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12533 DAG.getConstant(1, DL, SumType),
12534 DAG.getConstant(0, DL, SumType));
12535 else
12536 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12537 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12538 Value, DAG.getAllOnesConstant(DL, SumType));
12539 return Sum.getValue(1);
12540}
12541
12543 EVT CarryType, SelectionDAG &DAG,
12544 const PPCSubtarget &STI) {
12545 SDLoc DL(Flag);
12546 SDValue Zero = DAG.getConstant(0, DL, SumType);
12547 SDValue Carry = DAG.getNode(
12548 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12549 if (STI.useCRBits())
12550 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12551 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12552}
12553
12554SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12555
12556 SDLoc DL(Op);
12557 SDNode *N = Op.getNode();
12558 EVT VT = N->getValueType(0);
12559 EVT CarryType = N->getValueType(1);
12560 unsigned Opc = N->getOpcode();
12561 bool IsAdd = Opc == ISD::UADDO;
12562 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12563 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12564 N->getOperand(0), N->getOperand(1));
12565 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12566 DAG, Subtarget);
12567 if (!IsAdd)
12568 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12569 DAG.getConstant(1UL, DL, CarryType));
12570 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12571}
12572
12573SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12574 SelectionDAG &DAG) const {
12575 SDLoc DL(Op);
12576 SDNode *N = Op.getNode();
12577 unsigned Opc = N->getOpcode();
12578 EVT VT = N->getValueType(0);
12579 EVT CarryType = N->getValueType(1);
12580 SDValue CarryOp = N->getOperand(2);
12581 bool IsAdd = Opc == ISD::UADDO_CARRY;
12582 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12583 if (!IsAdd)
12584 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12585 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12586 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12587 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12588 Op.getOperand(0), Op.getOperand(1), CarryOp);
12589 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12590 Subtarget);
12591 if (!IsAdd)
12592 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12593 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12594 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12595}
12596
12597SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12598
12599 SDLoc dl(Op);
12600 SDValue LHS = Op.getOperand(0);
12601 SDValue RHS = Op.getOperand(1);
12602 EVT VT = Op.getNode()->getValueType(0);
12603
12604 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12605
12606 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12607 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12608
12609 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12610
12611 SDValue Overflow =
12612 DAG.getNode(ISD::SRL, dl, VT, And,
12613 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12614
12615 SDValue OverflowTrunc =
12616 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12617
12618 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12619}
12620
12621/// LowerOperation - Provide custom lowering hooks for some operations.
12622///
12624 switch (Op.getOpcode()) {
12625 default:
12626 llvm_unreachable("Wasn't expecting to be able to lower this!");
12627 case ISD::FPOW: return lowerPow(Op, DAG);
12628 case ISD::FSIN: return lowerSin(Op, DAG);
12629 case ISD::FCOS: return lowerCos(Op, DAG);
12630 case ISD::FLOG: return lowerLog(Op, DAG);
12631 case ISD::FLOG10: return lowerLog10(Op, DAG);
12632 case ISD::FEXP: return lowerExp(Op, DAG);
12633 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12634 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12635 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12636 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12637 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12638 case ISD::STRICT_FSETCC:
12640 case ISD::SETCC: return LowerSETCC(Op, DAG);
12641 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12642 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12643 case ISD::SSUBO:
12644 return LowerSSUBO(Op, DAG);
12645
12646 case ISD::INLINEASM:
12647 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12648 // Variable argument lowering.
12649 case ISD::VASTART: return LowerVASTART(Op, DAG);
12650 case ISD::VAARG: return LowerVAARG(Op, DAG);
12651 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12652
12653 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12654 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12655 case ISD::GET_DYNAMIC_AREA_OFFSET:
12656 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12657
12658 // Exception handling lowering.
12659 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12660 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12661 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12662
12663 case ISD::LOAD: return LowerLOAD(Op, DAG);
12664 case ISD::STORE: return LowerSTORE(Op, DAG);
12665 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12666 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12669 case ISD::FP_TO_UINT:
12670 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12673 case ISD::UINT_TO_FP:
12674 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12675 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12676 case ISD::SET_ROUNDING:
12677 return LowerSET_ROUNDING(Op, DAG);
12678
12679 // Lower 64-bit shifts.
12680 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12681 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12682 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12683
12684 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12685 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12686
12687 // Vector-related lowering.
12688 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12689 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12690 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12691 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12692 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12693 case ISD::MUL: return LowerMUL(Op, DAG);
12694 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12696 case ISD::FP_ROUND:
12697 return LowerFP_ROUND(Op, DAG);
12698 case ISD::ROTL: return LowerROTL(Op, DAG);
12699
12700 // For counter-based loop handling.
12701 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12702
12703 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12704
12705 // Frame & Return address.
12706 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12707 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12708
12710 return LowerINTRINSIC_VOID(Op, DAG);
12711 case ISD::BSWAP:
12712 return LowerBSWAP(Op, DAG);
12713 case ISD::ATOMIC_CMP_SWAP:
12714 return LowerATOMIC_CMP_SWAP(Op, DAG);
12715 case ISD::ATOMIC_STORE:
12716 return LowerATOMIC_LOAD_STORE(Op, DAG);
12717 case ISD::IS_FPCLASS:
12718 return LowerIS_FPCLASS(Op, DAG);
12719 case ISD::UADDO:
12720 case ISD::USUBO:
12721 return LowerADDSUBO(Op, DAG);
12722 case ISD::UADDO_CARRY:
12723 case ISD::USUBO_CARRY:
12724 return LowerADDSUBO_CARRY(Op, DAG);
12725 }
12726}
12727
12730 SelectionDAG &DAG) const {
12731 SDLoc dl(N);
12732 switch (N->getOpcode()) {
12733 default:
12734 llvm_unreachable("Do not know how to custom type legalize this operation!");
12735 case ISD::ATOMIC_LOAD: {
12736 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12737 Results.push_back(Res);
12738 Results.push_back(Res.getValue(1));
12739 break;
12740 }
12741 case ISD::READCYCLECOUNTER: {
12742 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12743 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12744
12745 Results.push_back(
12746 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12747 Results.push_back(RTB.getValue(2));
12748 break;
12749 }
12751 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12752 break;
12753
12754 assert(N->getValueType(0) == MVT::i1 &&
12755 "Unexpected result type for CTR decrement intrinsic");
12756 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12757 N->getValueType(0));
12758 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12759 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12760 N->getOperand(1));
12761
12762 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12763 Results.push_back(NewInt.getValue(1));
12764 break;
12765 }
12767 switch (N->getConstantOperandVal(0)) {
12768 case Intrinsic::ppc_pack_longdouble:
12769 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12770 N->getOperand(2), N->getOperand(1)));
12771 break;
12772 case Intrinsic::ppc_maxfe:
12773 case Intrinsic::ppc_minfe:
12774 case Intrinsic::ppc_fnmsub:
12775 case Intrinsic::ppc_convert_f128_to_ppcf128:
12776 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12777 break;
12778 }
12779 break;
12780 }
12781 case ISD::VAARG: {
12782 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12783 return;
12784
12785 EVT VT = N->getValueType(0);
12786
12787 if (VT == MVT::i64) {
12788 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12789
12790 Results.push_back(NewNode);
12791 Results.push_back(NewNode.getValue(1));
12792 }
12793 return;
12794 }
12797 case ISD::FP_TO_SINT:
12798 case ISD::FP_TO_UINT: {
12799 // LowerFP_TO_INT() can only handle f32 and f64.
12800 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12801 MVT::ppcf128)
12802 return;
12803 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12804 Results.push_back(LoweredValue);
12805 if (N->isStrictFPOpcode())
12806 Results.push_back(LoweredValue.getValue(1));
12807 return;
12808 }
12809 case ISD::TRUNCATE: {
12810 if (!N->getValueType(0).isVector())
12811 return;
12812 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12813 if (Lowered)
12814 Results.push_back(Lowered);
12815 return;
12816 }
12817 case ISD::SCALAR_TO_VECTOR: {
12818 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12819 if (Lowered)
12820 Results.push_back(Lowered);
12821 return;
12822 }
12823 case ISD::FSHL:
12824 case ISD::FSHR:
12825 // Don't handle funnel shifts here.
12826 return;
12827 case ISD::BITCAST:
12828 // Don't handle bitcast here.
12829 return;
12830 case ISD::FP_EXTEND:
12831 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12832 if (Lowered)
12833 Results.push_back(Lowered);
12834 return;
12835 }
12836}
12837
12838//===----------------------------------------------------------------------===//
12839// Other Lowering Code
12840//===----------------------------------------------------------------------===//
12841
12843 return Builder.CreateIntrinsic(Id, {});
12844}
12845
12847 Value *Addr,
12848 AtomicOrdering Ord) const {
12849 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12850
12851 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12852 "Only 8/16/32/64-bit atomic loads supported");
12853 Intrinsic::ID IntID;
12854 switch (SZ) {
12855 default:
12856 llvm_unreachable("Unexpected PrimitiveSize");
12857 case 8:
12858 IntID = Intrinsic::ppc_lbarx;
12859 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12860 break;
12861 case 16:
12862 IntID = Intrinsic::ppc_lharx;
12863 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12864 break;
12865 case 32:
12866 IntID = Intrinsic::ppc_lwarx;
12867 break;
12868 case 64:
12869 IntID = Intrinsic::ppc_ldarx;
12870 break;
12871 }
12872 Value *Call =
12873 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12874
12875 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12876}
12877
12878// Perform a store-conditional operation to Addr. Return the status of the
12879// store. This should be 0 if the store succeeded, non-zero otherwise.
12881 Value *Val, Value *Addr,
12882 AtomicOrdering Ord) const {
12883 Type *Ty = Val->getType();
12884 unsigned SZ = Ty->getPrimitiveSizeInBits();
12885
12886 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12887 "Only 8/16/32/64-bit atomic loads supported");
12888 Intrinsic::ID IntID;
12889 switch (SZ) {
12890 default:
12891 llvm_unreachable("Unexpected PrimitiveSize");
12892 case 8:
12893 IntID = Intrinsic::ppc_stbcx;
12894 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12895 break;
12896 case 16:
12897 IntID = Intrinsic::ppc_sthcx;
12898 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12899 break;
12900 case 32:
12901 IntID = Intrinsic::ppc_stwcx;
12902 break;
12903 case 64:
12904 IntID = Intrinsic::ppc_stdcx;
12905 break;
12906 }
12907
12908 if (SZ == 8 || SZ == 16)
12909 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12910
12911 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12912 /*FMFSource=*/nullptr, "stcx");
12913 return Builder.CreateXor(Call, Builder.getInt32(1));
12914}
12915
12916// The mappings for emitLeading/TrailingFence is taken from
12917// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12919 Instruction *Inst,
12920 AtomicOrdering Ord) const {
12922 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12923 if (isReleaseOrStronger(Ord))
12924 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12925 return nullptr;
12926}
12927
12929 Instruction *Inst,
12930 AtomicOrdering Ord) const {
12931 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12932 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12933 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12934 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12935 if (isa<LoadInst>(Inst))
12936 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12937 {Inst});
12938 // FIXME: Can use isync for rmw operation.
12939 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12940 }
12941 return nullptr;
12942}
12943
12946 unsigned AtomicSize,
12947 unsigned BinOpcode,
12948 unsigned CmpOpcode,
12949 unsigned CmpPred) const {
12950 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12951 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12952
12953 auto LoadMnemonic = PPC::LDARX;
12954 auto StoreMnemonic = PPC::STDCX;
12955 switch (AtomicSize) {
12956 default:
12957 llvm_unreachable("Unexpected size of atomic entity");
12958 case 1:
12959 LoadMnemonic = PPC::LBARX;
12960 StoreMnemonic = PPC::STBCX;
12961 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12962 break;
12963 case 2:
12964 LoadMnemonic = PPC::LHARX;
12965 StoreMnemonic = PPC::STHCX;
12966 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12967 break;
12968 case 4:
12969 LoadMnemonic = PPC::LWARX;
12970 StoreMnemonic = PPC::STWCX;
12971 break;
12972 case 8:
12973 LoadMnemonic = PPC::LDARX;
12974 StoreMnemonic = PPC::STDCX;
12975 break;
12976 }
12977
12978 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12979 MachineFunction *F = BB->getParent();
12981
12982 Register dest = MI.getOperand(0).getReg();
12983 Register ptrA = MI.getOperand(1).getReg();
12984 Register ptrB = MI.getOperand(2).getReg();
12985 Register incr = MI.getOperand(3).getReg();
12986 DebugLoc dl = MI.getDebugLoc();
12987
12988 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12989 MachineBasicBlock *loop2MBB =
12990 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12991 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12992 F->insert(It, loopMBB);
12993 if (CmpOpcode)
12994 F->insert(It, loop2MBB);
12995 F->insert(It, exitMBB);
12996 exitMBB->splice(exitMBB->begin(), BB,
12997 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12999
13000 MachineRegisterInfo &RegInfo = F->getRegInfo();
13001 Register TmpReg = (!BinOpcode) ? incr :
13002 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13003 : &PPC::GPRCRegClass);
13004
13005 // thisMBB:
13006 // ...
13007 // fallthrough --> loopMBB
13008 BB->addSuccessor(loopMBB);
13009
13010 // loopMBB:
13011 // l[wd]arx dest, ptr
13012 // add r0, dest, incr
13013 // st[wd]cx. r0, ptr
13014 // bne- loopMBB
13015 // fallthrough --> exitMBB
13016
13017 // For max/min...
13018 // loopMBB:
13019 // l[wd]arx dest, ptr
13020 // cmpl?[wd] dest, incr
13021 // bgt exitMBB
13022 // loop2MBB:
13023 // st[wd]cx. dest, ptr
13024 // bne- loopMBB
13025 // fallthrough --> exitMBB
13026
13027 BB = loopMBB;
13028 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13029 .addReg(ptrA).addReg(ptrB);
13030 if (BinOpcode)
13031 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13032 if (CmpOpcode) {
13033 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13034 // Signed comparisons of byte or halfword values must be sign-extended.
13035 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13036 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13037 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13038 ExtReg).addReg(dest);
13039 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13040 } else
13041 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13042
13043 BuildMI(BB, dl, TII->get(PPC::BCC))
13044 .addImm(CmpPred)
13045 .addReg(CrReg)
13046 .addMBB(exitMBB);
13047 BB->addSuccessor(loop2MBB);
13048 BB->addSuccessor(exitMBB);
13049 BB = loop2MBB;
13050 }
13051 BuildMI(BB, dl, TII->get(StoreMnemonic))
13052 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13053 BuildMI(BB, dl, TII->get(PPC::BCC))
13054 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
13055 BB->addSuccessor(loopMBB);
13056 BB->addSuccessor(exitMBB);
13057
13058 // exitMBB:
13059 // ...
13060 BB = exitMBB;
13061 return BB;
13062}
13063
13065 switch(MI.getOpcode()) {
13066 default:
13067 return false;
13068 case PPC::COPY:
13069 return TII->isSignExtended(MI.getOperand(1).getReg(),
13070 &MI.getMF()->getRegInfo());
13071 case PPC::LHA:
13072 case PPC::LHA8:
13073 case PPC::LHAU:
13074 case PPC::LHAU8:
13075 case PPC::LHAUX:
13076 case PPC::LHAUX8:
13077 case PPC::LHAX:
13078 case PPC::LHAX8:
13079 case PPC::LWA:
13080 case PPC::LWAUX:
13081 case PPC::LWAX:
13082 case PPC::LWAX_32:
13083 case PPC::LWA_32:
13084 case PPC::PLHA:
13085 case PPC::PLHA8:
13086 case PPC::PLHA8pc:
13087 case PPC::PLHApc:
13088 case PPC::PLWA:
13089 case PPC::PLWA8:
13090 case PPC::PLWA8pc:
13091 case PPC::PLWApc:
13092 case PPC::EXTSB:
13093 case PPC::EXTSB8:
13094 case PPC::EXTSB8_32_64:
13095 case PPC::EXTSB8_rec:
13096 case PPC::EXTSB_rec:
13097 case PPC::EXTSH:
13098 case PPC::EXTSH8:
13099 case PPC::EXTSH8_32_64:
13100 case PPC::EXTSH8_rec:
13101 case PPC::EXTSH_rec:
13102 case PPC::EXTSW:
13103 case PPC::EXTSWSLI:
13104 case PPC::EXTSWSLI_32_64:
13105 case PPC::EXTSWSLI_32_64_rec:
13106 case PPC::EXTSWSLI_rec:
13107 case PPC::EXTSW_32:
13108 case PPC::EXTSW_32_64:
13109 case PPC::EXTSW_32_64_rec:
13110 case PPC::EXTSW_rec:
13111 case PPC::SRAW:
13112 case PPC::SRAWI:
13113 case PPC::SRAWI_rec:
13114 case PPC::SRAW_rec:
13115 return true;
13116 }
13117 return false;
13118}
13119
13122 bool is8bit, // operation
13123 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13124 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13125 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13126
13127 // If this is a signed comparison and the value being compared is not known
13128 // to be sign extended, sign extend it here.
13129 DebugLoc dl = MI.getDebugLoc();
13130 MachineFunction *F = BB->getParent();
13131 MachineRegisterInfo &RegInfo = F->getRegInfo();
13132 Register incr = MI.getOperand(3).getReg();
13133 bool IsSignExtended =
13134 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13135
13136 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13137 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13138 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13139 .addReg(MI.getOperand(3).getReg());
13140 MI.getOperand(3).setReg(ValueReg);
13141 incr = ValueReg;
13142 }
13143 // If we support part-word atomic mnemonics, just use them
13144 if (Subtarget.hasPartwordAtomics())
13145 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13146 CmpPred);
13147
13148 // In 64 bit mode we have to use 64 bits for addresses, even though the
13149 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13150 // registers without caring whether they're 32 or 64, but here we're
13151 // doing actual arithmetic on the addresses.
13152 bool is64bit = Subtarget.isPPC64();
13153 bool isLittleEndian = Subtarget.isLittleEndian();
13154 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13155
13156 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13158
13159 Register dest = MI.getOperand(0).getReg();
13160 Register ptrA = MI.getOperand(1).getReg();
13161 Register ptrB = MI.getOperand(2).getReg();
13162
13163 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13164 MachineBasicBlock *loop2MBB =
13165 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13166 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13167 F->insert(It, loopMBB);
13168 if (CmpOpcode)
13169 F->insert(It, loop2MBB);
13170 F->insert(It, exitMBB);
13171 exitMBB->splice(exitMBB->begin(), BB,
13172 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13174
13175 const TargetRegisterClass *RC =
13176 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13177 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13178
13179 Register PtrReg = RegInfo.createVirtualRegister(RC);
13180 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13181 Register ShiftReg =
13182 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13183 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13184 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13185 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13186 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13187 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13188 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13189 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13190 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13191 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13192 Register Ptr1Reg;
13193 Register TmpReg =
13194 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13195
13196 // thisMBB:
13197 // ...
13198 // fallthrough --> loopMBB
13199 BB->addSuccessor(loopMBB);
13200
13201 // The 4-byte load must be aligned, while a char or short may be
13202 // anywhere in the word. Hence all this nasty bookkeeping code.
13203 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13204 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13205 // xori shift, shift1, 24 [16]
13206 // rlwinm ptr, ptr1, 0, 0, 29
13207 // slw incr2, incr, shift
13208 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13209 // slw mask, mask2, shift
13210 // loopMBB:
13211 // lwarx tmpDest, ptr
13212 // add tmp, tmpDest, incr2
13213 // andc tmp2, tmpDest, mask
13214 // and tmp3, tmp, mask
13215 // or tmp4, tmp3, tmp2
13216 // stwcx. tmp4, ptr
13217 // bne- loopMBB
13218 // fallthrough --> exitMBB
13219 // srw SrwDest, tmpDest, shift
13220 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13221 if (ptrA != ZeroReg) {
13222 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13223 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13224 .addReg(ptrA)
13225 .addReg(ptrB);
13226 } else {
13227 Ptr1Reg = ptrB;
13228 }
13229 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13230 // mode.
13231 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13232 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13233 .addImm(3)
13234 .addImm(27)
13235 .addImm(is8bit ? 28 : 27);
13236 if (!isLittleEndian)
13237 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13238 .addReg(Shift1Reg)
13239 .addImm(is8bit ? 24 : 16);
13240 if (is64bit)
13241 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13242 .addReg(Ptr1Reg)
13243 .addImm(0)
13244 .addImm(61);
13245 else
13246 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13247 .addReg(Ptr1Reg)
13248 .addImm(0)
13249 .addImm(0)
13250 .addImm(29);
13251 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13252 if (is8bit)
13253 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13254 else {
13255 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13256 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13257 .addReg(Mask3Reg)
13258 .addImm(65535);
13259 }
13260 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13261 .addReg(Mask2Reg)
13262 .addReg(ShiftReg);
13263
13264 BB = loopMBB;
13265 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13266 .addReg(ZeroReg)
13267 .addReg(PtrReg);
13268 if (BinOpcode)
13269 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13270 .addReg(Incr2Reg)
13271 .addReg(TmpDestReg);
13272 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13273 .addReg(TmpDestReg)
13274 .addReg(MaskReg);
13275 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13276 if (CmpOpcode) {
13277 // For unsigned comparisons, we can directly compare the shifted values.
13278 // For signed comparisons we shift and sign extend.
13279 Register SReg = RegInfo.createVirtualRegister(GPRC);
13280 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13281 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13282 .addReg(TmpDestReg)
13283 .addReg(MaskReg);
13284 unsigned ValueReg = SReg;
13285 unsigned CmpReg = Incr2Reg;
13286 if (CmpOpcode == PPC::CMPW) {
13287 ValueReg = RegInfo.createVirtualRegister(GPRC);
13288 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13289 .addReg(SReg)
13290 .addReg(ShiftReg);
13291 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13292 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13293 .addReg(ValueReg);
13294 ValueReg = ValueSReg;
13295 CmpReg = incr;
13296 }
13297 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13298 BuildMI(BB, dl, TII->get(PPC::BCC))
13299 .addImm(CmpPred)
13300 .addReg(CrReg)
13301 .addMBB(exitMBB);
13302 BB->addSuccessor(loop2MBB);
13303 BB->addSuccessor(exitMBB);
13304 BB = loop2MBB;
13305 }
13306 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13307 BuildMI(BB, dl, TII->get(PPC::STWCX))
13308 .addReg(Tmp4Reg)
13309 .addReg(ZeroReg)
13310 .addReg(PtrReg);
13311 BuildMI(BB, dl, TII->get(PPC::BCC))
13313 .addReg(PPC::CR0)
13314 .addMBB(loopMBB);
13315 BB->addSuccessor(loopMBB);
13316 BB->addSuccessor(exitMBB);
13317
13318 // exitMBB:
13319 // ...
13320 BB = exitMBB;
13321 // Since the shift amount is not a constant, we need to clear
13322 // the upper bits with a separate RLWINM.
13323 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13324 .addReg(SrwDestReg)
13325 .addImm(0)
13326 .addImm(is8bit ? 24 : 16)
13327 .addImm(31);
13328 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13329 .addReg(TmpDestReg)
13330 .addReg(ShiftReg);
13331 return BB;
13332}
13333
13336 MachineBasicBlock *MBB) const {
13337 DebugLoc DL = MI.getDebugLoc();
13338 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13339 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13340
13341 MachineFunction *MF = MBB->getParent();
13343
13344 const BasicBlock *BB = MBB->getBasicBlock();
13345 MachineFunction::iterator I = ++MBB->getIterator();
13346
13347 Register DstReg = MI.getOperand(0).getReg();
13348 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13349 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13350 Register mainDstReg = MRI.createVirtualRegister(RC);
13351 Register restoreDstReg = MRI.createVirtualRegister(RC);
13352
13353 MVT PVT = getPointerTy(MF->getDataLayout());
13354 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13355 "Invalid Pointer Size!");
13356 // For v = setjmp(buf), we generate
13357 //
13358 // thisMBB:
13359 // SjLjSetup mainMBB
13360 // bl mainMBB
13361 // v_restore = 1
13362 // b sinkMBB
13363 //
13364 // mainMBB:
13365 // buf[LabelOffset] = LR
13366 // v_main = 0
13367 //
13368 // sinkMBB:
13369 // v = phi(main, restore)
13370 //
13371
13372 MachineBasicBlock *thisMBB = MBB;
13373 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13374 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13375 MF->insert(I, mainMBB);
13376 MF->insert(I, sinkMBB);
13377
13379
13380 // Transfer the remainder of BB and its successor edges to sinkMBB.
13381 sinkMBB->splice(sinkMBB->begin(), MBB,
13382 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13384
13385 // Note that the structure of the jmp_buf used here is not compatible
13386 // with that used by libc, and is not designed to be. Specifically, it
13387 // stores only those 'reserved' registers that LLVM does not otherwise
13388 // understand how to spill. Also, by convention, by the time this
13389 // intrinsic is called, Clang has already stored the frame address in the
13390 // first slot of the buffer and stack address in the third. Following the
13391 // X86 target code, we'll store the jump address in the second slot. We also
13392 // need to save the TOC pointer (R2) to handle jumps between shared
13393 // libraries, and that will be stored in the fourth slot. The thread
13394 // identifier (R13) is not affected.
13395
13396 // thisMBB:
13397 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13398 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13399 const int64_t BPOffset = 4 * PVT.getStoreSize();
13400
13401 // Prepare IP either in reg.
13402 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13403 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13404 Register BufReg = MI.getOperand(1).getReg();
13405
13406 if (Subtarget.is64BitELFABI()) {
13407 setUsesTOCBasePtr(*MBB->getParent());
13408 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13409 .addReg(PPC::X2)
13410 .addImm(TOCOffset)
13411 .addReg(BufReg)
13412 .cloneMemRefs(MI);
13413 }
13414
13415 // Naked functions never have a base pointer, and so we use r1. For all
13416 // other functions, this decision must be delayed until during PEI.
13417 unsigned BaseReg;
13418 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13419 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13420 else
13421 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13422
13423 MIB = BuildMI(*thisMBB, MI, DL,
13424 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13425 .addReg(BaseReg)
13426 .addImm(BPOffset)
13427 .addReg(BufReg)
13428 .cloneMemRefs(MI);
13429
13430 // Setup
13431 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13432 MIB.addRegMask(TRI->getNoPreservedMask());
13433
13434 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13435
13436 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13437 .addMBB(mainMBB);
13438 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13439
13440 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13441 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13442
13443 // mainMBB:
13444 // mainDstReg = 0
13445 MIB =
13446 BuildMI(mainMBB, DL,
13447 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13448
13449 // Store IP
13450 if (Subtarget.isPPC64()) {
13451 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13452 .addReg(LabelReg)
13453 .addImm(LabelOffset)
13454 .addReg(BufReg);
13455 } else {
13456 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13457 .addReg(LabelReg)
13458 .addImm(LabelOffset)
13459 .addReg(BufReg);
13460 }
13461 MIB.cloneMemRefs(MI);
13462
13463 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13464 mainMBB->addSuccessor(sinkMBB);
13465
13466 // sinkMBB:
13467 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13468 TII->get(PPC::PHI), DstReg)
13469 .addReg(mainDstReg).addMBB(mainMBB)
13470 .addReg(restoreDstReg).addMBB(thisMBB);
13471
13472 MI.eraseFromParent();
13473 return sinkMBB;
13474}
13475
13478 MachineBasicBlock *MBB) const {
13479 DebugLoc DL = MI.getDebugLoc();
13480 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13481
13482 MachineFunction *MF = MBB->getParent();
13484
13485 MVT PVT = getPointerTy(MF->getDataLayout());
13486 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13487 "Invalid Pointer Size!");
13488
13489 const TargetRegisterClass *RC =
13490 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13491 Register Tmp = MRI.createVirtualRegister(RC);
13492 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13493 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13494 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13495 unsigned BP =
13496 (PVT == MVT::i64)
13497 ? PPC::X30
13498 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13499 : PPC::R30);
13500
13502
13503 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13504 const int64_t SPOffset = 2 * PVT.getStoreSize();
13505 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13506 const int64_t BPOffset = 4 * PVT.getStoreSize();
13507
13508 Register BufReg = MI.getOperand(0).getReg();
13509
13510 // Reload FP (the jumped-to function may not have had a
13511 // frame pointer, and if so, then its r31 will be restored
13512 // as necessary).
13513 if (PVT == MVT::i64) {
13514 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13515 .addImm(0)
13516 .addReg(BufReg);
13517 } else {
13518 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13519 .addImm(0)
13520 .addReg(BufReg);
13521 }
13522 MIB.cloneMemRefs(MI);
13523
13524 // Reload IP
13525 if (PVT == MVT::i64) {
13526 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13527 .addImm(LabelOffset)
13528 .addReg(BufReg);
13529 } else {
13530 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13531 .addImm(LabelOffset)
13532 .addReg(BufReg);
13533 }
13534 MIB.cloneMemRefs(MI);
13535
13536 // Reload SP
13537 if (PVT == MVT::i64) {
13538 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13539 .addImm(SPOffset)
13540 .addReg(BufReg);
13541 } else {
13542 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13543 .addImm(SPOffset)
13544 .addReg(BufReg);
13545 }
13546 MIB.cloneMemRefs(MI);
13547
13548 // Reload BP
13549 if (PVT == MVT::i64) {
13550 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13551 .addImm(BPOffset)
13552 .addReg(BufReg);
13553 } else {
13554 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13555 .addImm(BPOffset)
13556 .addReg(BufReg);
13557 }
13558 MIB.cloneMemRefs(MI);
13559
13560 // Reload TOC
13561 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13562 setUsesTOCBasePtr(*MBB->getParent());
13563 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13564 .addImm(TOCOffset)
13565 .addReg(BufReg)
13566 .cloneMemRefs(MI);
13567 }
13568
13569 // Jump
13570 BuildMI(*MBB, MI, DL,
13571 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13572 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13573
13574 MI.eraseFromParent();
13575 return MBB;
13576}
13577
13579 // If the function specifically requests inline stack probes, emit them.
13580 if (MF.getFunction().hasFnAttribute("probe-stack"))
13581 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13582 "inline-asm";
13583 return false;
13584}
13585
13587 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13588 unsigned StackAlign = TFI->getStackAlignment();
13589 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13590 "Unexpected stack alignment");
13591 // The default stack probe size is 4096 if the function has no
13592 // stack-probe-size attribute.
13593 const Function &Fn = MF.getFunction();
13594 unsigned StackProbeSize =
13595 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13596 // Round down to the stack alignment.
13597 StackProbeSize &= ~(StackAlign - 1);
13598 return StackProbeSize ? StackProbeSize : StackAlign;
13599}
13600
13601// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13602// into three phases. In the first phase, it uses pseudo instruction
13603// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13604// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13605// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13606// MaxCallFrameSize so that it can calculate correct data area pointer.
13609 MachineBasicBlock *MBB) const {
13610 const bool isPPC64 = Subtarget.isPPC64();
13611 MachineFunction *MF = MBB->getParent();
13612 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13613 DebugLoc DL = MI.getDebugLoc();
13614 const unsigned ProbeSize = getStackProbeSize(*MF);
13615 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13617 // The CFG of probing stack looks as
13618 // +-----+
13619 // | MBB |
13620 // +--+--+
13621 // |
13622 // +----v----+
13623 // +--->+ TestMBB +---+
13624 // | +----+----+ |
13625 // | | |
13626 // | +-----v----+ |
13627 // +---+ BlockMBB | |
13628 // +----------+ |
13629 // |
13630 // +---------+ |
13631 // | TailMBB +<--+
13632 // +---------+
13633 // In MBB, calculate previous frame pointer and final stack pointer.
13634 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13635 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13636 // TailMBB is spliced via \p MI.
13637 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13638 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13639 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13640
13641 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13642 MF->insert(MBBIter, TestMBB);
13643 MF->insert(MBBIter, BlockMBB);
13644 MF->insert(MBBIter, TailMBB);
13645
13646 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13647 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13648
13649 Register DstReg = MI.getOperand(0).getReg();
13650 Register NegSizeReg = MI.getOperand(1).getReg();
13651 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13652 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13653 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13654 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13655
13656 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13657 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13658 // NegSize.
13659 unsigned ProbeOpc;
13660 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13661 ProbeOpc =
13662 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13663 else
13664 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13665 // and NegSizeReg will be allocated in the same phyreg to avoid
13666 // redundant copy when NegSizeReg has only one use which is current MI and
13667 // will be replaced by PREPARE_PROBED_ALLOCA then.
13668 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13669 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13670 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13671 .addDef(ActualNegSizeReg)
13672 .addReg(NegSizeReg)
13673 .add(MI.getOperand(2))
13674 .add(MI.getOperand(3));
13675
13676 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13677 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13678 FinalStackPtr)
13679 .addReg(SPReg)
13680 .addReg(ActualNegSizeReg);
13681
13682 // Materialize a scratch register for update.
13683 int64_t NegProbeSize = -(int64_t)ProbeSize;
13684 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13685 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13686 if (!isInt<16>(NegProbeSize)) {
13687 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13688 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13689 .addImm(NegProbeSize >> 16);
13690 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13691 ScratchReg)
13692 .addReg(TempReg)
13693 .addImm(NegProbeSize & 0xFFFF);
13694 } else
13695 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13696 .addImm(NegProbeSize);
13697
13698 {
13699 // Probing leading residual part.
13700 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13701 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13702 .addReg(ActualNegSizeReg)
13703 .addReg(ScratchReg);
13704 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13705 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13706 .addReg(Div)
13707 .addReg(ScratchReg);
13708 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13709 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13710 .addReg(Mul)
13711 .addReg(ActualNegSizeReg);
13712 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13713 .addReg(FramePointer)
13714 .addReg(SPReg)
13715 .addReg(NegMod);
13716 }
13717
13718 {
13719 // Remaining part should be multiple of ProbeSize.
13720 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13721 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13722 .addReg(SPReg)
13723 .addReg(FinalStackPtr);
13724 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13726 .addReg(CmpResult)
13727 .addMBB(TailMBB);
13728 TestMBB->addSuccessor(BlockMBB);
13729 TestMBB->addSuccessor(TailMBB);
13730 }
13731
13732 {
13733 // Touch the block.
13734 // |P...|P...|P...
13735 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13736 .addReg(FramePointer)
13737 .addReg(SPReg)
13738 .addReg(ScratchReg);
13739 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13740 BlockMBB->addSuccessor(TestMBB);
13741 }
13742
13743 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13744 // DYNAREAOFFSET pseudo instruction to get the future result.
13745 Register MaxCallFrameSizeReg =
13746 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13747 BuildMI(TailMBB, DL,
13748 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13749 MaxCallFrameSizeReg)
13750 .add(MI.getOperand(2))
13751 .add(MI.getOperand(3));
13752 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13753 .addReg(SPReg)
13754 .addReg(MaxCallFrameSizeReg);
13755
13756 // Splice instructions after MI to TailMBB.
13757 TailMBB->splice(TailMBB->end(), MBB,
13758 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13760 MBB->addSuccessor(TestMBB);
13761
13762 // Delete the pseudo instruction.
13763 MI.eraseFromParent();
13764
13765 ++NumDynamicAllocaProbed;
13766 return TailMBB;
13767}
13768
13770 switch (MI.getOpcode()) {
13771 case PPC::SELECT_CC_I4:
13772 case PPC::SELECT_CC_I8:
13773 case PPC::SELECT_CC_F4:
13774 case PPC::SELECT_CC_F8:
13775 case PPC::SELECT_CC_F16:
13776 case PPC::SELECT_CC_VRRC:
13777 case PPC::SELECT_CC_VSFRC:
13778 case PPC::SELECT_CC_VSSRC:
13779 case PPC::SELECT_CC_VSRC:
13780 case PPC::SELECT_CC_SPE4:
13781 case PPC::SELECT_CC_SPE:
13782 return true;
13783 default:
13784 return false;
13785 }
13786}
13787
13788static bool IsSelect(MachineInstr &MI) {
13789 switch (MI.getOpcode()) {
13790 case PPC::SELECT_I4:
13791 case PPC::SELECT_I8:
13792 case PPC::SELECT_F4:
13793 case PPC::SELECT_F8:
13794 case PPC::SELECT_F16:
13795 case PPC::SELECT_SPE:
13796 case PPC::SELECT_SPE4:
13797 case PPC::SELECT_VRRC:
13798 case PPC::SELECT_VSFRC:
13799 case PPC::SELECT_VSSRC:
13800 case PPC::SELECT_VSRC:
13801 return true;
13802 default:
13803 return false;
13804 }
13805}
13806
13809 MachineBasicBlock *BB) const {
13810 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13811 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13812 if (Subtarget.is64BitELFABI() &&
13813 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13814 !Subtarget.isUsingPCRelativeCalls()) {
13815 // Call lowering should have added an r2 operand to indicate a dependence
13816 // on the TOC base pointer value. It can't however, because there is no
13817 // way to mark the dependence as implicit there, and so the stackmap code
13818 // will confuse it with a regular operand. Instead, add the dependence
13819 // here.
13820 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13821 }
13822
13823 return emitPatchPoint(MI, BB);
13824 }
13825
13826 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13827 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13828 return emitEHSjLjSetJmp(MI, BB);
13829 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13830 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13831 return emitEHSjLjLongJmp(MI, BB);
13832 }
13833
13834 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13835
13836 // To "insert" these instructions we actually have to insert their
13837 // control-flow patterns.
13838 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13840
13841 MachineFunction *F = BB->getParent();
13842 MachineRegisterInfo &MRI = F->getRegInfo();
13843
13844 if (Subtarget.hasISEL() &&
13845 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13846 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13847 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13849 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13850 MI.getOpcode() == PPC::SELECT_CC_I8)
13851 Cond.push_back(MI.getOperand(4));
13852 else
13854 Cond.push_back(MI.getOperand(1));
13855
13856 DebugLoc dl = MI.getDebugLoc();
13857 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13858 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13859 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13860 // The incoming instruction knows the destination vreg to set, the
13861 // condition code register to branch on, the true/false values to
13862 // select between, and a branch opcode to use.
13863
13864 // thisMBB:
13865 // ...
13866 // TrueVal = ...
13867 // cmpTY ccX, r1, r2
13868 // bCC sinkMBB
13869 // fallthrough --> copy0MBB
13870 MachineBasicBlock *thisMBB = BB;
13871 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13872 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13873 DebugLoc dl = MI.getDebugLoc();
13874 F->insert(It, copy0MBB);
13875 F->insert(It, sinkMBB);
13876
13877 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13878 copy0MBB->addLiveIn(PPC::CARRY);
13879 sinkMBB->addLiveIn(PPC::CARRY);
13880 }
13881
13882 // Set the call frame size on entry to the new basic blocks.
13883 // See https://reviews.llvm.org/D156113.
13884 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13885 copy0MBB->setCallFrameSize(CallFrameSize);
13886 sinkMBB->setCallFrameSize(CallFrameSize);
13887
13888 // Transfer the remainder of BB and its successor edges to sinkMBB.
13889 sinkMBB->splice(sinkMBB->begin(), BB,
13890 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13892
13893 // Next, add the true and fallthrough blocks as its successors.
13894 BB->addSuccessor(copy0MBB);
13895 BB->addSuccessor(sinkMBB);
13896
13897 if (IsSelect(MI)) {
13898 BuildMI(BB, dl, TII->get(PPC::BC))
13899 .addReg(MI.getOperand(1).getReg())
13900 .addMBB(sinkMBB);
13901 } else {
13902 unsigned SelectPred = MI.getOperand(4).getImm();
13903 BuildMI(BB, dl, TII->get(PPC::BCC))
13904 .addImm(SelectPred)
13905 .addReg(MI.getOperand(1).getReg())
13906 .addMBB(sinkMBB);
13907 }
13908
13909 // copy0MBB:
13910 // %FalseValue = ...
13911 // # fallthrough to sinkMBB
13912 BB = copy0MBB;
13913
13914 // Update machine-CFG edges
13915 BB->addSuccessor(sinkMBB);
13916
13917 // sinkMBB:
13918 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13919 // ...
13920 BB = sinkMBB;
13921 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13922 .addReg(MI.getOperand(3).getReg())
13923 .addMBB(copy0MBB)
13924 .addReg(MI.getOperand(2).getReg())
13925 .addMBB(thisMBB);
13926 } else if (MI.getOpcode() == PPC::ReadTB) {
13927 // To read the 64-bit time-base register on a 32-bit target, we read the
13928 // two halves. Should the counter have wrapped while it was being read, we
13929 // need to try again.
13930 // ...
13931 // readLoop:
13932 // mfspr Rx,TBU # load from TBU
13933 // mfspr Ry,TB # load from TB
13934 // mfspr Rz,TBU # load from TBU
13935 // cmpw crX,Rx,Rz # check if 'old'='new'
13936 // bne readLoop # branch if they're not equal
13937 // ...
13938
13939 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13940 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13941 DebugLoc dl = MI.getDebugLoc();
13942 F->insert(It, readMBB);
13943 F->insert(It, sinkMBB);
13944
13945 // Transfer the remainder of BB and its successor edges to sinkMBB.
13946 sinkMBB->splice(sinkMBB->begin(), BB,
13947 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13949
13950 BB->addSuccessor(readMBB);
13951 BB = readMBB;
13952
13953 MachineRegisterInfo &RegInfo = F->getRegInfo();
13954 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13955 Register LoReg = MI.getOperand(0).getReg();
13956 Register HiReg = MI.getOperand(1).getReg();
13957
13958 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13959 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13960 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13961
13962 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13963
13964 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13965 .addReg(HiReg)
13966 .addReg(ReadAgainReg);
13967 BuildMI(BB, dl, TII->get(PPC::BCC))
13969 .addReg(CmpReg)
13970 .addMBB(readMBB);
13971
13972 BB->addSuccessor(readMBB);
13973 BB->addSuccessor(sinkMBB);
13974 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13975 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13976 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13977 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13978 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13979 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13980 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13981 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13982
13983 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13984 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13985 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13986 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13987 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13988 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13989 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13990 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13991
13992 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13993 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13994 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13995 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13996 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13997 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13998 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13999 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14000
14001 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14002 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14003 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14004 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14005 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14006 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14007 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14008 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14009
14010 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14011 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14012 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14013 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14014 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14015 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14016 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14017 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14018
14019 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14020 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14021 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14022 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14023 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14024 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14025 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14026 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14027
14028 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14029 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14030 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14031 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14032 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14033 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14034 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14035 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14036
14037 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14038 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14039 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14040 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14041 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14042 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14043 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14044 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14045
14046 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14047 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14048 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14049 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14050 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14051 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14053 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14054
14055 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14056 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14057 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14058 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14059 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14060 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14062 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14063
14064 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14065 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14066 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14067 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14068 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14069 BB = EmitAtomicBinary(MI, BB, 4, 0);
14070 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14071 BB = EmitAtomicBinary(MI, BB, 8, 0);
14072 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14073 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14074 (Subtarget.hasPartwordAtomics() &&
14075 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14076 (Subtarget.hasPartwordAtomics() &&
14077 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14078 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14079
14080 auto LoadMnemonic = PPC::LDARX;
14081 auto StoreMnemonic = PPC::STDCX;
14082 switch (MI.getOpcode()) {
14083 default:
14084 llvm_unreachable("Compare and swap of unknown size");
14085 case PPC::ATOMIC_CMP_SWAP_I8:
14086 LoadMnemonic = PPC::LBARX;
14087 StoreMnemonic = PPC::STBCX;
14088 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14089 break;
14090 case PPC::ATOMIC_CMP_SWAP_I16:
14091 LoadMnemonic = PPC::LHARX;
14092 StoreMnemonic = PPC::STHCX;
14093 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14094 break;
14095 case PPC::ATOMIC_CMP_SWAP_I32:
14096 LoadMnemonic = PPC::LWARX;
14097 StoreMnemonic = PPC::STWCX;
14098 break;
14099 case PPC::ATOMIC_CMP_SWAP_I64:
14100 LoadMnemonic = PPC::LDARX;
14101 StoreMnemonic = PPC::STDCX;
14102 break;
14103 }
14104 MachineRegisterInfo &RegInfo = F->getRegInfo();
14105 Register dest = MI.getOperand(0).getReg();
14106 Register ptrA = MI.getOperand(1).getReg();
14107 Register ptrB = MI.getOperand(2).getReg();
14108 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14109 Register oldval = MI.getOperand(3).getReg();
14110 Register newval = MI.getOperand(4).getReg();
14111 DebugLoc dl = MI.getDebugLoc();
14112
14113 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14114 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14115 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14116 F->insert(It, loop1MBB);
14117 F->insert(It, loop2MBB);
14118 F->insert(It, exitMBB);
14119 exitMBB->splice(exitMBB->begin(), BB,
14120 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14122
14123 // thisMBB:
14124 // ...
14125 // fallthrough --> loopMBB
14126 BB->addSuccessor(loop1MBB);
14127
14128 // loop1MBB:
14129 // l[bhwd]arx dest, ptr
14130 // cmp[wd] dest, oldval
14131 // bne- exitBB
14132 // loop2MBB:
14133 // st[bhwd]cx. newval, ptr
14134 // bne- loopMBB
14135 // b exitBB
14136 // exitBB:
14137 BB = loop1MBB;
14138 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14139 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14140 .addReg(dest)
14141 .addReg(oldval);
14142 BuildMI(BB, dl, TII->get(PPC::BCC))
14144 .addReg(CrReg)
14145 .addMBB(exitMBB);
14146 BB->addSuccessor(loop2MBB);
14147 BB->addSuccessor(exitMBB);
14148
14149 BB = loop2MBB;
14150 BuildMI(BB, dl, TII->get(StoreMnemonic))
14151 .addReg(newval)
14152 .addReg(ptrA)
14153 .addReg(ptrB);
14154 BuildMI(BB, dl, TII->get(PPC::BCC))
14156 .addReg(PPC::CR0)
14157 .addMBB(loop1MBB);
14158 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14159 BB->addSuccessor(loop1MBB);
14160 BB->addSuccessor(exitMBB);
14161
14162 // exitMBB:
14163 // ...
14164 BB = exitMBB;
14165 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14166 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14167 // We must use 64-bit registers for addresses when targeting 64-bit,
14168 // since we're actually doing arithmetic on them. Other registers
14169 // can be 32-bit.
14170 bool is64bit = Subtarget.isPPC64();
14171 bool isLittleEndian = Subtarget.isLittleEndian();
14172 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14173
14174 Register dest = MI.getOperand(0).getReg();
14175 Register ptrA = MI.getOperand(1).getReg();
14176 Register ptrB = MI.getOperand(2).getReg();
14177 Register oldval = MI.getOperand(3).getReg();
14178 Register newval = MI.getOperand(4).getReg();
14179 DebugLoc dl = MI.getDebugLoc();
14180
14181 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14182 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14183 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14184 F->insert(It, loop1MBB);
14185 F->insert(It, loop2MBB);
14186 F->insert(It, exitMBB);
14187 exitMBB->splice(exitMBB->begin(), BB,
14188 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14190
14191 MachineRegisterInfo &RegInfo = F->getRegInfo();
14192 const TargetRegisterClass *RC =
14193 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14194 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14195
14196 Register PtrReg = RegInfo.createVirtualRegister(RC);
14197 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14198 Register ShiftReg =
14199 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14200 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14201 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14202 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14203 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14204 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14205 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14206 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14207 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14208 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14209 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14210 Register Ptr1Reg;
14211 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14212 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14213 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14214 // thisMBB:
14215 // ...
14216 // fallthrough --> loopMBB
14217 BB->addSuccessor(loop1MBB);
14218
14219 // The 4-byte load must be aligned, while a char or short may be
14220 // anywhere in the word. Hence all this nasty bookkeeping code.
14221 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14222 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14223 // xori shift, shift1, 24 [16]
14224 // rlwinm ptr, ptr1, 0, 0, 29
14225 // slw newval2, newval, shift
14226 // slw oldval2, oldval,shift
14227 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14228 // slw mask, mask2, shift
14229 // and newval3, newval2, mask
14230 // and oldval3, oldval2, mask
14231 // loop1MBB:
14232 // lwarx tmpDest, ptr
14233 // and tmp, tmpDest, mask
14234 // cmpw tmp, oldval3
14235 // bne- exitBB
14236 // loop2MBB:
14237 // andc tmp2, tmpDest, mask
14238 // or tmp4, tmp2, newval3
14239 // stwcx. tmp4, ptr
14240 // bne- loop1MBB
14241 // b exitBB
14242 // exitBB:
14243 // srw dest, tmpDest, shift
14244 if (ptrA != ZeroReg) {
14245 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14246 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14247 .addReg(ptrA)
14248 .addReg(ptrB);
14249 } else {
14250 Ptr1Reg = ptrB;
14251 }
14252
14253 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14254 // mode.
14255 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14256 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14257 .addImm(3)
14258 .addImm(27)
14259 .addImm(is8bit ? 28 : 27);
14260 if (!isLittleEndian)
14261 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14262 .addReg(Shift1Reg)
14263 .addImm(is8bit ? 24 : 16);
14264 if (is64bit)
14265 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14266 .addReg(Ptr1Reg)
14267 .addImm(0)
14268 .addImm(61);
14269 else
14270 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14271 .addReg(Ptr1Reg)
14272 .addImm(0)
14273 .addImm(0)
14274 .addImm(29);
14275 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14276 .addReg(newval)
14277 .addReg(ShiftReg);
14278 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14279 .addReg(oldval)
14280 .addReg(ShiftReg);
14281 if (is8bit)
14282 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14283 else {
14284 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14285 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14286 .addReg(Mask3Reg)
14287 .addImm(65535);
14288 }
14289 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14290 .addReg(Mask2Reg)
14291 .addReg(ShiftReg);
14292 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14293 .addReg(NewVal2Reg)
14294 .addReg(MaskReg);
14295 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14296 .addReg(OldVal2Reg)
14297 .addReg(MaskReg);
14298
14299 BB = loop1MBB;
14300 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14301 .addReg(ZeroReg)
14302 .addReg(PtrReg);
14303 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14304 .addReg(TmpDestReg)
14305 .addReg(MaskReg);
14306 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14307 .addReg(TmpReg)
14308 .addReg(OldVal3Reg);
14309 BuildMI(BB, dl, TII->get(PPC::BCC))
14311 .addReg(CrReg)
14312 .addMBB(exitMBB);
14313 BB->addSuccessor(loop2MBB);
14314 BB->addSuccessor(exitMBB);
14315
14316 BB = loop2MBB;
14317 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14318 .addReg(TmpDestReg)
14319 .addReg(MaskReg);
14320 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14321 .addReg(Tmp2Reg)
14322 .addReg(NewVal3Reg);
14323 BuildMI(BB, dl, TII->get(PPC::STWCX))
14324 .addReg(Tmp4Reg)
14325 .addReg(ZeroReg)
14326 .addReg(PtrReg);
14327 BuildMI(BB, dl, TII->get(PPC::BCC))
14329 .addReg(PPC::CR0)
14330 .addMBB(loop1MBB);
14331 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14332 BB->addSuccessor(loop1MBB);
14333 BB->addSuccessor(exitMBB);
14334
14335 // exitMBB:
14336 // ...
14337 BB = exitMBB;
14338 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14339 .addReg(TmpReg)
14340 .addReg(ShiftReg);
14341 } else if (MI.getOpcode() == PPC::FADDrtz) {
14342 // This pseudo performs an FADD with rounding mode temporarily forced
14343 // to round-to-zero. We emit this via custom inserter since the FPSCR
14344 // is not modeled at the SelectionDAG level.
14345 Register Dest = MI.getOperand(0).getReg();
14346 Register Src1 = MI.getOperand(1).getReg();
14347 Register Src2 = MI.getOperand(2).getReg();
14348 DebugLoc dl = MI.getDebugLoc();
14349
14350 MachineRegisterInfo &RegInfo = F->getRegInfo();
14351 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14352
14353 // Save FPSCR value.
14354 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14355
14356 // Set rounding mode to round-to-zero.
14357 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14358 .addImm(31)
14360
14361 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14362 .addImm(30)
14364
14365 // Perform addition.
14366 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14367 .addReg(Src1)
14368 .addReg(Src2);
14369 if (MI.getFlag(MachineInstr::NoFPExcept))
14371
14372 // Restore FPSCR value.
14373 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14374 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14375 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14376 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14377 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14378 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14379 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14380 ? PPC::ANDI8_rec
14381 : PPC::ANDI_rec;
14382 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14383 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14384
14385 MachineRegisterInfo &RegInfo = F->getRegInfo();
14386 Register Dest = RegInfo.createVirtualRegister(
14387 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14388
14389 DebugLoc Dl = MI.getDebugLoc();
14390 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14391 .addReg(MI.getOperand(1).getReg())
14392 .addImm(1);
14393 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14394 MI.getOperand(0).getReg())
14395 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14396 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14397 DebugLoc Dl = MI.getDebugLoc();
14398 MachineRegisterInfo &RegInfo = F->getRegInfo();
14399 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14400 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14401 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14402 MI.getOperand(0).getReg())
14403 .addReg(CRReg);
14404 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14405 DebugLoc Dl = MI.getDebugLoc();
14406 unsigned Imm = MI.getOperand(1).getImm();
14407 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14408 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14409 MI.getOperand(0).getReg())
14410 .addReg(PPC::CR0EQ);
14411 } else if (MI.getOpcode() == PPC::SETRNDi) {
14412 DebugLoc dl = MI.getDebugLoc();
14413 Register OldFPSCRReg = MI.getOperand(0).getReg();
14414
14415 // Save FPSCR value.
14416 if (MRI.use_empty(OldFPSCRReg))
14417 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14418 else
14419 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14420
14421 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14422 // the following settings:
14423 // 00 Round to nearest
14424 // 01 Round to 0
14425 // 10 Round to +inf
14426 // 11 Round to -inf
14427
14428 // When the operand is immediate, using the two least significant bits of
14429 // the immediate to set the bits 62:63 of FPSCR.
14430 unsigned Mode = MI.getOperand(1).getImm();
14431 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14432 .addImm(31)
14434
14435 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14436 .addImm(30)
14438 } else if (MI.getOpcode() == PPC::SETRND) {
14439 DebugLoc dl = MI.getDebugLoc();
14440
14441 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14442 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14443 // If the target doesn't have DirectMove, we should use stack to do the
14444 // conversion, because the target doesn't have the instructions like mtvsrd
14445 // or mfvsrd to do this conversion directly.
14446 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14447 if (Subtarget.hasDirectMove()) {
14448 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14449 .addReg(SrcReg);
14450 } else {
14451 // Use stack to do the register copy.
14452 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14453 MachineRegisterInfo &RegInfo = F->getRegInfo();
14454 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14455 if (RC == &PPC::F8RCRegClass) {
14456 // Copy register from F8RCRegClass to G8RCRegclass.
14457 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14458 "Unsupported RegClass.");
14459
14460 StoreOp = PPC::STFD;
14461 LoadOp = PPC::LD;
14462 } else {
14463 // Copy register from G8RCRegClass to F8RCRegclass.
14464 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14465 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14466 "Unsupported RegClass.");
14467 }
14468
14469 MachineFrameInfo &MFI = F->getFrameInfo();
14470 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14471
14472 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14473 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14475 MFI.getObjectAlign(FrameIdx));
14476
14477 // Store the SrcReg into the stack.
14478 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14479 .addReg(SrcReg)
14480 .addImm(0)
14481 .addFrameIndex(FrameIdx)
14482 .addMemOperand(MMOStore);
14483
14484 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14485 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14487 MFI.getObjectAlign(FrameIdx));
14488
14489 // Load from the stack where SrcReg is stored, and save to DestReg,
14490 // so we have done the RegClass conversion from RegClass::SrcReg to
14491 // RegClass::DestReg.
14492 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14493 .addImm(0)
14494 .addFrameIndex(FrameIdx)
14495 .addMemOperand(MMOLoad);
14496 }
14497 };
14498
14499 Register OldFPSCRReg = MI.getOperand(0).getReg();
14500
14501 // Save FPSCR value.
14502 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14503
14504 // When the operand is gprc register, use two least significant bits of the
14505 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14506 //
14507 // copy OldFPSCRTmpReg, OldFPSCRReg
14508 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14509 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14510 // copy NewFPSCRReg, NewFPSCRTmpReg
14511 // mtfsf 255, NewFPSCRReg
14512 MachineOperand SrcOp = MI.getOperand(1);
14513 MachineRegisterInfo &RegInfo = F->getRegInfo();
14514 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14515
14516 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14517
14518 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14519 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14520
14521 // The first operand of INSERT_SUBREG should be a register which has
14522 // subregisters, we only care about its RegClass, so we should use an
14523 // IMPLICIT_DEF register.
14524 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14525 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14526 .addReg(ImDefReg)
14527 .add(SrcOp)
14528 .addImm(1);
14529
14530 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14531 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14532 .addReg(OldFPSCRTmpReg)
14533 .addReg(ExtSrcReg)
14534 .addImm(0)
14535 .addImm(62);
14536
14537 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14538 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14539
14540 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14541 // bits of FPSCR.
14542 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14543 .addImm(255)
14544 .addReg(NewFPSCRReg)
14545 .addImm(0)
14546 .addImm(0);
14547 } else if (MI.getOpcode() == PPC::SETFLM) {
14548 DebugLoc Dl = MI.getDebugLoc();
14549
14550 // Result of setflm is previous FPSCR content, so we need to save it first.
14551 Register OldFPSCRReg = MI.getOperand(0).getReg();
14552 if (MRI.use_empty(OldFPSCRReg))
14553 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14554 else
14555 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14556
14557 // Put bits in 32:63 to FPSCR.
14558 Register NewFPSCRReg = MI.getOperand(1).getReg();
14559 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14560 .addImm(255)
14561 .addReg(NewFPSCRReg)
14562 .addImm(0)
14563 .addImm(0);
14564 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14565 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14566 return emitProbedAlloca(MI, BB);
14567 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14568 DebugLoc DL = MI.getDebugLoc();
14569 Register Src = MI.getOperand(2).getReg();
14570 Register Lo = MI.getOperand(0).getReg();
14571 Register Hi = MI.getOperand(1).getReg();
14572 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14573 .addDef(Lo)
14574 .addUse(Src, 0, PPC::sub_gp8_x1);
14575 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14576 .addDef(Hi)
14577 .addUse(Src, 0, PPC::sub_gp8_x0);
14578 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14579 MI.getOpcode() == PPC::STQX_PSEUDO) {
14580 DebugLoc DL = MI.getDebugLoc();
14581 // Ptr is used as the ptr_rc_no_r0 part
14582 // of LQ/STQ's memory operand and adding result of RA and RB,
14583 // so it has to be g8rc_and_g8rc_nox0.
14584 Register Ptr =
14585 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14586 Register Val = MI.getOperand(0).getReg();
14587 Register RA = MI.getOperand(1).getReg();
14588 Register RB = MI.getOperand(2).getReg();
14589 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14590 BuildMI(*BB, MI, DL,
14591 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14592 : TII->get(PPC::STQ))
14593 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14594 .addImm(0)
14595 .addReg(Ptr);
14596 } else {
14597 llvm_unreachable("Unexpected instr type to insert");
14598 }
14599
14600 MI.eraseFromParent(); // The pseudo instruction is gone now.
14601 return BB;
14602}
14603
14604//===----------------------------------------------------------------------===//
14605// Target Optimization Hooks
14606//===----------------------------------------------------------------------===//
14607
14608static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14609 // For the estimates, convergence is quadratic, so we essentially double the
14610 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14611 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14612 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14613 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14614 if (VT.getScalarType() == MVT::f64)
14615 RefinementSteps++;
14616 return RefinementSteps;
14617}
14618
14619SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14620 const DenormalMode &Mode) const {
14621 // We only have VSX Vector Test for software Square Root.
14622 EVT VT = Op.getValueType();
14623 if (!isTypeLegal(MVT::i1) ||
14624 (VT != MVT::f64 &&
14625 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14627
14628 SDLoc DL(Op);
14629 // The output register of FTSQRT is CR field.
14630 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14631 // ftsqrt BF,FRB
14632 // Let e_b be the unbiased exponent of the double-precision
14633 // floating-point operand in register FRB.
14634 // fe_flag is set to 1 if either of the following conditions occurs.
14635 // - The double-precision floating-point operand in register FRB is a zero,
14636 // a NaN, or an infinity, or a negative value.
14637 // - e_b is less than or equal to -970.
14638 // Otherwise fe_flag is set to 0.
14639 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14640 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14641 // exponent is less than -970)
14642 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14643 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14644 FTSQRT, SRIdxVal),
14645 0);
14646}
14647
14648SDValue
14649PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14650 SelectionDAG &DAG) const {
14651 // We only have VSX Vector Square Root.
14652 EVT VT = Op.getValueType();
14653 if (VT != MVT::f64 &&
14654 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14656
14657 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14658}
14659
14660SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14661 int Enabled, int &RefinementSteps,
14662 bool &UseOneConstNR,
14663 bool Reciprocal) const {
14664 EVT VT = Operand.getValueType();
14665 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14666 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14667 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14668 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14669 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14670 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14671
14672 // The Newton-Raphson computation with a single constant does not provide
14673 // enough accuracy on some CPUs.
14674 UseOneConstNR = !Subtarget.needsTwoConstNR();
14675 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14676 }
14677 return SDValue();
14678}
14679
14680SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14681 int Enabled,
14682 int &RefinementSteps) const {
14683 EVT VT = Operand.getValueType();
14684 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14685 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14686 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14687 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14688 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14689 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14690 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14691 }
14692 return SDValue();
14693}
14694
14696 // Note: This functionality is used only when unsafe-fp-math is enabled, and
14697 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14698 // enabled for division), this functionality is redundant with the default
14699 // combiner logic (once the division -> reciprocal/multiply transformation
14700 // has taken place). As a result, this matters more for older cores than for
14701 // newer ones.
14702
14703 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14704 // reciprocal if there are two or more FDIVs (for embedded cores with only
14705 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14706 switch (Subtarget.getCPUDirective()) {
14707 default:
14708 return 3;
14709 case PPC::DIR_440:
14710 case PPC::DIR_A2:
14711 case PPC::DIR_E500:
14712 case PPC::DIR_E500mc:
14713 case PPC::DIR_E5500:
14714 return 2;
14715 }
14716}
14717
14718// isConsecutiveLSLoc needs to work even if all adds have not yet been
14719// collapsed, and so we need to look through chains of them.
14721 int64_t& Offset, SelectionDAG &DAG) {
14722 if (DAG.isBaseWithConstantOffset(Loc)) {
14723 Base = Loc.getOperand(0);
14724 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14725
14726 // The base might itself be a base plus an offset, and if so, accumulate
14727 // that as well.
14728 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14729 }
14730}
14731
14733 unsigned Bytes, int Dist,
14734 SelectionDAG &DAG) {
14735 if (VT.getSizeInBits() / 8 != Bytes)
14736 return false;
14737
14738 SDValue BaseLoc = Base->getBasePtr();
14739 if (Loc.getOpcode() == ISD::FrameIndex) {
14740 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14741 return false;
14743 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14744 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14745 int FS = MFI.getObjectSize(FI);
14746 int BFS = MFI.getObjectSize(BFI);
14747 if (FS != BFS || FS != (int)Bytes) return false;
14748 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14749 }
14750
14751 SDValue Base1 = Loc, Base2 = BaseLoc;
14752 int64_t Offset1 = 0, Offset2 = 0;
14753 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14754 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14755 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14756 return true;
14757
14758 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14759 const GlobalValue *GV1 = nullptr;
14760 const GlobalValue *GV2 = nullptr;
14761 Offset1 = 0;
14762 Offset2 = 0;
14763 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14764 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14765 if (isGA1 && isGA2 && GV1 == GV2)
14766 return Offset1 == (Offset2 + Dist*Bytes);
14767 return false;
14768}
14769
14770// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14771// not enforce equality of the chain operands.
14773 unsigned Bytes, int Dist,
14774 SelectionDAG &DAG) {
14776 EVT VT = LS->getMemoryVT();
14777 SDValue Loc = LS->getBasePtr();
14778 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14779 }
14780
14781 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14782 EVT VT;
14783 switch (N->getConstantOperandVal(1)) {
14784 default: return false;
14785 case Intrinsic::ppc_altivec_lvx:
14786 case Intrinsic::ppc_altivec_lvxl:
14787 case Intrinsic::ppc_vsx_lxvw4x:
14788 case Intrinsic::ppc_vsx_lxvw4x_be:
14789 VT = MVT::v4i32;
14790 break;
14791 case Intrinsic::ppc_vsx_lxvd2x:
14792 case Intrinsic::ppc_vsx_lxvd2x_be:
14793 VT = MVT::v2f64;
14794 break;
14795 case Intrinsic::ppc_altivec_lvebx:
14796 VT = MVT::i8;
14797 break;
14798 case Intrinsic::ppc_altivec_lvehx:
14799 VT = MVT::i16;
14800 break;
14801 case Intrinsic::ppc_altivec_lvewx:
14802 VT = MVT::i32;
14803 break;
14804 }
14805
14806 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14807 }
14808
14809 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14810 EVT VT;
14811 switch (N->getConstantOperandVal(1)) {
14812 default: return false;
14813 case Intrinsic::ppc_altivec_stvx:
14814 case Intrinsic::ppc_altivec_stvxl:
14815 case Intrinsic::ppc_vsx_stxvw4x:
14816 VT = MVT::v4i32;
14817 break;
14818 case Intrinsic::ppc_vsx_stxvd2x:
14819 VT = MVT::v2f64;
14820 break;
14821 case Intrinsic::ppc_vsx_stxvw4x_be:
14822 VT = MVT::v4i32;
14823 break;
14824 case Intrinsic::ppc_vsx_stxvd2x_be:
14825 VT = MVT::v2f64;
14826 break;
14827 case Intrinsic::ppc_altivec_stvebx:
14828 VT = MVT::i8;
14829 break;
14830 case Intrinsic::ppc_altivec_stvehx:
14831 VT = MVT::i16;
14832 break;
14833 case Intrinsic::ppc_altivec_stvewx:
14834 VT = MVT::i32;
14835 break;
14836 }
14837
14838 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14839 }
14840
14841 return false;
14842}
14843
14844// Return true is there is a nearyby consecutive load to the one provided
14845// (regardless of alignment). We search up and down the chain, looking though
14846// token factors and other loads (but nothing else). As a result, a true result
14847// indicates that it is safe to create a new consecutive load adjacent to the
14848// load provided.
14850 SDValue Chain = LD->getChain();
14851 EVT VT = LD->getMemoryVT();
14852
14853 SmallPtrSet<SDNode *, 16> LoadRoots;
14854 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14856
14857 // First, search up the chain, branching to follow all token-factor operands.
14858 // If we find a consecutive load, then we're done, otherwise, record all
14859 // nodes just above the top-level loads and token factors.
14860 while (!Queue.empty()) {
14861 SDNode *ChainNext = Queue.pop_back_val();
14862 if (!Visited.insert(ChainNext).second)
14863 continue;
14864
14865 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14866 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14867 return true;
14868
14869 if (!Visited.count(ChainLD->getChain().getNode()))
14870 Queue.push_back(ChainLD->getChain().getNode());
14871 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14872 for (const SDUse &O : ChainNext->ops())
14873 if (!Visited.count(O.getNode()))
14874 Queue.push_back(O.getNode());
14875 } else
14876 LoadRoots.insert(ChainNext);
14877 }
14878
14879 // Second, search down the chain, starting from the top-level nodes recorded
14880 // in the first phase. These top-level nodes are the nodes just above all
14881 // loads and token factors. Starting with their uses, recursively look though
14882 // all loads (just the chain uses) and token factors to find a consecutive
14883 // load.
14884 Visited.clear();
14885 Queue.clear();
14886
14887 for (SDNode *I : LoadRoots) {
14888 Queue.push_back(I);
14889
14890 while (!Queue.empty()) {
14891 SDNode *LoadRoot = Queue.pop_back_val();
14892 if (!Visited.insert(LoadRoot).second)
14893 continue;
14894
14895 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14896 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14897 return true;
14898
14899 for (SDNode *U : LoadRoot->users())
14900 if (((isa<MemSDNode>(U) &&
14901 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14902 U->getOpcode() == ISD::TokenFactor) &&
14903 !Visited.count(U))
14904 Queue.push_back(U);
14905 }
14906 }
14907
14908 return false;
14909}
14910
14911/// This function is called when we have proved that a SETCC node can be replaced
14912/// by subtraction (and other supporting instructions) so that the result of
14913/// comparison is kept in a GPR instead of CR. This function is purely for
14914/// codegen purposes and has some flags to guide the codegen process.
14915static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14916 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14917 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14918
14919 // Zero extend the operands to the largest legal integer. Originally, they
14920 // must be of a strictly smaller size.
14921 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14922 DAG.getConstant(Size, DL, MVT::i32));
14923 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14924 DAG.getConstant(Size, DL, MVT::i32));
14925
14926 // Swap if needed. Depends on the condition code.
14927 if (Swap)
14928 std::swap(Op0, Op1);
14929
14930 // Subtract extended integers.
14931 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14932
14933 // Move the sign bit to the least significant position and zero out the rest.
14934 // Now the least significant bit carries the result of original comparison.
14935 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14936 DAG.getConstant(Size - 1, DL, MVT::i32));
14937 auto Final = Shifted;
14938
14939 // Complement the result if needed. Based on the condition code.
14940 if (Complement)
14941 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14942 DAG.getConstant(1, DL, MVT::i64));
14943
14944 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14945}
14946
14947SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14948 DAGCombinerInfo &DCI) const {
14949 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14950
14951 SelectionDAG &DAG = DCI.DAG;
14952 SDLoc DL(N);
14953
14954 // Size of integers being compared has a critical role in the following
14955 // analysis, so we prefer to do this when all types are legal.
14956 if (!DCI.isAfterLegalizeDAG())
14957 return SDValue();
14958
14959 // If all users of SETCC extend its value to a legal integer type
14960 // then we replace SETCC with a subtraction
14961 for (const SDNode *U : N->users())
14962 if (U->getOpcode() != ISD::ZERO_EXTEND)
14963 return SDValue();
14964
14965 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14966 auto OpSize = N->getOperand(0).getValueSizeInBits();
14967
14969
14970 if (OpSize < Size) {
14971 switch (CC) {
14972 default: break;
14973 case ISD::SETULT:
14974 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14975 case ISD::SETULE:
14976 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14977 case ISD::SETUGT:
14978 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14979 case ISD::SETUGE:
14980 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14981 }
14982 }
14983
14984 return SDValue();
14985}
14986
14987SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14988 DAGCombinerInfo &DCI) const {
14989 SelectionDAG &DAG = DCI.DAG;
14990 SDLoc dl(N);
14991
14992 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14993 // If we're tracking CR bits, we need to be careful that we don't have:
14994 // trunc(binary-ops(zext(x), zext(y)))
14995 // or
14996 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14997 // such that we're unnecessarily moving things into GPRs when it would be
14998 // better to keep them in CR bits.
14999
15000 // Note that trunc here can be an actual i1 trunc, or can be the effective
15001 // truncation that comes from a setcc or select_cc.
15002 if (N->getOpcode() == ISD::TRUNCATE &&
15003 N->getValueType(0) != MVT::i1)
15004 return SDValue();
15005
15006 if (N->getOperand(0).getValueType() != MVT::i32 &&
15007 N->getOperand(0).getValueType() != MVT::i64)
15008 return SDValue();
15009
15010 if (N->getOpcode() == ISD::SETCC ||
15011 N->getOpcode() == ISD::SELECT_CC) {
15012 // If we're looking at a comparison, then we need to make sure that the
15013 // high bits (all except for the first) don't matter the result.
15014 ISD::CondCode CC =
15015 cast<CondCodeSDNode>(N->getOperand(
15016 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15017 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15018
15019 if (ISD::isSignedIntSetCC(CC)) {
15020 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15021 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15022 return SDValue();
15023 } else if (ISD::isUnsignedIntSetCC(CC)) {
15024 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15025 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15026 !DAG.MaskedValueIsZero(N->getOperand(1),
15027 APInt::getHighBitsSet(OpBits, OpBits-1)))
15028 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15029 : SDValue());
15030 } else {
15031 // This is neither a signed nor an unsigned comparison, just make sure
15032 // that the high bits are equal.
15033 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15034 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15035
15036 // We don't really care about what is known about the first bit (if
15037 // anything), so pretend that it is known zero for both to ensure they can
15038 // be compared as constants.
15039 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15040 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15041
15042 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15043 Op1Known.getConstant() != Op2Known.getConstant())
15044 return SDValue();
15045 }
15046 }
15047
15048 // We now know that the higher-order bits are irrelevant, we just need to
15049 // make sure that all of the intermediate operations are bit operations, and
15050 // all inputs are extensions.
15051 if (N->getOperand(0).getOpcode() != ISD::AND &&
15052 N->getOperand(0).getOpcode() != ISD::OR &&
15053 N->getOperand(0).getOpcode() != ISD::XOR &&
15054 N->getOperand(0).getOpcode() != ISD::SELECT &&
15055 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15056 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15057 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15058 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15059 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15060 return SDValue();
15061
15062 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15063 N->getOperand(1).getOpcode() != ISD::AND &&
15064 N->getOperand(1).getOpcode() != ISD::OR &&
15065 N->getOperand(1).getOpcode() != ISD::XOR &&
15066 N->getOperand(1).getOpcode() != ISD::SELECT &&
15067 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15068 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15069 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15070 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15071 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15072 return SDValue();
15073
15075 SmallVector<SDValue, 8> BinOps, PromOps;
15076 SmallPtrSet<SDNode *, 16> Visited;
15077
15078 for (unsigned i = 0; i < 2; ++i) {
15079 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15080 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15081 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15082 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15083 isa<ConstantSDNode>(N->getOperand(i)))
15084 Inputs.push_back(N->getOperand(i));
15085 else
15086 BinOps.push_back(N->getOperand(i));
15087
15088 if (N->getOpcode() == ISD::TRUNCATE)
15089 break;
15090 }
15091
15092 // Visit all inputs, collect all binary operations (and, or, xor and
15093 // select) that are all fed by extensions.
15094 while (!BinOps.empty()) {
15095 SDValue BinOp = BinOps.pop_back_val();
15096
15097 if (!Visited.insert(BinOp.getNode()).second)
15098 continue;
15099
15100 PromOps.push_back(BinOp);
15101
15102 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15103 // The condition of the select is not promoted.
15104 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15105 continue;
15106 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15107 continue;
15108
15109 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15110 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15111 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15112 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15113 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15114 Inputs.push_back(BinOp.getOperand(i));
15115 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15116 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15117 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15118 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15119 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15120 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15121 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15122 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15123 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15124 BinOps.push_back(BinOp.getOperand(i));
15125 } else {
15126 // We have an input that is not an extension or another binary
15127 // operation; we'll abort this transformation.
15128 return SDValue();
15129 }
15130 }
15131 }
15132
15133 // Make sure that this is a self-contained cluster of operations (which
15134 // is not quite the same thing as saying that everything has only one
15135 // use).
15136 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15137 if (isa<ConstantSDNode>(Inputs[i]))
15138 continue;
15139
15140 for (const SDNode *User : Inputs[i].getNode()->users()) {
15141 if (User != N && !Visited.count(User))
15142 return SDValue();
15143
15144 // Make sure that we're not going to promote the non-output-value
15145 // operand(s) or SELECT or SELECT_CC.
15146 // FIXME: Although we could sometimes handle this, and it does occur in
15147 // practice that one of the condition inputs to the select is also one of
15148 // the outputs, we currently can't deal with this.
15149 if (User->getOpcode() == ISD::SELECT) {
15150 if (User->getOperand(0) == Inputs[i])
15151 return SDValue();
15152 } else if (User->getOpcode() == ISD::SELECT_CC) {
15153 if (User->getOperand(0) == Inputs[i] ||
15154 User->getOperand(1) == Inputs[i])
15155 return SDValue();
15156 }
15157 }
15158 }
15159
15160 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15161 for (const SDNode *User : PromOps[i].getNode()->users()) {
15162 if (User != N && !Visited.count(User))
15163 return SDValue();
15164
15165 // Make sure that we're not going to promote the non-output-value
15166 // operand(s) or SELECT or SELECT_CC.
15167 // FIXME: Although we could sometimes handle this, and it does occur in
15168 // practice that one of the condition inputs to the select is also one of
15169 // the outputs, we currently can't deal with this.
15170 if (User->getOpcode() == ISD::SELECT) {
15171 if (User->getOperand(0) == PromOps[i])
15172 return SDValue();
15173 } else if (User->getOpcode() == ISD::SELECT_CC) {
15174 if (User->getOperand(0) == PromOps[i] ||
15175 User->getOperand(1) == PromOps[i])
15176 return SDValue();
15177 }
15178 }
15179 }
15180
15181 // Replace all inputs with the extension operand.
15182 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15183 // Constants may have users outside the cluster of to-be-promoted nodes,
15184 // and so we need to replace those as we do the promotions.
15185 if (isa<ConstantSDNode>(Inputs[i]))
15186 continue;
15187 else
15188 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15189 }
15190
15191 std::list<HandleSDNode> PromOpHandles;
15192 for (auto &PromOp : PromOps)
15193 PromOpHandles.emplace_back(PromOp);
15194
15195 // Replace all operations (these are all the same, but have a different
15196 // (i1) return type). DAG.getNode will validate that the types of
15197 // a binary operator match, so go through the list in reverse so that
15198 // we've likely promoted both operands first. Any intermediate truncations or
15199 // extensions disappear.
15200 while (!PromOpHandles.empty()) {
15201 SDValue PromOp = PromOpHandles.back().getValue();
15202 PromOpHandles.pop_back();
15203
15204 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15205 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15206 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15207 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15208 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15209 PromOp.getOperand(0).getValueType() != MVT::i1) {
15210 // The operand is not yet ready (see comment below).
15211 PromOpHandles.emplace_front(PromOp);
15212 continue;
15213 }
15214
15215 SDValue RepValue = PromOp.getOperand(0);
15216 if (isa<ConstantSDNode>(RepValue))
15217 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15218
15219 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15220 continue;
15221 }
15222
15223 unsigned C;
15224 switch (PromOp.getOpcode()) {
15225 default: C = 0; break;
15226 case ISD::SELECT: C = 1; break;
15227 case ISD::SELECT_CC: C = 2; break;
15228 }
15229
15230 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15231 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15232 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15233 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15234 // The to-be-promoted operands of this node have not yet been
15235 // promoted (this should be rare because we're going through the
15236 // list backward, but if one of the operands has several users in
15237 // this cluster of to-be-promoted nodes, it is possible).
15238 PromOpHandles.emplace_front(PromOp);
15239 continue;
15240 }
15241
15243
15244 // If there are any constant inputs, make sure they're replaced now.
15245 for (unsigned i = 0; i < 2; ++i)
15246 if (isa<ConstantSDNode>(Ops[C+i]))
15247 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15248
15249 DAG.ReplaceAllUsesOfValueWith(PromOp,
15250 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15251 }
15252
15253 // Now we're left with the initial truncation itself.
15254 if (N->getOpcode() == ISD::TRUNCATE)
15255 return N->getOperand(0);
15256
15257 // Otherwise, this is a comparison. The operands to be compared have just
15258 // changed type (to i1), but everything else is the same.
15259 return SDValue(N, 0);
15260}
15261
15262SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15263 DAGCombinerInfo &DCI) const {
15264 SelectionDAG &DAG = DCI.DAG;
15265 SDLoc dl(N);
15266
15267 // If we're tracking CR bits, we need to be careful that we don't have:
15268 // zext(binary-ops(trunc(x), trunc(y)))
15269 // or
15270 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15271 // such that we're unnecessarily moving things into CR bits that can more
15272 // efficiently stay in GPRs. Note that if we're not certain that the high
15273 // bits are set as required by the final extension, we still may need to do
15274 // some masking to get the proper behavior.
15275
15276 // This same functionality is important on PPC64 when dealing with
15277 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15278 // the return values of functions. Because it is so similar, it is handled
15279 // here as well.
15280
15281 if (N->getValueType(0) != MVT::i32 &&
15282 N->getValueType(0) != MVT::i64)
15283 return SDValue();
15284
15285 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15286 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15287 return SDValue();
15288
15289 if (N->getOperand(0).getOpcode() != ISD::AND &&
15290 N->getOperand(0).getOpcode() != ISD::OR &&
15291 N->getOperand(0).getOpcode() != ISD::XOR &&
15292 N->getOperand(0).getOpcode() != ISD::SELECT &&
15293 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15294 return SDValue();
15295
15297 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15298 SmallPtrSet<SDNode *, 16> Visited;
15299
15300 // Visit all inputs, collect all binary operations (and, or, xor and
15301 // select) that are all fed by truncations.
15302 while (!BinOps.empty()) {
15303 SDValue BinOp = BinOps.pop_back_val();
15304
15305 if (!Visited.insert(BinOp.getNode()).second)
15306 continue;
15307
15308 PromOps.push_back(BinOp);
15309
15310 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15311 // The condition of the select is not promoted.
15312 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15313 continue;
15314 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15315 continue;
15316
15317 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15318 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15319 Inputs.push_back(BinOp.getOperand(i));
15320 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15321 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15322 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15323 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15324 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15325 BinOps.push_back(BinOp.getOperand(i));
15326 } else {
15327 // We have an input that is not a truncation or another binary
15328 // operation; we'll abort this transformation.
15329 return SDValue();
15330 }
15331 }
15332 }
15333
15334 // The operands of a select that must be truncated when the select is
15335 // promoted because the operand is actually part of the to-be-promoted set.
15336 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15337
15338 // Make sure that this is a self-contained cluster of operations (which
15339 // is not quite the same thing as saying that everything has only one
15340 // use).
15341 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15342 if (isa<ConstantSDNode>(Inputs[i]))
15343 continue;
15344
15345 for (SDNode *User : Inputs[i].getNode()->users()) {
15346 if (User != N && !Visited.count(User))
15347 return SDValue();
15348
15349 // If we're going to promote the non-output-value operand(s) or SELECT or
15350 // SELECT_CC, record them for truncation.
15351 if (User->getOpcode() == ISD::SELECT) {
15352 if (User->getOperand(0) == Inputs[i])
15353 SelectTruncOp[0].insert(std::make_pair(User,
15354 User->getOperand(0).getValueType()));
15355 } else if (User->getOpcode() == ISD::SELECT_CC) {
15356 if (User->getOperand(0) == Inputs[i])
15357 SelectTruncOp[0].insert(std::make_pair(User,
15358 User->getOperand(0).getValueType()));
15359 if (User->getOperand(1) == Inputs[i])
15360 SelectTruncOp[1].insert(std::make_pair(User,
15361 User->getOperand(1).getValueType()));
15362 }
15363 }
15364 }
15365
15366 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15367 for (SDNode *User : PromOps[i].getNode()->users()) {
15368 if (User != N && !Visited.count(User))
15369 return SDValue();
15370
15371 // If we're going to promote the non-output-value operand(s) or SELECT or
15372 // SELECT_CC, record them for truncation.
15373 if (User->getOpcode() == ISD::SELECT) {
15374 if (User->getOperand(0) == PromOps[i])
15375 SelectTruncOp[0].insert(std::make_pair(User,
15376 User->getOperand(0).getValueType()));
15377 } else if (User->getOpcode() == ISD::SELECT_CC) {
15378 if (User->getOperand(0) == PromOps[i])
15379 SelectTruncOp[0].insert(std::make_pair(User,
15380 User->getOperand(0).getValueType()));
15381 if (User->getOperand(1) == PromOps[i])
15382 SelectTruncOp[1].insert(std::make_pair(User,
15383 User->getOperand(1).getValueType()));
15384 }
15385 }
15386 }
15387
15388 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15389 bool ReallyNeedsExt = false;
15390 if (N->getOpcode() != ISD::ANY_EXTEND) {
15391 // If all of the inputs are not already sign/zero extended, then
15392 // we'll still need to do that at the end.
15393 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15394 if (isa<ConstantSDNode>(Inputs[i]))
15395 continue;
15396
15397 unsigned OpBits =
15398 Inputs[i].getOperand(0).getValueSizeInBits();
15399 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15400
15401 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15402 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15403 APInt::getHighBitsSet(OpBits,
15404 OpBits-PromBits))) ||
15405 (N->getOpcode() == ISD::SIGN_EXTEND &&
15406 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15407 (OpBits-(PromBits-1)))) {
15408 ReallyNeedsExt = true;
15409 break;
15410 }
15411 }
15412 }
15413
15414 // Replace all inputs, either with the truncation operand, or a
15415 // truncation or extension to the final output type.
15416 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15417 // Constant inputs need to be replaced with the to-be-promoted nodes that
15418 // use them because they might have users outside of the cluster of
15419 // promoted nodes.
15420 if (isa<ConstantSDNode>(Inputs[i]))
15421 continue;
15422
15423 SDValue InSrc = Inputs[i].getOperand(0);
15424 if (Inputs[i].getValueType() == N->getValueType(0))
15425 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15426 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15427 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15428 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15429 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15430 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15431 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15432 else
15433 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15434 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15435 }
15436
15437 std::list<HandleSDNode> PromOpHandles;
15438 for (auto &PromOp : PromOps)
15439 PromOpHandles.emplace_back(PromOp);
15440
15441 // Replace all operations (these are all the same, but have a different
15442 // (promoted) return type). DAG.getNode will validate that the types of
15443 // a binary operator match, so go through the list in reverse so that
15444 // we've likely promoted both operands first.
15445 while (!PromOpHandles.empty()) {
15446 SDValue PromOp = PromOpHandles.back().getValue();
15447 PromOpHandles.pop_back();
15448
15449 unsigned C;
15450 switch (PromOp.getOpcode()) {
15451 default: C = 0; break;
15452 case ISD::SELECT: C = 1; break;
15453 case ISD::SELECT_CC: C = 2; break;
15454 }
15455
15456 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15457 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15458 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15459 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15460 // The to-be-promoted operands of this node have not yet been
15461 // promoted (this should be rare because we're going through the
15462 // list backward, but if one of the operands has several users in
15463 // this cluster of to-be-promoted nodes, it is possible).
15464 PromOpHandles.emplace_front(PromOp);
15465 continue;
15466 }
15467
15468 // For SELECT and SELECT_CC nodes, we do a similar check for any
15469 // to-be-promoted comparison inputs.
15470 if (PromOp.getOpcode() == ISD::SELECT ||
15471 PromOp.getOpcode() == ISD::SELECT_CC) {
15472 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15473 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15474 (SelectTruncOp[1].count(PromOp.getNode()) &&
15475 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15476 PromOpHandles.emplace_front(PromOp);
15477 continue;
15478 }
15479 }
15480
15482
15483 // If this node has constant inputs, then they'll need to be promoted here.
15484 for (unsigned i = 0; i < 2; ++i) {
15485 if (!isa<ConstantSDNode>(Ops[C+i]))
15486 continue;
15487 if (Ops[C+i].getValueType() == N->getValueType(0))
15488 continue;
15489
15490 if (N->getOpcode() == ISD::SIGN_EXTEND)
15491 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15492 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15493 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15494 else
15495 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15496 }
15497
15498 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15499 // truncate them again to the original value type.
15500 if (PromOp.getOpcode() == ISD::SELECT ||
15501 PromOp.getOpcode() == ISD::SELECT_CC) {
15502 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15503 if (SI0 != SelectTruncOp[0].end())
15504 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15505 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15506 if (SI1 != SelectTruncOp[1].end())
15507 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15508 }
15509
15510 DAG.ReplaceAllUsesOfValueWith(PromOp,
15511 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15512 }
15513
15514 // Now we're left with the initial extension itself.
15515 if (!ReallyNeedsExt)
15516 return N->getOperand(0);
15517
15518 // To zero extend, just mask off everything except for the first bit (in the
15519 // i1 case).
15520 if (N->getOpcode() == ISD::ZERO_EXTEND)
15521 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15523 N->getValueSizeInBits(0), PromBits),
15524 dl, N->getValueType(0)));
15525
15526 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15527 "Invalid extension type");
15528 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15529 SDValue ShiftCst =
15530 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15531 return DAG.getNode(
15532 ISD::SRA, dl, N->getValueType(0),
15533 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15534 ShiftCst);
15535}
15536
15537SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15538 DAGCombinerInfo &DCI) const {
15539 assert(N->getOpcode() == ISD::SETCC &&
15540 "Should be called with a SETCC node");
15541
15542 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15543 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15544 SDValue LHS = N->getOperand(0);
15545 SDValue RHS = N->getOperand(1);
15546
15547 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15548 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15549 LHS.hasOneUse())
15550 std::swap(LHS, RHS);
15551
15552 // x == 0-y --> x+y == 0
15553 // x != 0-y --> x+y != 0
15554 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15555 RHS.hasOneUse()) {
15556 SDLoc DL(N);
15557 SelectionDAG &DAG = DCI.DAG;
15558 EVT VT = N->getValueType(0);
15559 EVT OpVT = LHS.getValueType();
15560 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15561 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15562 }
15563 }
15564
15565 return DAGCombineTruncBoolExt(N, DCI);
15566}
15567
15568// Is this an extending load from an f32 to an f64?
15569static bool isFPExtLoad(SDValue Op) {
15570 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15571 return LD->getExtensionType() == ISD::EXTLOAD &&
15572 Op.getValueType() == MVT::f64;
15573 return false;
15574}
15575
15576/// Reduces the number of fp-to-int conversion when building a vector.
15577///
15578/// If this vector is built out of floating to integer conversions,
15579/// transform it to a vector built out of floating point values followed by a
15580/// single floating to integer conversion of the vector.
15581/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15582/// becomes (fptosi (build_vector ($A, $B, ...)))
15583SDValue PPCTargetLowering::
15584combineElementTruncationToVectorTruncation(SDNode *N,
15585 DAGCombinerInfo &DCI) const {
15586 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15587 "Should be called with a BUILD_VECTOR node");
15588
15589 SelectionDAG &DAG = DCI.DAG;
15590 SDLoc dl(N);
15591
15592 SDValue FirstInput = N->getOperand(0);
15593 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15594 "The input operand must be an fp-to-int conversion.");
15595
15596 // This combine happens after legalization so the fp_to_[su]i nodes are
15597 // already converted to PPCSISD nodes.
15598 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15599 if (FirstConversion == PPCISD::FCTIDZ ||
15600 FirstConversion == PPCISD::FCTIDUZ ||
15601 FirstConversion == PPCISD::FCTIWZ ||
15602 FirstConversion == PPCISD::FCTIWUZ) {
15603 bool IsSplat = true;
15604 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15605 FirstConversion == PPCISD::FCTIWUZ;
15606 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15608 EVT TargetVT = N->getValueType(0);
15609 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15610 SDValue NextOp = N->getOperand(i);
15611 if (NextOp.getOpcode() != PPCISD::MFVSR)
15612 return SDValue();
15613 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15614 if (NextConversion != FirstConversion)
15615 return SDValue();
15616 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15617 // This is not valid if the input was originally double precision. It is
15618 // also not profitable to do unless this is an extending load in which
15619 // case doing this combine will allow us to combine consecutive loads.
15620 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15621 return SDValue();
15622 if (N->getOperand(i) != FirstInput)
15623 IsSplat = false;
15624 }
15625
15626 // If this is a splat, we leave it as-is since there will be only a single
15627 // fp-to-int conversion followed by a splat of the integer. This is better
15628 // for 32-bit and smaller ints and neutral for 64-bit ints.
15629 if (IsSplat)
15630 return SDValue();
15631
15632 // Now that we know we have the right type of node, get its operands
15633 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15634 SDValue In = N->getOperand(i).getOperand(0);
15635 if (Is32Bit) {
15636 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15637 // here, we know that all inputs are extending loads so this is safe).
15638 if (In.isUndef())
15639 Ops.push_back(DAG.getUNDEF(SrcVT));
15640 else {
15641 SDValue Trunc =
15642 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15643 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15644 Ops.push_back(Trunc);
15645 }
15646 } else
15647 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15648 }
15649
15650 unsigned Opcode;
15651 if (FirstConversion == PPCISD::FCTIDZ ||
15652 FirstConversion == PPCISD::FCTIWZ)
15653 Opcode = ISD::FP_TO_SINT;
15654 else
15655 Opcode = ISD::FP_TO_UINT;
15656
15657 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15658 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15659 return DAG.getNode(Opcode, dl, TargetVT, BV);
15660 }
15661 return SDValue();
15662}
15663
15664/// Reduce the number of loads when building a vector.
15665///
15666/// Building a vector out of multiple loads can be converted to a load
15667/// of the vector type if the loads are consecutive. If the loads are
15668/// consecutive but in descending order, a shuffle is added at the end
15669/// to reorder the vector.
15671 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15672 "Should be called with a BUILD_VECTOR node");
15673
15674 SDLoc dl(N);
15675
15676 // Return early for non byte-sized type, as they can't be consecutive.
15677 if (!N->getValueType(0).getVectorElementType().isByteSized())
15678 return SDValue();
15679
15680 bool InputsAreConsecutiveLoads = true;
15681 bool InputsAreReverseConsecutive = true;
15682 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15683 SDValue FirstInput = N->getOperand(0);
15684 bool IsRoundOfExtLoad = false;
15685 LoadSDNode *FirstLoad = nullptr;
15686
15687 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15688 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15689 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15690 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15691 }
15692 // Not a build vector of (possibly fp_rounded) loads.
15693 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15694 N->getNumOperands() == 1)
15695 return SDValue();
15696
15697 if (!IsRoundOfExtLoad)
15698 FirstLoad = cast<LoadSDNode>(FirstInput);
15699
15701 InputLoads.push_back(FirstLoad);
15702 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15703 // If any inputs are fp_round(extload), they all must be.
15704 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15705 return SDValue();
15706
15707 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15708 N->getOperand(i);
15709 if (NextInput.getOpcode() != ISD::LOAD)
15710 return SDValue();
15711
15712 SDValue PreviousInput =
15713 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15714 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15715 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15716
15717 // If any inputs are fp_round(extload), they all must be.
15718 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15719 return SDValue();
15720
15721 // We only care about regular loads. The PPC-specific load intrinsics
15722 // will not lead to a merge opportunity.
15723 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15724 InputsAreConsecutiveLoads = false;
15725 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15726 InputsAreReverseConsecutive = false;
15727
15728 // Exit early if the loads are neither consecutive nor reverse consecutive.
15729 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15730 return SDValue();
15731 InputLoads.push_back(LD2);
15732 }
15733
15734 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15735 "The loads cannot be both consecutive and reverse consecutive.");
15736
15737 SDValue WideLoad;
15738 SDValue ReturnSDVal;
15739 if (InputsAreConsecutiveLoads) {
15740 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15741 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15742 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15743 FirstLoad->getAlign());
15744 ReturnSDVal = WideLoad;
15745 } else if (InputsAreReverseConsecutive) {
15746 LoadSDNode *LastLoad = InputLoads.back();
15747 assert(LastLoad && "Input needs to be a LoadSDNode.");
15748 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15749 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15750 LastLoad->getAlign());
15752 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15753 Ops.push_back(i);
15754
15755 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15756 DAG.getUNDEF(N->getValueType(0)), Ops);
15757 } else
15758 return SDValue();
15759
15760 for (auto *LD : InputLoads)
15761 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15762 return ReturnSDVal;
15763}
15764
15765// This function adds the required vector_shuffle needed to get
15766// the elements of the vector extract in the correct position
15767// as specified by the CorrectElems encoding.
15769 SDValue Input, uint64_t Elems,
15770 uint64_t CorrectElems) {
15771 SDLoc dl(N);
15772
15773 unsigned NumElems = Input.getValueType().getVectorNumElements();
15774 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15775
15776 // Knowing the element indices being extracted from the original
15777 // vector and the order in which they're being inserted, just put
15778 // them at element indices required for the instruction.
15779 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15780 if (DAG.getDataLayout().isLittleEndian())
15781 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15782 else
15783 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15784 CorrectElems = CorrectElems >> 8;
15785 Elems = Elems >> 8;
15786 }
15787
15788 SDValue Shuffle =
15789 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15790 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15791
15792 EVT VT = N->getValueType(0);
15793 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15794
15795 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15796 Input.getValueType().getVectorElementType(),
15798 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15799 DAG.getValueType(ExtVT));
15800}
15801
15802// Look for build vector patterns where input operands come from sign
15803// extended vector_extract elements of specific indices. If the correct indices
15804// aren't used, add a vector shuffle to fix up the indices and create
15805// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15806// during instruction selection.
15808 // This array encodes the indices that the vector sign extend instructions
15809 // extract from when extending from one type to another for both BE and LE.
15810 // The right nibble of each byte corresponds to the LE incides.
15811 // and the left nibble of each byte corresponds to the BE incides.
15812 // For example: 0x3074B8FC byte->word
15813 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15814 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15815 // For example: 0x000070F8 byte->double word
15816 // For LE: the allowed indices are: 0x0,0x8
15817 // For BE: the allowed indices are: 0x7,0xF
15818 uint64_t TargetElems[] = {
15819 0x3074B8FC, // b->w
15820 0x000070F8, // b->d
15821 0x10325476, // h->w
15822 0x00003074, // h->d
15823 0x00001032, // w->d
15824 };
15825
15826 uint64_t Elems = 0;
15827 int Index;
15828 SDValue Input;
15829
15830 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15831 if (!Op)
15832 return false;
15833 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15834 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15835 return false;
15836
15837 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15838 // of the right width.
15839 SDValue Extract = Op.getOperand(0);
15840 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15841 Extract = Extract.getOperand(0);
15842 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15843 return false;
15844
15846 if (!ExtOp)
15847 return false;
15848
15849 Index = ExtOp->getZExtValue();
15850 if (Input && Input != Extract.getOperand(0))
15851 return false;
15852
15853 if (!Input)
15854 Input = Extract.getOperand(0);
15855
15856 Elems = Elems << 8;
15857 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15858 Elems |= Index;
15859
15860 return true;
15861 };
15862
15863 // If the build vector operands aren't sign extended vector extracts,
15864 // of the same input vector, then return.
15865 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15866 if (!isSExtOfVecExtract(N->getOperand(i))) {
15867 return SDValue();
15868 }
15869 }
15870
15871 // If the vector extract indices are not correct, add the appropriate
15872 // vector_shuffle.
15873 int TgtElemArrayIdx;
15874 int InputSize = Input.getValueType().getScalarSizeInBits();
15875 int OutputSize = N->getValueType(0).getScalarSizeInBits();
15876 if (InputSize + OutputSize == 40)
15877 TgtElemArrayIdx = 0;
15878 else if (InputSize + OutputSize == 72)
15879 TgtElemArrayIdx = 1;
15880 else if (InputSize + OutputSize == 48)
15881 TgtElemArrayIdx = 2;
15882 else if (InputSize + OutputSize == 80)
15883 TgtElemArrayIdx = 3;
15884 else if (InputSize + OutputSize == 96)
15885 TgtElemArrayIdx = 4;
15886 else
15887 return SDValue();
15888
15889 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15890 CorrectElems = DAG.getDataLayout().isLittleEndian()
15891 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15892 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15893 if (Elems != CorrectElems) {
15894 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15895 }
15896
15897 // Regular lowering will catch cases where a shuffle is not needed.
15898 return SDValue();
15899}
15900
15901// Look for the pattern of a load from a narrow width to i128, feeding
15902// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15903// (LXVRZX). This node represents a zero extending load that will be matched
15904// to the Load VSX Vector Rightmost instructions.
15906 SDLoc DL(N);
15907
15908 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15909 if (N->getValueType(0) != MVT::v1i128)
15910 return SDValue();
15911
15912 SDValue Operand = N->getOperand(0);
15913 // Proceed with the transformation if the operand to the BUILD_VECTOR
15914 // is a load instruction.
15915 if (Operand.getOpcode() != ISD::LOAD)
15916 return SDValue();
15917
15918 auto *LD = cast<LoadSDNode>(Operand);
15919 EVT MemoryType = LD->getMemoryVT();
15920
15921 // This transformation is only valid if the we are loading either a byte,
15922 // halfword, word, or doubleword.
15923 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15924 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15925
15926 // Ensure that the load from the narrow width is being zero extended to i128.
15927 if (!ValidLDType ||
15928 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15929 LD->getExtensionType() != ISD::EXTLOAD))
15930 return SDValue();
15931
15932 SDValue LoadOps[] = {
15933 LD->getChain(), LD->getBasePtr(),
15934 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15935
15937 DAG.getVTList(MVT::v1i128, MVT::Other),
15938 LoadOps, MemoryType, LD->getMemOperand());
15939}
15940
15941SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15942 DAGCombinerInfo &DCI) const {
15943 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15944 "Should be called with a BUILD_VECTOR node");
15945
15946 SelectionDAG &DAG = DCI.DAG;
15947 SDLoc dl(N);
15948
15949 if (!Subtarget.hasVSX())
15950 return SDValue();
15951
15952 // The target independent DAG combiner will leave a build_vector of
15953 // float-to-int conversions intact. We can generate MUCH better code for
15954 // a float-to-int conversion of a vector of floats.
15955 SDValue FirstInput = N->getOperand(0);
15956 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15957 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15958 if (Reduced)
15959 return Reduced;
15960 }
15961
15962 // If we're building a vector out of consecutive loads, just load that
15963 // vector type.
15964 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15965 if (Reduced)
15966 return Reduced;
15967
15968 // If we're building a vector out of extended elements from another vector
15969 // we have P9 vector integer extend instructions. The code assumes legal
15970 // input types (i.e. it can't handle things like v4i16) so do not run before
15971 // legalization.
15972 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15973 Reduced = combineBVOfVecSExt(N, DAG);
15974 if (Reduced)
15975 return Reduced;
15976 }
15977
15978 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15979 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15980 // is a load from <valid narrow width> to i128.
15981 if (Subtarget.isISA3_1()) {
15982 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15983 if (BVOfZLoad)
15984 return BVOfZLoad;
15985 }
15986
15987 if (N->getValueType(0) != MVT::v2f64)
15988 return SDValue();
15989
15990 // Looking for:
15991 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15992 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15993 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15994 return SDValue();
15995 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15996 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15997 return SDValue();
15998 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15999 return SDValue();
16000
16001 SDValue Ext1 = FirstInput.getOperand(0);
16002 SDValue Ext2 = N->getOperand(1).getOperand(0);
16003 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16005 return SDValue();
16006
16007 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16008 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16009 if (!Ext1Op || !Ext2Op)
16010 return SDValue();
16011 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16012 Ext1.getOperand(0) != Ext2.getOperand(0))
16013 return SDValue();
16014
16015 int FirstElem = Ext1Op->getZExtValue();
16016 int SecondElem = Ext2Op->getZExtValue();
16017 int SubvecIdx;
16018 if (FirstElem == 0 && SecondElem == 1)
16019 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16020 else if (FirstElem == 2 && SecondElem == 3)
16021 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16022 else
16023 return SDValue();
16024
16025 SDValue SrcVec = Ext1.getOperand(0);
16026 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16028 return DAG.getNode(NodeType, dl, MVT::v2f64,
16029 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16030}
16031
16032SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16033 DAGCombinerInfo &DCI) const {
16034 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16035 N->getOpcode() == ISD::UINT_TO_FP) &&
16036 "Need an int -> FP conversion node here");
16037
16038 if (useSoftFloat() || !Subtarget.has64BitSupport())
16039 return SDValue();
16040
16041 SelectionDAG &DAG = DCI.DAG;
16042 SDLoc dl(N);
16043 SDValue Op(N, 0);
16044
16045 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16046 // from the hardware.
16047 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16048 return SDValue();
16049 if (!Op.getOperand(0).getValueType().isSimple())
16050 return SDValue();
16051 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16052 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16053 return SDValue();
16054
16055 SDValue FirstOperand(Op.getOperand(0));
16056 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16057 (FirstOperand.getValueType() == MVT::i8 ||
16058 FirstOperand.getValueType() == MVT::i16);
16059 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16060 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16061 bool DstDouble = Op.getValueType() == MVT::f64;
16062 unsigned ConvOp = Signed ?
16063 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16064 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16065 SDValue WidthConst =
16066 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16067 dl, false);
16068 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16069 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16071 DAG.getVTList(MVT::f64, MVT::Other),
16072 Ops, MVT::i8, LDN->getMemOperand());
16073 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16074
16075 // For signed conversion, we need to sign-extend the value in the VSR
16076 if (Signed) {
16077 SDValue ExtOps[] = { Ld, WidthConst };
16078 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16079 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16080 } else
16081 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16082 }
16083
16084
16085 // For i32 intermediate values, unfortunately, the conversion functions
16086 // leave the upper 32 bits of the value are undefined. Within the set of
16087 // scalar instructions, we have no method for zero- or sign-extending the
16088 // value. Thus, we cannot handle i32 intermediate values here.
16089 if (Op.getOperand(0).getValueType() == MVT::i32)
16090 return SDValue();
16091
16092 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16093 "UINT_TO_FP is supported only with FPCVT");
16094
16095 // If we have FCFIDS, then use it when converting to single-precision.
16096 // Otherwise, convert to double-precision and then round.
16097 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16098 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16100 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16101 : PPCISD::FCFID);
16102 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16103 ? MVT::f32
16104 : MVT::f64;
16105
16106 // If we're converting from a float, to an int, and back to a float again,
16107 // then we don't need the store/load pair at all.
16108 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16109 Subtarget.hasFPCVT()) ||
16110 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16111 SDValue Src = Op.getOperand(0).getOperand(0);
16112 if (Src.getValueType() == MVT::f32) {
16113 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16114 DCI.AddToWorklist(Src.getNode());
16115 } else if (Src.getValueType() != MVT::f64) {
16116 // Make sure that we don't pick up a ppc_fp128 source value.
16117 return SDValue();
16118 }
16119
16120 unsigned FCTOp =
16121 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16123
16124 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16125 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16126
16127 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16128 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16129 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16130 DCI.AddToWorklist(FP.getNode());
16131 }
16132
16133 return FP;
16134 }
16135
16136 return SDValue();
16137}
16138
16139// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16140// builtins) into loads with swaps.
16142 DAGCombinerInfo &DCI) const {
16143 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16144 // load combines.
16145 if (DCI.isBeforeLegalizeOps())
16146 return SDValue();
16147
16148 SelectionDAG &DAG = DCI.DAG;
16149 SDLoc dl(N);
16150 SDValue Chain;
16151 SDValue Base;
16152 MachineMemOperand *MMO;
16153
16154 switch (N->getOpcode()) {
16155 default:
16156 llvm_unreachable("Unexpected opcode for little endian VSX load");
16157 case ISD::LOAD: {
16159 Chain = LD->getChain();
16160 Base = LD->getBasePtr();
16161 MMO = LD->getMemOperand();
16162 // If the MMO suggests this isn't a load of a full vector, leave
16163 // things alone. For a built-in, we have to make the change for
16164 // correctness, so if there is a size problem that will be a bug.
16165 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16166 return SDValue();
16167 break;
16168 }
16171 Chain = Intrin->getChain();
16172 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16173 // us what we want. Get operand 2 instead.
16174 Base = Intrin->getOperand(2);
16175 MMO = Intrin->getMemOperand();
16176 break;
16177 }
16178 }
16179
16180 MVT VecTy = N->getValueType(0).getSimpleVT();
16181
16182 SDValue LoadOps[] = { Chain, Base };
16184 DAG.getVTList(MVT::v2f64, MVT::Other),
16185 LoadOps, MVT::v2f64, MMO);
16186
16187 DCI.AddToWorklist(Load.getNode());
16188 Chain = Load.getValue(1);
16189 SDValue Swap = DAG.getNode(
16190 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16191 DCI.AddToWorklist(Swap.getNode());
16192
16193 // Add a bitcast if the resulting load type doesn't match v2f64.
16194 if (VecTy != MVT::v2f64) {
16195 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16196 DCI.AddToWorklist(N.getNode());
16197 // Package {bitcast value, swap's chain} to match Load's shape.
16198 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16199 N, Swap.getValue(1));
16200 }
16201
16202 return Swap;
16203}
16204
16205// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16206// builtins) into stores with swaps.
16208 DAGCombinerInfo &DCI) const {
16209 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16210 // store combines.
16211 if (DCI.isBeforeLegalizeOps())
16212 return SDValue();
16213
16214 SelectionDAG &DAG = DCI.DAG;
16215 SDLoc dl(N);
16216 SDValue Chain;
16217 SDValue Base;
16218 unsigned SrcOpnd;
16219 MachineMemOperand *MMO;
16220
16221 switch (N->getOpcode()) {
16222 default:
16223 llvm_unreachable("Unexpected opcode for little endian VSX store");
16224 case ISD::STORE: {
16226 Chain = ST->getChain();
16227 Base = ST->getBasePtr();
16228 MMO = ST->getMemOperand();
16229 SrcOpnd = 1;
16230 // If the MMO suggests this isn't a store of a full vector, leave
16231 // things alone. For a built-in, we have to make the change for
16232 // correctness, so if there is a size problem that will be a bug.
16233 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16234 return SDValue();
16235 break;
16236 }
16237 case ISD::INTRINSIC_VOID: {
16239 Chain = Intrin->getChain();
16240 // Intrin->getBasePtr() oddly does not get what we want.
16241 Base = Intrin->getOperand(3);
16242 MMO = Intrin->getMemOperand();
16243 SrcOpnd = 2;
16244 break;
16245 }
16246 }
16247
16248 SDValue Src = N->getOperand(SrcOpnd);
16249 MVT VecTy = Src.getValueType().getSimpleVT();
16250
16251 // All stores are done as v2f64 and possible bit cast.
16252 if (VecTy != MVT::v2f64) {
16253 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16254 DCI.AddToWorklist(Src.getNode());
16255 }
16256
16257 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16258 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16259 DCI.AddToWorklist(Swap.getNode());
16260 Chain = Swap.getValue(1);
16261 SDValue StoreOps[] = { Chain, Swap, Base };
16263 DAG.getVTList(MVT::Other),
16264 StoreOps, VecTy, MMO);
16265 DCI.AddToWorklist(Store.getNode());
16266 return Store;
16267}
16268
16269// Handle DAG combine for STORE (FP_TO_INT F).
16270SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16271 DAGCombinerInfo &DCI) const {
16272 SelectionDAG &DAG = DCI.DAG;
16273 SDLoc dl(N);
16274 unsigned Opcode = N->getOperand(1).getOpcode();
16275 (void)Opcode;
16276 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16277
16278 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16279 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16280 && "Not a FP_TO_INT Instruction!");
16281
16282 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16283 EVT Op1VT = N->getOperand(1).getValueType();
16284 EVT ResVT = Val.getValueType();
16285
16286 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16287 return SDValue();
16288
16289 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16290 bool ValidTypeForStoreFltAsInt =
16291 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16292 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16293
16294 // TODO: Lower conversion from f128 on all VSX targets
16295 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16296 return SDValue();
16297
16298 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16299 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16300 return SDValue();
16301
16302 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16303
16304 // Set number of bytes being converted.
16305 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16306 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16307 DAG.getIntPtrConstant(ByteSize, dl, false),
16308 DAG.getValueType(Op1VT)};
16309
16311 DAG.getVTList(MVT::Other), Ops,
16312 cast<StoreSDNode>(N)->getMemoryVT(),
16313 cast<StoreSDNode>(N)->getMemOperand());
16314
16315 return Val;
16316}
16317
16318static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16319 // Check that the source of the element keeps flipping
16320 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16321 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16322 for (int i = 1, e = Mask.size(); i < e; i++) {
16323 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16324 return false;
16325 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16326 return false;
16327 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16328 }
16329 return true;
16330}
16331
16332static bool isSplatBV(SDValue Op) {
16333 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16334 return false;
16335 SDValue FirstOp;
16336
16337 // Find first non-undef input.
16338 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16339 FirstOp = Op.getOperand(i);
16340 if (!FirstOp.isUndef())
16341 break;
16342 }
16343
16344 // All inputs are undef or the same as the first non-undef input.
16345 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16346 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16347 return false;
16348 return true;
16349}
16350
16352 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16353 return Op;
16354 if (Op.getOpcode() != ISD::BITCAST)
16355 return SDValue();
16356 Op = Op.getOperand(0);
16357 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16358 return Op;
16359 return SDValue();
16360}
16361
16362// Fix up the shuffle mask to account for the fact that the result of
16363// scalar_to_vector is not in lane zero. This just takes all values in
16364// the ranges specified by the min/max indices and adds the number of
16365// elements required to ensure each element comes from the respective
16366// position in the valid lane.
16367// On little endian, that's just the corresponding element in the other
16368// half of the vector. On big endian, it is in the same half but right
16369// justified rather than left justified in that half.
16371 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16372 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16373 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16374 int LHSEltFixup =
16375 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16376 int RHSEltFixup =
16377 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16378 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16379 int Idx = ShuffV[I];
16380 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16381 ShuffV[I] += LHSEltFixup;
16382 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16383 ShuffV[I] += RHSEltFixup;
16384 }
16385}
16386
16387// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16388// the original is:
16389// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16390// In such a case, just change the shuffle mask to extract the element
16391// from the permuted index.
16393 const PPCSubtarget &Subtarget) {
16394 SDLoc dl(OrigSToV);
16395 EVT VT = OrigSToV.getValueType();
16396 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16397 "Expecting a SCALAR_TO_VECTOR here");
16398 SDValue Input = OrigSToV.getOperand(0);
16399
16400 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16401 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16402 SDValue OrigVector = Input.getOperand(0);
16403
16404 // Can't handle non-const element indices or different vector types
16405 // for the input to the extract and the output of the scalar_to_vector.
16406 if (Idx && VT == OrigVector.getValueType()) {
16407 unsigned NumElts = VT.getVectorNumElements();
16408 assert(
16409 NumElts > 1 &&
16410 "Cannot produce a permuted scalar_to_vector for one element vector");
16411 SmallVector<int, 16> NewMask(NumElts, -1);
16412 unsigned ResultInElt = NumElts / 2;
16413 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16414 NewMask[ResultInElt] = Idx->getZExtValue();
16415 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16416 }
16417 }
16418 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16419 OrigSToV.getOperand(0));
16420}
16421
16423 int HalfVec, int LHSLastElementDefined,
16424 int RHSLastElementDefined) {
16425 for (int Index : ShuffV) {
16426 if (Index < 0) // Skip explicitly undefined mask indices.
16427 continue;
16428 // Handle first input vector of the vector_shuffle.
16429 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16430 (Index > LHSLastElementDefined))
16431 return false;
16432 // Handle second input vector of the vector_shuffle.
16433 if ((RHSLastElementDefined >= 0) &&
16434 (Index > HalfVec + RHSLastElementDefined))
16435 return false;
16436 }
16437 return true;
16438}
16439
16441 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16442 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16443 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16444 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16445 // Set up the values for the shuffle vector fixup.
16446 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16447 // The last element depends on if the input comes from the LHS or RHS.
16448 //
16449 // For example:
16450 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16451 //
16452 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16453 // because elements 1 and higher of a scalar_to_vector are undefined.
16454 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16455 // because elements 1 and higher of a scalar_to_vector are undefined.
16456 // It is also not 4 because the original scalar_to_vector is wider and
16457 // actually contains two i32 elements.
16458 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16459 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16460 : FirstElt;
16461 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16462 if (SToVPermuted.getValueType() != VecShuffOperandType)
16463 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16464 return SToVPermuted;
16465}
16466
16467// On little endian subtargets, combine shuffles such as:
16468// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16469// into:
16470// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16471// because the latter can be matched to a single instruction merge.
16472// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16473// to put the value into element zero. Adjust the shuffle mask so that the
16474// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16475// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16476// nodes with elements smaller than doubleword because all the ways
16477// of getting scalar data into a vector register put the value in the
16478// rightmost element of the left half of the vector.
16479SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16480 SelectionDAG &DAG) const {
16481 SDValue LHS = SVN->getOperand(0);
16482 SDValue RHS = SVN->getOperand(1);
16483 auto Mask = SVN->getMask();
16484 int NumElts = LHS.getValueType().getVectorNumElements();
16485 SDValue Res(SVN, 0);
16486 SDLoc dl(SVN);
16487 bool IsLittleEndian = Subtarget.isLittleEndian();
16488
16489 // On big endian targets this is only useful for subtargets with direct moves.
16490 // On little endian targets it would be useful for all subtargets with VSX.
16491 // However adding special handling for LE subtargets without direct moves
16492 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16493 // which includes direct moves.
16494 if (!Subtarget.hasDirectMove())
16495 return Res;
16496
16497 // If this is not a shuffle of a shuffle and the first element comes from
16498 // the second vector, canonicalize to the commuted form. This will make it
16499 // more likely to match one of the single instruction patterns.
16500 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16501 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16502 std::swap(LHS, RHS);
16503 Res = DAG.getCommutedVectorShuffle(*SVN);
16504 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16505 }
16506
16507 // Adjust the shuffle mask if either input vector comes from a
16508 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16509 // form (to prevent the need for a swap).
16510 SmallVector<int, 16> ShuffV(Mask);
16511 SDValue SToVLHS = isScalarToVec(LHS);
16512 SDValue SToVRHS = isScalarToVec(RHS);
16513 if (SToVLHS || SToVRHS) {
16514 EVT VT = SVN->getValueType(0);
16515 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16516 int ShuffleNumElts = ShuffV.size();
16517 int HalfVec = ShuffleNumElts / 2;
16518 // The width of the "valid lane" (i.e. the lane that contains the value that
16519 // is vectorized) needs to be expressed in terms of the number of elements
16520 // of the shuffle. It is thereby the ratio of the values before and after
16521 // any bitcast, which will be set later on if the LHS or RHS are
16522 // SCALAR_TO_VECTOR nodes.
16523 unsigned LHSNumValidElts = HalfVec;
16524 unsigned RHSNumValidElts = HalfVec;
16525
16526 // Initially assume that neither input is permuted. These will be adjusted
16527 // accordingly if either input is. Note, that -1 means that all elements
16528 // are undefined.
16529 int LHSFirstElt = 0;
16530 int RHSFirstElt = ShuffleNumElts;
16531 int LHSLastElt = -1;
16532 int RHSLastElt = -1;
16533
16534 // Get the permuted scalar to vector nodes for the source(s) that come from
16535 // ISD::SCALAR_TO_VECTOR.
16536 // On big endian systems, this only makes sense for element sizes smaller
16537 // than 64 bits since for 64-bit elements, all instructions already put
16538 // the value into element zero. Since scalar size of LHS and RHS may differ
16539 // after isScalarToVec, this should be checked using their own sizes.
16540 int LHSScalarSize = 0;
16541 int RHSScalarSize = 0;
16542 if (SToVLHS) {
16543 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16544 if (!IsLittleEndian && LHSScalarSize >= 64)
16545 return Res;
16546 }
16547 if (SToVRHS) {
16548 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16549 if (!IsLittleEndian && RHSScalarSize >= 64)
16550 return Res;
16551 }
16552 if (LHSScalarSize != 0)
16554 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16555 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16556 if (RHSScalarSize != 0)
16558 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16559 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16560
16561 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16562 return Res;
16563
16564 // Fix up the shuffle mask to reflect where the desired element actually is.
16565 // The minimum and maximum indices that correspond to element zero for both
16566 // the LHS and RHS are computed and will control which shuffle mask entries
16567 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16568 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16570 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16571 LHSNumValidElts, RHSNumValidElts, Subtarget);
16572 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16573
16574 // We may have simplified away the shuffle. We won't be able to do anything
16575 // further with it here.
16576 if (!isa<ShuffleVectorSDNode>(Res))
16577 return Res;
16578 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16579 }
16580
16581 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16582 // The common case after we commuted the shuffle is that the RHS is a splat
16583 // and we have elements coming in from the splat at indices that are not
16584 // conducive to using a merge.
16585 // Example:
16586 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16587 if (!isSplatBV(TheSplat))
16588 return Res;
16589
16590 // We are looking for a mask such that all even elements are from
16591 // one vector and all odd elements from the other.
16592 if (!isAlternatingShuffMask(Mask, NumElts))
16593 return Res;
16594
16595 // Adjust the mask so we are pulling in the same index from the splat
16596 // as the index from the interesting vector in consecutive elements.
16597 if (IsLittleEndian) {
16598 // Example (even elements from first vector):
16599 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16600 if (Mask[0] < NumElts)
16601 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16602 if (ShuffV[i] < 0)
16603 continue;
16604 // If element from non-splat is undef, pick first element from splat.
16605 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16606 }
16607 // Example (odd elements from first vector):
16608 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16609 else
16610 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16611 if (ShuffV[i] < 0)
16612 continue;
16613 // If element from non-splat is undef, pick first element from splat.
16614 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16615 }
16616 } else {
16617 // Example (even elements from first vector):
16618 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16619 if (Mask[0] < NumElts)
16620 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16621 if (ShuffV[i] < 0)
16622 continue;
16623 // If element from non-splat is undef, pick first element from splat.
16624 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16625 }
16626 // Example (odd elements from first vector):
16627 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16628 else
16629 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16630 if (ShuffV[i] < 0)
16631 continue;
16632 // If element from non-splat is undef, pick first element from splat.
16633 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16634 }
16635 }
16636
16637 // If the RHS has undefs, we need to remove them since we may have created
16638 // a shuffle that adds those instead of the splat value.
16639 SDValue SplatVal =
16640 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16641 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16642
16643 if (IsLittleEndian)
16644 RHS = TheSplat;
16645 else
16646 LHS = TheSplat;
16647 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16648}
16649
16650SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16651 LSBaseSDNode *LSBase,
16652 DAGCombinerInfo &DCI) const {
16653 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16654 "Not a reverse memop pattern!");
16655
16656 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16657 auto Mask = SVN->getMask();
16658 int i = 0;
16659 auto I = Mask.rbegin();
16660 auto E = Mask.rend();
16661
16662 for (; I != E; ++I) {
16663 if (*I != i)
16664 return false;
16665 i++;
16666 }
16667 return true;
16668 };
16669
16670 SelectionDAG &DAG = DCI.DAG;
16671 EVT VT = SVN->getValueType(0);
16672
16673 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16674 return SDValue();
16675
16676 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16677 // See comment in PPCVSXSwapRemoval.cpp.
16678 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16679 if (!Subtarget.hasP9Vector())
16680 return SDValue();
16681
16682 if(!IsElementReverse(SVN))
16683 return SDValue();
16684
16685 if (LSBase->getOpcode() == ISD::LOAD) {
16686 // If the load return value 0 has more than one user except the
16687 // shufflevector instruction, it is not profitable to replace the
16688 // shufflevector with a reverse load.
16689 for (SDUse &Use : LSBase->uses())
16690 if (Use.getResNo() == 0 &&
16691 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16692 return SDValue();
16693
16694 SDLoc dl(LSBase);
16695 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16696 return DAG.getMemIntrinsicNode(
16697 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16698 LSBase->getMemoryVT(), LSBase->getMemOperand());
16699 }
16700
16701 if (LSBase->getOpcode() == ISD::STORE) {
16702 // If there are other uses of the shuffle, the swap cannot be avoided.
16703 // Forcing the use of an X-Form (since swapped stores only have
16704 // X-Forms) without removing the swap is unprofitable.
16705 if (!SVN->hasOneUse())
16706 return SDValue();
16707
16708 SDLoc dl(LSBase);
16709 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16710 LSBase->getBasePtr()};
16711 return DAG.getMemIntrinsicNode(
16712 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16713 LSBase->getMemoryVT(), LSBase->getMemOperand());
16714 }
16715
16716 llvm_unreachable("Expected a load or store node here");
16717}
16718
16719static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16720 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16721 if (IntrinsicID == Intrinsic::ppc_stdcx)
16722 StoreWidth = 8;
16723 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16724 StoreWidth = 4;
16725 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16726 StoreWidth = 2;
16727 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16728 StoreWidth = 1;
16729 else
16730 return false;
16731 return true;
16732}
16733
16736 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
16737 // (ADDC (ADDE 0, 0, C), -1) -> C
16738 SDValue LHS = N->getOperand(0);
16739 SDValue RHS = N->getOperand(1);
16740 if (LHS->getOpcode() == PPCISD::ADDE &&
16741 isNullConstant(LHS->getOperand(0)) &&
16742 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
16743 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
16744 }
16745 }
16746 return SDValue();
16747}
16748
16750 DAGCombinerInfo &DCI) const {
16751 SelectionDAG &DAG = DCI.DAG;
16752 SDLoc dl(N);
16753 switch (N->getOpcode()) {
16754 default: break;
16755 case ISD::ADD:
16756 return combineADD(N, DCI);
16757 case ISD::AND: {
16758 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16759 // original input as that will prevent us from selecting optimal rotates.
16760 // This only matters if the input to the extend is i32 widened to i64.
16761 SDValue Op1 = N->getOperand(0);
16762 SDValue Op2 = N->getOperand(1);
16763 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16764 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16765 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16766 Op1.getOperand(0).getValueType() != MVT::i32)
16767 break;
16768 SDValue NarrowOp = Op1.getOperand(0);
16769 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16770 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16771 break;
16772
16773 uint64_t Imm = Op2->getAsZExtVal();
16774 // Make sure that the constant is narrow enough to fit in the narrow type.
16775 if (!isUInt<32>(Imm))
16776 break;
16777 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16778 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16779 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16780 }
16781 case ISD::SHL:
16782 return combineSHL(N, DCI);
16783 case ISD::SRA:
16784 return combineSRA(N, DCI);
16785 case ISD::SRL:
16786 return combineSRL(N, DCI);
16787 case ISD::MUL:
16788 return combineMUL(N, DCI);
16789 case ISD::FMA:
16790 case PPCISD::FNMSUB:
16791 return combineFMALike(N, DCI);
16792 case PPCISD::SHL:
16793 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16794 return N->getOperand(0);
16795 break;
16796 case PPCISD::SRL:
16797 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16798 return N->getOperand(0);
16799 break;
16800 case PPCISD::SRA:
16801 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16802 if (C->isZero() || // 0 >>s V -> 0.
16803 C->isAllOnes()) // -1 >>s V -> -1.
16804 return N->getOperand(0);
16805 }
16806 break;
16807 case ISD::SIGN_EXTEND:
16808 case ISD::ZERO_EXTEND:
16809 case ISD::ANY_EXTEND:
16810 return DAGCombineExtBoolTrunc(N, DCI);
16811 case ISD::TRUNCATE:
16812 return combineTRUNCATE(N, DCI);
16813 case ISD::SETCC:
16814 if (SDValue CSCC = combineSetCC(N, DCI))
16815 return CSCC;
16816 [[fallthrough]];
16817 case ISD::SELECT_CC:
16818 return DAGCombineTruncBoolExt(N, DCI);
16819 case ISD::SINT_TO_FP:
16820 case ISD::UINT_TO_FP:
16821 return combineFPToIntToFP(N, DCI);
16823 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16824 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16825 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16826 }
16827 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16828 case ISD::STORE: {
16829
16830 EVT Op1VT = N->getOperand(1).getValueType();
16831 unsigned Opcode = N->getOperand(1).getOpcode();
16832
16833 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16834 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16835 SDValue Val = combineStoreFPToInt(N, DCI);
16836 if (Val)
16837 return Val;
16838 }
16839
16840 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16841 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16842 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16843 if (Val)
16844 return Val;
16845 }
16846
16847 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16848 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16849 N->getOperand(1).getNode()->hasOneUse() &&
16850 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16851 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16852
16853 // STBRX can only handle simple types and it makes no sense to store less
16854 // two bytes in byte-reversed order.
16855 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16856 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16857 break;
16858
16859 SDValue BSwapOp = N->getOperand(1).getOperand(0);
16860 // Do an any-extend to 32-bits if this is a half-word input.
16861 if (BSwapOp.getValueType() == MVT::i16)
16862 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16863
16864 // If the type of BSWAP operand is wider than stored memory width
16865 // it need to be shifted to the right side before STBRX.
16866 if (Op1VT.bitsGT(mVT)) {
16867 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16868 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16869 DAG.getConstant(Shift, dl, MVT::i32));
16870 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16871 if (Op1VT == MVT::i64)
16872 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16873 }
16874
16875 SDValue Ops[] = {
16876 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16877 };
16878 return
16879 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16880 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16881 cast<StoreSDNode>(N)->getMemOperand());
16882 }
16883
16884 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16885 // So it can increase the chance of CSE constant construction.
16886 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16887 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16888 // Need to sign-extended to 64-bits to handle negative values.
16889 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16890 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16891 MemVT.getSizeInBits());
16892 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16893
16894 auto *ST = cast<StoreSDNode>(N);
16895 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
16896 ST->getBasePtr(), ST->getOffset(), MemVT,
16897 ST->getMemOperand(), ST->getAddressingMode(),
16898 /*IsTruncating=*/true);
16899 // Note we use CombineTo here to prevent DAGCombiner from visiting the
16900 // new store which will change the constant by removing non-demanded bits.
16901 return ST->isUnindexed()
16902 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
16903 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
16904 }
16905
16906 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16907 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16908 if (Op1VT.isSimple()) {
16909 MVT StoreVT = Op1VT.getSimpleVT();
16910 if (Subtarget.needsSwapsForVSXMemOps() &&
16911 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16912 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16913 return expandVSXStoreForLE(N, DCI);
16914 }
16915 break;
16916 }
16917 case ISD::LOAD: {
16919 EVT VT = LD->getValueType(0);
16920
16921 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16922 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16923 if (VT.isSimple()) {
16924 MVT LoadVT = VT.getSimpleVT();
16925 if (Subtarget.needsSwapsForVSXMemOps() &&
16926 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16927 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16928 return expandVSXLoadForLE(N, DCI);
16929 }
16930
16931 // We sometimes end up with a 64-bit integer load, from which we extract
16932 // two single-precision floating-point numbers. This happens with
16933 // std::complex<float>, and other similar structures, because of the way we
16934 // canonicalize structure copies. However, if we lack direct moves,
16935 // then the final bitcasts from the extracted integer values to the
16936 // floating-point numbers turn into store/load pairs. Even with direct moves,
16937 // just loading the two floating-point numbers is likely better.
16938 auto ReplaceTwoFloatLoad = [&]() {
16939 if (VT != MVT::i64)
16940 return false;
16941
16942 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16943 LD->isVolatile())
16944 return false;
16945
16946 // We're looking for a sequence like this:
16947 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16948 // t16: i64 = srl t13, Constant:i32<32>
16949 // t17: i32 = truncate t16
16950 // t18: f32 = bitcast t17
16951 // t19: i32 = truncate t13
16952 // t20: f32 = bitcast t19
16953
16954 if (!LD->hasNUsesOfValue(2, 0))
16955 return false;
16956
16957 auto UI = LD->user_begin();
16958 while (UI.getUse().getResNo() != 0) ++UI;
16959 SDNode *Trunc = *UI++;
16960 while (UI.getUse().getResNo() != 0) ++UI;
16961 SDNode *RightShift = *UI;
16962 if (Trunc->getOpcode() != ISD::TRUNCATE)
16963 std::swap(Trunc, RightShift);
16964
16965 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16966 Trunc->getValueType(0) != MVT::i32 ||
16967 !Trunc->hasOneUse())
16968 return false;
16969 if (RightShift->getOpcode() != ISD::SRL ||
16970 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16971 RightShift->getConstantOperandVal(1) != 32 ||
16972 !RightShift->hasOneUse())
16973 return false;
16974
16975 SDNode *Trunc2 = *RightShift->user_begin();
16976 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16977 Trunc2->getValueType(0) != MVT::i32 ||
16978 !Trunc2->hasOneUse())
16979 return false;
16980
16981 SDNode *Bitcast = *Trunc->user_begin();
16982 SDNode *Bitcast2 = *Trunc2->user_begin();
16983
16984 if (Bitcast->getOpcode() != ISD::BITCAST ||
16985 Bitcast->getValueType(0) != MVT::f32)
16986 return false;
16987 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16988 Bitcast2->getValueType(0) != MVT::f32)
16989 return false;
16990
16991 if (Subtarget.isLittleEndian())
16992 std::swap(Bitcast, Bitcast2);
16993
16994 // Bitcast has the second float (in memory-layout order) and Bitcast2
16995 // has the first one.
16996
16997 SDValue BasePtr = LD->getBasePtr();
16998 if (LD->isIndexed()) {
16999 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17000 "Non-pre-inc AM on PPC?");
17001 BasePtr =
17002 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17003 LD->getOffset());
17004 }
17005
17006 auto MMOFlags =
17007 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17008 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17009 LD->getPointerInfo(), LD->getAlign(),
17010 MMOFlags, LD->getAAInfo());
17011 SDValue AddPtr =
17012 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17013 BasePtr, DAG.getIntPtrConstant(4, dl));
17014 SDValue FloatLoad2 = DAG.getLoad(
17015 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17016 LD->getPointerInfo().getWithOffset(4),
17017 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17018
17019 if (LD->isIndexed()) {
17020 // Note that DAGCombine should re-form any pre-increment load(s) from
17021 // what is produced here if that makes sense.
17022 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17023 }
17024
17025 DCI.CombineTo(Bitcast2, FloatLoad);
17026 DCI.CombineTo(Bitcast, FloatLoad2);
17027
17028 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17029 SDValue(FloatLoad2.getNode(), 1));
17030 return true;
17031 };
17032
17033 if (ReplaceTwoFloatLoad())
17034 return SDValue(N, 0);
17035
17036 EVT MemVT = LD->getMemoryVT();
17037 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17038 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17039 if (LD->isUnindexed() && VT.isVector() &&
17040 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17041 // P8 and later hardware should just use LOAD.
17042 !Subtarget.hasP8Vector() &&
17043 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17044 VT == MVT::v4f32))) &&
17045 LD->getAlign() < ABIAlignment) {
17046 // This is a type-legal unaligned Altivec load.
17047 SDValue Chain = LD->getChain();
17048 SDValue Ptr = LD->getBasePtr();
17049 bool isLittleEndian = Subtarget.isLittleEndian();
17050
17051 // This implements the loading of unaligned vectors as described in
17052 // the venerable Apple Velocity Engine overview. Specifically:
17053 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17054 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17055 //
17056 // The general idea is to expand a sequence of one or more unaligned
17057 // loads into an alignment-based permutation-control instruction (lvsl
17058 // or lvsr), a series of regular vector loads (which always truncate
17059 // their input address to an aligned address), and a series of
17060 // permutations. The results of these permutations are the requested
17061 // loaded values. The trick is that the last "extra" load is not taken
17062 // from the address you might suspect (sizeof(vector) bytes after the
17063 // last requested load), but rather sizeof(vector) - 1 bytes after the
17064 // last requested vector. The point of this is to avoid a page fault if
17065 // the base address happened to be aligned. This works because if the
17066 // base address is aligned, then adding less than a full vector length
17067 // will cause the last vector in the sequence to be (re)loaded.
17068 // Otherwise, the next vector will be fetched as you might suspect was
17069 // necessary.
17070
17071 // We might be able to reuse the permutation generation from
17072 // a different base address offset from this one by an aligned amount.
17073 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17074 // optimization later.
17075 Intrinsic::ID Intr, IntrLD, IntrPerm;
17076 MVT PermCntlTy, PermTy, LDTy;
17077 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17078 : Intrinsic::ppc_altivec_lvsl;
17079 IntrLD = Intrinsic::ppc_altivec_lvx;
17080 IntrPerm = Intrinsic::ppc_altivec_vperm;
17081 PermCntlTy = MVT::v16i8;
17082 PermTy = MVT::v4i32;
17083 LDTy = MVT::v4i32;
17084
17085 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17086
17087 // Create the new MMO for the new base load. It is like the original MMO,
17088 // but represents an area in memory almost twice the vector size centered
17089 // on the original address. If the address is unaligned, we might start
17090 // reading up to (sizeof(vector)-1) bytes below the address of the
17091 // original unaligned load.
17093 MachineMemOperand *BaseMMO =
17094 MF.getMachineMemOperand(LD->getMemOperand(),
17095 -(int64_t)MemVT.getStoreSize()+1,
17096 2*MemVT.getStoreSize()-1);
17097
17098 // Create the new base load.
17099 SDValue LDXIntID =
17100 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17101 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17102 SDValue BaseLoad =
17104 DAG.getVTList(PermTy, MVT::Other),
17105 BaseLoadOps, LDTy, BaseMMO);
17106
17107 // Note that the value of IncOffset (which is provided to the next
17108 // load's pointer info offset value, and thus used to calculate the
17109 // alignment), and the value of IncValue (which is actually used to
17110 // increment the pointer value) are different! This is because we
17111 // require the next load to appear to be aligned, even though it
17112 // is actually offset from the base pointer by a lesser amount.
17113 int IncOffset = VT.getSizeInBits() / 8;
17114 int IncValue = IncOffset;
17115
17116 // Walk (both up and down) the chain looking for another load at the real
17117 // (aligned) offset (the alignment of the other load does not matter in
17118 // this case). If found, then do not use the offset reduction trick, as
17119 // that will prevent the loads from being later combined (as they would
17120 // otherwise be duplicates).
17121 if (!findConsecutiveLoad(LD, DAG))
17122 --IncValue;
17123
17124 SDValue Increment =
17125 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17126 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17127
17128 MachineMemOperand *ExtraMMO =
17129 MF.getMachineMemOperand(LD->getMemOperand(),
17130 1, 2*MemVT.getStoreSize()-1);
17131 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17132 SDValue ExtraLoad =
17134 DAG.getVTList(PermTy, MVT::Other),
17135 ExtraLoadOps, LDTy, ExtraMMO);
17136
17137 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17138 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17139
17140 // Because vperm has a big-endian bias, we must reverse the order
17141 // of the input vectors and complement the permute control vector
17142 // when generating little endian code. We have already handled the
17143 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17144 // and ExtraLoad here.
17145 SDValue Perm;
17146 if (isLittleEndian)
17147 Perm = BuildIntrinsicOp(IntrPerm,
17148 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17149 else
17150 Perm = BuildIntrinsicOp(IntrPerm,
17151 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17152
17153 if (VT != PermTy)
17154 Perm = Subtarget.hasAltivec()
17155 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17156 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17157 DAG.getTargetConstant(1, dl, MVT::i64));
17158 // second argument is 1 because this rounding
17159 // is always exact.
17160
17161 // The output of the permutation is our loaded result, the TokenFactor is
17162 // our new chain.
17163 DCI.CombineTo(N, Perm, TF);
17164 return SDValue(N, 0);
17165 }
17166 }
17167 break;
17169 bool isLittleEndian = Subtarget.isLittleEndian();
17170 unsigned IID = N->getConstantOperandVal(0);
17171 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17172 : Intrinsic::ppc_altivec_lvsl);
17173 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17174 SDValue Add = N->getOperand(1);
17175
17176 int Bits = 4 /* 16 byte alignment */;
17177
17178 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17179 APInt::getAllOnes(Bits /* alignment */)
17180 .zext(Add.getScalarValueSizeInBits()))) {
17181 SDNode *BasePtr = Add->getOperand(0).getNode();
17182 for (SDNode *U : BasePtr->users()) {
17183 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17184 U->getConstantOperandVal(0) == IID) {
17185 // We've found another LVSL/LVSR, and this address is an aligned
17186 // multiple of that one. The results will be the same, so use the
17187 // one we've just found instead.
17188
17189 return SDValue(U, 0);
17190 }
17191 }
17192 }
17193
17194 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17195 SDNode *BasePtr = Add->getOperand(0).getNode();
17196 for (SDNode *U : BasePtr->users()) {
17197 if (U->getOpcode() == ISD::ADD &&
17198 isa<ConstantSDNode>(U->getOperand(1)) &&
17199 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17200 (1ULL << Bits) ==
17201 0) {
17202 SDNode *OtherAdd = U;
17203 for (SDNode *V : OtherAdd->users()) {
17204 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17205 V->getConstantOperandVal(0) == IID) {
17206 return SDValue(V, 0);
17207 }
17208 }
17209 }
17210 }
17211 }
17212 }
17213
17214 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17215 // Expose the vabsduw/h/b opportunity for down stream
17216 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17217 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17218 IID == Intrinsic::ppc_altivec_vmaxsh ||
17219 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17220 SDValue V1 = N->getOperand(1);
17221 SDValue V2 = N->getOperand(2);
17222 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17223 V1.getSimpleValueType() == MVT::v8i16 ||
17224 V1.getSimpleValueType() == MVT::v16i8) &&
17226 // (0-a, a)
17227 if (V1.getOpcode() == ISD::SUB &&
17229 V1.getOperand(1) == V2) {
17230 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17231 }
17232 // (a, 0-a)
17233 if (V2.getOpcode() == ISD::SUB &&
17235 V2.getOperand(1) == V1) {
17236 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17237 }
17238 // (x-y, y-x)
17239 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17240 V1.getOperand(0) == V2.getOperand(1) &&
17241 V1.getOperand(1) == V2.getOperand(0)) {
17242 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17243 }
17244 }
17245 }
17246 }
17247
17248 break;
17250 switch (N->getConstantOperandVal(1)) {
17251 default:
17252 break;
17253 case Intrinsic::ppc_altivec_vsum4sbs:
17254 case Intrinsic::ppc_altivec_vsum4shs:
17255 case Intrinsic::ppc_altivec_vsum4ubs: {
17256 // These sum-across intrinsics only have a chain due to the side effect
17257 // that they may set the SAT bit. If we know the SAT bit will not be set
17258 // for some inputs, we can replace any uses of their chain with the
17259 // input chain.
17260 if (BuildVectorSDNode *BVN =
17261 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17262 APInt APSplatBits, APSplatUndef;
17263 unsigned SplatBitSize;
17264 bool HasAnyUndefs;
17265 bool BVNIsConstantSplat = BVN->isConstantSplat(
17266 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17267 !Subtarget.isLittleEndian());
17268 // If the constant splat vector is 0, the SAT bit will not be set.
17269 if (BVNIsConstantSplat && APSplatBits == 0)
17270 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17271 }
17272 return SDValue();
17273 }
17274 case Intrinsic::ppc_vsx_lxvw4x:
17275 case Intrinsic::ppc_vsx_lxvd2x:
17276 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17277 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17278 if (Subtarget.needsSwapsForVSXMemOps())
17279 return expandVSXLoadForLE(N, DCI);
17280 break;
17281 }
17282 break;
17284 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17285 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17286 if (Subtarget.needsSwapsForVSXMemOps()) {
17287 switch (N->getConstantOperandVal(1)) {
17288 default:
17289 break;
17290 case Intrinsic::ppc_vsx_stxvw4x:
17291 case Intrinsic::ppc_vsx_stxvd2x:
17292 return expandVSXStoreForLE(N, DCI);
17293 }
17294 }
17295 break;
17296 case ISD::BSWAP: {
17297 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17298 // For subtargets without LDBRX, we can still do better than the default
17299 // expansion even for 64-bit BSWAP (LOAD).
17300 bool Is64BitBswapOn64BitTgt =
17301 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17302 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17303 N->getOperand(0).hasOneUse();
17304 if (IsSingleUseNormalLd &&
17305 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17306 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17307 SDValue Load = N->getOperand(0);
17308 LoadSDNode *LD = cast<LoadSDNode>(Load);
17309 // Create the byte-swapping load.
17310 SDValue Ops[] = {
17311 LD->getChain(), // Chain
17312 LD->getBasePtr(), // Ptr
17313 DAG.getValueType(N->getValueType(0)) // VT
17314 };
17315 SDValue BSLoad =
17317 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17318 MVT::i64 : MVT::i32, MVT::Other),
17319 Ops, LD->getMemoryVT(), LD->getMemOperand());
17320
17321 // If this is an i16 load, insert the truncate.
17322 SDValue ResVal = BSLoad;
17323 if (N->getValueType(0) == MVT::i16)
17324 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17325
17326 // First, combine the bswap away. This makes the value produced by the
17327 // load dead.
17328 DCI.CombineTo(N, ResVal);
17329
17330 // Next, combine the load away, we give it a bogus result value but a real
17331 // chain result. The result value is dead because the bswap is dead.
17332 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17333
17334 // Return N so it doesn't get rechecked!
17335 return SDValue(N, 0);
17336 }
17337 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17338 // before legalization so that the BUILD_PAIR is handled correctly.
17339 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17340 !IsSingleUseNormalLd)
17341 return SDValue();
17342 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17343
17344 // Can't split volatile or atomic loads.
17345 if (!LD->isSimple())
17346 return SDValue();
17347 SDValue BasePtr = LD->getBasePtr();
17348 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17349 LD->getPointerInfo(), LD->getAlign());
17350 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17351 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17352 DAG.getIntPtrConstant(4, dl));
17354 LD->getMemOperand(), 4, 4);
17355 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17356 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17357 SDValue Res;
17358 if (Subtarget.isLittleEndian())
17359 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17360 else
17361 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17362 SDValue TF =
17363 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17364 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17365 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17366 return Res;
17367 }
17368 case PPCISD::VCMP:
17369 // If a VCMP_rec node already exists with exactly the same operands as this
17370 // node, use its result instead of this node (VCMP_rec computes both a CR6
17371 // and a normal output).
17372 //
17373 if (!N->getOperand(0).hasOneUse() &&
17374 !N->getOperand(1).hasOneUse() &&
17375 !N->getOperand(2).hasOneUse()) {
17376
17377 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17378 SDNode *VCMPrecNode = nullptr;
17379
17380 SDNode *LHSN = N->getOperand(0).getNode();
17381 for (SDNode *User : LHSN->users())
17382 if (User->getOpcode() == PPCISD::VCMP_rec &&
17383 User->getOperand(1) == N->getOperand(1) &&
17384 User->getOperand(2) == N->getOperand(2) &&
17385 User->getOperand(0) == N->getOperand(0)) {
17386 VCMPrecNode = User;
17387 break;
17388 }
17389
17390 // If there is no VCMP_rec node, or if the flag value has a single use,
17391 // don't transform this.
17392 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17393 break;
17394
17395 // Look at the (necessarily single) use of the flag value. If it has a
17396 // chain, this transformation is more complex. Note that multiple things
17397 // could use the value result, which we should ignore.
17398 SDNode *FlagUser = nullptr;
17399 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17400 FlagUser == nullptr; ++UI) {
17401 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17402 SDNode *User = UI->getUser();
17403 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17404 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17405 FlagUser = User;
17406 break;
17407 }
17408 }
17409 }
17410
17411 // If the user is a MFOCRF instruction, we know this is safe.
17412 // Otherwise we give up for right now.
17413 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17414 return SDValue(VCMPrecNode, 0);
17415 }
17416 break;
17417 case ISD::BR_CC: {
17418 // If this is a branch on an altivec predicate comparison, lower this so
17419 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17420 // lowering is done pre-legalize, because the legalizer lowers the predicate
17421 // compare down to code that is difficult to reassemble.
17422 // This code also handles branches that depend on the result of a store
17423 // conditional.
17424 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17425 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17426
17427 int CompareOpc;
17428 bool isDot;
17429
17430 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17431 break;
17432
17433 // Since we are doing this pre-legalize, the RHS can be a constant of
17434 // arbitrary bitwidth which may cause issues when trying to get the value
17435 // from the underlying APInt.
17436 auto RHSAPInt = RHS->getAsAPIntVal();
17437 if (!RHSAPInt.isIntN(64))
17438 break;
17439
17440 unsigned Val = RHSAPInt.getZExtValue();
17441 auto isImpossibleCompare = [&]() {
17442 // If this is a comparison against something other than 0/1, then we know
17443 // that the condition is never/always true.
17444 if (Val != 0 && Val != 1) {
17445 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17446 return N->getOperand(0);
17447 // Always !=, turn it into an unconditional branch.
17448 return DAG.getNode(ISD::BR, dl, MVT::Other,
17449 N->getOperand(0), N->getOperand(4));
17450 }
17451 return SDValue();
17452 };
17453 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17454 unsigned StoreWidth = 0;
17455 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17456 isStoreConditional(LHS, StoreWidth)) {
17457 if (SDValue Impossible = isImpossibleCompare())
17458 return Impossible;
17459 PPC::Predicate CompOpc;
17460 // eq 0 => ne
17461 // ne 0 => eq
17462 // eq 1 => eq
17463 // ne 1 => ne
17464 if (Val == 0)
17465 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17466 else
17467 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17468
17469 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17470 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17471 auto *MemNode = cast<MemSDNode>(LHS);
17472 SDValue ConstSt = DAG.getMemIntrinsicNode(
17474 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17475 MemNode->getMemoryVT(), MemNode->getMemOperand());
17476
17477 SDValue InChain;
17478 // Unchain the branch from the original store conditional.
17479 if (N->getOperand(0) == LHS.getValue(1))
17480 InChain = LHS.getOperand(0);
17481 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17482 SmallVector<SDValue, 4> InChains;
17483 SDValue InTF = N->getOperand(0);
17484 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17485 if (InTF.getOperand(i) != LHS.getValue(1))
17486 InChains.push_back(InTF.getOperand(i));
17487 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17488 }
17489
17490 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17491 DAG.getConstant(CompOpc, dl, MVT::i32),
17492 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17493 ConstSt.getValue(2));
17494 }
17495
17496 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17497 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17498 assert(isDot && "Can't compare against a vector result!");
17499
17500 if (SDValue Impossible = isImpossibleCompare())
17501 return Impossible;
17502
17503 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17504 // Create the PPCISD altivec 'dot' comparison node.
17505 SDValue Ops[] = {
17506 LHS.getOperand(2), // LHS of compare
17507 LHS.getOperand(3), // RHS of compare
17508 DAG.getConstant(CompareOpc, dl, MVT::i32)
17509 };
17510 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17511 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17512
17513 // Unpack the result based on how the target uses it.
17514 PPC::Predicate CompOpc;
17515 switch (LHS.getConstantOperandVal(1)) {
17516 default: // Can't happen, don't crash on invalid number though.
17517 case 0: // Branch on the value of the EQ bit of CR6.
17518 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17519 break;
17520 case 1: // Branch on the inverted value of the EQ bit of CR6.
17521 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17522 break;
17523 case 2: // Branch on the value of the LT bit of CR6.
17524 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17525 break;
17526 case 3: // Branch on the inverted value of the LT bit of CR6.
17527 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17528 break;
17529 }
17530
17531 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17532 DAG.getConstant(CompOpc, dl, MVT::i32),
17533 DAG.getRegister(PPC::CR6, MVT::i32),
17534 N->getOperand(4), CompNode.getValue(1));
17535 }
17536 break;
17537 }
17538 case ISD::BUILD_VECTOR:
17539 return DAGCombineBuildVector(N, DCI);
17540 case PPCISD::ADDC:
17541 return DAGCombineAddc(N, DCI);
17542 }
17543
17544 return SDValue();
17545}
17546
17547SDValue
17549 SelectionDAG &DAG,
17550 SmallVectorImpl<SDNode *> &Created) const {
17551 // fold (sdiv X, pow2)
17552 EVT VT = N->getValueType(0);
17553 if (VT == MVT::i64 && !Subtarget.isPPC64())
17554 return SDValue();
17555 if ((VT != MVT::i32 && VT != MVT::i64) ||
17556 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17557 return SDValue();
17558
17559 SDLoc DL(N);
17560 SDValue N0 = N->getOperand(0);
17561
17562 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17563 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17564 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17565
17566 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17567 Created.push_back(Op.getNode());
17568
17569 if (IsNegPow2) {
17570 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17571 Created.push_back(Op.getNode());
17572 }
17573
17574 return Op;
17575}
17576
17577//===----------------------------------------------------------------------===//
17578// Inline Assembly Support
17579//===----------------------------------------------------------------------===//
17580
17582 KnownBits &Known,
17583 const APInt &DemandedElts,
17584 const SelectionDAG &DAG,
17585 unsigned Depth) const {
17586 Known.resetAll();
17587 switch (Op.getOpcode()) {
17588 default: break;
17589 case PPCISD::LBRX: {
17590 // lhbrx is known to have the top bits cleared out.
17591 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17592 Known.Zero = 0xFFFF0000;
17593 break;
17594 }
17595 case PPCISD::ADDE: {
17596 if (Op.getResNo() == 0) {
17597 // (0|1), _ = ADDE 0, 0, CARRY
17598 SDValue LHS = Op.getOperand(0);
17599 SDValue RHS = Op.getOperand(1);
17600 if (isNullConstant(LHS) && isNullConstant(RHS))
17601 Known.Zero = ~1ULL;
17602 }
17603 break;
17604 }
17606 switch (Op.getConstantOperandVal(0)) {
17607 default: break;
17608 case Intrinsic::ppc_altivec_vcmpbfp_p:
17609 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17610 case Intrinsic::ppc_altivec_vcmpequb_p:
17611 case Intrinsic::ppc_altivec_vcmpequh_p:
17612 case Intrinsic::ppc_altivec_vcmpequw_p:
17613 case Intrinsic::ppc_altivec_vcmpequd_p:
17614 case Intrinsic::ppc_altivec_vcmpequq_p:
17615 case Intrinsic::ppc_altivec_vcmpgefp_p:
17616 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17617 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17618 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17619 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17620 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17621 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17622 case Intrinsic::ppc_altivec_vcmpgtub_p:
17623 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17624 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17625 case Intrinsic::ppc_altivec_vcmpgtud_p:
17626 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17627 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17628 break;
17629 }
17630 break;
17631 }
17633 switch (Op.getConstantOperandVal(1)) {
17634 default:
17635 break;
17636 case Intrinsic::ppc_load2r:
17637 // Top bits are cleared for load2r (which is the same as lhbrx).
17638 Known.Zero = 0xFFFF0000;
17639 break;
17640 }
17641 break;
17642 }
17643 }
17644}
17645
17647 switch (Subtarget.getCPUDirective()) {
17648 default: break;
17649 case PPC::DIR_970:
17650 case PPC::DIR_PWR4:
17651 case PPC::DIR_PWR5:
17652 case PPC::DIR_PWR5X:
17653 case PPC::DIR_PWR6:
17654 case PPC::DIR_PWR6X:
17655 case PPC::DIR_PWR7:
17656 case PPC::DIR_PWR8:
17657 case PPC::DIR_PWR9:
17658 case PPC::DIR_PWR10:
17659 case PPC::DIR_PWR11:
17660 case PPC::DIR_PWR_FUTURE: {
17661 if (!ML)
17662 break;
17663
17665 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17666 // so that we can decrease cache misses and branch-prediction misses.
17667 // Actual alignment of the loop will depend on the hotness check and other
17668 // logic in alignBlocks.
17669 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17670 return Align(32);
17671 }
17672
17673 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17674
17675 // For small loops (between 5 and 8 instructions), align to a 32-byte
17676 // boundary so that the entire loop fits in one instruction-cache line.
17677 uint64_t LoopSize = 0;
17678 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17679 for (const MachineInstr &J : **I) {
17680 LoopSize += TII->getInstSizeInBytes(J);
17681 if (LoopSize > 32)
17682 break;
17683 }
17684
17685 if (LoopSize > 16 && LoopSize <= 32)
17686 return Align(32);
17687
17688 break;
17689 }
17690 }
17691
17693}
17694
17695/// getConstraintType - Given a constraint, return the type of
17696/// constraint it is for this target.
17699 if (Constraint.size() == 1) {
17700 switch (Constraint[0]) {
17701 default: break;
17702 case 'b':
17703 case 'r':
17704 case 'f':
17705 case 'd':
17706 case 'v':
17707 case 'y':
17708 return C_RegisterClass;
17709 case 'Z':
17710 // FIXME: While Z does indicate a memory constraint, it specifically
17711 // indicates an r+r address (used in conjunction with the 'y' modifier
17712 // in the replacement string). Currently, we're forcing the base
17713 // register to be r0 in the asm printer (which is interpreted as zero)
17714 // and forming the complete address in the second register. This is
17715 // suboptimal.
17716 return C_Memory;
17717 }
17718 } else if (Constraint == "wc") { // individual CR bits.
17719 return C_RegisterClass;
17720 } else if (Constraint == "wa" || Constraint == "wd" ||
17721 Constraint == "wf" || Constraint == "ws" ||
17722 Constraint == "wi" || Constraint == "ww") {
17723 return C_RegisterClass; // VSX registers.
17724 }
17725 return TargetLowering::getConstraintType(Constraint);
17726}
17727
17728/// Examine constraint type and operand type and determine a weight value.
17729/// This object must already have been set up with the operand type
17730/// and the current alternative constraint selected.
17733 AsmOperandInfo &info, const char *constraint) const {
17735 Value *CallOperandVal = info.CallOperandVal;
17736 // If we don't have a value, we can't do a match,
17737 // but allow it at the lowest weight.
17738 if (!CallOperandVal)
17739 return CW_Default;
17740 Type *type = CallOperandVal->getType();
17741
17742 // Look at the constraint type.
17743 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17744 return CW_Register; // an individual CR bit.
17745 else if ((StringRef(constraint) == "wa" ||
17746 StringRef(constraint) == "wd" ||
17747 StringRef(constraint) == "wf") &&
17748 type->isVectorTy())
17749 return CW_Register;
17750 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17751 return CW_Register; // just hold 64-bit integers data.
17752 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17753 return CW_Register;
17754 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17755 return CW_Register;
17756
17757 switch (*constraint) {
17758 default:
17760 break;
17761 case 'b':
17762 if (type->isIntegerTy())
17763 weight = CW_Register;
17764 break;
17765 case 'f':
17766 if (type->isFloatTy())
17767 weight = CW_Register;
17768 break;
17769 case 'd':
17770 if (type->isDoubleTy())
17771 weight = CW_Register;
17772 break;
17773 case 'v':
17774 if (type->isVectorTy())
17775 weight = CW_Register;
17776 break;
17777 case 'y':
17778 weight = CW_Register;
17779 break;
17780 case 'Z':
17781 weight = CW_Memory;
17782 break;
17783 }
17784 return weight;
17785}
17786
17787std::pair<unsigned, const TargetRegisterClass *>
17789 StringRef Constraint,
17790 MVT VT) const {
17791 if (Constraint.size() == 1) {
17792 // GCC RS6000 Constraint Letters
17793 switch (Constraint[0]) {
17794 case 'b': // R1-R31
17795 if (VT == MVT::i64 && Subtarget.isPPC64())
17796 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17797 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17798 case 'r': // R0-R31
17799 if (VT == MVT::i64 && Subtarget.isPPC64())
17800 return std::make_pair(0U, &PPC::G8RCRegClass);
17801 return std::make_pair(0U, &PPC::GPRCRegClass);
17802 // 'd' and 'f' constraints are both defined to be "the floating point
17803 // registers", where one is for 32-bit and the other for 64-bit. We don't
17804 // really care overly much here so just give them all the same reg classes.
17805 case 'd':
17806 case 'f':
17807 if (Subtarget.hasSPE()) {
17808 if (VT == MVT::f32 || VT == MVT::i32)
17809 return std::make_pair(0U, &PPC::GPRCRegClass);
17810 if (VT == MVT::f64 || VT == MVT::i64)
17811 return std::make_pair(0U, &PPC::SPERCRegClass);
17812 } else {
17813 if (VT == MVT::f32 || VT == MVT::i32)
17814 return std::make_pair(0U, &PPC::F4RCRegClass);
17815 if (VT == MVT::f64 || VT == MVT::i64)
17816 return std::make_pair(0U, &PPC::F8RCRegClass);
17817 }
17818 break;
17819 case 'v':
17820 if (Subtarget.hasAltivec() && VT.isVector())
17821 return std::make_pair(0U, &PPC::VRRCRegClass);
17822 else if (Subtarget.hasVSX())
17823 // Scalars in Altivec registers only make sense with VSX.
17824 return std::make_pair(0U, &PPC::VFRCRegClass);
17825 break;
17826 case 'y': // crrc
17827 return std::make_pair(0U, &PPC::CRRCRegClass);
17828 }
17829 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17830 // An individual CR bit.
17831 return std::make_pair(0U, &PPC::CRBITRCRegClass);
17832 } else if ((Constraint == "wa" || Constraint == "wd" ||
17833 Constraint == "wf" || Constraint == "wi") &&
17834 Subtarget.hasVSX()) {
17835 // A VSX register for either a scalar (FP) or vector. There is no
17836 // support for single precision scalars on subtargets prior to Power8.
17837 if (VT.isVector())
17838 return std::make_pair(0U, &PPC::VSRCRegClass);
17839 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17840 return std::make_pair(0U, &PPC::VSSRCRegClass);
17841 return std::make_pair(0U, &PPC::VSFRCRegClass);
17842 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17843 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17844 return std::make_pair(0U, &PPC::VSSRCRegClass);
17845 else
17846 return std::make_pair(0U, &PPC::VSFRCRegClass);
17847 } else if (Constraint == "lr") {
17848 if (VT == MVT::i64)
17849 return std::make_pair(0U, &PPC::LR8RCRegClass);
17850 else
17851 return std::make_pair(0U, &PPC::LRRCRegClass);
17852 }
17853
17854 // Handle special cases of physical registers that are not properly handled
17855 // by the base class.
17856 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17857 // If we name a VSX register, we can't defer to the base class because it
17858 // will not recognize the correct register (their names will be VSL{0-31}
17859 // and V{0-31} so they won't match). So we match them here.
17860 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17861 int VSNum = atoi(Constraint.data() + 3);
17862 assert(VSNum >= 0 && VSNum <= 63 &&
17863 "Attempted to access a vsr out of range");
17864 if (VSNum < 32)
17865 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17866 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17867 }
17868
17869 // For float registers, we can't defer to the base class as it will match
17870 // the SPILLTOVSRRC class.
17871 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17872 int RegNum = atoi(Constraint.data() + 2);
17873 if (RegNum > 31 || RegNum < 0)
17874 report_fatal_error("Invalid floating point register number");
17875 if (VT == MVT::f32 || VT == MVT::i32)
17876 return Subtarget.hasSPE()
17877 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17878 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17879 if (VT == MVT::f64 || VT == MVT::i64)
17880 return Subtarget.hasSPE()
17881 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17882 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17883 }
17884 }
17885
17886 std::pair<unsigned, const TargetRegisterClass *> R =
17888
17889 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17890 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17891 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17892 // register.
17893 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17894 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17895 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17896 PPC::GPRCRegClass.contains(R.first))
17897 return std::make_pair(TRI->getMatchingSuperReg(R.first,
17898 PPC::sub_32, &PPC::G8RCRegClass),
17899 &PPC::G8RCRegClass);
17900
17901 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17902 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17903 R.first = PPC::CR0;
17904 R.second = &PPC::CRRCRegClass;
17905 }
17906 // FIXME: This warning should ideally be emitted in the front end.
17907 const auto &TM = getTargetMachine();
17908 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17909 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17910 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17911 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17912 errs() << "warning: vector registers 20 to 32 are reserved in the "
17913 "default AIX AltiVec ABI and cannot be used\n";
17914 }
17915
17916 return R;
17917}
17918
17919/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17920/// vector. If it is invalid, don't add anything to Ops.
17922 StringRef Constraint,
17923 std::vector<SDValue> &Ops,
17924 SelectionDAG &DAG) const {
17925 SDValue Result;
17926
17927 // Only support length 1 constraints.
17928 if (Constraint.size() > 1)
17929 return;
17930
17931 char Letter = Constraint[0];
17932 switch (Letter) {
17933 default: break;
17934 case 'I':
17935 case 'J':
17936 case 'K':
17937 case 'L':
17938 case 'M':
17939 case 'N':
17940 case 'O':
17941 case 'P': {
17943 if (!CST) return; // Must be an immediate to match.
17944 SDLoc dl(Op);
17945 int64_t Value = CST->getSExtValue();
17946 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17947 // numbers are printed as such.
17948 switch (Letter) {
17949 default: llvm_unreachable("Unknown constraint letter!");
17950 case 'I': // "I" is a signed 16-bit constant.
17951 if (isInt<16>(Value))
17952 Result = DAG.getTargetConstant(Value, dl, TCVT);
17953 break;
17954 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17956 Result = DAG.getTargetConstant(Value, dl, TCVT);
17957 break;
17958 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17960 Result = DAG.getTargetConstant(Value, dl, TCVT);
17961 break;
17962 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17963 if (isUInt<16>(Value))
17964 Result = DAG.getTargetConstant(Value, dl, TCVT);
17965 break;
17966 case 'M': // "M" is a constant that is greater than 31.
17967 if (Value > 31)
17968 Result = DAG.getTargetConstant(Value, dl, TCVT);
17969 break;
17970 case 'N': // "N" is a positive constant that is an exact power of two.
17971 if (Value > 0 && isPowerOf2_64(Value))
17972 Result = DAG.getTargetConstant(Value, dl, TCVT);
17973 break;
17974 case 'O': // "O" is the constant zero.
17975 if (Value == 0)
17976 Result = DAG.getTargetConstant(Value, dl, TCVT);
17977 break;
17978 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17979 if (isInt<16>(-Value))
17980 Result = DAG.getTargetConstant(Value, dl, TCVT);
17981 break;
17982 }
17983 break;
17984 }
17985 }
17986
17987 if (Result.getNode()) {
17988 Ops.push_back(Result);
17989 return;
17990 }
17991
17992 // Handle standard constraint letters.
17994}
17995
17998 SelectionDAG &DAG) const {
17999 if (I.getNumOperands() <= 1)
18000 return;
18001 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18002 return;
18003 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18004 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18005 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18006 return;
18007
18008 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18009 Ops.push_back(DAG.getMDNode(MDN));
18010}
18011
18012// isLegalAddressingMode - Return true if the addressing mode represented
18013// by AM is legal for this target, for a load/store of the specified type.
18015 const AddrMode &AM, Type *Ty,
18016 unsigned AS,
18017 Instruction *I) const {
18018 // Vector type r+i form is supported since power9 as DQ form. We don't check
18019 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18020 // imm form is preferred and the offset can be adjusted to use imm form later
18021 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18022 // max offset to check legal addressing mode, we should be a little aggressive
18023 // to contain other offsets for that LSRUse.
18024 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18025 return false;
18026
18027 // PPC allows a sign-extended 16-bit immediate field.
18028 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18029 return false;
18030
18031 // No global is ever allowed as a base.
18032 if (AM.BaseGV)
18033 return false;
18034
18035 // PPC only support r+r,
18036 switch (AM.Scale) {
18037 case 0: // "r+i" or just "i", depending on HasBaseReg.
18038 break;
18039 case 1:
18040 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18041 return false;
18042 // Otherwise we have r+r or r+i.
18043 break;
18044 case 2:
18045 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18046 return false;
18047 // Allow 2*r as r+r.
18048 break;
18049 default:
18050 // No other scales are supported.
18051 return false;
18052 }
18053
18054 return true;
18055}
18056
18057SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18058 SelectionDAG &DAG) const {
18060 MachineFrameInfo &MFI = MF.getFrameInfo();
18061 MFI.setReturnAddressIsTaken(true);
18062
18063 SDLoc dl(Op);
18064 unsigned Depth = Op.getConstantOperandVal(0);
18065
18066 // Make sure the function does not optimize away the store of the RA to
18067 // the stack.
18068 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18069 FuncInfo->setLRStoreRequired();
18070 auto PtrVT = getPointerTy(MF.getDataLayout());
18071
18072 if (Depth > 0) {
18073 // The link register (return address) is saved in the caller's frame
18074 // not the callee's stack frame. So we must get the caller's frame
18075 // address and load the return address at the LR offset from there.
18076 SDValue FrameAddr =
18077 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18079 SDValue Offset =
18080 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18081 Subtarget.getScalarIntVT());
18082 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18083 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18085 }
18086
18087 // Just load the return address off the stack.
18088 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18089 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18091}
18092
18093SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18094 SelectionDAG &DAG) const {
18095 SDLoc dl(Op);
18096 unsigned Depth = Op.getConstantOperandVal(0);
18097
18098 MachineFunction &MF = DAG.getMachineFunction();
18099 MachineFrameInfo &MFI = MF.getFrameInfo();
18100 MFI.setFrameAddressIsTaken(true);
18101
18102 EVT PtrVT = getPointerTy(MF.getDataLayout());
18103 bool isPPC64 = PtrVT == MVT::i64;
18104
18105 // Naked functions never have a frame pointer, and so we use r1. For all
18106 // other functions, this decision must be delayed until during PEI.
18107 unsigned FrameReg;
18108 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18109 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18110 else
18111 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18112
18113 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18114 PtrVT);
18115 while (Depth--)
18116 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18117 FrameAddr, MachinePointerInfo());
18118 return FrameAddr;
18119}
18120
18121#define GET_REGISTER_MATCHER
18122#include "PPCGenAsmMatcher.inc"
18123
18125 const MachineFunction &MF) const {
18126 bool IsPPC64 = Subtarget.isPPC64();
18127
18128 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18129 if (!Is64Bit && VT != LLT::scalar(32))
18130 report_fatal_error("Invalid register global variable type");
18131
18133 if (!Reg)
18134 return Reg;
18135
18136 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18137 // Need followup investigation as to why.
18138 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18139 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18140 StringRef(RegName) + "\"."));
18141
18142 // Convert GPR to GP8R register for 64bit.
18143 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18144 Reg = Reg.id() - PPC::R0 + PPC::X0;
18145
18146 return Reg;
18147}
18148
18150 // 32-bit SVR4 ABI access everything as got-indirect.
18151 if (Subtarget.is32BitELFABI())
18152 return true;
18153
18154 // AIX accesses everything indirectly through the TOC, which is similar to
18155 // the GOT.
18156 if (Subtarget.isAIXABI())
18157 return true;
18158
18160 // If it is small or large code model, module locals are accessed
18161 // indirectly by loading their address from .toc/.got.
18162 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18163 return true;
18164
18165 // JumpTable and BlockAddress are accessed as got-indirect.
18167 return true;
18168
18170 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18171
18172 return false;
18173}
18174
18175bool
18177 // The PowerPC target isn't yet aware of offsets.
18178 return false;
18179}
18180
18182 const CallInst &I,
18183 MachineFunction &MF,
18184 unsigned Intrinsic) const {
18185 switch (Intrinsic) {
18186 case Intrinsic::ppc_atomicrmw_xchg_i128:
18187 case Intrinsic::ppc_atomicrmw_add_i128:
18188 case Intrinsic::ppc_atomicrmw_sub_i128:
18189 case Intrinsic::ppc_atomicrmw_nand_i128:
18190 case Intrinsic::ppc_atomicrmw_and_i128:
18191 case Intrinsic::ppc_atomicrmw_or_i128:
18192 case Intrinsic::ppc_atomicrmw_xor_i128:
18193 case Intrinsic::ppc_cmpxchg_i128:
18194 Info.opc = ISD::INTRINSIC_W_CHAIN;
18195 Info.memVT = MVT::i128;
18196 Info.ptrVal = I.getArgOperand(0);
18197 Info.offset = 0;
18198 Info.align = Align(16);
18201 return true;
18202 case Intrinsic::ppc_atomic_load_i128:
18203 Info.opc = ISD::INTRINSIC_W_CHAIN;
18204 Info.memVT = MVT::i128;
18205 Info.ptrVal = I.getArgOperand(0);
18206 Info.offset = 0;
18207 Info.align = Align(16);
18209 return true;
18210 case Intrinsic::ppc_atomic_store_i128:
18211 Info.opc = ISD::INTRINSIC_VOID;
18212 Info.memVT = MVT::i128;
18213 Info.ptrVal = I.getArgOperand(2);
18214 Info.offset = 0;
18215 Info.align = Align(16);
18217 return true;
18218 case Intrinsic::ppc_altivec_lvx:
18219 case Intrinsic::ppc_altivec_lvxl:
18220 case Intrinsic::ppc_altivec_lvebx:
18221 case Intrinsic::ppc_altivec_lvehx:
18222 case Intrinsic::ppc_altivec_lvewx:
18223 case Intrinsic::ppc_vsx_lxvd2x:
18224 case Intrinsic::ppc_vsx_lxvw4x:
18225 case Intrinsic::ppc_vsx_lxvd2x_be:
18226 case Intrinsic::ppc_vsx_lxvw4x_be:
18227 case Intrinsic::ppc_vsx_lxvl:
18228 case Intrinsic::ppc_vsx_lxvll: {
18229 EVT VT;
18230 switch (Intrinsic) {
18231 case Intrinsic::ppc_altivec_lvebx:
18232 VT = MVT::i8;
18233 break;
18234 case Intrinsic::ppc_altivec_lvehx:
18235 VT = MVT::i16;
18236 break;
18237 case Intrinsic::ppc_altivec_lvewx:
18238 VT = MVT::i32;
18239 break;
18240 case Intrinsic::ppc_vsx_lxvd2x:
18241 case Intrinsic::ppc_vsx_lxvd2x_be:
18242 VT = MVT::v2f64;
18243 break;
18244 default:
18245 VT = MVT::v4i32;
18246 break;
18247 }
18248
18249 Info.opc = ISD::INTRINSIC_W_CHAIN;
18250 Info.memVT = VT;
18251 Info.ptrVal = I.getArgOperand(0);
18252 Info.offset = -VT.getStoreSize()+1;
18253 Info.size = 2*VT.getStoreSize()-1;
18254 Info.align = Align(1);
18255 Info.flags = MachineMemOperand::MOLoad;
18256 return true;
18257 }
18258 case Intrinsic::ppc_altivec_stvx:
18259 case Intrinsic::ppc_altivec_stvxl:
18260 case Intrinsic::ppc_altivec_stvebx:
18261 case Intrinsic::ppc_altivec_stvehx:
18262 case Intrinsic::ppc_altivec_stvewx:
18263 case Intrinsic::ppc_vsx_stxvd2x:
18264 case Intrinsic::ppc_vsx_stxvw4x:
18265 case Intrinsic::ppc_vsx_stxvd2x_be:
18266 case Intrinsic::ppc_vsx_stxvw4x_be:
18267 case Intrinsic::ppc_vsx_stxvl:
18268 case Intrinsic::ppc_vsx_stxvll: {
18269 EVT VT;
18270 switch (Intrinsic) {
18271 case Intrinsic::ppc_altivec_stvebx:
18272 VT = MVT::i8;
18273 break;
18274 case Intrinsic::ppc_altivec_stvehx:
18275 VT = MVT::i16;
18276 break;
18277 case Intrinsic::ppc_altivec_stvewx:
18278 VT = MVT::i32;
18279 break;
18280 case Intrinsic::ppc_vsx_stxvd2x:
18281 case Intrinsic::ppc_vsx_stxvd2x_be:
18282 VT = MVT::v2f64;
18283 break;
18284 default:
18285 VT = MVT::v4i32;
18286 break;
18287 }
18288
18289 Info.opc = ISD::INTRINSIC_VOID;
18290 Info.memVT = VT;
18291 Info.ptrVal = I.getArgOperand(1);
18292 Info.offset = -VT.getStoreSize()+1;
18293 Info.size = 2*VT.getStoreSize()-1;
18294 Info.align = Align(1);
18295 Info.flags = MachineMemOperand::MOStore;
18296 return true;
18297 }
18298 case Intrinsic::ppc_stdcx:
18299 case Intrinsic::ppc_stwcx:
18300 case Intrinsic::ppc_sthcx:
18301 case Intrinsic::ppc_stbcx: {
18302 EVT VT;
18303 auto Alignment = Align(8);
18304 switch (Intrinsic) {
18305 case Intrinsic::ppc_stdcx:
18306 VT = MVT::i64;
18307 break;
18308 case Intrinsic::ppc_stwcx:
18309 VT = MVT::i32;
18310 Alignment = Align(4);
18311 break;
18312 case Intrinsic::ppc_sthcx:
18313 VT = MVT::i16;
18314 Alignment = Align(2);
18315 break;
18316 case Intrinsic::ppc_stbcx:
18317 VT = MVT::i8;
18318 Alignment = Align(1);
18319 break;
18320 }
18321 Info.opc = ISD::INTRINSIC_W_CHAIN;
18322 Info.memVT = VT;
18323 Info.ptrVal = I.getArgOperand(0);
18324 Info.offset = 0;
18325 Info.align = Alignment;
18327 return true;
18328 }
18329 default:
18330 break;
18331 }
18332
18333 return false;
18334}
18335
18336/// It returns EVT::Other if the type should be determined using generic
18337/// target-independent logic.
18339 LLVMContext &Context, const MemOp &Op,
18340 const AttributeList &FuncAttributes) const {
18341 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18342 // We should use Altivec/VSX loads and stores when available. For unaligned
18343 // addresses, unaligned VSX loads are only fast starting with the P8.
18344 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18345 if (Op.isMemset() && Subtarget.hasVSX()) {
18346 uint64_t TailSize = Op.size() % 16;
18347 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18348 // element if vector element type matches tail store. For tail size
18349 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18350 if (TailSize > 2 && TailSize <= 4) {
18351 return MVT::v8i16;
18352 }
18353 return MVT::v4i32;
18354 }
18355 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18356 return MVT::v4i32;
18357 }
18358 }
18359
18360 if (Subtarget.isPPC64()) {
18361 return MVT::i64;
18362 }
18363
18364 return MVT::i32;
18365}
18366
18367/// Returns true if it is beneficial to convert a load of a constant
18368/// to just the constant itself.
18370 Type *Ty) const {
18371 assert(Ty->isIntegerTy());
18372
18373 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18374 return !(BitSize == 0 || BitSize > 64);
18375}
18376
18378 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18379 return false;
18380 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18381 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18382 return NumBits1 == 64 && NumBits2 == 32;
18383}
18384
18386 if (!VT1.isInteger() || !VT2.isInteger())
18387 return false;
18388 unsigned NumBits1 = VT1.getSizeInBits();
18389 unsigned NumBits2 = VT2.getSizeInBits();
18390 return NumBits1 == 64 && NumBits2 == 32;
18391}
18392
18394 // Generally speaking, zexts are not free, but they are free when they can be
18395 // folded with other operations.
18396 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18397 EVT MemVT = LD->getMemoryVT();
18398 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18399 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18400 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18401 LD->getExtensionType() == ISD::ZEXTLOAD))
18402 return true;
18403 }
18404
18405 // FIXME: Add other cases...
18406 // - 32-bit shifts with a zext to i64
18407 // - zext after ctlz, bswap, etc.
18408 // - zext after and by a constant mask
18409
18410 return TargetLowering::isZExtFree(Val, VT2);
18411}
18412
18413bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18414 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18415 "invalid fpext types");
18416 // Extending to float128 is not free.
18417 if (DestVT == MVT::f128)
18418 return false;
18419 return true;
18420}
18421
18423 return isInt<16>(Imm) || isUInt<16>(Imm);
18424}
18425
18427 return isInt<16>(Imm) || isUInt<16>(Imm);
18428}
18429
18432 unsigned *Fast) const {
18434 return false;
18435
18436 // PowerPC supports unaligned memory access for simple non-vector types.
18437 // Although accessing unaligned addresses is not as efficient as accessing
18438 // aligned addresses, it is generally more efficient than manual expansion,
18439 // and generally only traps for software emulation when crossing page
18440 // boundaries.
18441
18442 if (!VT.isSimple())
18443 return false;
18444
18445 if (VT.isFloatingPoint() && !VT.isVector() &&
18446 !Subtarget.allowsUnalignedFPAccess())
18447 return false;
18448
18449 if (VT.getSimpleVT().isVector()) {
18450 if (Subtarget.hasVSX()) {
18451 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18452 VT != MVT::v4f32 && VT != MVT::v4i32)
18453 return false;
18454 } else {
18455 return false;
18456 }
18457 }
18458
18459 if (VT == MVT::ppcf128)
18460 return false;
18461
18462 if (Fast)
18463 *Fast = 1;
18464
18465 return true;
18466}
18467
18469 SDValue C) const {
18470 // Check integral scalar types.
18471 if (!VT.isScalarInteger())
18472 return false;
18473 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18474 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18475 return false;
18476 // This transformation will generate >= 2 operations. But the following
18477 // cases will generate <= 2 instructions during ISEL. So exclude them.
18478 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18479 // HW instruction, ie. MULLI
18480 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18481 // instruction is needed than case 1, ie. MULLI and RLDICR
18482 int64_t Imm = ConstNode->getSExtValue();
18483 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18484 Imm >>= Shift;
18485 if (isInt<16>(Imm))
18486 return false;
18487 uint64_t UImm = static_cast<uint64_t>(Imm);
18488 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18489 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18490 return true;
18491 }
18492 return false;
18493}
18494
18500
18502 Type *Ty) const {
18503 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18504 return false;
18505 switch (Ty->getScalarType()->getTypeID()) {
18506 case Type::FloatTyID:
18507 case Type::DoubleTyID:
18508 return true;
18509 case Type::FP128TyID:
18510 return Subtarget.hasP9Vector();
18511 default:
18512 return false;
18513 }
18514}
18515
18516// FIXME: add more patterns which are not profitable to hoist.
18518 if (!I->hasOneUse())
18519 return true;
18520
18521 Instruction *User = I->user_back();
18522 assert(User && "A single use instruction with no uses.");
18523
18524 switch (I->getOpcode()) {
18525 case Instruction::FMul: {
18526 // Don't break FMA, PowerPC prefers FMA.
18527 if (User->getOpcode() != Instruction::FSub &&
18528 User->getOpcode() != Instruction::FAdd)
18529 return true;
18530
18532 const Function *F = I->getFunction();
18533 const DataLayout &DL = F->getDataLayout();
18534 Type *Ty = User->getOperand(0)->getType();
18535
18536 return !(
18539 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
18540 }
18541 case Instruction::Load: {
18542 // Don't break "store (load float*)" pattern, this pattern will be combined
18543 // to "store (load int32)" in later InstCombine pass. See function
18544 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18545 // cycles than loading a 32 bit integer.
18546 LoadInst *LI = cast<LoadInst>(I);
18547 // For the loads that combineLoadToOperationType does nothing, like
18548 // ordered load, it should be profitable to hoist them.
18549 // For swifterror load, it can only be used for pointer to pointer type, so
18550 // later type check should get rid of this case.
18551 if (!LI->isUnordered())
18552 return true;
18553
18554 if (User->getOpcode() != Instruction::Store)
18555 return true;
18556
18557 if (I->getType()->getTypeID() != Type::FloatTyID)
18558 return true;
18559
18560 return false;
18561 }
18562 default:
18563 return true;
18564 }
18565 return true;
18566}
18567
18568const MCPhysReg *
18570 // LR is a callee-save register, but we must treat it as clobbered by any call
18571 // site. Hence we include LR in the scratch registers, which are in turn added
18572 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18573 // to CTR, which is used by any indirect call.
18574 static const MCPhysReg ScratchRegs[] = {
18575 PPC::X12, PPC::LR8, PPC::CTR8, 0
18576 };
18577
18578 return ScratchRegs;
18579}
18580
18582 const Constant *PersonalityFn) const {
18583 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18584}
18585
18587 const Constant *PersonalityFn) const {
18588 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18589}
18590
18591bool
18593 EVT VT , unsigned DefinedValues) const {
18594 if (VT == MVT::v2i64)
18595 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18596
18597 if (Subtarget.hasVSX())
18598 return true;
18599
18601}
18602
18604 if (DisableILPPref || Subtarget.enableMachineScheduler())
18606
18607 return Sched::ILP;
18608}
18609
18610// Create a fast isel object.
18611FastISel *
18613 const TargetLibraryInfo *LibInfo) const {
18614 return PPC::createFastISel(FuncInfo, LibInfo);
18615}
18616
18617// 'Inverted' means the FMA opcode after negating one multiplicand.
18618// For example, (fma -a b c) = (fnmsub a b c)
18619static unsigned invertFMAOpcode(unsigned Opc) {
18620 switch (Opc) {
18621 default:
18622 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18623 case ISD::FMA:
18624 return PPCISD::FNMSUB;
18625 case PPCISD::FNMSUB:
18626 return ISD::FMA;
18627 }
18628}
18629
18631 bool LegalOps, bool OptForSize,
18633 unsigned Depth) const {
18635 return SDValue();
18636
18637 unsigned Opc = Op.getOpcode();
18638 EVT VT = Op.getValueType();
18639 SDNodeFlags Flags = Op.getNode()->getFlags();
18640
18641 switch (Opc) {
18642 case PPCISD::FNMSUB:
18643 if (!Op.hasOneUse() || !isTypeLegal(VT))
18644 break;
18645
18647 SDValue N0 = Op.getOperand(0);
18648 SDValue N1 = Op.getOperand(1);
18649 SDValue N2 = Op.getOperand(2);
18650 SDLoc Loc(Op);
18651
18653 SDValue NegN2 =
18654 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18655
18656 if (!NegN2)
18657 return SDValue();
18658
18659 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18660 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18661 // These transformations may change sign of zeroes. For example,
18662 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18663 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18664 // Try and choose the cheaper one to negate.
18666 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18667 N0Cost, Depth + 1);
18668
18670 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18671 N1Cost, Depth + 1);
18672
18673 if (NegN0 && N0Cost <= N1Cost) {
18674 Cost = std::min(N0Cost, N2Cost);
18675 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18676 } else if (NegN1) {
18677 Cost = std::min(N1Cost, N2Cost);
18678 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18679 }
18680 }
18681
18682 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18683 if (isOperationLegal(ISD::FMA, VT)) {
18684 Cost = N2Cost;
18685 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18686 }
18687
18688 break;
18689 }
18690
18691 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18692 Cost, Depth);
18693}
18694
18695// Override to enable LOAD_STACK_GUARD lowering on Linux.
18697 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18698 return true;
18700}
18701
18703 bool ForCodeSize) const {
18704 if (!VT.isSimple() || !Subtarget.hasVSX())
18705 return false;
18706
18707 switch(VT.getSimpleVT().SimpleTy) {
18708 default:
18709 // For FP types that are currently not supported by PPC backend, return
18710 // false. Examples: f16, f80.
18711 return false;
18712 case MVT::f32:
18713 case MVT::f64: {
18714 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18715 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18716 return true;
18717 }
18718 bool IsExact;
18719 APSInt IntResult(16, false);
18720 // The rounding mode doesn't really matter because we only care about floats
18721 // that can be converted to integers exactly.
18722 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18723 // For exact values in the range [-16, 15] we can materialize the float.
18724 if (IsExact && IntResult <= 15 && IntResult >= -16)
18725 return true;
18726 return Imm.isZero();
18727 }
18728 case MVT::ppcf128:
18729 return Imm.isPosZero();
18730 }
18731}
18732
18733// For vector shift operation op, fold
18734// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18736 SelectionDAG &DAG) {
18737 SDValue N0 = N->getOperand(0);
18738 SDValue N1 = N->getOperand(1);
18739 EVT VT = N0.getValueType();
18740 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18741 unsigned Opcode = N->getOpcode();
18742 unsigned TargetOpcode;
18743
18744 switch (Opcode) {
18745 default:
18746 llvm_unreachable("Unexpected shift operation");
18747 case ISD::SHL:
18749 break;
18750 case ISD::SRL:
18752 break;
18753 case ISD::SRA:
18755 break;
18756 }
18757
18758 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18759 N1->getOpcode() == ISD::AND)
18760 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18761 if (Mask->getZExtValue() == OpSizeInBits - 1)
18762 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18763
18764 return SDValue();
18765}
18766
18767SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
18768 DAGCombinerInfo &DCI) const {
18769 EVT VT = N->getValueType(0);
18770 assert(VT.isVector() && "Vector type expected.");
18771
18772 unsigned Opc = N->getOpcode();
18773 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
18774 "Unexpected opcode.");
18775
18776 if (!isOperationLegal(Opc, VT))
18777 return SDValue();
18778
18779 EVT EltTy = VT.getScalarType();
18780 unsigned EltBits = EltTy.getSizeInBits();
18781 if (EltTy != MVT::i64 && EltTy != MVT::i32)
18782 return SDValue();
18783
18784 SDValue N1 = N->getOperand(1);
18785 uint64_t SplatBits = 0;
18786 bool AddSplatCase = false;
18787 unsigned OpcN1 = N1.getOpcode();
18788 if (OpcN1 == PPCISD::VADD_SPLAT &&
18790 AddSplatCase = true;
18791 SplatBits = N1.getConstantOperandVal(0);
18792 }
18793
18794 if (!AddSplatCase) {
18795 if (OpcN1 != ISD::BUILD_VECTOR)
18796 return SDValue();
18797
18798 unsigned SplatBitSize;
18799 bool HasAnyUndefs;
18800 APInt APSplatBits, APSplatUndef;
18801 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
18802 bool BVNIsConstantSplat =
18803 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
18804 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
18805 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
18806 return SDValue();
18807 SplatBits = APSplatBits.getZExtValue();
18808 }
18809
18810 SDLoc DL(N);
18811 SDValue N0 = N->getOperand(0);
18812 // PPC vector shifts by word/double look at only the low 5/6 bits of the
18813 // shift vector, which means the max value is 31/63. A shift vector of all
18814 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
18815 // -16 to 15 range.
18816 if (SplatBits == (EltBits - 1)) {
18817 unsigned NewOpc;
18818 switch (Opc) {
18819 case ISD::SHL:
18820 NewOpc = PPCISD::SHL;
18821 break;
18822 case ISD::SRL:
18823 NewOpc = PPCISD::SRL;
18824 break;
18825 case ISD::SRA:
18826 NewOpc = PPCISD::SRA;
18827 break;
18828 }
18829 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
18830 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
18831 }
18832
18833 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
18834 return SDValue();
18835
18836 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
18837 // before the BUILD_VECTOR is replaced by a load.
18838 if (EltTy != MVT::i64 || SplatBits != 1)
18839 return SDValue();
18840
18841 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
18842}
18843
18844SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18845 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18846 return Value;
18847
18848 if (N->getValueType(0).isVector())
18849 return combineVectorShift(N, DCI);
18850
18851 SDValue N0 = N->getOperand(0);
18852 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18853 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18854 N0.getOpcode() != ISD::SIGN_EXTEND ||
18855 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
18856 N->getValueType(0) != MVT::i64)
18857 return SDValue();
18858
18859 // We can't save an operation here if the value is already extended, and
18860 // the existing shift is easier to combine.
18861 SDValue ExtsSrc = N0.getOperand(0);
18862 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18863 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
18864 return SDValue();
18865
18866 SDLoc DL(N0);
18867 SDValue ShiftBy = SDValue(CN1, 0);
18868 // We want the shift amount to be i32 on the extswli, but the shift could
18869 // have an i64.
18870 if (ShiftBy.getValueType() == MVT::i64)
18871 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18872
18873 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18874 ShiftBy);
18875}
18876
18877SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18878 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18879 return Value;
18880
18881 if (N->getValueType(0).isVector())
18882 return combineVectorShift(N, DCI);
18883
18884 return SDValue();
18885}
18886
18887SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18888 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18889 return Value;
18890
18891 if (N->getValueType(0).isVector())
18892 return combineVectorShift(N, DCI);
18893
18894 return SDValue();
18895}
18896
18897// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18898// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18899// When C is zero, the equation (addi Z, -C) can be simplified to Z
18900// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18902 const PPCSubtarget &Subtarget) {
18903 if (!Subtarget.isPPC64())
18904 return SDValue();
18905
18906 SDValue LHS = N->getOperand(0);
18907 SDValue RHS = N->getOperand(1);
18908
18909 auto isZextOfCompareWithConstant = [](SDValue Op) {
18910 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18911 Op.getValueType() != MVT::i64)
18912 return false;
18913
18914 SDValue Cmp = Op.getOperand(0);
18915 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18916 Cmp.getOperand(0).getValueType() != MVT::i64)
18917 return false;
18918
18919 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18920 int64_t NegConstant = 0 - Constant->getSExtValue();
18921 // Due to the limitations of the addi instruction,
18922 // -C is required to be [-32768, 32767].
18923 return isInt<16>(NegConstant);
18924 }
18925
18926 return false;
18927 };
18928
18929 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18930 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18931
18932 // If there is a pattern, canonicalize a zext operand to the RHS.
18933 if (LHSHasPattern && !RHSHasPattern)
18934 std::swap(LHS, RHS);
18935 else if (!LHSHasPattern && !RHSHasPattern)
18936 return SDValue();
18937
18938 SDLoc DL(N);
18939 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
18940 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
18941 SDValue Cmp = RHS.getOperand(0);
18942 SDValue Z = Cmp.getOperand(0);
18943 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18944 int64_t NegConstant = 0 - Constant->getSExtValue();
18945
18946 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18947 default: break;
18948 case ISD::SETNE: {
18949 // when C == 0
18950 // --> addze X, (addic Z, -1).carry
18951 // /
18952 // add X, (zext(setne Z, C))--
18953 // \ when -32768 <= -C <= 32767 && C != 0
18954 // --> addze X, (addic (addi Z, -C), -1).carry
18955 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18956 DAG.getConstant(NegConstant, DL, MVT::i64));
18957 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18958 SDValue Addc =
18959 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
18960 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
18961 DAG.getConstant(0, DL, CarryType));
18962 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
18963 DAG.getConstant(0, DL, MVT::i64),
18964 SDValue(Addc.getNode(), 1));
18965 }
18966 case ISD::SETEQ: {
18967 // when C == 0
18968 // --> addze X, (subfic Z, 0).carry
18969 // /
18970 // add X, (zext(sete Z, C))--
18971 // \ when -32768 <= -C <= 32767 && C != 0
18972 // --> addze X, (subfic (addi Z, -C), 0).carry
18973 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18974 DAG.getConstant(NegConstant, DL, MVT::i64));
18975 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18976 SDValue Subc =
18977 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
18978 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
18979 DAG.getConstant(0, DL, CarryType));
18980 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
18981 DAG.getConstant(1UL, DL, CarryType));
18982 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
18983 DAG.getConstant(0, DL, MVT::i64), Invert);
18984 }
18985 }
18986
18987 return SDValue();
18988}
18989
18990// Transform
18991// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18992// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18993// In this case both C1 and C2 must be known constants.
18994// C1+C2 must fit into a 34 bit signed integer.
18996 const PPCSubtarget &Subtarget) {
18997 if (!Subtarget.isUsingPCRelativeCalls())
18998 return SDValue();
18999
19000 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19001 // If we find that node try to cast the Global Address and the Constant.
19002 SDValue LHS = N->getOperand(0);
19003 SDValue RHS = N->getOperand(1);
19004
19005 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19006 std::swap(LHS, RHS);
19007
19008 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19009 return SDValue();
19010
19011 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19014
19015 // Check that both casts succeeded.
19016 if (!GSDN || !ConstNode)
19017 return SDValue();
19018
19019 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19020 SDLoc DL(GSDN);
19021
19022 // The signed int offset needs to fit in 34 bits.
19023 if (!isInt<34>(NewOffset))
19024 return SDValue();
19025
19026 // The new global address is a copy of the old global address except
19027 // that it has the updated Offset.
19028 SDValue GA =
19029 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19030 NewOffset, GSDN->getTargetFlags());
19031 SDValue MatPCRel =
19032 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19033 return MatPCRel;
19034}
19035
19036SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19037 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19038 return Value;
19039
19040 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19041 return Value;
19042
19043 return SDValue();
19044}
19045
19046// Detect TRUNCATE operations on bitcasts of float128 values.
19047// What we are looking for here is the situtation where we extract a subset
19048// of bits from a 128 bit float.
19049// This can be of two forms:
19050// 1) BITCAST of f128 feeding TRUNCATE
19051// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19052// The reason this is required is because we do not have a legal i128 type
19053// and so we want to prevent having to store the f128 and then reload part
19054// of it.
19055SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19056 DAGCombinerInfo &DCI) const {
19057 // If we are using CRBits then try that first.
19058 if (Subtarget.useCRBits()) {
19059 // Check if CRBits did anything and return that if it did.
19060 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19061 return CRTruncValue;
19062 }
19063
19064 SDLoc dl(N);
19065 SDValue Op0 = N->getOperand(0);
19066
19067 // Looking for a truncate of i128 to i64.
19068 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19069 return SDValue();
19070
19071 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19072
19073 // SRL feeding TRUNCATE.
19074 if (Op0.getOpcode() == ISD::SRL) {
19075 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19076 // The right shift has to be by 64 bits.
19077 if (!ConstNode || ConstNode->getZExtValue() != 64)
19078 return SDValue();
19079
19080 // Switch the element number to extract.
19081 EltToExtract = EltToExtract ? 0 : 1;
19082 // Update Op0 past the SRL.
19083 Op0 = Op0.getOperand(0);
19084 }
19085
19086 // BITCAST feeding a TRUNCATE possibly via SRL.
19087 if (Op0.getOpcode() == ISD::BITCAST &&
19088 Op0.getValueType() == MVT::i128 &&
19089 Op0.getOperand(0).getValueType() == MVT::f128) {
19090 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19091 return DCI.DAG.getNode(
19092 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19093 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19094 }
19095 return SDValue();
19096}
19097
19098SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19099 SelectionDAG &DAG = DCI.DAG;
19100
19101 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19102 if (!ConstOpOrElement)
19103 return SDValue();
19104
19105 // An imul is usually smaller than the alternative sequence for legal type.
19107 isOperationLegal(ISD::MUL, N->getValueType(0)))
19108 return SDValue();
19109
19110 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19111 switch (this->Subtarget.getCPUDirective()) {
19112 default:
19113 // TODO: enhance the condition for subtarget before pwr8
19114 return false;
19115 case PPC::DIR_PWR8:
19116 // type mul add shl
19117 // scalar 4 1 1
19118 // vector 7 2 2
19119 return true;
19120 case PPC::DIR_PWR9:
19121 case PPC::DIR_PWR10:
19122 case PPC::DIR_PWR11:
19124 // type mul add shl
19125 // scalar 5 2 2
19126 // vector 7 2 2
19127
19128 // The cycle RATIO of related operations are showed as a table above.
19129 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19130 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19131 // are 4, it is always profitable; but for 3 instrs patterns
19132 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19133 // So we should only do it for vector type.
19134 return IsAddOne && IsNeg ? VT.isVector() : true;
19135 }
19136 };
19137
19138 EVT VT = N->getValueType(0);
19139 SDLoc DL(N);
19140
19141 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19142 bool IsNeg = MulAmt.isNegative();
19143 APInt MulAmtAbs = MulAmt.abs();
19144
19145 if ((MulAmtAbs - 1).isPowerOf2()) {
19146 // (mul x, 2^N + 1) => (add (shl x, N), x)
19147 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19148
19149 if (!IsProfitable(IsNeg, true, VT))
19150 return SDValue();
19151
19152 SDValue Op0 = N->getOperand(0);
19153 SDValue Op1 =
19154 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19155 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19156 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19157
19158 if (!IsNeg)
19159 return Res;
19160
19161 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19162 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19163 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19164 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19165
19166 if (!IsProfitable(IsNeg, false, VT))
19167 return SDValue();
19168
19169 SDValue Op0 = N->getOperand(0);
19170 SDValue Op1 =
19171 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19172 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19173
19174 if (!IsNeg)
19175 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19176 else
19177 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19178
19179 } else {
19180 return SDValue();
19181 }
19182}
19183
19184// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19185// in combiner since we need to check SD flags and other subtarget features.
19186SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19187 DAGCombinerInfo &DCI) const {
19188 SDValue N0 = N->getOperand(0);
19189 SDValue N1 = N->getOperand(1);
19190 SDValue N2 = N->getOperand(2);
19191 SDNodeFlags Flags = N->getFlags();
19192 EVT VT = N->getValueType(0);
19193 SelectionDAG &DAG = DCI.DAG;
19194 const TargetOptions &Options = getTargetMachine().Options;
19195 unsigned Opc = N->getOpcode();
19197 bool LegalOps = !DCI.isBeforeLegalizeOps();
19198 SDLoc Loc(N);
19199
19200 if (!isOperationLegal(ISD::FMA, VT))
19201 return SDValue();
19202
19203 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19204 // since (fnmsub a b c)=-0 while c-ab=+0.
19205 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19206 return SDValue();
19207
19208 // (fma (fneg a) b c) => (fnmsub a b c)
19209 // (fnmsub (fneg a) b c) => (fma a b c)
19210 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19211 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19212
19213 // (fma a (fneg b) c) => (fnmsub a b c)
19214 // (fnmsub a (fneg b) c) => (fma a b c)
19215 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19216 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19217
19218 return SDValue();
19219}
19220
19221bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19222 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19223 if (!Subtarget.is64BitELFABI())
19224 return false;
19225
19226 // If not a tail call then no need to proceed.
19227 if (!CI->isTailCall())
19228 return false;
19229
19230 // If sibling calls have been disabled and tail-calls aren't guaranteed
19231 // there is no reason to duplicate.
19232 auto &TM = getTargetMachine();
19233 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19234 return false;
19235
19236 // Can't tail call a function called indirectly, or if it has variadic args.
19237 const Function *Callee = CI->getCalledFunction();
19238 if (!Callee || Callee->isVarArg())
19239 return false;
19240
19241 // Make sure the callee and caller calling conventions are eligible for tco.
19242 const Function *Caller = CI->getParent()->getParent();
19243 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19244 CI->getCallingConv()))
19245 return false;
19246
19247 // If the function is local then we have a good chance at tail-calling it
19248 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19249}
19250
19251bool PPCTargetLowering::
19252isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19253 const Value *Mask = AndI.getOperand(1);
19254 // If the mask is suitable for andi. or andis. we should sink the and.
19255 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19256 // Can't handle constants wider than 64-bits.
19257 if (CI->getBitWidth() > 64)
19258 return false;
19259 int64_t ConstVal = CI->getZExtValue();
19260 return isUInt<16>(ConstVal) ||
19261 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19262 }
19263
19264 // For non-constant masks, we can always use the record-form and.
19265 return true;
19266}
19267
19268/// getAddrModeForFlags - Based on the set of address flags, select the most
19269/// optimal instruction format to match by.
19270PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19271 // This is not a node we should be handling here.
19272 if (Flags == PPC::MOF_None)
19273 return PPC::AM_None;
19274 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19275 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19276 if ((Flags & FlagSet) == FlagSet)
19277 return PPC::AM_DForm;
19278 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19279 if ((Flags & FlagSet) == FlagSet)
19280 return PPC::AM_DSForm;
19281 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19282 if ((Flags & FlagSet) == FlagSet)
19283 return PPC::AM_DQForm;
19284 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19285 if ((Flags & FlagSet) == FlagSet)
19286 return PPC::AM_PrefixDForm;
19287 // If no other forms are selected, return an X-Form as it is the most
19288 // general addressing mode.
19289 return PPC::AM_XForm;
19290}
19291
19292/// Set alignment flags based on whether or not the Frame Index is aligned.
19293/// Utilized when computing flags for address computation when selecting
19294/// load and store instructions.
19295static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19296 SelectionDAG &DAG) {
19297 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19298 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19299 if (!FI)
19300 return;
19302 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19303 // If this is (add $FI, $S16Imm), the alignment flags are already set
19304 // based on the immediate. We just need to clear the alignment flags
19305 // if the FI alignment is weaker.
19306 if ((FrameIndexAlign % 4) != 0)
19307 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19308 if ((FrameIndexAlign % 16) != 0)
19309 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19310 // If the address is a plain FrameIndex, set alignment flags based on
19311 // FI alignment.
19312 if (!IsAdd) {
19313 if ((FrameIndexAlign % 4) == 0)
19314 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19315 if ((FrameIndexAlign % 16) == 0)
19316 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19317 }
19318}
19319
19320/// Given a node, compute flags that are used for address computation when
19321/// selecting load and store instructions. The flags computed are stored in
19322/// FlagSet. This function takes into account whether the node is a constant,
19323/// an ADD, OR, or a constant, and computes the address flags accordingly.
19324static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19325 SelectionDAG &DAG) {
19326 // Set the alignment flags for the node depending on if the node is
19327 // 4-byte or 16-byte aligned.
19328 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19329 if ((Imm & 0x3) == 0)
19330 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19331 if ((Imm & 0xf) == 0)
19332 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19333 };
19334
19336 // All 32-bit constants can be computed as LIS + Disp.
19337 const APInt &ConstImm = CN->getAPIntValue();
19338 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19339 FlagSet |= PPC::MOF_AddrIsSImm32;
19340 SetAlignFlagsForImm(ConstImm.getZExtValue());
19341 setAlignFlagsForFI(N, FlagSet, DAG);
19342 }
19343 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19344 FlagSet |= PPC::MOF_RPlusSImm34;
19345 else // Let constant materialization handle large constants.
19346 FlagSet |= PPC::MOF_NotAddNorCst;
19347 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19348 // This address can be represented as an addition of:
19349 // - Register + Imm16 (possibly a multiple of 4/16)
19350 // - Register + Imm34
19351 // - Register + PPCISD::Lo
19352 // - Register + Register
19353 // In any case, we won't have to match this as Base + Zero.
19354 SDValue RHS = N.getOperand(1);
19356 const APInt &ConstImm = CN->getAPIntValue();
19357 if (ConstImm.isSignedIntN(16)) {
19358 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19359 SetAlignFlagsForImm(ConstImm.getZExtValue());
19360 setAlignFlagsForFI(N, FlagSet, DAG);
19361 }
19362 if (ConstImm.isSignedIntN(34))
19363 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19364 else
19365 FlagSet |= PPC::MOF_RPlusR; // Register.
19366 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19367 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19368 else
19369 FlagSet |= PPC::MOF_RPlusR;
19370 } else { // The address computation is not a constant or an addition.
19371 setAlignFlagsForFI(N, FlagSet, DAG);
19372 FlagSet |= PPC::MOF_NotAddNorCst;
19373 }
19374}
19375
19383
19384/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19385/// the address flags of the load/store instruction that is to be matched.
19386unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19387 SelectionDAG &DAG) const {
19388 unsigned FlagSet = PPC::MOF_None;
19389
19390 // Compute subtarget flags.
19391 if (!Subtarget.hasP9Vector())
19392 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19393 else
19394 FlagSet |= PPC::MOF_SubtargetP9;
19395
19396 if (Subtarget.hasPrefixInstrs())
19397 FlagSet |= PPC::MOF_SubtargetP10;
19398
19399 if (Subtarget.hasSPE())
19400 FlagSet |= PPC::MOF_SubtargetSPE;
19401
19402 // Check if we have a PCRel node and return early.
19403 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19404 return FlagSet;
19405
19406 // If the node is the paired load/store intrinsics, compute flags for
19407 // address computation and return early.
19408 unsigned ParentOp = Parent->getOpcode();
19409 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19410 (ParentOp == ISD::INTRINSIC_VOID))) {
19411 unsigned ID = Parent->getConstantOperandVal(1);
19412 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19413 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19414 ? Parent->getOperand(2)
19415 : Parent->getOperand(3);
19416 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19417 FlagSet |= PPC::MOF_Vector;
19418 return FlagSet;
19419 }
19420 }
19421
19422 // Mark this as something we don't want to handle here if it is atomic
19423 // or pre-increment instruction.
19424 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19425 if (LSB->isIndexed())
19426 return PPC::MOF_None;
19427
19428 // Compute in-memory type flags. This is based on if there are scalars,
19429 // floats or vectors.
19430 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19431 assert(MN && "Parent should be a MemSDNode!");
19432 EVT MemVT = MN->getMemoryVT();
19433 unsigned Size = MemVT.getSizeInBits();
19434 if (MemVT.isScalarInteger()) {
19435 assert(Size <= 128 &&
19436 "Not expecting scalar integers larger than 16 bytes!");
19437 if (Size < 32)
19438 FlagSet |= PPC::MOF_SubWordInt;
19439 else if (Size == 32)
19440 FlagSet |= PPC::MOF_WordInt;
19441 else
19442 FlagSet |= PPC::MOF_DoubleWordInt;
19443 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19444 if (Size == 128)
19445 FlagSet |= PPC::MOF_Vector;
19446 else if (Size == 256) {
19447 assert(Subtarget.pairedVectorMemops() &&
19448 "256-bit vectors are only available when paired vector memops is "
19449 "enabled!");
19450 FlagSet |= PPC::MOF_Vector;
19451 } else
19452 llvm_unreachable("Not expecting illegal vectors!");
19453 } else { // Floating point type: can be scalar, f128 or vector types.
19454 if (Size == 32 || Size == 64)
19455 FlagSet |= PPC::MOF_ScalarFloat;
19456 else if (MemVT == MVT::f128 || MemVT.isVector())
19457 FlagSet |= PPC::MOF_Vector;
19458 else
19459 llvm_unreachable("Not expecting illegal scalar floats!");
19460 }
19461
19462 // Compute flags for address computation.
19463 computeFlagsForAddressComputation(N, FlagSet, DAG);
19464
19465 // Compute type extension flags.
19466 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19467 switch (LN->getExtensionType()) {
19468 case ISD::SEXTLOAD:
19469 FlagSet |= PPC::MOF_SExt;
19470 break;
19471 case ISD::EXTLOAD:
19472 case ISD::ZEXTLOAD:
19473 FlagSet |= PPC::MOF_ZExt;
19474 break;
19475 case ISD::NON_EXTLOAD:
19476 FlagSet |= PPC::MOF_NoExt;
19477 break;
19478 }
19479 } else
19480 FlagSet |= PPC::MOF_NoExt;
19481
19482 // For integers, no extension is the same as zero extension.
19483 // We set the extension mode to zero extension so we don't have
19484 // to add separate entries in AddrModesMap for loads and stores.
19485 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19486 FlagSet |= PPC::MOF_ZExt;
19487 FlagSet &= ~PPC::MOF_NoExt;
19488 }
19489
19490 // If we don't have prefixed instructions, 34-bit constants should be
19491 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19492 bool IsNonP1034BitConst =
19494 FlagSet) == PPC::MOF_RPlusSImm34;
19495 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19496 IsNonP1034BitConst)
19497 FlagSet |= PPC::MOF_NotAddNorCst;
19498
19499 return FlagSet;
19500}
19501
19502/// SelectForceXFormMode - Given the specified address, force it to be
19503/// represented as an indexed [r+r] operation (an XForm instruction).
19505 SDValue &Base,
19506 SelectionDAG &DAG) const {
19507
19509 int16_t ForceXFormImm = 0;
19510 if (provablyDisjointOr(DAG, N) &&
19511 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19512 Disp = N.getOperand(0);
19513 Base = N.getOperand(1);
19514 return Mode;
19515 }
19516
19517 // If the address is the result of an add, we will utilize the fact that the
19518 // address calculation includes an implicit add. However, we can reduce
19519 // register pressure if we do not materialize a constant just for use as the
19520 // index register. We only get rid of the add if it is not an add of a
19521 // value and a 16-bit signed constant and both have a single use.
19522 if (N.getOpcode() == ISD::ADD &&
19523 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19524 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19525 Disp = N.getOperand(0);
19526 Base = N.getOperand(1);
19527 return Mode;
19528 }
19529
19530 // Otherwise, use R0 as the base register.
19531 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19532 N.getValueType());
19533 Base = N;
19534
19535 return Mode;
19536}
19537
19539 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19540 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19541 EVT ValVT = Val.getValueType();
19542 // If we are splitting a scalar integer into f64 parts (i.e. so they
19543 // can be placed into VFRC registers), we need to zero extend and
19544 // bitcast the values. This will ensure the value is placed into a
19545 // VSR using direct moves or stack operations as needed.
19546 if (PartVT == MVT::f64 &&
19547 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19548 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19549 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19550 Parts[0] = Val;
19551 return true;
19552 }
19553 return false;
19554}
19555
19556SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19557 SelectionDAG &DAG) const {
19558 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19560 EVT RetVT = Op.getValueType();
19561 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19562 SDValue Callee =
19563 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19564 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19566 for (const SDValue &N : Op->op_values()) {
19567 EVT ArgVT = N.getValueType();
19568 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
19569 TargetLowering::ArgListEntry Entry(N, ArgTy);
19570 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
19571 Entry.IsZExt = !Entry.IsSExt;
19572 Args.push_back(Entry);
19573 }
19574
19575 SDValue InChain = DAG.getEntryNode();
19576 SDValue TCChain = InChain;
19577 const Function &F = DAG.getMachineFunction().getFunction();
19578 bool isTailCall =
19579 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
19580 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19581 if (isTailCall)
19582 InChain = TCChain;
19583 CLI.setDebugLoc(SDLoc(Op))
19584 .setChain(InChain)
19585 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
19586 .setTailCall(isTailCall)
19587 .setSExtResult(SignExtend)
19588 .setZExtResult(!SignExtend)
19590 return TLI.LowerCallTo(CLI).first;
19591}
19592
19593SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19594 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19595 SelectionDAG &DAG) const {
19596 if (Op.getValueType() == MVT::f32)
19597 return lowerToLibCall(LibCallFloatName, Op, DAG);
19598
19599 if (Op.getValueType() == MVT::f64)
19600 return lowerToLibCall(LibCallDoubleName, Op, DAG);
19601
19602 return SDValue();
19603}
19604
19605bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19606 SDNodeFlags Flags = Op.getNode()->getFlags();
19607 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19608 Flags.hasNoNaNs() && Flags.hasNoInfs();
19609}
19610
19611bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19612 return Op.getNode()->getFlags().hasApproximateFuncs();
19613}
19614
19615bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19617}
19618
19619SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19620 const char *LibCallFloatName,
19621 const char *LibCallDoubleNameFinite,
19622 const char *LibCallFloatNameFinite,
19623 SDValue Op,
19624 SelectionDAG &DAG) const {
19625 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
19626 return SDValue();
19627
19628 if (!isLowringToMASSFiniteSafe(Op))
19629 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19630 DAG);
19631
19632 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
19633 LibCallDoubleNameFinite, Op, DAG);
19634}
19635
19636SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19637 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
19638 "__xl_powf_finite", Op, DAG);
19639}
19640
19641SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19642 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
19643 "__xl_sinf_finite", Op, DAG);
19644}
19645
19646SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19647 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
19648 "__xl_cosf_finite", Op, DAG);
19649}
19650
19651SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19652 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
19653 "__xl_logf_finite", Op, DAG);
19654}
19655
19656SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19657 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
19658 "__xl_log10f_finite", Op, DAG);
19659}
19660
19661SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19662 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
19663 "__xl_expf_finite", Op, DAG);
19664}
19665
19666// If we happen to match to an aligned D-Form, check if the Frame Index is
19667// adequately aligned. If it is not, reset the mode to match to X-Form.
19668static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19671 return;
19672 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
19675}
19676
19677/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19678/// compute the address flags of the node, get the optimal address mode based
19679/// on the flags, and set the Base and Disp based on the address mode.
19681 SDValue N, SDValue &Disp,
19682 SDValue &Base,
19683 SelectionDAG &DAG,
19684 MaybeAlign Align) const {
19685 SDLoc DL(Parent);
19686
19687 // Compute the address flags.
19688 unsigned Flags = computeMOFlags(Parent, N, DAG);
19689
19690 // Get the optimal address mode based on the Flags.
19691 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19692
19693 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19694 // Select an X-Form load if it is not.
19695 setXFormForUnalignedFI(N, Flags, Mode);
19696
19697 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19698 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19699 assert(Subtarget.isUsingPCRelativeCalls() &&
19700 "Must be using PC-Relative calls when a valid PC-Relative node is "
19701 "present!");
19702 Mode = PPC::AM_PCRel;
19703 }
19704
19705 // Set Base and Disp accordingly depending on the address mode.
19706 switch (Mode) {
19707 case PPC::AM_DForm:
19708 case PPC::AM_DSForm:
19709 case PPC::AM_DQForm: {
19710 // This is a register plus a 16-bit immediate. The base will be the
19711 // register and the displacement will be the immediate unless it
19712 // isn't sufficiently aligned.
19713 if (Flags & PPC::MOF_RPlusSImm16) {
19714 SDValue Op0 = N.getOperand(0);
19715 SDValue Op1 = N.getOperand(1);
19716 int16_t Imm = Op1->getAsZExtVal();
19717 if (!Align || isAligned(*Align, Imm)) {
19718 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
19719 Base = Op0;
19721 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19722 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19723 }
19724 break;
19725 }
19726 }
19727 // This is a register plus the @lo relocation. The base is the register
19728 // and the displacement is the global address.
19729 else if (Flags & PPC::MOF_RPlusLo) {
19730 Disp = N.getOperand(1).getOperand(0); // The global address.
19735 Base = N.getOperand(0);
19736 break;
19737 }
19738 // This is a constant address at most 32 bits. The base will be
19739 // zero or load-immediate-shifted and the displacement will be
19740 // the low 16 bits of the address.
19741 else if (Flags & PPC::MOF_AddrIsSImm32) {
19742 auto *CN = cast<ConstantSDNode>(N);
19743 EVT CNType = CN->getValueType(0);
19744 uint64_t CNImm = CN->getZExtValue();
19745 // If this address fits entirely in a 16-bit sext immediate field, codegen
19746 // this as "d, 0".
19747 int16_t Imm;
19748 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19749 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19750 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19751 CNType);
19752 break;
19753 }
19754 // Handle 32-bit sext immediate with LIS + Addr mode.
19755 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19756 (!Align || isAligned(*Align, CNImm))) {
19757 int32_t Addr = (int32_t)CNImm;
19758 // Otherwise, break this down into LIS + Disp.
19759 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19760 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
19761 MVT::i32);
19762 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19763 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19764 break;
19765 }
19766 }
19767 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19768 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19770 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19771 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19772 } else
19773 Base = N;
19774 break;
19775 }
19776 case PPC::AM_PrefixDForm: {
19777 int64_t Imm34 = 0;
19778 unsigned Opcode = N.getOpcode();
19779 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19780 (isIntS34Immediate(N.getOperand(1), Imm34))) {
19781 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19782 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19783 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19784 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19785 else
19786 Base = N.getOperand(0);
19787 } else if (isIntS34Immediate(N, Imm34)) {
19788 // The address is a 34-bit signed immediate.
19789 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19790 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19791 }
19792 break;
19793 }
19794 case PPC::AM_PCRel: {
19795 // When selecting PC-Relative instructions, "Base" is not utilized as
19796 // we select the address as [PC+imm].
19797 Disp = N;
19798 break;
19799 }
19800 case PPC::AM_None:
19801 break;
19802 default: { // By default, X-Form is always available to be selected.
19803 // When a frame index is not aligned, we also match by XForm.
19805 Base = FI ? N : N.getOperand(1);
19806 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19807 N.getValueType())
19808 : N.getOperand(0);
19809 break;
19810 }
19811 }
19812 return Mode;
19813}
19814
19816 bool Return,
19817 bool IsVarArg) const {
19818 switch (CC) {
19819 case CallingConv::Cold:
19820 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19821 default:
19822 return CC_PPC64_ELF;
19823 }
19824}
19825
19827 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19828}
19829
19832 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19833 if (shouldInlineQuadwordAtomics() && Size == 128)
19835
19836 switch (AI->getOperation()) {
19842 default:
19844 }
19845
19846 llvm_unreachable("unreachable atomicrmw operation");
19847}
19848
19856
19857static Intrinsic::ID
19859 switch (BinOp) {
19860 default:
19861 llvm_unreachable("Unexpected AtomicRMW BinOp");
19863 return Intrinsic::ppc_atomicrmw_xchg_i128;
19864 case AtomicRMWInst::Add:
19865 return Intrinsic::ppc_atomicrmw_add_i128;
19866 case AtomicRMWInst::Sub:
19867 return Intrinsic::ppc_atomicrmw_sub_i128;
19868 case AtomicRMWInst::And:
19869 return Intrinsic::ppc_atomicrmw_and_i128;
19870 case AtomicRMWInst::Or:
19871 return Intrinsic::ppc_atomicrmw_or_i128;
19872 case AtomicRMWInst::Xor:
19873 return Intrinsic::ppc_atomicrmw_xor_i128;
19875 return Intrinsic::ppc_atomicrmw_nand_i128;
19876 }
19877}
19878
19880 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19881 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19882 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19883 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19884 Type *ValTy = Incr->getType();
19885 assert(ValTy->getPrimitiveSizeInBits() == 128);
19886 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19887 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19888 Value *IncrHi =
19889 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19890 Value *LoHi = Builder.CreateIntrinsic(
19892 {AlignedAddr, IncrLo, IncrHi});
19893 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19894 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19895 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19896 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19897 return Builder.CreateOr(
19898 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19899}
19900
19902 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19903 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19904 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19905 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19906 Type *ValTy = CmpVal->getType();
19907 assert(ValTy->getPrimitiveSizeInBits() == 128);
19908 Function *IntCmpXchg =
19909 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19910 Type *Int64Ty = Type::getInt64Ty(M->getContext());
19911 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19912 Value *CmpHi =
19913 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19914 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19915 Value *NewHi =
19916 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19917 emitLeadingFence(Builder, CI, Ord);
19918 Value *LoHi =
19919 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19920 emitTrailingFence(Builder, CI, Ord);
19921 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19922 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19923 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19924 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19925 return Builder.CreateOr(
19926 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19927}
19928
19930 return Subtarget.useCRBits();
19931}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
static bool Enabled
Definition Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
bool isDenormal() const
Definition APFloat.h:1450
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:471
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1722
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:635
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1077
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:148
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:181
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:130
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ TargetJumpTable
Definition ISDOpcodes.h:183
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:134
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:200
@ MO_TPREL_HA
Definition PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TPREL_LO
Definition PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:160
@ MO_HA
Definition PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ VSRQ
VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ ADDC
These nodes represent PPC arithmetic operations with carry.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ Define
Register definition.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:682
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:126
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:565
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:299
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition APFloat.cpp:269
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.