LLVM 22.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
16#include "PPC.h"
17#include "PPCISelLowering.h"
19#include "PPCSubtarget.h"
20#include "PPCTargetMachine.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/ADT/APSInt.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/InlineAsm.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/IntrinsicsPowerPC.h"
50#include "llvm/IR/Module.h"
55#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <cstdint>
63#include <iterator>
64#include <limits>
65#include <memory>
66#include <new>
67#include <tuple>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "ppc-isel"
73#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
74
75STATISTIC(NumSextSetcc,
76 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
77STATISTIC(NumZextSetcc,
78 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
79STATISTIC(SignExtensionsAdded,
80 "Number of sign extensions for compare inputs added.");
81STATISTIC(ZeroExtensionsAdded,
82 "Number of zero extensions for compare inputs added.");
83STATISTIC(NumLogicOpsOnComparison,
84 "Number of logical ops on i1 values calculated in GPR.");
85STATISTIC(OmittedForNonExtendUses,
86 "Number of compares not eliminated as they have non-extending uses.");
87STATISTIC(NumP9Setb,
88 "Number of compares lowered to setb.");
89
90// FIXME: Remove this once the bug has been fixed!
91cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
92cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
93
94static cl::opt<bool>
95 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
96 cl::desc("use aggressive ppc isel for bit permutations"),
99 "ppc-bit-perm-rewriter-stress-rotates",
100 cl::desc("stress rotate selection in aggressive ppc isel for "
101 "bit permutations"),
102 cl::Hidden);
103
105 "ppc-use-branch-hint", cl::init(true),
106 cl::desc("Enable static hinting of branches on ppc"),
107 cl::Hidden);
108
110 "ppc-tls-opt", cl::init(true),
111 cl::desc("Enable tls optimization peephole"),
112 cl::Hidden);
113
117
119 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
120 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
121 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
122 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
123 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
124 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
125 clEnumValN(ICGPR_NonExtIn, "nonextin",
126 "Only comparisons where inputs don't need [sz]ext."),
127 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
128 clEnumValN(ICGPR_ZextI32, "zexti32",
129 "Only i32 comparisons with zext result."),
130 clEnumValN(ICGPR_ZextI64, "zexti64",
131 "Only i64 comparisons with zext result."),
132 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
133 clEnumValN(ICGPR_SextI32, "sexti32",
134 "Only i32 comparisons with sext result."),
135 clEnumValN(ICGPR_SextI64, "sexti64",
136 "Only i64 comparisons with sext result.")));
137namespace {
138
139 //===--------------------------------------------------------------------===//
140 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
141 /// instructions for SelectionDAG operations.
142 ///
143 class PPCDAGToDAGISel : public SelectionDAGISel {
144 const PPCTargetMachine &TM;
145 const PPCSubtarget *Subtarget = nullptr;
146 const PPCTargetLowering *PPCLowering = nullptr;
147 unsigned GlobalBaseReg = 0;
148
149 public:
150 PPCDAGToDAGISel() = delete;
151
152 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
153 : SelectionDAGISel(tm, OptLevel), TM(tm) {}
154
155 bool runOnMachineFunction(MachineFunction &MF) override {
156 // Make sure we re-emit a set of the global base reg if necessary
157 GlobalBaseReg = 0;
158 Subtarget = &MF.getSubtarget<PPCSubtarget>();
159 PPCLowering = Subtarget->getTargetLowering();
160 if (Subtarget->hasROPProtect()) {
161 // Create a place on the stack for the ROP Protection Hash.
162 // The ROP Protection Hash will always be 8 bytes and aligned to 8
163 // bytes.
164 MachineFrameInfo &MFI = MF.getFrameInfo();
166 const int Result = MFI.CreateStackObject(8, Align(8), false);
168 }
170
171 return true;
172 }
173
174 void PreprocessISelDAG() override;
175 void PostprocessISelDAG() override;
176
177 /// getI16Imm - Return a target constant with the specified value, of type
178 /// i16.
179 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
180 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
181 }
182
183 /// getI32Imm - Return a target constant with the specified value, of type
184 /// i32.
185 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
186 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
187 }
188
189 /// getI64Imm - Return a target constant with the specified value, of type
190 /// i64.
191 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
192 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
193 }
194
195 /// getSmallIPtrImm - Return a target constant of pointer type.
196 inline SDValue getSmallIPtrImm(int64_t Imm, const SDLoc &dl) {
197 return CurDAG->getSignedTargetConstant(
198 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
199 }
200
201 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
202 /// rotate and mask opcode and mask operation.
203 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
204 unsigned &SH, unsigned &MB, unsigned &ME);
205
206 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
207 /// base register. Return the virtual register that holds this value.
208 SDNode *getGlobalBaseReg();
209
210 void selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset = 0);
211
212 // Select - Convert the specified operand from a target-independent to a
213 // target-specific node if it hasn't already been changed.
214 void Select(SDNode *N) override;
215
216 bool tryBitfieldInsert(SDNode *N);
217 bool tryBitPermutation(SDNode *N);
218 bool tryIntCompareInGPR(SDNode *N);
219
220 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
221 // an X-Form load instruction with the offset being a relocation coming from
222 // the PPCISD::ADD_TLS.
223 bool tryTLSXFormLoad(LoadSDNode *N);
224 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
225 // an X-Form store instruction with the offset being a relocation coming from
226 // the PPCISD::ADD_TLS.
227 bool tryTLSXFormStore(StoreSDNode *N);
228 /// SelectCC - Select a comparison of the specified values with the
229 /// specified condition code, returning the CR# of the expression.
230 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
231 const SDLoc &dl, SDValue Chain = SDValue());
232
233 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
234 /// immediate field. Note that the operand at this point is already the
235 /// result of a prior SelectAddressRegImm call.
236 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
237 if (N.getOpcode() == ISD::TargetConstant ||
238 N.getOpcode() == ISD::TargetGlobalAddress) {
239 Out = N;
240 return true;
241 }
242
243 return false;
244 }
245
246 /// SelectDSForm - Returns true if address N can be represented by the
247 /// addressing mode of DSForm instructions (a base register, plus a signed
248 /// 16-bit displacement that is a multiple of 4.
249 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
250 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
251 Align(4)) == PPC::AM_DSForm;
252 }
253
254 /// SelectDQForm - Returns true if address N can be represented by the
255 /// addressing mode of DQForm instructions (a base register, plus a signed
256 /// 16-bit displacement that is a multiple of 16.
257 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
258 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
259 Align(16)) == PPC::AM_DQForm;
260 }
261
262 /// SelectDForm - Returns true if address N can be represented by
263 /// the addressing mode of DForm instructions (a base register, plus a
264 /// signed 16-bit immediate.
265 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
266 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
267 std::nullopt) == PPC::AM_DForm;
268 }
269
270 /// SelectPCRelForm - Returns true if address N can be represented by
271 /// PC-Relative addressing mode.
272 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
273 SDValue &Base) {
274 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
275 std::nullopt) == PPC::AM_PCRel;
276 }
277
278 /// SelectPDForm - Returns true if address N can be represented by Prefixed
279 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
280 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
281 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
282 std::nullopt) ==
284 }
285
286 /// SelectXForm - Returns true if address N can be represented by the
287 /// addressing mode of XForm instructions (an indexed [r+r] operation).
288 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
289 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
290 std::nullopt) == PPC::AM_XForm;
291 }
292
293 /// SelectForceXForm - Given the specified address, force it to be
294 /// represented as an indexed [r+r] operation (an XForm instruction).
295 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
296 SDValue &Base) {
297 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
299 }
300
301 /// SelectAddrIdx - Given the specified address, check to see if it can be
302 /// represented as an indexed [r+r] operation.
303 /// This is for xform instructions whose associated displacement form is D.
304 /// The last parameter \p 0 means associated D form has no requirment for 16
305 /// bit signed displacement.
306 /// Returns false if it can be represented by [r+imm], which are preferred.
307 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
308 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
309 std::nullopt);
310 }
311
312 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
313 /// represented as an indexed [r+r] operation.
314 /// This is for xform instructions whose associated displacement form is DS.
315 /// The last parameter \p 4 means associated DS form 16 bit signed
316 /// displacement must be a multiple of 4.
317 /// Returns false if it can be represented by [r+imm], which are preferred.
318 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
319 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
320 Align(4));
321 }
322
323 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
324 /// represented as an indexed [r+r] operation.
325 /// This is for xform instructions whose associated displacement form is DQ.
326 /// The last parameter \p 16 means associated DQ form 16 bit signed
327 /// displacement must be a multiple of 16.
328 /// Returns false if it can be represented by [r+imm], which are preferred.
329 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
330 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
331 Align(16));
332 }
333
334 /// SelectAddrIdxOnly - Given the specified address, force it to be
335 /// represented as an indexed [r+r] operation.
336 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
337 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
338 }
339
340 /// SelectAddrImm - Returns true if the address N can be represented by
341 /// a base register plus a signed 16-bit displacement [r+imm].
342 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
343 /// displacement.
344 bool SelectAddrImm(SDValue N, SDValue &Disp,
345 SDValue &Base) {
346 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
347 std::nullopt);
348 }
349
350 /// SelectAddrImmX4 - Returns true if the address N can be represented by
351 /// a base register plus a signed 16-bit displacement that is a multiple of
352 /// 4 (last parameter). Suitable for use by STD and friends.
353 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
354 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
355 }
356
357 /// SelectAddrImmX16 - Returns true if the address N can be represented by
358 /// a base register plus a signed 16-bit displacement that is a multiple of
359 /// 16(last parameter). Suitable for use by STXV and friends.
360 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
361 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
362 Align(16));
363 }
364
365 /// SelectAddrImmX34 - Returns true if the address N can be represented by
366 /// a base register plus a signed 34-bit displacement. Suitable for use by
367 /// PSTXVP and friends.
368 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
369 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
370 }
371
372 // Select an address into a single register.
373 bool SelectAddr(SDValue N, SDValue &Base) {
374 Base = N;
375 return true;
376 }
377
378 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
379 return PPCLowering->SelectAddressPCRel(N, Base);
380 }
381
382 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
383 /// inline asm expressions. It is always correct to compute the value into
384 /// a register. The case of adding a (possibly relocatable) constant to a
385 /// register can be improved, but it is wrong to substitute Reg+Reg for
386 /// Reg in an asm, because the load or store opcode would have to change.
388 InlineAsm::ConstraintCode ConstraintID,
389 std::vector<SDValue> &OutOps) override {
390 switch(ConstraintID) {
391 default:
392 errs() << "ConstraintID: "
393 << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
394 llvm_unreachable("Unexpected asm memory constraint");
401 // We need to make sure that this one operand does not end up in r0
402 // (because we might end up lowering this as 0(%op)).
403 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
404 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
405 SDLoc dl(Op);
406 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
407 SDValue NewOp =
408 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
409 dl, Op.getValueType(),
410 Op, RC), 0);
411
412 OutOps.push_back(NewOp);
413 return false;
414 }
415 return true;
416 }
417
418// Include the pieces autogenerated from the target description.
419#include "PPCGenDAGISel.inc"
420
421private:
422 bool trySETCC(SDNode *N);
423 bool tryFoldSWTestBRCC(SDNode *N);
424 bool trySelectLoopCountIntrinsic(SDNode *N);
425 bool tryAsSingleRLDICL(SDNode *N);
426 bool tryAsSingleRLDCL(SDNode *N);
427 bool tryAsSingleRLDICR(SDNode *N);
428 bool tryAsSingleRLWINM(SDNode *N);
429 bool tryAsSingleRLWINM8(SDNode *N);
430 bool tryAsSingleRLWIMI(SDNode *N);
431 bool tryAsPairOfRLDICL(SDNode *N);
432 bool tryAsSingleRLDIMI(SDNode *N);
433
434 void PeepholePPC64();
435 void PeepholePPC64ZExt();
436 void PeepholeCROps();
437
438 SDValue combineToCMPB(SDNode *N);
439 void foldBoolExts(SDValue &Res, SDNode *&N);
440
441 bool AllUsersSelectZero(SDNode *N);
442 void SwapAllSelectUsers(SDNode *N);
443
444 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
445 void transferMemOperands(SDNode *N, SDNode *Result);
446 };
447
448 class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
449 public:
450 static char ID;
451 explicit PPCDAGToDAGISelLegacy(PPCTargetMachine &tm,
452 CodeGenOptLevel OptLevel)
454 ID, std::make_unique<PPCDAGToDAGISel>(tm, OptLevel)) {}
455 };
456} // end anonymous namespace
457
458char PPCDAGToDAGISelLegacy::ID = 0;
459
460INITIALIZE_PASS(PPCDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
461
462/// getGlobalBaseReg - Output the instructions required to put the
463/// base address to use for accessing globals into a register.
464///
465SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
466 if (!GlobalBaseReg) {
467 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
468 // Insert the set of GlobalBaseReg into the first MBB of the function
469 MachineBasicBlock &FirstMBB = MF->front();
471 const Module *M = MF->getFunction().getParent();
472 DebugLoc dl;
473
474 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
475 if (Subtarget->isTargetELF()) {
476 GlobalBaseReg = PPC::R30;
477 if (!Subtarget->isSecurePlt() &&
478 M->getPICLevel() == PICLevel::SmallPIC) {
479 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
480 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
481 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
482 } else {
483 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
484 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
485 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
486 BuildMI(FirstMBB, MBBI, dl,
487 TII.get(PPC::UpdateGBR), GlobalBaseReg)
488 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
489 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
490 }
491 } else {
493 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
494 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
495 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
496 }
497 } else {
498 // We must ensure that this sequence is dominated by the prologue.
499 // FIXME: This is a bit of a big hammer since we don't get the benefits
500 // of shrink-wrapping whenever we emit this instruction. Considering
501 // this is used in any function where we emit a jump table, this may be
502 // a significant limitation. We should consider inserting this in the
503 // block where it is used and then commoning this sequence up if it
504 // appears in multiple places.
505 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
506 // MovePCtoLR8.
507 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
508 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
509 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
510 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
511 }
512 }
513 return CurDAG->getRegister(GlobalBaseReg,
514 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
515 .getNode();
516}
517
518// Check if a SDValue has the toc-data attribute.
519static bool hasTocDataAttr(SDValue Val) {
520 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
521 if (!GA)
522 return false;
523
524 const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
525 if (!GV)
526 return false;
527
528 if (!GV->hasAttribute("toc-data"))
529 return false;
530 return true;
531}
532
534 const TargetMachine &TM,
535 const SDNode *Node) {
536 // If there isn't an attribute to override the module code model
537 // this will be the effective code model.
538 CodeModel::Model ModuleModel = TM.getCodeModel();
539
540 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Node->getOperand(0));
541 if (!GA)
542 return ModuleModel;
543
544 const GlobalValue *GV = GA->getGlobal();
545 if (!GV)
546 return ModuleModel;
547
548 return Subtarget.getCodeModel(TM, GV);
549}
550
551/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
552/// operand. If so Imm will receive the 32-bit value.
553static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
554 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
555 Imm = N->getAsZExtVal();
556 return true;
557 }
558 return false;
559}
560
561/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
562/// operand. If so Imm will receive the 64-bit value.
563static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
564 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
565 Imm = N->getAsZExtVal();
566 return true;
567 }
568 return false;
569}
570
571// isInt32Immediate - This method tests to see if a constant operand.
572// If so Imm will receive the 32 bit value.
573static bool isInt32Immediate(SDValue N, unsigned &Imm) {
574 return isInt32Immediate(N.getNode(), Imm);
575}
576
577/// isInt64Immediate - This method tests to see if the value is a 64-bit
578/// constant operand. If so Imm will receive the 64-bit value.
579static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
580 return isInt64Immediate(N.getNode(), Imm);
581}
582
583static unsigned getBranchHint(unsigned PCC,
584 const FunctionLoweringInfo &FuncInfo,
585 const SDValue &DestMBB) {
586 assert(isa<BasicBlockSDNode>(DestMBB));
587
588 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
589
590 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
591 const Instruction *BBTerm = BB->getTerminator();
592
593 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
594
595 const BasicBlock *TBB = BBTerm->getSuccessor(0);
596 const BasicBlock *FBB = BBTerm->getSuccessor(1);
597
598 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
599 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
600
601 // We only want to handle cases which are easy to predict at static time, e.g.
602 // C++ throw statement, that is very likely not taken, or calling never
603 // returned function, e.g. stdlib exit(). So we set Threshold to filter
604 // unwanted cases.
605 //
606 // Below is LLVM branch weight table, we only want to handle case 1, 2
607 //
608 // Case Taken:Nontaken Example
609 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
610 // 2. Invoke-terminating 1:1048575
611 // 3. Coldblock 4:64 __builtin_expect
612 // 4. Loop Branch 124:4 For loop
613 // 5. PH/ZH/FPH 20:12
614 const uint32_t Threshold = 10000;
615
616 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
617 return PPC::BR_NO_HINT;
618
619 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
620 << "::" << BB->getName() << "'\n"
621 << " -> " << TBB->getName() << ": " << TProb << "\n"
622 << " -> " << FBB->getName() << ": " << FProb << "\n");
623
624 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
625
626 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
627 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
628 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
629 std::swap(TProb, FProb);
630
631 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
632}
633
634// isOpcWithIntImmediate - This method tests to see if the node is a specific
635// opcode and that it has a immediate integer right operand.
636// If so Imm will receive the 32 bit value.
637static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
638 return N->getOpcode() == Opc
639 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
640}
641
642void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset) {
643 SDLoc dl(SN);
644 int FI = cast<FrameIndexSDNode>(N)->getIndex();
645 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
646 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
647 if (SN->hasOneUse())
648 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
649 getSmallIPtrImm(Offset, dl));
650 else
651 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
652 getSmallIPtrImm(Offset, dl)));
653}
654
655bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
656 bool isShiftMask, unsigned &SH,
657 unsigned &MB, unsigned &ME) {
658 // Don't even go down this path for i64, since different logic will be
659 // necessary for rldicl/rldicr/rldimi.
660 if (N->getValueType(0) != MVT::i32)
661 return false;
662
663 unsigned Shift = 32;
664 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
665 unsigned Opcode = N->getOpcode();
666 if (N->getNumOperands() != 2 ||
667 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
668 return false;
669
670 if (Opcode == ISD::SHL) {
671 // apply shift left to mask if it comes first
672 if (isShiftMask) Mask = Mask << Shift;
673 // determine which bits are made indeterminant by shift
674 Indeterminant = ~(0xFFFFFFFFu << Shift);
675 } else if (Opcode == ISD::SRL) {
676 // apply shift right to mask if it comes first
677 if (isShiftMask) Mask = Mask >> Shift;
678 // determine which bits are made indeterminant by shift
679 Indeterminant = ~(0xFFFFFFFFu >> Shift);
680 // adjust for the left rotate
681 Shift = 32 - Shift;
682 } else if (Opcode == ISD::ROTL) {
683 Indeterminant = 0;
684 } else {
685 return false;
686 }
687
688 // if the mask doesn't intersect any Indeterminant bits
689 if (Mask && !(Mask & Indeterminant)) {
690 SH = Shift & 31;
691 // make sure the mask is still a mask (wrap arounds may not be)
692 return isRunOfOnes(Mask, MB, ME);
693 }
694 return false;
695}
696
697// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
698// instruction use the thread pointer.
700 assert(
701 Base.getOpcode() == PPCISD::ADD_TLS &&
702 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
703 const PPCSubtarget &Subtarget =
705 SDValue ADDTLSOp1 = Base.getOperand(0);
706 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
707
708 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
709 //
710 // Although ADD_TLS does not explicitly use the thread pointer
711 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
712 // instruction will have a relocation specifier, @got@tprel, that is used to
713 // generate a GOT entry. The linker replaces this entry with an offset for a
714 // for a thread local variable, which will be relative to the thread pointer.
715 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
716 return true;
717 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
718 // node is produced instead to represent the aforementioned situation.
719 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
720 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
721 return true;
722
723 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
724 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
725 // later returning it into R3.
726 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
727 return true;
728
729 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
730 RegisterSDNode *AddFirstOpReg =
731 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
732 if (AddFirstOpReg &&
733 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
734 return true;
735
736 return false;
737}
738
739// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
740// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
741// operation, can be optimized to use an X-Form load or store, allowing the
742// ADD_TLS node to be removed completely.
744
745 // Do not do this transformation at -O0.
746 if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
747 return false;
748
749 // In order to perform this optimization inside tryTLSXForm[Load|Store],
750 // Base is expected to be an ADD_TLS node.
751 if (Base.getOpcode() != PPCISD::ADD_TLS)
752 return false;
753 for (auto *ADDTLSUse : Base.getNode()->users()) {
754 // The optimization to convert the D-Form load/store into its X-Form
755 // counterpart should only occur if the source value offset of the load/
756 // store is 0. This also means that The offset should always be undefined.
757 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
758 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
759 return false;
760 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
761 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
762 return false;
763 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
764 return false;
765 }
766
767 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
768 return false;
769
770 // Does the ADD_TLS node of the load/store use the thread pointer?
771 // If the thread pointer is not used as one of the operands of ADD_TLS,
772 // then this optimization is not valid.
773 return isThreadPointerAcquisitionNode(Base, CurDAG);
774}
775
776bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
777 SDValue Base = ST->getBasePtr();
778 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
779 return false;
780
781 SDLoc dl(ST);
782 EVT MemVT = ST->getMemoryVT();
783 EVT RegVT = ST->getValue().getValueType();
784
785 unsigned Opcode;
786 switch (MemVT.getSimpleVT().SimpleTy) {
787 default:
788 return false;
789 case MVT::i8: {
790 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
791 break;
792 }
793 case MVT::i16: {
794 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
795 break;
796 }
797 case MVT::i32: {
798 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
799 break;
800 }
801 case MVT::i64: {
802 Opcode = PPC::STDXTLS;
803 break;
804 }
805 case MVT::f32: {
806 Opcode = PPC::STFSXTLS;
807 break;
808 }
809 case MVT::f64: {
810 Opcode = PPC::STFDXTLS;
811 break;
812 }
813 }
814 SDValue Chain = ST->getChain();
815 SDVTList VTs = ST->getVTList();
816 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
817 Chain};
818 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
819 transferMemOperands(ST, MN);
820 ReplaceNode(ST, MN);
821 return true;
822}
823
824bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
825 SDValue Base = LD->getBasePtr();
826 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
827 return false;
828
829 SDLoc dl(LD);
830 EVT MemVT = LD->getMemoryVT();
831 EVT RegVT = LD->getValueType(0);
832 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
833 unsigned Opcode;
834 switch (MemVT.getSimpleVT().SimpleTy) {
835 default:
836 return false;
837 case MVT::i8: {
838 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
839 break;
840 }
841 case MVT::i16: {
842 if (RegVT == MVT::i32)
843 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
844 else
845 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
846 break;
847 }
848 case MVT::i32: {
849 if (RegVT == MVT::i32)
850 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
851 else
852 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
853 break;
854 }
855 case MVT::i64: {
856 Opcode = PPC::LDXTLS;
857 break;
858 }
859 case MVT::f32: {
860 Opcode = PPC::LFSXTLS;
861 break;
862 }
863 case MVT::f64: {
864 Opcode = PPC::LFDXTLS;
865 break;
866 }
867 }
868 SDValue Chain = LD->getChain();
869 SDVTList VTs = LD->getVTList();
870 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
871 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
872 transferMemOperands(LD, MN);
873 ReplaceNode(LD, MN);
874 return true;
875}
876
877/// Turn an or of two masked values into the rotate left word immediate then
878/// mask insert (rlwimi) instruction.
879bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
880 SDValue Op0 = N->getOperand(0);
881 SDValue Op1 = N->getOperand(1);
882 SDLoc dl(N);
883
884 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
885 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
886
887 unsigned TargetMask = LKnown.Zero.getZExtValue();
888 unsigned InsertMask = RKnown.Zero.getZExtValue();
889
890 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
891 unsigned Op0Opc = Op0.getOpcode();
892 unsigned Op1Opc = Op1.getOpcode();
893 unsigned Value, SH = 0;
894 TargetMask = ~TargetMask;
895 InsertMask = ~InsertMask;
896
897 // If the LHS has a foldable shift and the RHS does not, then swap it to the
898 // RHS so that we can fold the shift into the insert.
899 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
900 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
901 Op0.getOperand(0).getOpcode() == ISD::SRL) {
902 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
903 Op1.getOperand(0).getOpcode() != ISD::SRL) {
904 std::swap(Op0, Op1);
905 std::swap(Op0Opc, Op1Opc);
906 std::swap(TargetMask, InsertMask);
907 }
908 }
909 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
910 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
911 Op1.getOperand(0).getOpcode() != ISD::SRL) {
912 std::swap(Op0, Op1);
913 std::swap(Op0Opc, Op1Opc);
914 std::swap(TargetMask, InsertMask);
915 }
916 }
917
918 unsigned MB, ME;
919 if (isRunOfOnes(InsertMask, MB, ME)) {
920 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
922 Op1 = Op1.getOperand(0);
923 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
924 }
925 if (Op1Opc == ISD::AND) {
926 // The AND mask might not be a constant, and we need to make sure that
927 // if we're going to fold the masking with the insert, all bits not
928 // know to be zero in the mask are known to be one.
929 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
930 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
931
932 unsigned SHOpc = Op1.getOperand(0).getOpcode();
933 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
935 // Note that Value must be in range here (less than 32) because
936 // otherwise there would not be any bits set in InsertMask.
937 Op1 = Op1.getOperand(0).getOperand(0);
938 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
939 }
940 }
941
942 SH &= 31;
943 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
944 getI32Imm(ME, dl) };
945 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
946 return true;
947 }
948 }
949 return false;
950}
951
952static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
953 unsigned MaxTruncation = 0;
954 // Cannot use range-based for loop here as we need the actual use (i.e. we
955 // need the operand number corresponding to the use). A range-based for
956 // will unbox the use and provide an SDNode*.
957 for (SDUse &Use : N->uses()) {
958 SDNode *User = Use.getUser();
959 unsigned Opc =
960 User->isMachineOpcode() ? User->getMachineOpcode() : User->getOpcode();
961 switch (Opc) {
962 default: return 0;
963 case ISD::TRUNCATE:
964 if (User->isMachineOpcode())
965 return 0;
966 MaxTruncation = std::max(MaxTruncation,
967 (unsigned)User->getValueType(0).getSizeInBits());
968 continue;
969 case ISD::STORE: {
970 if (User->isMachineOpcode())
971 return 0;
972 StoreSDNode *STN = cast<StoreSDNode>(User);
973 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
974 if (MemVTSize == 64 || Use.getOperandNo() != 0)
975 return 0;
976 MaxTruncation = std::max(MaxTruncation, MemVTSize);
977 continue;
978 }
979 case PPC::STW8:
980 case PPC::STWX8:
981 case PPC::STWU8:
982 case PPC::STWUX8:
983 if (Use.getOperandNo() != 0)
984 return 0;
985 MaxTruncation = std::max(MaxTruncation, 32u);
986 continue;
987 case PPC::STH8:
988 case PPC::STHX8:
989 case PPC::STHU8:
990 case PPC::STHUX8:
991 if (Use.getOperandNo() != 0)
992 return 0;
993 MaxTruncation = std::max(MaxTruncation, 16u);
994 continue;
995 case PPC::STB8:
996 case PPC::STBX8:
997 case PPC::STBU8:
998 case PPC::STBUX8:
999 if (Use.getOperandNo() != 0)
1000 return 0;
1001 MaxTruncation = std::max(MaxTruncation, 8u);
1002 continue;
1003 }
1004 }
1005 return MaxTruncation;
1006}
1007
1008// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1009// zeros and return the number of bits by the left of these consecutive zeros.
1010static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1011 unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1012 unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1013 if ((HiTZ + LoLZ) >= Num)
1014 return (32 + HiTZ);
1015 return 0;
1016}
1017
1018// Direct materialization of 64-bit constants by enumerated patterns.
1019static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1020 uint64_t Imm, unsigned &InstCnt) {
1021 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1022 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1023 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1024 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1025 unsigned Hi32 = Hi_32(Imm);
1026 unsigned Lo32 = Lo_32(Imm);
1027 SDNode *Result = nullptr;
1028 unsigned Shift = 0;
1029
1030 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1031 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1032 };
1033
1034 // Following patterns use 1 instructions to materialize the Imm.
1035 InstCnt = 1;
1036 // 1-1) Patterns : {zeros}{15-bit valve}
1037 // {ones}{15-bit valve}
1038 if (isInt<16>(Imm)) {
1039 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1040 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1041 }
1042 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1043 // {ones}{15-bit valve}{16 zeros}
1044 if (TZ > 15 && (LZ > 32 || LO > 32))
1045 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1046 getI32Imm((Imm >> 16) & 0xffff));
1047
1048 // Following patterns use 2 instructions to materialize the Imm.
1049 InstCnt = 2;
1050 assert(LZ < 64 && "Unexpected leading zeros here.");
1051 // Count of ones follwing the leading zeros.
1052 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1053 // 2-1) Patterns : {zeros}{31-bit value}
1054 // {ones}{31-bit value}
1055 if (isInt<32>(Imm)) {
1056 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1057 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1058 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1059 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1060 getI32Imm(Imm & 0xffff));
1061 }
1062 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1063 // {zeros}{15-bit value}{zeros}
1064 // {zeros}{ones}{15-bit value}
1065 // {ones}{15-bit value}{zeros}
1066 // We can take advantage of LI's sign-extension semantics to generate leading
1067 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1068 if ((LZ + FO + TZ) > 48) {
1069 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1070 getI32Imm((Imm >> TZ) & 0xffff));
1071 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1072 getI32Imm(TZ), getI32Imm(LZ));
1073 }
1074 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1075 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1076 // therefore we can take advantage of LI's sign-extension semantics, and then
1077 // mask them off after rotation.
1078 //
1079 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1080 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1081 // +------------------------+ +------------------------+
1082 // 63 0 63 0
1083 // Imm (Imm >> (48 - LZ) & 0xffff)
1084 // +----sext-----|--16-bit--+ +clear-|-----------------+
1085 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1086 // +------------------------+ +------------------------+
1087 // 63 0 63 0
1088 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1089 if ((LZ + TO) > 48) {
1090 // Since the immediates with (LZ > 32) have been handled by previous
1091 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1092 // the Imm by a negative value.
1093 assert(LZ <= 32 && "Unexpected shift value.");
1094 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1095 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1096 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1097 getI32Imm(48 - LZ), getI32Imm(LZ));
1098 }
1099 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1100 // {ones}{15-bit value}{ones}
1101 // We can take advantage of LI's sign-extension semantics to generate leading
1102 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1103 // after rotation.
1104 //
1105 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1106 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1107 // +------------------------+ +------------------------+
1108 // 63 0 63 0
1109 // Imm (Imm >> TO) & 0xffff
1110 // +----sext-----|--16-bit--+ +LZ|---------------------+
1111 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1112 // +------------------------+ +------------------------+
1113 // 63 0 63 0
1114 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1115 if ((LZ + FO + TO) > 48) {
1116 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1117 getI32Imm((Imm >> TO) & 0xffff));
1118 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1119 getI32Imm(TO), getI32Imm(LZ));
1120 }
1121 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1122 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1123 // value, we can use LI for Lo16 without generating leading ones then add the
1124 // Hi16(in Lo32).
1125 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1126 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1127 getI32Imm(Lo32 & 0xffff));
1128 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1129 getI32Imm(Lo32 >> 16));
1130 }
1131 // 2-6) Patterns : {******}{49 zeros}{******}
1132 // {******}{49 ones}{******}
1133 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1134 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1135 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1136 // it back.
1137 //
1138 // 1) findContiguousZerosAtLeast(Imm, 49)
1139 // +------|--zeros-|------+ +---ones--||---15 bit--+
1140 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1141 // +----------------------+ +----------------------+
1142 // 63 0 63 0
1143 //
1144 // 2) findContiguousZerosAtLeast(~Imm, 49)
1145 // +------|--ones--|------+ +---ones--||---15 bit--+
1146 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1147 // +----------------------+ +----------------------+
1148 // 63 0 63 0
1149 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1150 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1151 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1152 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1153 getI32Imm(RotImm & 0xffff));
1154 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1155 getI32Imm(Shift), getI32Imm(0));
1156 }
1157 // 2-7) Patterns : High word == Low word
1158 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1159 // materialized in 1 instruction.
1160 if (Hi32 == Lo32) {
1161 // Handle the first 32 bits.
1162 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1163 uint64_t ImmLo16 = Lo32 & 0xffff;
1164 if (isInt<16>(Lo32))
1165 Result =
1166 CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1167 else if (!ImmLo16)
1168 Result =
1169 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1170 else {
1171 InstCnt = 3;
1172 Result =
1173 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1174 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1175 SDValue(Result, 0), getI32Imm(ImmLo16));
1176 }
1177 // Use rldimi to insert the Low word into High word.
1178 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1179 getI32Imm(0)};
1180 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1181 }
1182
1183 // Following patterns use 3 instructions to materialize the Imm.
1184 InstCnt = 3;
1185 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1186 // {zeros}{31-bit value}{zeros}
1187 // {zeros}{ones}{31-bit value}
1188 // {ones}{31-bit value}{zeros}
1189 // We can take advantage of LIS's sign-extension semantics to generate leading
1190 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1191 // ones in both sides after rotation.
1192 if ((LZ + FO + TZ) > 32) {
1193 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1194 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1195 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1196 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1197 getI32Imm((Imm >> TZ) & 0xffff));
1198 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1199 getI32Imm(TZ), getI32Imm(LZ));
1200 }
1201 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1202 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1203 // value, therefore we can take advantage of LIS's sign-extension semantics,
1204 // add the remaining bits with ORI, and then mask them off after rotation.
1205 // This is similar to Pattern 2-3, please refer to the diagram there.
1206 if ((LZ + TO) > 32) {
1207 // Since the immediates with (LZ > 32) have been handled by previous
1208 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1209 // the Imm by a negative value.
1210 assert(LZ <= 32 && "Unexpected shift value.");
1211 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1212 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1213 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1214 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1215 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1216 getI32Imm(32 - LZ), getI32Imm(LZ));
1217 }
1218 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1219 // {ones}{31-bit value}{ones}
1220 // We can take advantage of LIS's sign-extension semantics to generate leading
1221 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1222 // ones in left sides (if required) after rotation.
1223 // This is similar to Pattern 2-4, please refer to the diagram there.
1224 if ((LZ + FO + TO) > 32) {
1225 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1226 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1227 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1228 getI32Imm((Imm >> TO) & 0xffff));
1229 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1230 getI32Imm(TO), getI32Imm(LZ));
1231 }
1232 // 3-4) Patterns : {******}{33 zeros}{******}
1233 // {******}{33 ones}{******}
1234 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1235 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1236 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1237 // rotate it back.
1238 // This is similar to Pattern 2-6, please refer to the diagram there.
1239 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1240 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1241 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1242 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1243 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1244 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1245 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1246 getI32Imm(RotImm & 0xffff));
1247 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1248 getI32Imm(Shift), getI32Imm(0));
1249 }
1250
1251 InstCnt = 0;
1252 return nullptr;
1253}
1254
1255// Try to select instructions to generate a 64 bit immediate using prefix as
1256// well as non prefix instructions. The function will return the SDNode
1257// to materialize that constant or it will return nullptr if it does not
1258// find one. The variable InstCnt is set to the number of instructions that
1259// were selected.
1261 uint64_t Imm, unsigned &InstCnt) {
1262 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1263 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1264 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1265 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1266 unsigned Hi32 = Hi_32(Imm);
1267 unsigned Lo32 = Lo_32(Imm);
1268
1269 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1270 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1271 };
1272
1273 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1274 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1275 };
1276
1277 // Following patterns use 1 instruction to materialize Imm.
1278 InstCnt = 1;
1279
1280 // The pli instruction can materialize up to 34 bits directly.
1281 // If a constant fits within 34-bits, emit the pli instruction here directly.
1282 if (isInt<34>(Imm))
1283 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1284 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1285
1286 // Require at least two instructions.
1287 InstCnt = 2;
1288 SDNode *Result = nullptr;
1289 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1290 // {zeros}{33-bit value}{zeros}
1291 // {zeros}{ones}{33-bit value}
1292 // {ones}{33-bit value}{zeros}
1293 // We can take advantage of PLI's sign-extension semantics to generate leading
1294 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1295 if ((LZ + FO + TZ) > 30) {
1296 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1297 APInt Extended = SignedInt34.sext(64);
1298 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1299 getI64Imm(Extended.getZExtValue()));
1300 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1301 getI32Imm(TZ), getI32Imm(LZ));
1302 }
1303 // Pattern : {zeros}{33-bit value}{ones}
1304 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1305 // therefore we can take advantage of PLI's sign-extension semantics, and then
1306 // mask them off after rotation.
1307 //
1308 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1309 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1310 // +------------------------+ +------------------------+
1311 // 63 0 63 0
1312 //
1313 // +----sext-----|--34-bit--+ +clear-|-----------------+
1314 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1315 // +------------------------+ +------------------------+
1316 // 63 0 63 0
1317 if ((LZ + TO) > 30) {
1318 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1319 APInt Extended = SignedInt34.sext(64);
1320 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1321 getI64Imm(Extended.getZExtValue()));
1322 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1323 getI32Imm(30 - LZ), getI32Imm(LZ));
1324 }
1325 // Patterns : {zeros}{ones}{33-bit value}{ones}
1326 // {ones}{33-bit value}{ones}
1327 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1328 // generate leading ones, and then use RLDICL to mask off the ones in left
1329 // sides (if required) after rotation.
1330 if ((LZ + FO + TO) > 30) {
1331 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1332 APInt Extended = SignedInt34.sext(64);
1333 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1334 getI64Imm(Extended.getZExtValue()));
1335 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1336 getI32Imm(TO), getI32Imm(LZ));
1337 }
1338 // Patterns : {******}{31 zeros}{******}
1339 // : {******}{31 ones}{******}
1340 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1341 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1342 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1343 //
1344 // +------|--ones--|------+ +---ones--||---33 bit--+
1345 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1346 // +----------------------+ +----------------------+
1347 // 63 0 63 0
1348 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1349 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1350 if (isInt<34>(RotImm)) {
1351 Result =
1352 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1353 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1354 SDValue(Result, 0), getI32Imm(Shift),
1355 getI32Imm(0));
1356 }
1357 }
1358
1359 // Patterns : High word == Low word
1360 // This is basically a splat of a 32 bit immediate.
1361 if (Hi32 == Lo32) {
1362 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1363 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1364 getI32Imm(0)};
1365 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1366 }
1367
1368 InstCnt = 3;
1369 // Catch-all
1370 // This pattern can form any 64 bit immediate in 3 instructions.
1371 SDNode *ResultHi =
1372 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1373 SDNode *ResultLo =
1374 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1375 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1376 getI32Imm(0)};
1377 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1378}
1379
1380static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1381 unsigned *InstCnt = nullptr) {
1382 unsigned InstCntDirect = 0;
1383 // No more than 3 instructions are used if we can select the i64 immediate
1384 // directly.
1385 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1386
1387 const PPCSubtarget &Subtarget =
1389
1390 // If we have prefixed instructions and there is a chance we can
1391 // materialize the constant with fewer prefixed instructions than
1392 // non-prefixed, try that.
1393 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1394 unsigned InstCntDirectP = 0;
1395 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1396 // Use the prefix case in either of two cases:
1397 // 1) We have no result from the non-prefix case to use.
1398 // 2) The non-prefix case uses more instructions than the prefix case.
1399 // If the prefix and non-prefix cases use the same number of instructions
1400 // we will prefer the non-prefix case.
1401 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1402 if (InstCnt)
1403 *InstCnt = InstCntDirectP;
1404 return ResultP;
1405 }
1406 }
1407
1408 if (Result) {
1409 if (InstCnt)
1410 *InstCnt = InstCntDirect;
1411 return Result;
1412 }
1413 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1414 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1415 };
1416
1417 uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1418 uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1419
1420 // Try to use 4 instructions to materialize the immediate which is "almost" a
1421 // splat of a 32 bit immediate.
1422 if (Hi16OfLo32 && Lo16OfLo32) {
1423 uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1424 uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1425 bool IsSelected = false;
1426
1427 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1428 SDNode *Result =
1429 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1430 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1431 SDValue(Result, 0), getI32Imm(Lo16));
1432 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1433 getI32Imm(0)};
1434 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1435 };
1436
1437 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1438 IsSelected = true;
1439 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1440 // Modify Hi16OfHi32.
1441 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1442 getI32Imm(0)};
1443 Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1444 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1445 IsSelected = true;
1446 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1447 // Modify Lo16OfLo32.
1448 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1449 getI32Imm(16), getI32Imm(31)};
1450 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1451 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1452 IsSelected = true;
1453 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1454 // Modify Hi16OfLo32.
1455 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1456 getI32Imm(0), getI32Imm(15)};
1457 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1458 }
1459 if (IsSelected == true) {
1460 if (InstCnt)
1461 *InstCnt = 4;
1462 return Result;
1463 }
1464 }
1465
1466 // Handle the upper 32 bit value.
1467 Result =
1468 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1469 // Add in the last bits as required.
1470 if (Hi16OfLo32) {
1471 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1472 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1473 ++InstCntDirect;
1474 }
1475 if (Lo16OfLo32) {
1476 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1477 getI32Imm(Lo16OfLo32));
1478 ++InstCntDirect;
1479 }
1480 if (InstCnt)
1481 *InstCnt = InstCntDirect;
1482 return Result;
1483}
1484
1485// Select a 64-bit constant.
1487 SDLoc dl(N);
1488
1489 // Get 64 bit value.
1490 int64_t Imm = N->getAsZExtVal();
1491 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1492 uint64_t SextImm = SignExtend64(Imm, MinSize);
1493 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1494 if (isInt<16>(SextImm))
1495 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1496 }
1497 return selectI64Imm(CurDAG, dl, Imm);
1498}
1499
1500namespace {
1501
1502class BitPermutationSelector {
1503 struct ValueBit {
1504 SDValue V;
1505
1506 // The bit number in the value, using a convention where bit 0 is the
1507 // lowest-order bit.
1508 unsigned Idx;
1509
1510 // ConstZero means a bit we need to mask off.
1511 // Variable is a bit comes from an input variable.
1512 // VariableKnownToBeZero is also a bit comes from an input variable,
1513 // but it is known to be already zero. So we do not need to mask them.
1514 enum Kind {
1515 ConstZero,
1516 Variable,
1517 VariableKnownToBeZero
1518 } K;
1519
1520 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1521 : V(V), Idx(I), K(K) {}
1522 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1523
1524 bool isZero() const {
1525 return K == ConstZero || K == VariableKnownToBeZero;
1526 }
1527
1528 bool hasValue() const {
1529 return K == Variable || K == VariableKnownToBeZero;
1530 }
1531
1532 SDValue getValue() const {
1533 assert(hasValue() && "Cannot get the value of a constant bit");
1534 return V;
1535 }
1536
1537 unsigned getValueBitIndex() const {
1538 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1539 return Idx;
1540 }
1541 };
1542
1543 // A bit group has the same underlying value and the same rotate factor.
1544 struct BitGroup {
1545 SDValue V;
1546 unsigned RLAmt;
1547 unsigned StartIdx, EndIdx;
1548
1549 // This rotation amount assumes that the lower 32 bits of the quantity are
1550 // replicated in the high 32 bits by the rotation operator (which is done
1551 // by rlwinm and friends in 64-bit mode).
1552 bool Repl32;
1553 // Did converting to Repl32 == true change the rotation factor? If it did,
1554 // it decreased it by 32.
1555 bool Repl32CR;
1556 // Was this group coalesced after setting Repl32 to true?
1557 bool Repl32Coalesced;
1558
1559 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1560 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1561 Repl32Coalesced(false) {
1562 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1563 << " [" << S << ", " << E << "]\n");
1564 }
1565 };
1566
1567 // Information on each (Value, RLAmt) pair (like the number of groups
1568 // associated with each) used to choose the lowering method.
1569 struct ValueRotInfo {
1570 SDValue V;
1571 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1572 unsigned NumGroups = 0;
1573 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1574 bool Repl32 = false;
1575
1576 ValueRotInfo() = default;
1577
1578 // For sorting (in reverse order) by NumGroups, and then by
1579 // FirstGroupStartIdx.
1580 bool operator < (const ValueRotInfo &Other) const {
1581 // We need to sort so that the non-Repl32 come first because, when we're
1582 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1583 // masking operation.
1584 if (Repl32 < Other.Repl32)
1585 return true;
1586 else if (Repl32 > Other.Repl32)
1587 return false;
1588 else if (NumGroups > Other.NumGroups)
1589 return true;
1590 else if (NumGroups < Other.NumGroups)
1591 return false;
1592 else if (RLAmt == 0 && Other.RLAmt != 0)
1593 return true;
1594 else if (RLAmt != 0 && Other.RLAmt == 0)
1595 return false;
1596 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1597 return true;
1598 return false;
1599 }
1600 };
1601
1602 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1603 using ValueBitsMemoizer =
1605 ValueBitsMemoizer Memoizer;
1606
1607 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1608 // The bool is true if something interesting was deduced, otherwise if we're
1609 // providing only a generic representation of V (or something else likewise
1610 // uninteresting for instruction selection) through the SmallVector.
1611 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1612 unsigned NumBits) {
1613 auto &ValueEntry = Memoizer[V];
1614 if (ValueEntry)
1615 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1616 ValueEntry.reset(new ValueBitsMemoizedValue());
1617 bool &Interesting = ValueEntry->first;
1618 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1619 Bits.resize(NumBits);
1620
1621 switch (V.getOpcode()) {
1622 default: break;
1623 case ISD::ROTL:
1624 if (isa<ConstantSDNode>(V.getOperand(1))) {
1625 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1626 unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1627
1628 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1629
1630 for (unsigned i = 0; i < NumBits; ++i)
1631 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1632
1633 return std::make_pair(Interesting = true, &Bits);
1634 }
1635 break;
1636 case ISD::SHL:
1637 case PPCISD::SHL:
1638 if (isa<ConstantSDNode>(V.getOperand(1))) {
1639 // sld takes 7 bits, slw takes 6.
1640 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1641
1642 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1643
1644 if (ShiftAmt >= NumBits) {
1645 for (unsigned i = 0; i < NumBits; ++i)
1646 Bits[i] = ValueBit(ValueBit::ConstZero);
1647 } else {
1648 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1649 Bits[i] = LHSBits[i - ShiftAmt];
1650 for (unsigned i = 0; i < ShiftAmt; ++i)
1651 Bits[i] = ValueBit(ValueBit::ConstZero);
1652 }
1653
1654 return std::make_pair(Interesting = true, &Bits);
1655 }
1656 break;
1657 case ISD::SRL:
1658 case PPCISD::SRL:
1659 if (isa<ConstantSDNode>(V.getOperand(1))) {
1660 // srd takes lowest 7 bits, srw takes 6.
1661 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1662
1663 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1664
1665 if (ShiftAmt >= NumBits) {
1666 for (unsigned i = 0; i < NumBits; ++i)
1667 Bits[i] = ValueBit(ValueBit::ConstZero);
1668 } else {
1669 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1670 Bits[i] = LHSBits[i + ShiftAmt];
1671 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1672 Bits[i] = ValueBit(ValueBit::ConstZero);
1673 }
1674
1675 return std::make_pair(Interesting = true, &Bits);
1676 }
1677 break;
1678 case ISD::AND:
1679 if (isa<ConstantSDNode>(V.getOperand(1))) {
1680 uint64_t Mask = V.getConstantOperandVal(1);
1681
1682 const SmallVector<ValueBit, 64> *LHSBits;
1683 // Mark this as interesting, only if the LHS was also interesting. This
1684 // prevents the overall procedure from matching a single immediate 'and'
1685 // (which is non-optimal because such an and might be folded with other
1686 // things if we don't select it here).
1687 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1688
1689 for (unsigned i = 0; i < NumBits; ++i)
1690 if (((Mask >> i) & 1) == 1)
1691 Bits[i] = (*LHSBits)[i];
1692 else {
1693 // AND instruction masks this bit. If the input is already zero,
1694 // we have nothing to do here. Otherwise, make the bit ConstZero.
1695 if ((*LHSBits)[i].isZero())
1696 Bits[i] = (*LHSBits)[i];
1697 else
1698 Bits[i] = ValueBit(ValueBit::ConstZero);
1699 }
1700
1701 return std::make_pair(Interesting, &Bits);
1702 }
1703 break;
1704 case ISD::OR: {
1705 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1706 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1707
1708 bool AllDisjoint = true;
1709 SDValue LastVal = SDValue();
1710 unsigned LastIdx = 0;
1711 for (unsigned i = 0; i < NumBits; ++i) {
1712 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1713 // If both inputs are known to be zero and one is ConstZero and
1714 // another is VariableKnownToBeZero, we can select whichever
1715 // we like. To minimize the number of bit groups, we select
1716 // VariableKnownToBeZero if this bit is the next bit of the same
1717 // input variable from the previous bit. Otherwise, we select
1718 // ConstZero.
1719 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1720 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1721 Bits[i] = LHSBits[i];
1722 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1723 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1724 Bits[i] = RHSBits[i];
1725 else
1726 Bits[i] = ValueBit(ValueBit::ConstZero);
1727 }
1728 else if (LHSBits[i].isZero())
1729 Bits[i] = RHSBits[i];
1730 else if (RHSBits[i].isZero())
1731 Bits[i] = LHSBits[i];
1732 else {
1733 AllDisjoint = false;
1734 break;
1735 }
1736 // We remember the value and bit index of this bit.
1737 if (Bits[i].hasValue()) {
1738 LastVal = Bits[i].getValue();
1739 LastIdx = Bits[i].getValueBitIndex();
1740 }
1741 else {
1742 if (LastVal) LastVal = SDValue();
1743 LastIdx = 0;
1744 }
1745 }
1746
1747 if (!AllDisjoint)
1748 break;
1749
1750 return std::make_pair(Interesting = true, &Bits);
1751 }
1752 case ISD::ZERO_EXTEND: {
1753 // We support only the case with zero extension from i32 to i64 so far.
1754 if (V.getValueType() != MVT::i64 ||
1755 V.getOperand(0).getValueType() != MVT::i32)
1756 break;
1757
1758 const SmallVector<ValueBit, 64> *LHSBits;
1759 const unsigned NumOperandBits = 32;
1760 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1761 NumOperandBits);
1762
1763 for (unsigned i = 0; i < NumOperandBits; ++i)
1764 Bits[i] = (*LHSBits)[i];
1765
1766 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1767 Bits[i] = ValueBit(ValueBit::ConstZero);
1768
1769 return std::make_pair(Interesting, &Bits);
1770 }
1771 case ISD::TRUNCATE: {
1772 EVT FromType = V.getOperand(0).getValueType();
1773 EVT ToType = V.getValueType();
1774 // We support only the case with truncate from i64 to i32.
1775 if (FromType != MVT::i64 || ToType != MVT::i32)
1776 break;
1777 const unsigned NumAllBits = FromType.getSizeInBits();
1779 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1780 NumAllBits);
1781 const unsigned NumValidBits = ToType.getSizeInBits();
1782
1783 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1784 // So, we cannot include this truncate.
1785 bool UseUpper32bit = false;
1786 for (unsigned i = 0; i < NumValidBits; ++i)
1787 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1788 UseUpper32bit = true;
1789 break;
1790 }
1791 if (UseUpper32bit)
1792 break;
1793
1794 for (unsigned i = 0; i < NumValidBits; ++i)
1795 Bits[i] = (*InBits)[i];
1796
1797 return std::make_pair(Interesting, &Bits);
1798 }
1799 case ISD::AssertZext: {
1800 // For AssertZext, we look through the operand and
1801 // mark the bits known to be zero.
1802 const SmallVector<ValueBit, 64> *LHSBits;
1803 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1804 NumBits);
1805
1806 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1807 const unsigned NumValidBits = FromType.getSizeInBits();
1808 for (unsigned i = 0; i < NumValidBits; ++i)
1809 Bits[i] = (*LHSBits)[i];
1810
1811 // These bits are known to be zero but the AssertZext may be from a value
1812 // that already has some constant zero bits (i.e. from a masking and).
1813 for (unsigned i = NumValidBits; i < NumBits; ++i)
1814 Bits[i] = (*LHSBits)[i].hasValue()
1815 ? ValueBit((*LHSBits)[i].getValue(),
1816 (*LHSBits)[i].getValueBitIndex(),
1817 ValueBit::VariableKnownToBeZero)
1818 : ValueBit(ValueBit::ConstZero);
1819
1820 return std::make_pair(Interesting, &Bits);
1821 }
1822 case ISD::LOAD:
1823 LoadSDNode *LD = cast<LoadSDNode>(V);
1824 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1825 EVT VT = LD->getMemoryVT();
1826 const unsigned NumValidBits = VT.getSizeInBits();
1827
1828 for (unsigned i = 0; i < NumValidBits; ++i)
1829 Bits[i] = ValueBit(V, i);
1830
1831 // These bits are known to be zero.
1832 for (unsigned i = NumValidBits; i < NumBits; ++i)
1833 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1834
1835 // Zero-extending load itself cannot be optimized. So, it is not
1836 // interesting by itself though it gives useful information.
1837 return std::make_pair(Interesting = false, &Bits);
1838 }
1839 break;
1840 }
1841
1842 for (unsigned i = 0; i < NumBits; ++i)
1843 Bits[i] = ValueBit(V, i);
1844
1845 return std::make_pair(Interesting = false, &Bits);
1846 }
1847
1848 // For each value (except the constant ones), compute the left-rotate amount
1849 // to get it from its original to final position.
1850 void computeRotationAmounts() {
1851 NeedMask = false;
1852 RLAmt.resize(Bits.size());
1853 for (unsigned i = 0; i < Bits.size(); ++i)
1854 if (Bits[i].hasValue()) {
1855 unsigned VBI = Bits[i].getValueBitIndex();
1856 if (i >= VBI)
1857 RLAmt[i] = i - VBI;
1858 else
1859 RLAmt[i] = Bits.size() - (VBI - i);
1860 } else if (Bits[i].isZero()) {
1861 NeedMask = true;
1862 RLAmt[i] = UINT32_MAX;
1863 } else {
1864 llvm_unreachable("Unknown value bit type");
1865 }
1866 }
1867
1868 // Collect groups of consecutive bits with the same underlying value and
1869 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1870 // they break up groups.
1871 void collectBitGroups(bool LateMask) {
1872 BitGroups.clear();
1873
1874 unsigned LastRLAmt = RLAmt[0];
1875 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1876 unsigned LastGroupStartIdx = 0;
1877 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1878 for (unsigned i = 1; i < Bits.size(); ++i) {
1879 unsigned ThisRLAmt = RLAmt[i];
1880 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1881 if (LateMask && !ThisValue) {
1882 ThisValue = LastValue;
1883 ThisRLAmt = LastRLAmt;
1884 // If we're doing late masking, then the first bit group always starts
1885 // at zero (even if the first bits were zero).
1886 if (BitGroups.empty())
1887 LastGroupStartIdx = 0;
1888 }
1889
1890 // If this bit is known to be zero and the current group is a bit group
1891 // of zeros, we do not need to terminate the current bit group even the
1892 // Value or RLAmt does not match here. Instead, we terminate this group
1893 // when the first non-zero bit appears later.
1894 if (IsGroupOfZeros && Bits[i].isZero())
1895 continue;
1896
1897 // If this bit has the same underlying value and the same rotate factor as
1898 // the last one, then they're part of the same group.
1899 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1900 // We cannot continue the current group if this bits is not known to
1901 // be zero in a bit group of zeros.
1902 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1903 continue;
1904
1905 if (LastValue.getNode())
1906 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1907 i-1));
1908 LastRLAmt = ThisRLAmt;
1909 LastValue = ThisValue;
1910 LastGroupStartIdx = i;
1911 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1912 }
1913 if (LastValue.getNode())
1914 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1915 Bits.size()-1));
1916
1917 if (BitGroups.empty())
1918 return;
1919
1920 // We might be able to combine the first and last groups.
1921 if (BitGroups.size() > 1) {
1922 // If the first and last groups are the same, then remove the first group
1923 // in favor of the last group, making the ending index of the last group
1924 // equal to the ending index of the to-be-removed first group.
1925 if (BitGroups[0].StartIdx == 0 &&
1926 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1927 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1928 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1929 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1930 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1931 BitGroups.erase(BitGroups.begin());
1932 }
1933 }
1934 }
1935
1936 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1937 // associated with each. If the number of groups are same, we prefer a group
1938 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1939 // instruction. If there is a degeneracy, pick the one that occurs
1940 // first (in the final value).
1941 void collectValueRotInfo() {
1942 ValueRots.clear();
1943
1944 for (auto &BG : BitGroups) {
1945 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1946 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1947 VRI.V = BG.V;
1948 VRI.RLAmt = BG.RLAmt;
1949 VRI.Repl32 = BG.Repl32;
1950 VRI.NumGroups += 1;
1951 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1952 }
1953
1954 // Now that we've collected the various ValueRotInfo instances, we need to
1955 // sort them.
1956 ValueRotsVec.clear();
1957 for (auto &I : ValueRots) {
1958 ValueRotsVec.push_back(I.second);
1959 }
1960 llvm::sort(ValueRotsVec);
1961 }
1962
1963 // In 64-bit mode, rlwinm and friends have a rotation operator that
1964 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1965 // indices of these instructions can only be in the lower 32 bits, so they
1966 // can only represent some 64-bit bit groups. However, when they can be used,
1967 // the 32-bit replication can be used to represent, as a single bit group,
1968 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1969 // groups when possible. Returns true if any of the bit groups were
1970 // converted.
1971 void assignRepl32BitGroups() {
1972 // If we have bits like this:
1973 //
1974 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1975 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1976 // Groups: | RLAmt = 8 | RLAmt = 40 |
1977 //
1978 // But, making use of a 32-bit operation that replicates the low-order 32
1979 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1980 // of 8.
1981
1982 auto IsAllLow32 = [this](BitGroup & BG) {
1983 if (BG.StartIdx <= BG.EndIdx) {
1984 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1985 if (!Bits[i].hasValue())
1986 continue;
1987 if (Bits[i].getValueBitIndex() >= 32)
1988 return false;
1989 }
1990 } else {
1991 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1992 if (!Bits[i].hasValue())
1993 continue;
1994 if (Bits[i].getValueBitIndex() >= 32)
1995 return false;
1996 }
1997 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1998 if (!Bits[i].hasValue())
1999 continue;
2000 if (Bits[i].getValueBitIndex() >= 32)
2001 return false;
2002 }
2003 }
2004
2005 return true;
2006 };
2007
2008 for (auto &BG : BitGroups) {
2009 // If this bit group has RLAmt of 0 and will not be merged with
2010 // another bit group, we don't benefit from Repl32. We don't mark
2011 // such group to give more freedom for later instruction selection.
2012 if (BG.RLAmt == 0) {
2013 auto PotentiallyMerged = [this](BitGroup & BG) {
2014 for (auto &BG2 : BitGroups)
2015 if (&BG != &BG2 && BG.V == BG2.V &&
2016 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2017 return true;
2018 return false;
2019 };
2020 if (!PotentiallyMerged(BG))
2021 continue;
2022 }
2023 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2024 if (IsAllLow32(BG)) {
2025 if (BG.RLAmt >= 32) {
2026 BG.RLAmt -= 32;
2027 BG.Repl32CR = true;
2028 }
2029
2030 BG.Repl32 = true;
2031
2032 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2033 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2034 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2035 }
2036 }
2037 }
2038
2039 // Now walk through the bit groups, consolidating where possible.
2040 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2041 // We might want to remove this bit group by merging it with the previous
2042 // group (which might be the ending group).
2043 auto IP = (I == BitGroups.begin()) ?
2044 std::prev(BitGroups.end()) : std::prev(I);
2045 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2046 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2047
2048 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2049 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2050 << I->StartIdx << ", " << I->EndIdx
2051 << "] with group with range [" << IP->StartIdx << ", "
2052 << IP->EndIdx << "]\n");
2053
2054 IP->EndIdx = I->EndIdx;
2055 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2056 IP->Repl32Coalesced = true;
2057 I = BitGroups.erase(I);
2058 continue;
2059 } else {
2060 // There is a special case worth handling: If there is a single group
2061 // covering the entire upper 32 bits, and it can be merged with both
2062 // the next and previous groups (which might be the same group), then
2063 // do so. If it is the same group (so there will be only one group in
2064 // total), then we need to reverse the order of the range so that it
2065 // covers the entire 64 bits.
2066 if (I->StartIdx == 32 && I->EndIdx == 63) {
2067 assert(std::next(I) == BitGroups.end() &&
2068 "bit group ends at index 63 but there is another?");
2069 auto IN = BitGroups.begin();
2070
2071 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2072 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2073 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2074 IsAllLow32(*I)) {
2075
2076 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2077 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2078 << ", " << I->EndIdx
2079 << "] with 32-bit replicated groups with ranges ["
2080 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2081 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2082
2083 if (IP == IN) {
2084 // There is only one other group; change it to cover the whole
2085 // range (backward, so that it can still be Repl32 but cover the
2086 // whole 64-bit range).
2087 IP->StartIdx = 31;
2088 IP->EndIdx = 30;
2089 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2090 IP->Repl32Coalesced = true;
2091 I = BitGroups.erase(I);
2092 } else {
2093 // There are two separate groups, one before this group and one
2094 // after us (at the beginning). We're going to remove this group,
2095 // but also the group at the very beginning.
2096 IP->EndIdx = IN->EndIdx;
2097 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2098 IP->Repl32Coalesced = true;
2099 I = BitGroups.erase(I);
2100 BitGroups.erase(BitGroups.begin());
2101 }
2102
2103 // This must be the last group in the vector (and we might have
2104 // just invalidated the iterator above), so break here.
2105 break;
2106 }
2107 }
2108 }
2109
2110 ++I;
2111 }
2112 }
2113
2114 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2115 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2116 }
2117
2118 uint64_t getZerosMask() {
2119 uint64_t Mask = 0;
2120 for (unsigned i = 0; i < Bits.size(); ++i) {
2121 if (Bits[i].hasValue())
2122 continue;
2123 Mask |= (UINT64_C(1) << i);
2124 }
2125
2126 return ~Mask;
2127 }
2128
2129 // This method extends an input value to 64 bit if input is 32-bit integer.
2130 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2131 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2132 // In such case, we extend it to 64 bit to be consistent with other values.
2133 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2134 if (V.getValueSizeInBits() == 64)
2135 return V;
2136
2137 assert(V.getValueSizeInBits() == 32);
2138 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2139 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2140 MVT::i64), 0);
2141 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2142 MVT::i64, ImDef, V,
2143 SubRegIdx), 0);
2144 return ExtVal;
2145 }
2146
2147 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2148 if (V.getValueSizeInBits() == 32)
2149 return V;
2150
2151 assert(V.getValueSizeInBits() == 64);
2152 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2153 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2154 MVT::i32, V, SubRegIdx), 0);
2155 return SubVal;
2156 }
2157
2158 // Depending on the number of groups for a particular value, it might be
2159 // better to rotate, mask explicitly (using andi/andis), and then or the
2160 // result. Select this part of the result first.
2161 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2163 return;
2164
2165 for (ValueRotInfo &VRI : ValueRotsVec) {
2166 unsigned Mask = 0;
2167 for (unsigned i = 0; i < Bits.size(); ++i) {
2168 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2169 continue;
2170 if (RLAmt[i] != VRI.RLAmt)
2171 continue;
2172 Mask |= (1u << i);
2173 }
2174
2175 // Compute the masks for andi/andis that would be necessary.
2176 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2177 assert((ANDIMask != 0 || ANDISMask != 0) &&
2178 "No set bits in mask for value bit groups");
2179 bool NeedsRotate = VRI.RLAmt != 0;
2180
2181 // We're trying to minimize the number of instructions. If we have one
2182 // group, using one of andi/andis can break even. If we have three
2183 // groups, we can use both andi and andis and break even (to use both
2184 // andi and andis we also need to or the results together). We need four
2185 // groups if we also need to rotate. To use andi/andis we need to do more
2186 // than break even because rotate-and-mask instructions tend to be easier
2187 // to schedule.
2188
2189 // FIXME: We've biased here against using andi/andis, which is right for
2190 // POWER cores, but not optimal everywhere. For example, on the A2,
2191 // andi/andis have single-cycle latency whereas the rotate-and-mask
2192 // instructions take two cycles, and it would be better to bias toward
2193 // andi/andis in break-even cases.
2194
2195 unsigned NumAndInsts = (unsigned) NeedsRotate +
2196 (unsigned) (ANDIMask != 0) +
2197 (unsigned) (ANDISMask != 0) +
2198 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2199 (unsigned) (bool) Res;
2200
2201 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2202 << " RL: " << VRI.RLAmt << ":"
2203 << "\n\t\t\tisel using masking: " << NumAndInsts
2204 << " using rotates: " << VRI.NumGroups << "\n");
2205
2206 if (NumAndInsts >= VRI.NumGroups)
2207 continue;
2208
2209 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2210
2211 if (InstCnt) *InstCnt += NumAndInsts;
2212
2213 SDValue VRot;
2214 if (VRI.RLAmt) {
2215 SDValue Ops[] =
2216 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2217 getI32Imm(0, dl), getI32Imm(31, dl) };
2218 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2219 Ops), 0);
2220 } else {
2221 VRot = TruncateToInt32(VRI.V, dl);
2222 }
2223
2224 SDValue ANDIVal, ANDISVal;
2225 if (ANDIMask != 0)
2226 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2227 VRot, getI32Imm(ANDIMask, dl)),
2228 0);
2229 if (ANDISMask != 0)
2230 ANDISVal =
2231 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2232 getI32Imm(ANDISMask, dl)),
2233 0);
2234
2235 SDValue TotalVal;
2236 if (!ANDIVal)
2237 TotalVal = ANDISVal;
2238 else if (!ANDISVal)
2239 TotalVal = ANDIVal;
2240 else
2241 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2242 ANDIVal, ANDISVal), 0);
2243
2244 if (!Res)
2245 Res = TotalVal;
2246 else
2247 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2248 Res, TotalVal), 0);
2249
2250 // Now, remove all groups with this underlying value and rotation
2251 // factor.
2252 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2253 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2254 });
2255 }
2256 }
2257
2258 // Instruction selection for the 32-bit case.
2259 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2260 SDLoc dl(N);
2261 SDValue Res;
2262
2263 if (InstCnt) *InstCnt = 0;
2264
2265 // Take care of cases that should use andi/andis first.
2266 SelectAndParts32(dl, Res, InstCnt);
2267
2268 // If we've not yet selected a 'starting' instruction, and we have no zeros
2269 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2270 // number of groups), and start with this rotated value.
2271 if ((!NeedMask || LateMask) && !Res) {
2272 ValueRotInfo &VRI = ValueRotsVec[0];
2273 if (VRI.RLAmt) {
2274 if (InstCnt) *InstCnt += 1;
2275 SDValue Ops[] =
2276 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2277 getI32Imm(0, dl), getI32Imm(31, dl) };
2278 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2279 0);
2280 } else {
2281 Res = TruncateToInt32(VRI.V, dl);
2282 }
2283
2284 // Now, remove all groups with this underlying value and rotation factor.
2285 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2286 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2287 });
2288 }
2289
2290 if (InstCnt) *InstCnt += BitGroups.size();
2291
2292 // Insert the other groups (one at a time).
2293 for (auto &BG : BitGroups) {
2294 if (!Res) {
2295 SDValue Ops[] =
2296 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2297 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2298 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2299 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2300 } else {
2301 SDValue Ops[] =
2302 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2303 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2304 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2305 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2306 }
2307 }
2308
2309 if (LateMask) {
2310 unsigned Mask = (unsigned) getZerosMask();
2311
2312 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2313 assert((ANDIMask != 0 || ANDISMask != 0) &&
2314 "No set bits in zeros mask?");
2315
2316 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2317 (unsigned) (ANDISMask != 0) +
2318 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2319
2320 SDValue ANDIVal, ANDISVal;
2321 if (ANDIMask != 0)
2322 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2323 Res, getI32Imm(ANDIMask, dl)),
2324 0);
2325 if (ANDISMask != 0)
2326 ANDISVal =
2327 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2328 getI32Imm(ANDISMask, dl)),
2329 0);
2330
2331 if (!ANDIVal)
2332 Res = ANDISVal;
2333 else if (!ANDISVal)
2334 Res = ANDIVal;
2335 else
2336 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2337 ANDIVal, ANDISVal), 0);
2338 }
2339
2340 return Res.getNode();
2341 }
2342
2343 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2344 unsigned MaskStart, unsigned MaskEnd,
2345 bool IsIns) {
2346 // In the notation used by the instructions, 'start' and 'end' are reversed
2347 // because bits are counted from high to low order.
2348 unsigned InstMaskStart = 64 - MaskEnd - 1,
2349 InstMaskEnd = 64 - MaskStart - 1;
2350
2351 if (Repl32)
2352 return 1;
2353
2354 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2355 InstMaskEnd == 63 - RLAmt)
2356 return 1;
2357
2358 return 2;
2359 }
2360
2361 // For 64-bit values, not all combinations of rotates and masks are
2362 // available. Produce one if it is available.
2363 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2364 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2365 unsigned *InstCnt = nullptr) {
2366 // In the notation used by the instructions, 'start' and 'end' are reversed
2367 // because bits are counted from high to low order.
2368 unsigned InstMaskStart = 64 - MaskEnd - 1,
2369 InstMaskEnd = 64 - MaskStart - 1;
2370
2371 if (InstCnt) *InstCnt += 1;
2372
2373 if (Repl32) {
2374 // This rotation amount assumes that the lower 32 bits of the quantity
2375 // are replicated in the high 32 bits by the rotation operator (which is
2376 // done by rlwinm and friends).
2377 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2378 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2379 SDValue Ops[] =
2380 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2381 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2382 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2383 Ops), 0);
2384 }
2385
2386 if (InstMaskEnd == 63) {
2387 SDValue Ops[] =
2388 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2389 getI32Imm(InstMaskStart, dl) };
2390 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2391 }
2392
2393 if (InstMaskStart == 0) {
2394 SDValue Ops[] =
2395 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2396 getI32Imm(InstMaskEnd, dl) };
2397 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2398 }
2399
2400 if (InstMaskEnd == 63 - RLAmt) {
2401 SDValue Ops[] =
2402 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2403 getI32Imm(InstMaskStart, dl) };
2404 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2405 }
2406
2407 // We cannot do this with a single instruction, so we'll use two. The
2408 // problem is that we're not free to choose both a rotation amount and mask
2409 // start and end independently. We can choose an arbitrary mask start and
2410 // end, but then the rotation amount is fixed. Rotation, however, can be
2411 // inverted, and so by applying an "inverse" rotation first, we can get the
2412 // desired result.
2413 if (InstCnt) *InstCnt += 1;
2414
2415 // The rotation mask for the second instruction must be MaskStart.
2416 unsigned RLAmt2 = MaskStart;
2417 // The first instruction must rotate V so that the overall rotation amount
2418 // is RLAmt.
2419 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2420 if (RLAmt1)
2421 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2422 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2423 }
2424
2425 // For 64-bit values, not all combinations of rotates and masks are
2426 // available. Produce a rotate-mask-and-insert if one is available.
2427 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2428 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2429 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2430 // In the notation used by the instructions, 'start' and 'end' are reversed
2431 // because bits are counted from high to low order.
2432 unsigned InstMaskStart = 64 - MaskEnd - 1,
2433 InstMaskEnd = 64 - MaskStart - 1;
2434
2435 if (InstCnt) *InstCnt += 1;
2436
2437 if (Repl32) {
2438 // This rotation amount assumes that the lower 32 bits of the quantity
2439 // are replicated in the high 32 bits by the rotation operator (which is
2440 // done by rlwinm and friends).
2441 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2442 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2443 SDValue Ops[] =
2444 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2445 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2446 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2447 Ops), 0);
2448 }
2449
2450 if (InstMaskEnd == 63 - RLAmt) {
2451 SDValue Ops[] =
2452 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2453 getI32Imm(InstMaskStart, dl) };
2454 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2455 }
2456
2457 // We cannot do this with a single instruction, so we'll use two. The
2458 // problem is that we're not free to choose both a rotation amount and mask
2459 // start and end independently. We can choose an arbitrary mask start and
2460 // end, but then the rotation amount is fixed. Rotation, however, can be
2461 // inverted, and so by applying an "inverse" rotation first, we can get the
2462 // desired result.
2463 if (InstCnt) *InstCnt += 1;
2464
2465 // The rotation mask for the second instruction must be MaskStart.
2466 unsigned RLAmt2 = MaskStart;
2467 // The first instruction must rotate V so that the overall rotation amount
2468 // is RLAmt.
2469 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2470 if (RLAmt1)
2471 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2472 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2473 }
2474
2475 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2477 return;
2478
2479 // The idea here is the same as in the 32-bit version, but with additional
2480 // complications from the fact that Repl32 might be true. Because we
2481 // aggressively convert bit groups to Repl32 form (which, for small
2482 // rotation factors, involves no other change), and then coalesce, it might
2483 // be the case that a single 64-bit masking operation could handle both
2484 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2485 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2486 // completely capture the new combined bit group.
2487
2488 for (ValueRotInfo &VRI : ValueRotsVec) {
2489 uint64_t Mask = 0;
2490
2491 // We need to add to the mask all bits from the associated bit groups.
2492 // If Repl32 is false, we need to add bits from bit groups that have
2493 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2494 // group is trivially convertable if it overlaps only with the lower 32
2495 // bits, and the group has not been coalesced.
2496 auto MatchingBG = [VRI](const BitGroup &BG) {
2497 if (VRI.V != BG.V)
2498 return false;
2499
2500 unsigned EffRLAmt = BG.RLAmt;
2501 if (!VRI.Repl32 && BG.Repl32) {
2502 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2503 !BG.Repl32Coalesced) {
2504 if (BG.Repl32CR)
2505 EffRLAmt += 32;
2506 } else {
2507 return false;
2508 }
2509 } else if (VRI.Repl32 != BG.Repl32) {
2510 return false;
2511 }
2512
2513 return VRI.RLAmt == EffRLAmt;
2514 };
2515
2516 for (auto &BG : BitGroups) {
2517 if (!MatchingBG(BG))
2518 continue;
2519
2520 if (BG.StartIdx <= BG.EndIdx) {
2521 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2522 Mask |= (UINT64_C(1) << i);
2523 } else {
2524 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2525 Mask |= (UINT64_C(1) << i);
2526 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2527 Mask |= (UINT64_C(1) << i);
2528 }
2529 }
2530
2531 // We can use the 32-bit andi/andis technique if the mask does not
2532 // require any higher-order bits. This can save an instruction compared
2533 // to always using the general 64-bit technique.
2534 bool Use32BitInsts = isUInt<32>(Mask);
2535 // Compute the masks for andi/andis that would be necessary.
2536 unsigned ANDIMask = (Mask & UINT16_MAX),
2537 ANDISMask = (Mask >> 16) & UINT16_MAX;
2538
2539 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2540
2541 unsigned NumAndInsts = (unsigned) NeedsRotate +
2542 (unsigned) (bool) Res;
2543 unsigned NumOfSelectInsts = 0;
2544 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2545 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2546 if (Use32BitInsts)
2547 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2548 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2549 else
2550 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2551
2552 unsigned NumRLInsts = 0;
2553 bool FirstBG = true;
2554 bool MoreBG = false;
2555 for (auto &BG : BitGroups) {
2556 if (!MatchingBG(BG)) {
2557 MoreBG = true;
2558 continue;
2559 }
2560 NumRLInsts +=
2561 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2562 !FirstBG);
2563 FirstBG = false;
2564 }
2565
2566 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2567 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2568 << "\n\t\t\tisel using masking: " << NumAndInsts
2569 << " using rotates: " << NumRLInsts << "\n");
2570
2571 // When we'd use andi/andis, we bias toward using the rotates (andi only
2572 // has a record form, and is cracked on POWER cores). However, when using
2573 // general 64-bit constant formation, bias toward the constant form,
2574 // because that exposes more opportunities for CSE.
2575 if (NumAndInsts > NumRLInsts)
2576 continue;
2577 // When merging multiple bit groups, instruction or is used.
2578 // But when rotate is used, rldimi can inert the rotated value into any
2579 // register, so instruction or can be avoided.
2580 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2581 continue;
2582
2583 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2584
2585 if (InstCnt) *InstCnt += NumAndInsts;
2586
2587 SDValue VRot;
2588 // We actually need to generate a rotation if we have a non-zero rotation
2589 // factor or, in the Repl32 case, if we care about any of the
2590 // higher-order replicated bits. In the latter case, we generate a mask
2591 // backward so that it actually includes the entire 64 bits.
2592 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2593 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2594 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2595 else
2596 VRot = VRI.V;
2597
2598 SDValue TotalVal;
2599 if (Use32BitInsts) {
2600 assert((ANDIMask != 0 || ANDISMask != 0) &&
2601 "No set bits in mask when using 32-bit ands for 64-bit value");
2602
2603 SDValue ANDIVal, ANDISVal;
2604 if (ANDIMask != 0)
2605 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2606 ExtendToInt64(VRot, dl),
2607 getI32Imm(ANDIMask, dl)),
2608 0);
2609 if (ANDISMask != 0)
2610 ANDISVal =
2611 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2612 ExtendToInt64(VRot, dl),
2613 getI32Imm(ANDISMask, dl)),
2614 0);
2615
2616 if (!ANDIVal)
2617 TotalVal = ANDISVal;
2618 else if (!ANDISVal)
2619 TotalVal = ANDIVal;
2620 else
2621 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2622 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2623 } else {
2624 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2625 TotalVal =
2626 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2627 ExtendToInt64(VRot, dl), TotalVal),
2628 0);
2629 }
2630
2631 if (!Res)
2632 Res = TotalVal;
2633 else
2634 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2635 ExtendToInt64(Res, dl), TotalVal),
2636 0);
2637
2638 // Now, remove all groups with this underlying value and rotation
2639 // factor.
2640 eraseMatchingBitGroups(MatchingBG);
2641 }
2642 }
2643
2644 // Instruction selection for the 64-bit case.
2645 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2646 SDLoc dl(N);
2647 SDValue Res;
2648
2649 if (InstCnt) *InstCnt = 0;
2650
2651 // Take care of cases that should use andi/andis first.
2652 SelectAndParts64(dl, Res, InstCnt);
2653
2654 // If we've not yet selected a 'starting' instruction, and we have no zeros
2655 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2656 // number of groups), and start with this rotated value.
2657 if ((!NeedMask || LateMask) && !Res) {
2658 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2659 // groups will come first, and so the VRI representing the largest number
2660 // of groups might not be first (it might be the first Repl32 groups).
2661 unsigned MaxGroupsIdx = 0;
2662 if (!ValueRotsVec[0].Repl32) {
2663 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2664 if (ValueRotsVec[i].Repl32) {
2665 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2666 MaxGroupsIdx = i;
2667 break;
2668 }
2669 }
2670
2671 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2672 bool NeedsRotate = false;
2673 if (VRI.RLAmt) {
2674 NeedsRotate = true;
2675 } else if (VRI.Repl32) {
2676 for (auto &BG : BitGroups) {
2677 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2678 BG.Repl32 != VRI.Repl32)
2679 continue;
2680
2681 // We don't need a rotate if the bit group is confined to the lower
2682 // 32 bits.
2683 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2684 continue;
2685
2686 NeedsRotate = true;
2687 break;
2688 }
2689 }
2690
2691 if (NeedsRotate)
2692 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2693 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2694 InstCnt);
2695 else
2696 Res = VRI.V;
2697
2698 // Now, remove all groups with this underlying value and rotation factor.
2699 if (Res)
2700 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2701 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2702 BG.Repl32 == VRI.Repl32;
2703 });
2704 }
2705
2706 // Because 64-bit rotates are more flexible than inserts, we might have a
2707 // preference regarding which one we do first (to save one instruction).
2708 if (!Res)
2709 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2710 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2711 false) <
2712 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2713 true)) {
2714 if (I != BitGroups.begin()) {
2715 BitGroup BG = *I;
2716 BitGroups.erase(I);
2717 BitGroups.insert(BitGroups.begin(), BG);
2718 }
2719
2720 break;
2721 }
2722 }
2723
2724 // Insert the other groups (one at a time).
2725 for (auto &BG : BitGroups) {
2726 if (!Res)
2727 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2728 BG.EndIdx, InstCnt);
2729 else
2730 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2731 BG.StartIdx, BG.EndIdx, InstCnt);
2732 }
2733
2734 if (LateMask) {
2735 uint64_t Mask = getZerosMask();
2736
2737 // We can use the 32-bit andi/andis technique if the mask does not
2738 // require any higher-order bits. This can save an instruction compared
2739 // to always using the general 64-bit technique.
2740 bool Use32BitInsts = isUInt<32>(Mask);
2741 // Compute the masks for andi/andis that would be necessary.
2742 unsigned ANDIMask = (Mask & UINT16_MAX),
2743 ANDISMask = (Mask >> 16) & UINT16_MAX;
2744
2745 if (Use32BitInsts) {
2746 assert((ANDIMask != 0 || ANDISMask != 0) &&
2747 "No set bits in mask when using 32-bit ands for 64-bit value");
2748
2749 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2750 (unsigned) (ANDISMask != 0) +
2751 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2752
2753 SDValue ANDIVal, ANDISVal;
2754 if (ANDIMask != 0)
2755 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2756 ExtendToInt64(Res, dl),
2757 getI32Imm(ANDIMask, dl)),
2758 0);
2759 if (ANDISMask != 0)
2760 ANDISVal =
2761 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2762 ExtendToInt64(Res, dl),
2763 getI32Imm(ANDISMask, dl)),
2764 0);
2765
2766 if (!ANDIVal)
2767 Res = ANDISVal;
2768 else if (!ANDISVal)
2769 Res = ANDIVal;
2770 else
2771 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2772 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2773 } else {
2774 unsigned NumOfSelectInsts = 0;
2775 SDValue MaskVal =
2776 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2777 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2778 ExtendToInt64(Res, dl), MaskVal),
2779 0);
2780 if (InstCnt)
2781 *InstCnt += NumOfSelectInsts + /* and */ 1;
2782 }
2783 }
2784
2785 return Res.getNode();
2786 }
2787
2788 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2789 // Fill in BitGroups.
2790 collectBitGroups(LateMask);
2791 if (BitGroups.empty())
2792 return nullptr;
2793
2794 // For 64-bit values, figure out when we can use 32-bit instructions.
2795 if (Bits.size() == 64)
2796 assignRepl32BitGroups();
2797
2798 // Fill in ValueRotsVec.
2799 collectValueRotInfo();
2800
2801 if (Bits.size() == 32) {
2802 return Select32(N, LateMask, InstCnt);
2803 } else {
2804 assert(Bits.size() == 64 && "Not 64 bits here?");
2805 return Select64(N, LateMask, InstCnt);
2806 }
2807
2808 return nullptr;
2809 }
2810
2811 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2812 erase_if(BitGroups, F);
2813 }
2814
2816
2817 bool NeedMask = false;
2819
2820 SmallVector<BitGroup, 16> BitGroups;
2821
2822 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2823 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2824
2825 SelectionDAG *CurDAG = nullptr;
2826
2827public:
2828 BitPermutationSelector(SelectionDAG *DAG)
2829 : CurDAG(DAG) {}
2830
2831 // Here we try to match complex bit permutations into a set of
2832 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2833 // known to produce optimal code for common cases (like i32 byte swapping).
2834 SDNode *Select(SDNode *N) {
2835 Memoizer.clear();
2836 auto Result =
2837 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2838 if (!Result.first)
2839 return nullptr;
2840 Bits = std::move(*Result.second);
2841
2842 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2843 " selection for: ");
2844 LLVM_DEBUG(N->dump(CurDAG));
2845
2846 // Fill it RLAmt and set NeedMask.
2847 computeRotationAmounts();
2848
2849 if (!NeedMask)
2850 return Select(N, false);
2851
2852 // We currently have two techniques for handling results with zeros: early
2853 // masking (the default) and late masking. Late masking is sometimes more
2854 // efficient, but because the structure of the bit groups is different, it
2855 // is hard to tell without generating both and comparing the results. With
2856 // late masking, we ignore zeros in the resulting value when inserting each
2857 // set of bit groups, and then mask in the zeros at the end. With early
2858 // masking, we only insert the non-zero parts of the result at every step.
2859
2860 unsigned InstCnt = 0, InstCntLateMask = 0;
2861 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2862 SDNode *RN = Select(N, false, &InstCnt);
2863 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2864
2865 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2866 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2867 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2868 << " instructions\n");
2869
2870 if (InstCnt <= InstCntLateMask) {
2871 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2872 return RN;
2873 }
2874
2875 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2876 return RNLM;
2877 }
2878};
2879
2880class IntegerCompareEliminator {
2881 SelectionDAG *CurDAG;
2882 PPCDAGToDAGISel *S;
2883 // Conversion type for interpreting results of a 32-bit instruction as
2884 // a 64-bit value or vice versa.
2885 enum ExtOrTruncConversion { Ext, Trunc };
2886
2887 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2888 // in a GPR.
2889 // ZExtOrig - use the original condition code, zero-extend value
2890 // ZExtInvert - invert the condition code, zero-extend value
2891 // SExtOrig - use the original condition code, sign-extend value
2892 // SExtInvert - invert the condition code, sign-extend value
2893 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2894
2895 // Comparisons against zero to emit GPR code sequences for. Each of these
2896 // sequences may need to be emitted for two or more equivalent patterns.
2897 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2898 // matters as well as the extension type: sext (-1/0), zext (1/0).
2899 // GEZExt - (zext (LHS >= 0))
2900 // GESExt - (sext (LHS >= 0))
2901 // LEZExt - (zext (LHS <= 0))
2902 // LESExt - (sext (LHS <= 0))
2903 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2904
2905 SDNode *tryEXTEND(SDNode *N);
2906 SDNode *tryLogicOpOfCompares(SDNode *N);
2907 SDValue computeLogicOpInGPR(SDValue LogicOp);
2908 SDValue signExtendInputIfNeeded(SDValue Input);
2909 SDValue zeroExtendInputIfNeeded(SDValue Input);
2910 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2911 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2912 ZeroCompare CmpTy);
2913 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2914 int64_t RHSValue, SDLoc dl);
2915 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2916 int64_t RHSValue, SDLoc dl);
2917 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2918 int64_t RHSValue, SDLoc dl);
2919 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2920 int64_t RHSValue, SDLoc dl);
2921 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2922
2923public:
2924 IntegerCompareEliminator(SelectionDAG *DAG,
2925 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2927 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2928 "Only expecting to use this on 64 bit targets.");
2929 }
2930 SDNode *Select(SDNode *N) {
2931 if (CmpInGPR == ICGPR_None)
2932 return nullptr;
2933 switch (N->getOpcode()) {
2934 default: break;
2935 case ISD::ZERO_EXTEND:
2938 return nullptr;
2939 [[fallthrough]];
2940 case ISD::SIGN_EXTEND:
2943 return nullptr;
2944 return tryEXTEND(N);
2945 case ISD::AND:
2946 case ISD::OR:
2947 case ISD::XOR:
2948 return tryLogicOpOfCompares(N);
2949 }
2950 return nullptr;
2951 }
2952};
2953
2954// The obvious case for wanting to keep the value in a GPR. Namely, the
2955// result of the comparison is actually needed in a GPR.
2956SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2957 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2958 N->getOpcode() == ISD::SIGN_EXTEND) &&
2959 "Expecting a zero/sign extend node!");
2960 SDValue WideRes;
2961 // If we are zero-extending the result of a logical operation on i1
2962 // values, we can keep the values in GPRs.
2963 if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2964 N->getOperand(0).getValueType() == MVT::i1 &&
2965 N->getOpcode() == ISD::ZERO_EXTEND)
2966 WideRes = computeLogicOpInGPR(N->getOperand(0));
2967 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2968 return nullptr;
2969 else
2970 WideRes =
2971 getSETCCInGPR(N->getOperand(0),
2972 N->getOpcode() == ISD::SIGN_EXTEND ?
2973 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2974
2975 if (!WideRes)
2976 return nullptr;
2977
2978 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2979 bool Output32Bit = N->getValueType(0) == MVT::i32;
2980
2981 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2982 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2983
2984 SDValue ConvOp = WideRes;
2985 if (Input32Bit != Output32Bit)
2986 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2987 ExtOrTruncConversion::Trunc);
2988 return ConvOp.getNode();
2989}
2990
2991// Attempt to perform logical operations on the results of comparisons while
2992// keeping the values in GPRs. Without doing so, these would end up being
2993// lowered to CR-logical operations which suffer from significant latency and
2994// low ILP.
2995SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2996 if (N->getValueType(0) != MVT::i1)
2997 return nullptr;
2998 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
2999 "Expected a logic operation on setcc results.");
3000 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
3001 if (!LoweredLogical)
3002 return nullptr;
3003
3004 SDLoc dl(N);
3005 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3006 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3007 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3008 SDValue LHS = LoweredLogical.getOperand(0);
3009 SDValue RHS = LoweredLogical.getOperand(1);
3010 SDValue WideOp;
3011 SDValue OpToConvToRecForm;
3012
3013 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3014 // opcode that is input to the XORI.
3015 if (IsBitwiseNegate &&
3016 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3017 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3018 else if (IsBitwiseNegate)
3019 // If the input to the XORI isn't an extension, that's what we're after.
3020 OpToConvToRecForm = LoweredLogical.getOperand(0);
3021 else
3022 // If this is not an XORI, it is a reg-reg logical op and we can convert
3023 // it to record-form.
3024 OpToConvToRecForm = LoweredLogical;
3025
3026 // Get the record-form version of the node we're looking to use to get the
3027 // CR result from.
3028 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3029 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3030
3031 // Convert the right node to record-form. This is either the logical we're
3032 // looking at or it is the input node to the negation (if we're looking at
3033 // a bitwise negation).
3034 if (NewOpc != -1 && IsBitwiseNegate) {
3035 // The input to the XORI has a record-form. Use it.
3036 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3037 "Expected a PPC::XORI8 only for bitwise negation.");
3038 // Emit the record-form instruction.
3039 std::vector<SDValue> Ops;
3040 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3041 Ops.push_back(OpToConvToRecForm.getOperand(i));
3042
3043 WideOp =
3044 SDValue(CurDAG->getMachineNode(NewOpc, dl,
3045 OpToConvToRecForm.getValueType(),
3046 MVT::Glue, Ops), 0);
3047 } else {
3048 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3049 "No record form available for AND8/OR8/XOR8?");
3050 WideOp =
3051 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3052 dl, MVT::i64, MVT::Glue, LHS, RHS),
3053 0);
3054 }
3055
3056 // Select this node to a single bit from CR0 set by the record-form node
3057 // just created. For bitwise negation, use the EQ bit which is the equivalent
3058 // of negating the result (i.e. it is a bit set when the result of the
3059 // operation is zero).
3060 SDValue SRIdxVal =
3061 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3062 SDValue CRBit =
3063 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3064 MVT::i1, CR0Reg, SRIdxVal,
3065 WideOp.getValue(1)), 0);
3066 return CRBit.getNode();
3067}
3068
3069// Lower a logical operation on i1 values into a GPR sequence if possible.
3070// The result can be kept in a GPR if requested.
3071// Three types of inputs can be handled:
3072// - SETCC
3073// - TRUNCATE
3074// - Logical operation (AND/OR/XOR)
3075// There is also a special case that is handled (namely a complement operation
3076// achieved with xor %a, -1).
3077SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3079 "Can only handle logic operations here.");
3080 assert(LogicOp.getValueType() == MVT::i1 &&
3081 "Can only handle logic operations on i1 values here.");
3082 SDLoc dl(LogicOp);
3083 SDValue LHS, RHS;
3084
3085 // Special case: xor %a, -1
3086 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3087
3088 // Produces a GPR sequence for each operand of the binary logic operation.
3089 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3090 // the value in a GPR and for logic operations, it will recursively produce
3091 // a GPR sequence for the operation.
3092 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3093 unsigned OperandOpcode = Operand.getOpcode();
3094 if (OperandOpcode == ISD::SETCC)
3095 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3096 else if (OperandOpcode == ISD::TRUNCATE) {
3097 SDValue InputOp = Operand.getOperand(0);
3098 EVT InVT = InputOp.getValueType();
3099 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3100 PPC::RLDICL, dl, InVT, InputOp,
3101 S->getI64Imm(0, dl),
3102 S->getI64Imm(63, dl)), 0);
3103 } else if (ISD::isBitwiseLogicOp(OperandOpcode))
3104 return computeLogicOpInGPR(Operand);
3105 return SDValue();
3106 };
3107 LHS = getLogicOperand(LogicOp.getOperand(0));
3108 RHS = getLogicOperand(LogicOp.getOperand(1));
3109
3110 // If a GPR sequence can't be produced for the LHS we can't proceed.
3111 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3112 // a bitwise negation operation.
3113 if (!LHS || (!RHS && !IsBitwiseNegation))
3114 return SDValue();
3115
3116 NumLogicOpsOnComparison++;
3117
3118 // We will use the inputs as 64-bit values.
3119 if (LHS.getValueType() == MVT::i32)
3120 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3121 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3122 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3123
3124 unsigned NewOpc;
3125 switch (LogicOp.getOpcode()) {
3126 default: llvm_unreachable("Unknown logic operation.");
3127 case ISD::AND: NewOpc = PPC::AND8; break;
3128 case ISD::OR: NewOpc = PPC::OR8; break;
3129 case ISD::XOR: NewOpc = PPC::XOR8; break;
3130 }
3131
3132 if (IsBitwiseNegation) {
3133 RHS = S->getI64Imm(1, dl);
3134 NewOpc = PPC::XORI8;
3135 }
3136
3137 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3138
3139}
3140
3141/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3142/// Otherwise just reinterpret it as a 64-bit value.
3143/// Useful when emitting comparison code for 32-bit values without using
3144/// the compare instruction (which only considers the lower 32-bits).
3145SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3146 assert(Input.getValueType() == MVT::i32 &&
3147 "Can only sign-extend 32-bit values here.");
3148 unsigned Opc = Input.getOpcode();
3149
3150 // The value was sign extended and then truncated to 32-bits. No need to
3151 // sign extend it again.
3152 if (Opc == ISD::TRUNCATE &&
3153 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3154 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3155 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3156
3157 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3158 // The input is a sign-extending load. All ppc sign-extending loads
3159 // sign-extend to the full 64-bits.
3160 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3161 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3162
3163 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3164 // We don't sign-extend constants.
3165 if (InputConst)
3166 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3167
3168 SDLoc dl(Input);
3169 SignExtensionsAdded++;
3170 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3171 MVT::i64, Input), 0);
3172}
3173
3174/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3175/// Otherwise just reinterpret it as a 64-bit value.
3176/// Useful when emitting comparison code for 32-bit values without using
3177/// the compare instruction (which only considers the lower 32-bits).
3178SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3179 assert(Input.getValueType() == MVT::i32 &&
3180 "Can only zero-extend 32-bit values here.");
3181 unsigned Opc = Input.getOpcode();
3182
3183 // The only condition under which we can omit the actual extend instruction:
3184 // - The value is a positive constant
3185 // - The value comes from a load that isn't a sign-extending load
3186 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3187 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3188 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3189 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3190 if (IsTruncateOfZExt)
3191 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3192
3193 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3194 if (InputConst && InputConst->getSExtValue() >= 0)
3195 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3196
3197 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3198 // The input is a load that doesn't sign-extend (it will be zero-extended).
3199 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3200 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3201
3202 // None of the above, need to zero-extend.
3203 SDLoc dl(Input);
3204 ZeroExtensionsAdded++;
3205 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3206 S->getI64Imm(0, dl),
3207 S->getI64Imm(32, dl)), 0);
3208}
3209
3210// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3211// course not actual zero/sign extensions that will generate machine code,
3212// they're just a way to reinterpret a 32 bit value in a register as a
3213// 64 bit value and vice-versa.
3214SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3215 ExtOrTruncConversion Conv) {
3216 SDLoc dl(NatWidthRes);
3217
3218 // For reinterpreting 32-bit values as 64 bit values, we generate
3219 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3220 if (Conv == ExtOrTruncConversion::Ext) {
3221 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3222 SDValue SubRegIdx =
3223 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3224 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3225 ImDef, NatWidthRes, SubRegIdx), 0);
3226 }
3227
3228 assert(Conv == ExtOrTruncConversion::Trunc &&
3229 "Unknown convertion between 32 and 64 bit values.");
3230 // For reinterpreting 64-bit values as 32-bit values, we just need to
3231 // EXTRACT_SUBREG (i.e. extract the low word).
3232 SDValue SubRegIdx =
3233 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3234 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3235 NatWidthRes, SubRegIdx), 0);
3236}
3237
3238// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3239// Handle both zero-extensions and sign-extensions.
3240SDValue
3241IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3242 ZeroCompare CmpTy) {
3243 EVT InVT = LHS.getValueType();
3244 bool Is32Bit = InVT == MVT::i32;
3245 SDValue ToExtend;
3246
3247 // Produce the value that needs to be either zero or sign extended.
3248 switch (CmpTy) {
3249 case ZeroCompare::GEZExt:
3250 case ZeroCompare::GESExt:
3251 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3252 dl, InVT, LHS, LHS), 0);
3253 break;
3254 case ZeroCompare::LEZExt:
3255 case ZeroCompare::LESExt: {
3256 if (Is32Bit) {
3257 // Upper 32 bits cannot be undefined for this sequence.
3258 LHS = signExtendInputIfNeeded(LHS);
3259 SDValue Neg =
3260 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3261 ToExtend =
3262 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3263 Neg, S->getI64Imm(1, dl),
3264 S->getI64Imm(63, dl)), 0);
3265 } else {
3266 SDValue Addi =
3267 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3268 S->getI64Imm(~0ULL, dl)), 0);
3269 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3270 Addi, LHS), 0);
3271 }
3272 break;
3273 }
3274 }
3275
3276 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3277 if (!Is32Bit &&
3278 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3279 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3280 ToExtend, S->getI64Imm(1, dl),
3281 S->getI64Imm(63, dl)), 0);
3282 if (!Is32Bit &&
3283 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3284 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3285 S->getI64Imm(63, dl)), 0);
3286
3287 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3288 // For 32-bit sequences, the extensions differ between GE/LE cases.
3289 switch (CmpTy) {
3290 case ZeroCompare::GEZExt: {
3291 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3292 S->getI32Imm(31, dl) };
3293 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3294 ShiftOps), 0);
3295 }
3296 case ZeroCompare::GESExt:
3297 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3298 S->getI32Imm(31, dl)), 0);
3299 case ZeroCompare::LEZExt:
3300 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3301 S->getI32Imm(1, dl)), 0);
3302 case ZeroCompare::LESExt:
3303 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3304 S->getI32Imm(-1, dl)), 0);
3305 }
3306
3307 // The above case covers all the enumerators so it can't have a default clause
3308 // to avoid compiler warnings.
3309 llvm_unreachable("Unknown zero-comparison type.");
3310}
3311
3312/// Produces a zero-extended result of comparing two 32-bit values according to
3313/// the passed condition code.
3314SDValue
3315IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3316 ISD::CondCode CC,
3317 int64_t RHSValue, SDLoc dl) {
3320 return SDValue();
3321 bool IsRHSZero = RHSValue == 0;
3322 bool IsRHSOne = RHSValue == 1;
3323 bool IsRHSNegOne = RHSValue == -1LL;
3324 switch (CC) {
3325 default: return SDValue();
3326 case ISD::SETEQ: {
3327 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3328 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3329 SDValue Xor = IsRHSZero ? LHS :
3330 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3331 SDValue Clz =
3332 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3333 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3334 S->getI32Imm(31, dl) };
3335 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3336 ShiftOps), 0);
3337 }
3338 case ISD::SETNE: {
3339 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3340 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3341 SDValue Xor = IsRHSZero ? LHS :
3342 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3343 SDValue Clz =
3344 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3345 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3346 S->getI32Imm(31, dl) };
3347 SDValue Shift =
3348 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3349 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3350 S->getI32Imm(1, dl)), 0);
3351 }
3352 case ISD::SETGE: {
3353 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3354 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3355 if(IsRHSZero)
3356 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3357
3358 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3359 // by swapping inputs and falling through.
3360 std::swap(LHS, RHS);
3361 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3362 IsRHSZero = RHSConst && RHSConst->isZero();
3363 [[fallthrough]];
3364 }
3365 case ISD::SETLE: {
3366 if (CmpInGPR == ICGPR_NonExtIn)
3367 return SDValue();
3368 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3369 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3370 if(IsRHSZero) {
3371 if (CmpInGPR == ICGPR_NonExtIn)
3372 return SDValue();
3373 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3374 }
3375
3376 // The upper 32-bits of the register can't be undefined for this sequence.
3377 LHS = signExtendInputIfNeeded(LHS);
3378 RHS = signExtendInputIfNeeded(RHS);
3379 SDValue Sub =
3380 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3381 SDValue Shift =
3382 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3383 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3384 0);
3385 return
3386 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3387 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3388 }
3389 case ISD::SETGT: {
3390 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3391 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3392 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3393 // Handle SETLT -1 (which is equivalent to SETGE 0).
3394 if (IsRHSNegOne)
3395 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3396
3397 if (IsRHSZero) {
3398 if (CmpInGPR == ICGPR_NonExtIn)
3399 return SDValue();
3400 // The upper 32-bits of the register can't be undefined for this sequence.
3401 LHS = signExtendInputIfNeeded(LHS);
3402 RHS = signExtendInputIfNeeded(RHS);
3403 SDValue Neg =
3404 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3405 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3406 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3407 }
3408 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3409 // (%b < %a) by swapping inputs and falling through.
3410 std::swap(LHS, RHS);
3411 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3412 IsRHSZero = RHSConst && RHSConst->isZero();
3413 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3414 [[fallthrough]];
3415 }
3416 case ISD::SETLT: {
3417 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3418 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3419 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3420 // Handle SETLT 1 (which is equivalent to SETLE 0).
3421 if (IsRHSOne) {
3422 if (CmpInGPR == ICGPR_NonExtIn)
3423 return SDValue();
3424 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3425 }
3426
3427 if (IsRHSZero) {
3428 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3429 S->getI32Imm(31, dl) };
3430 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3431 ShiftOps), 0);
3432 }
3433
3434 if (CmpInGPR == ICGPR_NonExtIn)
3435 return SDValue();
3436 // The upper 32-bits of the register can't be undefined for this sequence.
3437 LHS = signExtendInputIfNeeded(LHS);
3438 RHS = signExtendInputIfNeeded(RHS);
3439 SDValue SUBFNode =
3440 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3441 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3442 SUBFNode, S->getI64Imm(1, dl),
3443 S->getI64Imm(63, dl)), 0);
3444 }
3445 case ISD::SETUGE:
3446 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3447 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3448 std::swap(LHS, RHS);
3449 [[fallthrough]];
3450 case ISD::SETULE: {
3451 if (CmpInGPR == ICGPR_NonExtIn)
3452 return SDValue();
3453 // The upper 32-bits of the register can't be undefined for this sequence.
3454 LHS = zeroExtendInputIfNeeded(LHS);
3455 RHS = zeroExtendInputIfNeeded(RHS);
3457 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3458 SDValue SrdiNode =
3459 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3460 Subtract, S->getI64Imm(1, dl),
3461 S->getI64Imm(63, dl)), 0);
3462 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3463 S->getI32Imm(1, dl)), 0);
3464 }
3465 case ISD::SETUGT:
3466 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3467 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3468 std::swap(LHS, RHS);
3469 [[fallthrough]];
3470 case ISD::SETULT: {
3471 if (CmpInGPR == ICGPR_NonExtIn)
3472 return SDValue();
3473 // The upper 32-bits of the register can't be undefined for this sequence.
3474 LHS = zeroExtendInputIfNeeded(LHS);
3475 RHS = zeroExtendInputIfNeeded(RHS);
3477 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3478 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3479 Subtract, S->getI64Imm(1, dl),
3480 S->getI64Imm(63, dl)), 0);
3481 }
3482 }
3483}
3484
3485/// Produces a sign-extended result of comparing two 32-bit values according to
3486/// the passed condition code.
3487SDValue
3488IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3489 ISD::CondCode CC,
3490 int64_t RHSValue, SDLoc dl) {
3493 return SDValue();
3494 bool IsRHSZero = RHSValue == 0;
3495 bool IsRHSOne = RHSValue == 1;
3496 bool IsRHSNegOne = RHSValue == -1LL;
3497
3498 switch (CC) {
3499 default: return SDValue();
3500 case ISD::SETEQ: {
3501 // (sext (setcc %a, %b, seteq)) ->
3502 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3503 // (sext (setcc %a, 0, seteq)) ->
3504 // (ashr (shl (ctlz %a), 58), 63)
3505 SDValue CountInput = IsRHSZero ? LHS :
3506 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3507 SDValue Cntlzw =
3508 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3509 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3510 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3511 SDValue Slwi =
3512 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3513 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3514 }
3515 case ISD::SETNE: {
3516 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3517 // flip the bit, finally take 2's complement.
3518 // (sext (setcc %a, %b, setne)) ->
3519 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3520 // Same as above, but the first xor is not needed.
3521 // (sext (setcc %a, 0, setne)) ->
3522 // (neg (xor (lshr (ctlz %a), 5), 1))
3523 SDValue Xor = IsRHSZero ? LHS :
3524 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3525 SDValue Clz =
3526 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3527 SDValue ShiftOps[] =
3528 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3529 SDValue Shift =
3530 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3531 SDValue Xori =
3532 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3533 S->getI32Imm(1, dl)), 0);
3534 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3535 }
3536 case ISD::SETGE: {
3537 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3538 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3539 if (IsRHSZero)
3540 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3541
3542 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3543 // by swapping inputs and falling through.
3544 std::swap(LHS, RHS);
3545 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3546 IsRHSZero = RHSConst && RHSConst->isZero();
3547 [[fallthrough]];
3548 }
3549 case ISD::SETLE: {
3550 if (CmpInGPR == ICGPR_NonExtIn)
3551 return SDValue();
3552 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3553 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3554 if (IsRHSZero)
3555 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3556
3557 // The upper 32-bits of the register can't be undefined for this sequence.
3558 LHS = signExtendInputIfNeeded(LHS);
3559 RHS = signExtendInputIfNeeded(RHS);
3560 SDValue SUBFNode =
3561 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3562 LHS, RHS), 0);
3563 SDValue Srdi =
3564 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3565 SUBFNode, S->getI64Imm(1, dl),
3566 S->getI64Imm(63, dl)), 0);
3567 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3568 S->getI32Imm(-1, dl)), 0);
3569 }
3570 case ISD::SETGT: {
3571 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3572 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3573 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3574 if (IsRHSNegOne)
3575 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3576 if (IsRHSZero) {
3577 if (CmpInGPR == ICGPR_NonExtIn)
3578 return SDValue();
3579 // The upper 32-bits of the register can't be undefined for this sequence.
3580 LHS = signExtendInputIfNeeded(LHS);
3581 RHS = signExtendInputIfNeeded(RHS);
3582 SDValue Neg =
3583 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3584 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3585 S->getI64Imm(63, dl)), 0);
3586 }
3587 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3588 // (%b < %a) by swapping inputs and falling through.
3589 std::swap(LHS, RHS);
3590 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3591 IsRHSZero = RHSConst && RHSConst->isZero();
3592 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3593 [[fallthrough]];
3594 }
3595 case ISD::SETLT: {
3596 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3597 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3598 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3599 if (IsRHSOne) {
3600 if (CmpInGPR == ICGPR_NonExtIn)
3601 return SDValue();
3602 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3603 }
3604 if (IsRHSZero)
3605 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3606 S->getI32Imm(31, dl)), 0);
3607
3608 if (CmpInGPR == ICGPR_NonExtIn)
3609 return SDValue();
3610 // The upper 32-bits of the register can't be undefined for this sequence.
3611 LHS = signExtendInputIfNeeded(LHS);
3612 RHS = signExtendInputIfNeeded(RHS);
3613 SDValue SUBFNode =
3614 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3615 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3616 SUBFNode, S->getI64Imm(63, dl)), 0);
3617 }
3618 case ISD::SETUGE:
3619 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3620 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3621 std::swap(LHS, RHS);
3622 [[fallthrough]];
3623 case ISD::SETULE: {
3624 if (CmpInGPR == ICGPR_NonExtIn)
3625 return SDValue();
3626 // The upper 32-bits of the register can't be undefined for this sequence.
3627 LHS = zeroExtendInputIfNeeded(LHS);
3628 RHS = zeroExtendInputIfNeeded(RHS);
3630 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3631 SDValue Shift =
3632 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3633 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3634 0);
3635 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3636 S->getI32Imm(-1, dl)), 0);
3637 }
3638 case ISD::SETUGT:
3639 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3640 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3641 std::swap(LHS, RHS);
3642 [[fallthrough]];
3643 case ISD::SETULT: {
3644 if (CmpInGPR == ICGPR_NonExtIn)
3645 return SDValue();
3646 // The upper 32-bits of the register can't be undefined for this sequence.
3647 LHS = zeroExtendInputIfNeeded(LHS);
3648 RHS = zeroExtendInputIfNeeded(RHS);
3650 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3651 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3652 Subtract, S->getI64Imm(63, dl)), 0);
3653 }
3654 }
3655}
3656
3657/// Produces a zero-extended result of comparing two 64-bit values according to
3658/// the passed condition code.
3659SDValue
3660IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3661 ISD::CondCode CC,
3662 int64_t RHSValue, SDLoc dl) {
3665 return SDValue();
3666 bool IsRHSZero = RHSValue == 0;
3667 bool IsRHSOne = RHSValue == 1;
3668 bool IsRHSNegOne = RHSValue == -1LL;
3669 switch (CC) {
3670 default: return SDValue();
3671 case ISD::SETEQ: {
3672 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3673 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3674 SDValue Xor = IsRHSZero ? LHS :
3675 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3676 SDValue Clz =
3677 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3678 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3679 S->getI64Imm(58, dl),
3680 S->getI64Imm(63, dl)), 0);
3681 }
3682 case ISD::SETNE: {
3683 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3684 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3685 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3686 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3687 SDValue Xor = IsRHSZero ? LHS :
3688 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3689 SDValue AC =
3690 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3691 Xor, S->getI32Imm(~0U, dl)), 0);
3692 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3693 Xor, AC.getValue(1)), 0);
3694 }
3695 case ISD::SETGE: {
3696 // {subc.reg, subc.CA} = (subcarry %a, %b)
3697 // (zext (setcc %a, %b, setge)) ->
3698 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3699 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3700 if (IsRHSZero)
3701 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3702 std::swap(LHS, RHS);
3703 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3704 IsRHSZero = RHSConst && RHSConst->isZero();
3705 [[fallthrough]];
3706 }
3707 case ISD::SETLE: {
3708 // {subc.reg, subc.CA} = (subcarry %b, %a)
3709 // (zext (setcc %a, %b, setge)) ->
3710 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3711 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3712 if (IsRHSZero)
3713 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3714 SDValue ShiftL =
3715 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3716 S->getI64Imm(1, dl),
3717 S->getI64Imm(63, dl)), 0);
3718 SDValue ShiftR =
3719 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3720 S->getI64Imm(63, dl)), 0);
3721 SDValue SubtractCarry =
3722 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3723 LHS, RHS), 1);
3724 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3725 ShiftR, ShiftL, SubtractCarry), 0);
3726 }
3727 case ISD::SETGT: {
3728 // {subc.reg, subc.CA} = (subcarry %b, %a)
3729 // (zext (setcc %a, %b, setgt)) ->
3730 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3731 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3732 if (IsRHSNegOne)
3733 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3734 if (IsRHSZero) {
3735 SDValue Addi =
3736 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3737 S->getI64Imm(~0ULL, dl)), 0);
3738 SDValue Nor =
3739 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3740 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3741 S->getI64Imm(1, dl),
3742 S->getI64Imm(63, dl)), 0);
3743 }
3744 std::swap(LHS, RHS);
3745 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3746 IsRHSZero = RHSConst && RHSConst->isZero();
3747 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3748 [[fallthrough]];
3749 }
3750 case ISD::SETLT: {
3751 // {subc.reg, subc.CA} = (subcarry %a, %b)
3752 // (zext (setcc %a, %b, setlt)) ->
3753 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3754 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3755 if (IsRHSOne)
3756 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3757 if (IsRHSZero)
3758 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3759 S->getI64Imm(1, dl),
3760 S->getI64Imm(63, dl)), 0);
3761 SDValue SRADINode =
3762 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3763 LHS, S->getI64Imm(63, dl)), 0);
3764 SDValue SRDINode =
3765 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3766 RHS, S->getI64Imm(1, dl),
3767 S->getI64Imm(63, dl)), 0);
3768 SDValue SUBFC8Carry =
3769 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3770 RHS, LHS), 1);
3771 SDValue ADDE8Node =
3772 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3773 SRDINode, SRADINode, SUBFC8Carry), 0);
3774 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3775 ADDE8Node, S->getI64Imm(1, dl)), 0);
3776 }
3777 case ISD::SETUGE:
3778 // {subc.reg, subc.CA} = (subcarry %a, %b)
3779 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3780 std::swap(LHS, RHS);
3781 [[fallthrough]];
3782 case ISD::SETULE: {
3783 // {subc.reg, subc.CA} = (subcarry %b, %a)
3784 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3785 SDValue SUBFC8Carry =
3786 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3787 LHS, RHS), 1);
3788 SDValue SUBFE8Node =
3789 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3790 LHS, LHS, SUBFC8Carry), 0);
3791 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3792 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3793 }
3794 case ISD::SETUGT:
3795 // {subc.reg, subc.CA} = (subcarry %b, %a)
3796 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3797 std::swap(LHS, RHS);
3798 [[fallthrough]];
3799 case ISD::SETULT: {
3800 // {subc.reg, subc.CA} = (subcarry %a, %b)
3801 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3802 SDValue SubtractCarry =
3803 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3804 RHS, LHS), 1);
3805 SDValue ExtSub =
3806 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3807 LHS, LHS, SubtractCarry), 0);
3808 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3809 ExtSub), 0);
3810 }
3811 }
3812}
3813
3814/// Produces a sign-extended result of comparing two 64-bit values according to
3815/// the passed condition code.
3816SDValue
3817IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3818 ISD::CondCode CC,
3819 int64_t RHSValue, SDLoc dl) {
3822 return SDValue();
3823 bool IsRHSZero = RHSValue == 0;
3824 bool IsRHSOne = RHSValue == 1;
3825 bool IsRHSNegOne = RHSValue == -1LL;
3826 switch (CC) {
3827 default: return SDValue();
3828 case ISD::SETEQ: {
3829 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3830 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3831 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3832 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3833 SDValue AddInput = IsRHSZero ? LHS :
3834 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3835 SDValue Addic =
3836 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3837 AddInput, S->getI32Imm(~0U, dl)), 0);
3838 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3839 Addic, Addic.getValue(1)), 0);
3840 }
3841 case ISD::SETNE: {
3842 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3843 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3844 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3845 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3846 SDValue Xor = IsRHSZero ? LHS :
3847 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3848 SDValue SC =
3849 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3850 Xor, S->getI32Imm(0, dl)), 0);
3851 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3852 SC, SC.getValue(1)), 0);
3853 }
3854 case ISD::SETGE: {
3855 // {subc.reg, subc.CA} = (subcarry %a, %b)
3856 // (zext (setcc %a, %b, setge)) ->
3857 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3858 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3859 if (IsRHSZero)
3860 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3861 std::swap(LHS, RHS);
3862 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3863 IsRHSZero = RHSConst && RHSConst->isZero();
3864 [[fallthrough]];
3865 }
3866 case ISD::SETLE: {
3867 // {subc.reg, subc.CA} = (subcarry %b, %a)
3868 // (zext (setcc %a, %b, setge)) ->
3869 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3870 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3871 if (IsRHSZero)
3872 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3873 SDValue ShiftR =
3874 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3875 S->getI64Imm(63, dl)), 0);
3876 SDValue ShiftL =
3877 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3878 S->getI64Imm(1, dl),
3879 S->getI64Imm(63, dl)), 0);
3880 SDValue SubtractCarry =
3881 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3882 LHS, RHS), 1);
3883 SDValue Adde =
3884 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3885 ShiftR, ShiftL, SubtractCarry), 0);
3886 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3887 }
3888 case ISD::SETGT: {
3889 // {subc.reg, subc.CA} = (subcarry %b, %a)
3890 // (zext (setcc %a, %b, setgt)) ->
3891 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3892 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3893 if (IsRHSNegOne)
3894 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3895 if (IsRHSZero) {
3896 SDValue Add =
3897 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3898 S->getI64Imm(-1, dl)), 0);
3899 SDValue Nor =
3900 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3901 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3902 S->getI64Imm(63, dl)), 0);
3903 }
3904 std::swap(LHS, RHS);
3905 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3906 IsRHSZero = RHSConst && RHSConst->isZero();
3907 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3908 [[fallthrough]];
3909 }
3910 case ISD::SETLT: {
3911 // {subc.reg, subc.CA} = (subcarry %a, %b)
3912 // (zext (setcc %a, %b, setlt)) ->
3913 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3914 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3915 if (IsRHSOne)
3916 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3917 if (IsRHSZero) {
3918 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3919 S->getI64Imm(63, dl)), 0);
3920 }
3921 SDValue SRADINode =
3922 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3923 LHS, S->getI64Imm(63, dl)), 0);
3924 SDValue SRDINode =
3925 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3926 RHS, S->getI64Imm(1, dl),
3927 S->getI64Imm(63, dl)), 0);
3928 SDValue SUBFC8Carry =
3929 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3930 RHS, LHS), 1);
3931 SDValue ADDE8Node =
3932 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3933 SRDINode, SRADINode, SUBFC8Carry), 0);
3934 SDValue XORI8Node =
3935 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3936 ADDE8Node, S->getI64Imm(1, dl)), 0);
3937 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3938 XORI8Node), 0);
3939 }
3940 case ISD::SETUGE:
3941 // {subc.reg, subc.CA} = (subcarry %a, %b)
3942 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3943 std::swap(LHS, RHS);
3944 [[fallthrough]];
3945 case ISD::SETULE: {
3946 // {subc.reg, subc.CA} = (subcarry %b, %a)
3947 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3948 SDValue SubtractCarry =
3949 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3950 LHS, RHS), 1);
3951 SDValue ExtSub =
3952 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3953 LHS, SubtractCarry), 0);
3954 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3955 ExtSub, ExtSub), 0);
3956 }
3957 case ISD::SETUGT:
3958 // {subc.reg, subc.CA} = (subcarry %b, %a)
3959 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3960 std::swap(LHS, RHS);
3961 [[fallthrough]];
3962 case ISD::SETULT: {
3963 // {subc.reg, subc.CA} = (subcarry %a, %b)
3964 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3965 SDValue SubCarry =
3966 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3967 RHS, LHS), 1);
3968 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3969 LHS, LHS, SubCarry), 0);
3970 }
3971 }
3972}
3973
3974/// Do all uses of this SDValue need the result in a GPR?
3975/// This is meant to be used on values that have type i1 since
3976/// it is somewhat meaningless to ask if values of other types
3977/// should be kept in GPR's.
3978static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3979 assert(Compare.getOpcode() == ISD::SETCC &&
3980 "An ISD::SETCC node required here.");
3981
3982 // For values that have a single use, the caller should obviously already have
3983 // checked if that use is an extending use. We check the other uses here.
3984 if (Compare.hasOneUse())
3985 return true;
3986 // We want the value in a GPR if it is being extended, used for a select, or
3987 // used in logical operations.
3988 for (auto *CompareUse : Compare.getNode()->users())
3989 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3990 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3991 CompareUse->getOpcode() != ISD::SELECT &&
3992 !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
3993 OmittedForNonExtendUses++;
3994 return false;
3995 }
3996 return true;
3997}
3998
3999/// Returns an equivalent of a SETCC node but with the result the same width as
4000/// the inputs. This can also be used for SELECT_CC if either the true or false
4001/// values is a power of two while the other is zero.
4002SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
4003 SetccInGPROpts ConvOpts) {
4004 assert((Compare.getOpcode() == ISD::SETCC ||
4005 Compare.getOpcode() == ISD::SELECT_CC) &&
4006 "An ISD::SETCC node required here.");
4007
4008 // Don't convert this comparison to a GPR sequence because there are uses
4009 // of the i1 result (i.e. uses that require the result in the CR).
4010 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4011 return SDValue();
4012
4013 SDValue LHS = Compare.getOperand(0);
4014 SDValue RHS = Compare.getOperand(1);
4015
4016 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4017 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4018 ISD::CondCode CC =
4019 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4020 EVT InputVT = LHS.getValueType();
4021 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4022 return SDValue();
4023
4024 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4025 ConvOpts == SetccInGPROpts::SExtInvert)
4026 CC = ISD::getSetCCInverse(CC, InputVT);
4027
4028 bool Inputs32Bit = InputVT == MVT::i32;
4029
4030 SDLoc dl(Compare);
4031 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4032 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4033 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4034 ConvOpts == SetccInGPROpts::SExtInvert;
4035
4036 if (IsSext && Inputs32Bit)
4037 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4038 else if (Inputs32Bit)
4039 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4040 else if (IsSext)
4041 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4042 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4043}
4044
4045} // end anonymous namespace
4046
4047bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4048 if (N->getValueType(0) != MVT::i32 &&
4049 N->getValueType(0) != MVT::i64)
4050 return false;
4051
4052 // This optimization will emit code that assumes 64-bit registers
4053 // so we don't want to run it in 32-bit mode. Also don't run it
4054 // on functions that are not to be optimized.
4055 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4056 return false;
4057
4058 // For POWER10, it is more profitable to use the set boolean extension
4059 // instructions rather than the integer compare elimination codegen.
4060 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4061 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4062 return false;
4063
4064 switch (N->getOpcode()) {
4065 default: break;
4066 case ISD::ZERO_EXTEND:
4067 case ISD::SIGN_EXTEND:
4068 case ISD::AND:
4069 case ISD::OR:
4070 case ISD::XOR: {
4071 IntegerCompareEliminator ICmpElim(CurDAG, this);
4072 if (SDNode *New = ICmpElim.Select(N)) {
4073 ReplaceNode(N, New);
4074 return true;
4075 }
4076 }
4077 }
4078 return false;
4079}
4080
4081bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4082 if (N->getValueType(0) != MVT::i32 &&
4083 N->getValueType(0) != MVT::i64)
4084 return false;
4085
4086 if (!UseBitPermRewriter)
4087 return false;
4088
4089 switch (N->getOpcode()) {
4090 default: break;
4091 case ISD::SRL:
4092 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4093 // uses the BRH instruction.
4094 if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4095 N->getOperand(0).getOpcode() == ISD::BSWAP) {
4096 auto &OpRight = N->getOperand(1);
4097 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4098 if (SRLConst && SRLConst->getSExtValue() == 16)
4099 return false;
4100 }
4101 [[fallthrough]];
4102 case ISD::ROTL:
4103 case ISD::SHL:
4104 case ISD::AND:
4105 case ISD::OR: {
4106 BitPermutationSelector BPS(CurDAG);
4107 if (SDNode *New = BPS.Select(N)) {
4108 ReplaceNode(N, New);
4109 return true;
4110 }
4111 return false;
4112 }
4113 }
4114
4115 return false;
4116}
4117
4118/// SelectCC - Select a comparison of the specified values with the specified
4119/// condition code, returning the CR# of the expression.
4120SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4121 const SDLoc &dl, SDValue Chain) {
4122 // Always select the LHS.
4123 unsigned Opc;
4124
4125 if (LHS.getValueType() == MVT::i32) {
4126 unsigned Imm;
4127 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4128 if (isInt32Immediate(RHS, Imm)) {
4129 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4130 if (isUInt<16>(Imm))
4131 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4132 getI32Imm(Imm & 0xFFFF, dl)),
4133 0);
4134 // If this is a 16-bit signed immediate, fold it.
4135 if (isInt<16>((int)Imm))
4136 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4137 getI32Imm(Imm & 0xFFFF, dl)),
4138 0);
4139
4140 // For non-equality comparisons, the default code would materialize the
4141 // constant, then compare against it, like this:
4142 // lis r2, 4660
4143 // ori r2, r2, 22136
4144 // cmpw cr0, r3, r2
4145 // Since we are just comparing for equality, we can emit this instead:
4146 // xoris r0,r3,0x1234
4147 // cmplwi cr0,r0,0x5678
4148 // beq cr0,L6
4149 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4150 getI32Imm(Imm >> 16, dl)), 0);
4151 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4152 getI32Imm(Imm & 0xFFFF, dl)), 0);
4153 }
4154 Opc = PPC::CMPLW;
4155 } else if (ISD::isUnsignedIntSetCC(CC)) {
4156 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4157 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4158 getI32Imm(Imm & 0xFFFF, dl)), 0);
4159 Opc = PPC::CMPLW;
4160 } else {
4161 int16_t SImm;
4162 if (isIntS16Immediate(RHS, SImm))
4163 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4164 getI32Imm((int)SImm & 0xFFFF,
4165 dl)),
4166 0);
4167 Opc = PPC::CMPW;
4168 }
4169 } else if (LHS.getValueType() == MVT::i64) {
4170 uint64_t Imm;
4171 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4172 if (isInt64Immediate(RHS.getNode(), Imm)) {
4173 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4174 if (isUInt<16>(Imm))
4175 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4176 getI32Imm(Imm & 0xFFFF, dl)),
4177 0);
4178 // If this is a 16-bit signed immediate, fold it.
4179 if (isInt<16>(Imm))
4180 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4181 getI32Imm(Imm & 0xFFFF, dl)),
4182 0);
4183
4184 // For non-equality comparisons, the default code would materialize the
4185 // constant, then compare against it, like this:
4186 // lis r2, 4660
4187 // ori r2, r2, 22136
4188 // cmpd cr0, r3, r2
4189 // Since we are just comparing for equality, we can emit this instead:
4190 // xoris r0,r3,0x1234
4191 // cmpldi cr0,r0,0x5678
4192 // beq cr0,L6
4193 if (isUInt<32>(Imm)) {
4194 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4195 getI64Imm(Imm >> 16, dl)), 0);
4196 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4197 getI64Imm(Imm & 0xFFFF, dl)),
4198 0);
4199 }
4200 }
4201 Opc = PPC::CMPLD;
4202 } else if (ISD::isUnsignedIntSetCC(CC)) {
4203 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4204 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4205 getI64Imm(Imm & 0xFFFF, dl)), 0);
4206 Opc = PPC::CMPLD;
4207 } else {
4208 int16_t SImm;
4209 if (isIntS16Immediate(RHS, SImm))
4210 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4211 getI64Imm(SImm & 0xFFFF, dl)),
4212 0);
4213 Opc = PPC::CMPD;
4214 }
4215 } else if (LHS.getValueType() == MVT::f32) {
4216 if (Subtarget->hasSPE()) {
4217 switch (CC) {
4218 default:
4219 case ISD::SETEQ:
4220 case ISD::SETNE:
4221 Opc = PPC::EFSCMPEQ;
4222 break;
4223 case ISD::SETLT:
4224 case ISD::SETGE:
4225 case ISD::SETOLT:
4226 case ISD::SETOGE:
4227 case ISD::SETULT:
4228 case ISD::SETUGE:
4229 Opc = PPC::EFSCMPLT;
4230 break;
4231 case ISD::SETGT:
4232 case ISD::SETLE:
4233 case ISD::SETOGT:
4234 case ISD::SETOLE:
4235 case ISD::SETUGT:
4236 case ISD::SETULE:
4237 Opc = PPC::EFSCMPGT;
4238 break;
4239 }
4240 } else
4241 Opc = PPC::FCMPUS;
4242 } else if (LHS.getValueType() == MVT::f64) {
4243 if (Subtarget->hasSPE()) {
4244 switch (CC) {
4245 default:
4246 case ISD::SETEQ:
4247 case ISD::SETNE:
4248 Opc = PPC::EFDCMPEQ;
4249 break;
4250 case ISD::SETLT:
4251 case ISD::SETGE:
4252 case ISD::SETOLT:
4253 case ISD::SETOGE:
4254 case ISD::SETULT:
4255 case ISD::SETUGE:
4256 Opc = PPC::EFDCMPLT;
4257 break;
4258 case ISD::SETGT:
4259 case ISD::SETLE:
4260 case ISD::SETOGT:
4261 case ISD::SETOLE:
4262 case ISD::SETUGT:
4263 case ISD::SETULE:
4264 Opc = PPC::EFDCMPGT;
4265 break;
4266 }
4267 } else
4268 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4269 } else {
4270 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4271 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4272 Opc = PPC::XSCMPUQP;
4273 }
4274 if (Chain)
4275 return SDValue(
4276 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4277 0);
4278 else
4279 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4280}
4281
4283 const PPCSubtarget *Subtarget) {
4284 // For SPE instructions, the result is in GT bit of the CR
4285 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4286
4287 switch (CC) {
4288 case ISD::SETUEQ:
4289 case ISD::SETONE:
4290 case ISD::SETOLE:
4291 case ISD::SETOGE:
4292 llvm_unreachable("Should be lowered by legalize!");
4293 default: llvm_unreachable("Unknown condition!");
4294 case ISD::SETOEQ:
4295 case ISD::SETEQ:
4296 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4297 case ISD::SETUNE:
4298 case ISD::SETNE:
4299 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4300 case ISD::SETOLT:
4301 case ISD::SETLT:
4302 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4303 case ISD::SETULE:
4304 case ISD::SETLE:
4305 return PPC::PRED_LE;
4306 case ISD::SETOGT:
4307 case ISD::SETGT:
4308 return PPC::PRED_GT;
4309 case ISD::SETUGE:
4310 case ISD::SETGE:
4311 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4312 case ISD::SETO: return PPC::PRED_NU;
4313 case ISD::SETUO: return PPC::PRED_UN;
4314 // These two are invalid for floating point. Assume we have int.
4315 case ISD::SETULT: return PPC::PRED_LT;
4316 case ISD::SETUGT: return PPC::PRED_GT;
4317 }
4318}
4319
4320/// getCRIdxForSetCC - Return the index of the condition register field
4321/// associated with the SetCC condition, and whether or not the field is
4322/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4323static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4324 Invert = false;
4325 switch (CC) {
4326 default: llvm_unreachable("Unknown condition!");
4327 case ISD::SETOLT:
4328 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4329 case ISD::SETOGT:
4330 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4331 case ISD::SETOEQ:
4332 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4333 case ISD::SETUO: return 3; // Bit #3 = SETUO
4334 case ISD::SETUGE:
4335 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4336 case ISD::SETULE:
4337 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4338 case ISD::SETUNE:
4339 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4340 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4341 case ISD::SETUEQ:
4342 case ISD::SETOGE:
4343 case ISD::SETOLE:
4344 case ISD::SETONE:
4345 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4346 // These are invalid for floating point. Assume integer.
4347 case ISD::SETULT: return 0;
4348 case ISD::SETUGT: return 1;
4349 }
4350}
4351
4352// getVCmpInst: return the vector compare instruction for the specified
4353// vector type and condition code. Since this is for altivec specific code,
4354// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4355// and v4f32).
4356static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4357 bool HasVSX, bool &Swap, bool &Negate) {
4358 Swap = false;
4359 Negate = false;
4360
4361 if (VecVT.isFloatingPoint()) {
4362 /* Handle some cases by swapping input operands. */
4363 switch (CC) {
4364 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4365 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4366 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4367 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4368 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4369 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4370 default: break;
4371 }
4372 /* Handle some cases by negating the result. */
4373 switch (CC) {
4374 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4375 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4376 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4377 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4378 default: break;
4379 }
4380 /* We have instructions implementing the remaining cases. */
4381 switch (CC) {
4382 case ISD::SETEQ:
4383 case ISD::SETOEQ:
4384 if (VecVT == MVT::v4f32)
4385 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4386 else if (VecVT == MVT::v2f64)
4387 return PPC::XVCMPEQDP;
4388 break;
4389 case ISD::SETGT:
4390 case ISD::SETOGT:
4391 if (VecVT == MVT::v4f32)
4392 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4393 else if (VecVT == MVT::v2f64)
4394 return PPC::XVCMPGTDP;
4395 break;
4396 case ISD::SETGE:
4397 case ISD::SETOGE:
4398 if (VecVT == MVT::v4f32)
4399 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4400 else if (VecVT == MVT::v2f64)
4401 return PPC::XVCMPGEDP;
4402 break;
4403 default:
4404 break;
4405 }
4406 llvm_unreachable("Invalid floating-point vector compare condition");
4407 } else {
4408 /* Handle some cases by swapping input operands. */
4409 switch (CC) {
4410 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4411 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4412 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4413 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4414 default: break;
4415 }
4416 /* Handle some cases by negating the result. */
4417 switch (CC) {
4418 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4419 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4420 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4421 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4422 default: break;
4423 }
4424 /* We have instructions implementing the remaining cases. */
4425 switch (CC) {
4426 case ISD::SETEQ:
4427 case ISD::SETUEQ:
4428 if (VecVT == MVT::v16i8)
4429 return PPC::VCMPEQUB;
4430 else if (VecVT == MVT::v8i16)
4431 return PPC::VCMPEQUH;
4432 else if (VecVT == MVT::v4i32)
4433 return PPC::VCMPEQUW;
4434 else if (VecVT == MVT::v2i64)
4435 return PPC::VCMPEQUD;
4436 else if (VecVT == MVT::v1i128)
4437 return PPC::VCMPEQUQ;
4438 break;
4439 case ISD::SETGT:
4440 if (VecVT == MVT::v16i8)
4441 return PPC::VCMPGTSB;
4442 else if (VecVT == MVT::v8i16)
4443 return PPC::VCMPGTSH;
4444 else if (VecVT == MVT::v4i32)
4445 return PPC::VCMPGTSW;
4446 else if (VecVT == MVT::v2i64)
4447 return PPC::VCMPGTSD;
4448 else if (VecVT == MVT::v1i128)
4449 return PPC::VCMPGTSQ;
4450 break;
4451 case ISD::SETUGT:
4452 if (VecVT == MVT::v16i8)
4453 return PPC::VCMPGTUB;
4454 else if (VecVT == MVT::v8i16)
4455 return PPC::VCMPGTUH;
4456 else if (VecVT == MVT::v4i32)
4457 return PPC::VCMPGTUW;
4458 else if (VecVT == MVT::v2i64)
4459 return PPC::VCMPGTUD;
4460 else if (VecVT == MVT::v1i128)
4461 return PPC::VCMPGTUQ;
4462 break;
4463 default:
4464 break;
4465 }
4466 llvm_unreachable("Invalid integer vector compare condition");
4467 }
4468}
4469
4470bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4471 SDLoc dl(N);
4472 unsigned Imm;
4473 bool IsStrict = N->isStrictFPOpcode();
4474 ISD::CondCode CC =
4475 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4476 EVT PtrVT =
4478 bool isPPC64 = (PtrVT == MVT::i64);
4479 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4480
4481 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4482 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4483
4484 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4485 // We can codegen setcc op, imm very efficiently compared to a brcond.
4486 // Check for those cases here.
4487 // setcc op, 0
4488 if (Imm == 0) {
4489 SDValue Op = LHS;
4490 switch (CC) {
4491 default: break;
4492 case ISD::SETEQ: {
4493 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4494 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4495 getI32Imm(31, dl) };
4496 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4497 return true;
4498 }
4499 case ISD::SETNE: {
4500 if (isPPC64) break;
4501 SDValue AD =
4502 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4503 Op, getI32Imm(~0U, dl)), 0);
4504 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4505 return true;
4506 }
4507 case ISD::SETLT: {
4508 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4509 getI32Imm(31, dl) };
4510 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4511 return true;
4512 }
4513 case ISD::SETGT: {
4514 SDValue T =
4515 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4516 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4517 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4518 getI32Imm(31, dl) };
4519 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4520 return true;
4521 }
4522 }
4523 } else if (Imm == ~0U) { // setcc op, -1
4524 SDValue Op = LHS;
4525 switch (CC) {
4526 default: break;
4527 case ISD::SETEQ:
4528 if (isPPC64) break;
4529 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4530 Op, getI32Imm(1, dl)), 0);
4531 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4532 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4533 MVT::i32,
4534 getI32Imm(0, dl)),
4535 0), Op.getValue(1));
4536 return true;
4537 case ISD::SETNE: {
4538 if (isPPC64) break;
4539 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4540 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4541 Op, getI32Imm(~0U, dl));
4542 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4543 SDValue(AD, 1));
4544 return true;
4545 }
4546 case ISD::SETLT: {
4547 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4548 getI32Imm(1, dl)), 0);
4549 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4550 Op), 0);
4551 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4552 getI32Imm(31, dl) };
4553 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4554 return true;
4555 }
4556 case ISD::SETGT: {
4557 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4558 getI32Imm(31, dl) };
4559 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4560 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4561 return true;
4562 }
4563 }
4564 }
4565 }
4566
4567 // Altivec Vector compare instructions do not set any CR register by default and
4568 // vector compare operations return the same type as the operands.
4569 if (!IsStrict && LHS.getValueType().isVector()) {
4570 if (Subtarget->hasSPE())
4571 return false;
4572
4573 EVT VecVT = LHS.getValueType();
4574 bool Swap, Negate;
4575 unsigned int VCmpInst =
4576 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4577 if (Swap)
4578 std::swap(LHS, RHS);
4579
4580 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4581 if (Negate) {
4582 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4583 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4584 ResVT, VCmp, VCmp);
4585 return true;
4586 }
4587
4588 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4589 return true;
4590 }
4591
4592 if (Subtarget->useCRBits())
4593 return false;
4594
4595 bool Inv;
4596 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4597 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4598 if (IsStrict)
4599 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4600 SDValue IntCR;
4601
4602 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4603 // The correct compare instruction is already set by SelectCC()
4604 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4605 Idx = 1;
4606 }
4607
4608 // Force the ccreg into CR7.
4609 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4610
4611 SDValue InGlue; // Null incoming flag value.
4612 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4613 InGlue).getValue(1);
4614
4615 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4616 CCReg), 0);
4617
4618 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4619 getI32Imm(31, dl), getI32Imm(31, dl) };
4620 if (!Inv) {
4621 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4622 return true;
4623 }
4624
4625 // Get the specified bit.
4626 SDValue Tmp =
4627 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4628 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4629 return true;
4630}
4631
4632/// Does this node represent a load/store node whose address can be represented
4633/// with a register plus an immediate that's a multiple of \p Val:
4634bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4635 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4636 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4637 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4638 SDValue AddrOp;
4639 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4640 AddrOp = N->getOperand(1);
4641 else if (STN)
4642 AddrOp = STN->getOperand(2);
4643
4644 // If the address points a frame object or a frame object with an offset,
4645 // we need to check the object alignment.
4646 short Imm = 0;
4647 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4648 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4649 AddrOp)) {
4650 // If op0 is a frame index that is under aligned, we can't do it either,
4651 // because it is translated to r31 or r1 + slot + offset. We won't know the
4652 // slot number until the stack frame is finalized.
4653 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4654 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4655 if ((SlotAlign % Val) != 0)
4656 return false;
4657
4658 // If we have an offset, we need further check on the offset.
4659 if (AddrOp.getOpcode() != ISD::ADD)
4660 return true;
4661 }
4662
4663 if (AddrOp.getOpcode() == ISD::ADD)
4664 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4665
4666 // If the address comes from the outside, the offset will be zero.
4667 return AddrOp.getOpcode() == ISD::CopyFromReg;
4668}
4669
4670void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4671 // Transfer memoperands.
4672 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4673 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4674}
4675
4676static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4677 bool &NeedSwapOps, bool &IsUnCmp) {
4678
4679 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4680
4681 SDValue LHS = N->getOperand(0);
4682 SDValue RHS = N->getOperand(1);
4683 SDValue TrueRes = N->getOperand(2);
4684 SDValue FalseRes = N->getOperand(3);
4685 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4686 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4687 N->getSimpleValueType(0) != MVT::i32))
4688 return false;
4689
4690 // We are looking for any of:
4691 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4692 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4693 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4694 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4695 int64_t TrueResVal = TrueConst->getSExtValue();
4696 if ((TrueResVal < -1 || TrueResVal > 1) ||
4697 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4698 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4699 (TrueResVal == 0 &&
4700 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4701 return false;
4702
4703 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4704 ? FalseRes
4705 : FalseRes.getOperand(0);
4706 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4707 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4708 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4709 return false;
4710
4711 // Without this setb optimization, the outer SELECT_CC will be manually
4712 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4713 // transforms pseudo instruction to isel instruction. When there are more than
4714 // one use for result like zext/sext, with current optimization we only see
4715 // isel is replaced by setb but can't see any significant gain. Since
4716 // setb has longer latency than original isel, we should avoid this. Another
4717 // point is that setb requires comparison always kept, it can break the
4718 // opportunity to get the comparison away if we have in future.
4719 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4720 return false;
4721
4722 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4723 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4724 ISD::CondCode InnerCC =
4725 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4726 // If the inner comparison is a select_cc, make sure the true/false values are
4727 // 1/-1 and canonicalize it if needed.
4728 if (InnerIsSel) {
4729 ConstantSDNode *SelCCTrueConst =
4730 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4731 ConstantSDNode *SelCCFalseConst =
4732 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4733 if (!SelCCTrueConst || !SelCCFalseConst)
4734 return false;
4735 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4736 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4737 // The values must be -1/1 (requiring a swap) or 1/-1.
4738 if (SelCCTVal == -1 && SelCCFVal == 1) {
4739 std::swap(InnerLHS, InnerRHS);
4740 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4741 return false;
4742 }
4743
4744 // Canonicalize unsigned case
4745 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4746 IsUnCmp = true;
4747 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4748 }
4749
4750 bool InnerSwapped = false;
4751 if (LHS == InnerRHS && RHS == InnerLHS)
4752 InnerSwapped = true;
4753 else if (LHS != InnerLHS || RHS != InnerRHS)
4754 return false;
4755
4756 switch (CC) {
4757 // (select_cc lhs, rhs, 0, \
4758 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4759 case ISD::SETEQ:
4760 if (!InnerIsSel)
4761 return false;
4762 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4763 return false;
4764 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4765 break;
4766
4767 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4768 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4769 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4770 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4771 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4772 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4773 case ISD::SETULT:
4774 if (!IsUnCmp && InnerCC != ISD::SETNE)
4775 return false;
4776 IsUnCmp = true;
4777 [[fallthrough]];
4778 case ISD::SETLT:
4779 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4780 (InnerCC == ISD::SETLT && InnerSwapped))
4781 NeedSwapOps = (TrueResVal == 1);
4782 else
4783 return false;
4784 break;
4785
4786 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4787 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4788 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4789 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4790 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4791 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4792 case ISD::SETUGT:
4793 if (!IsUnCmp && InnerCC != ISD::SETNE)
4794 return false;
4795 IsUnCmp = true;
4796 [[fallthrough]];
4797 case ISD::SETGT:
4798 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4799 (InnerCC == ISD::SETGT && InnerSwapped))
4800 NeedSwapOps = (TrueResVal == -1);
4801 else
4802 return false;
4803 break;
4804
4805 default:
4806 return false;
4807 }
4808
4809 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4810 LLVM_DEBUG(N->dump());
4811
4812 return true;
4813}
4814
4815// Return true if it's a software square-root/divide operand.
4816static bool isSWTestOp(SDValue N) {
4817 if (N.getOpcode() == PPCISD::FTSQRT)
4818 return true;
4819 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4820 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4821 return false;
4822 switch (N.getConstantOperandVal(0)) {
4823 case Intrinsic::ppc_vsx_xvtdivdp:
4824 case Intrinsic::ppc_vsx_xvtdivsp:
4825 case Intrinsic::ppc_vsx_xvtsqrtdp:
4826 case Intrinsic::ppc_vsx_xvtsqrtsp:
4827 return true;
4828 }
4829 return false;
4830}
4831
4832bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4833 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4834 // We are looking for following patterns, where `truncate to i1` actually has
4835 // the same semantic with `and 1`.
4836 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4837 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4838 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4839 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4840 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4841 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4842 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4843 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4844 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4845 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4846 return false;
4847
4848 SDValue CmpRHS = N->getOperand(3);
4849 if (!isNullConstant(CmpRHS))
4850 return false;
4851
4852 SDValue CmpLHS = N->getOperand(2);
4853 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4854 return false;
4855
4856 unsigned PCC = 0;
4857 bool IsCCNE = CC == ISD::SETNE;
4858 if (CmpLHS.getOpcode() == ISD::AND &&
4859 isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4860 switch (CmpLHS.getConstantOperandVal(1)) {
4861 case 1:
4862 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4863 break;
4864 case 2:
4865 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4866 break;
4867 case 4:
4868 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4869 break;
4870 case 8:
4871 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4872 break;
4873 default:
4874 return false;
4875 }
4876 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4877 CmpLHS.getValueType() == MVT::i1)
4878 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4879
4880 if (PCC) {
4881 SDLoc dl(N);
4882 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4883 N->getOperand(0)};
4884 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4885 return true;
4886 }
4887 return false;
4888}
4889
4890bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4891 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4892 // value, for example when crbits is disabled. If so, select the
4893 // loop_decrement intrinsics now.
4894 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4895 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4896
4897 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4898 isNullConstant(LHS.getOperand(1)))
4899 return false;
4900
4901 if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4902 LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4903 return false;
4904
4905 if (!isa<ConstantSDNode>(RHS))
4906 return false;
4907
4908 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4909 "Counter decrement comparison is not EQ or NE");
4910
4911 SDValue OldDecrement = LHS.getOperand(0);
4912 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4913
4914 SDLoc DecrementLoc(OldDecrement);
4915 SDValue ChainInput = OldDecrement.getOperand(0);
4916 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4917 : getI32Imm(1, DecrementLoc)};
4918 unsigned DecrementOpcode =
4919 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4920 SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4921 MVT::i1, DecrementOps);
4922
4923 unsigned Val = RHS->getAsZExtVal();
4924 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4925 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4926
4927 ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4928 CurDAG->RemoveDeadNode(LHS.getNode());
4929
4930 // Mark the old loop_decrement intrinsic as dead.
4931 ReplaceUses(OldDecrement.getValue(1), ChainInput);
4932 CurDAG->RemoveDeadNode(OldDecrement.getNode());
4933
4934 SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4935 ChainInput, N->getOperand(0));
4936
4937 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4938 N->getOperand(4), Chain);
4939 return true;
4940}
4941
4942bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4943 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4944 unsigned Imm;
4945 if (!isInt32Immediate(N->getOperand(1), Imm))
4946 return false;
4947
4948 SDLoc dl(N);
4949 SDValue Val = N->getOperand(0);
4950 unsigned SH, MB, ME;
4951 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4952 // with a mask, emit rlwinm
4953 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4954 Val = Val.getOperand(0);
4955 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4956 getI32Imm(ME, dl)};
4957 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4958 return true;
4959 }
4960
4961 // If this is just a masked value where the input is not handled, and
4962 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4963 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4964 // The result of LBARX/LHARX do not need to be cleared as the instructions
4965 // implicitly clear the upper bits.
4966 unsigned AlreadyCleared = 0;
4967 if (Val.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4968 auto IntrinsicID = Val.getConstantOperandVal(1);
4969 if (IntrinsicID == Intrinsic::ppc_lbarx)
4970 AlreadyCleared = 24;
4971 else if (IntrinsicID == Intrinsic::ppc_lharx)
4972 AlreadyCleared = 16;
4973 if (AlreadyCleared != 0 && AlreadyCleared == MB && ME == 31) {
4974 ReplaceUses(SDValue(N, 0), N->getOperand(0));
4975 return true;
4976 }
4977 }
4978
4979 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4980 getI32Imm(ME, dl)};
4981 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4982 return true;
4983 }
4984
4985 // AND X, 0 -> 0, not "rlwinm 32".
4986 if (Imm == 0) {
4987 ReplaceUses(SDValue(N, 0), N->getOperand(1));
4988 return true;
4989 }
4990
4991 return false;
4992}
4993
4994bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4995 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4997 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4998 return false;
4999
5000 unsigned MB, ME;
5001 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
5002 // MB ME
5003 // +----------------------+
5004 // |xxxxxxxxxxx00011111000|
5005 // +----------------------+
5006 // 0 32 64
5007 // We can only do it if the MB is larger than 32 and MB <= ME
5008 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
5009 // we didn't rotate it.
5010 SDLoc dl(N);
5011 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
5012 getI64Imm(ME - 32, dl)};
5013 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
5014 return true;
5015 }
5016
5017 return false;
5018}
5019
5020bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5021 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5023 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5024 return false;
5025
5026 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5027 // it well with "andi.".
5028 if (isUInt<16>(Imm64))
5029 return false;
5030
5031 SDLoc Loc(N);
5032 SDValue Val = N->getOperand(0);
5033
5034 // Optimized with two rldicl's as follows:
5035 // Add missing bits on left to the mask and check that the mask is a
5036 // wrapped run of ones, i.e.
5037 // Change pattern |0001111100000011111111|
5038 // to |1111111100000011111111|.
5039 unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5040 if (NumOfLeadingZeros != 0)
5041 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5042
5043 unsigned MB, ME;
5044 if (!isRunOfOnes64(Imm64, MB, ME))
5045 return false;
5046
5047 // ME MB MB-ME+63
5048 // +----------------------+ +----------------------+
5049 // |1111111100000011111111| -> |0000001111111111111111|
5050 // +----------------------+ +----------------------+
5051 // 0 63 0 63
5052 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5053 unsigned OnesOnLeft = ME + 1;
5054 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5055 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5056 // on the left the bits that are already zeros in the mask.
5057 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5058 getI64Imm(OnesOnLeft, Loc),
5059 getI64Imm(ZerosInBetween, Loc)),
5060 0);
5061 // MB-ME+63 ME MB
5062 // +----------------------+ +----------------------+
5063 // |0000001111111111111111| -> |0001111100000011111111|
5064 // +----------------------+ +----------------------+
5065 // 0 63 0 63
5066 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5067 // left the number of ones we previously added.
5068 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5069 getI64Imm(NumOfLeadingZeros, Loc)};
5070 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5071 return true;
5072}
5073
5074bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5075 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5076 unsigned Imm;
5077 if (!isInt32Immediate(N->getOperand(1), Imm))
5078 return false;
5079
5080 SDValue Val = N->getOperand(0);
5081 unsigned Imm2;
5082 // ISD::OR doesn't get all the bitfield insertion fun.
5083 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5084 // bitfield insert.
5085 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5086 return false;
5087
5088 // The idea here is to check whether this is equivalent to:
5089 // (c1 & m) | (x & ~m)
5090 // where m is a run-of-ones mask. The logic here is that, for each bit in
5091 // c1 and c2:
5092 // - if both are 1, then the output will be 1.
5093 // - if both are 0, then the output will be 0.
5094 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5095 // come from x.
5096 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5097 // be 0.
5098 // If that last condition is never the case, then we can form m from the
5099 // bits that are the same between c1 and c2.
5100 unsigned MB, ME;
5101 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5102 SDLoc dl(N);
5103 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5104 getI32Imm(MB, dl), getI32Imm(ME, dl)};
5105 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5106 return true;
5107 }
5108
5109 return false;
5110}
5111
5112bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5113 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5114
5116 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5117 return false;
5118
5119 SDValue Val = N->getOperand(0);
5120
5121 if (Val.getOpcode() != ISD::ROTL)
5122 return false;
5123
5124 // Looking to try to avoid a situation like this one:
5125 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5126 // %and1 = and i64 %2, 9223372036854775807
5127 // In this function we are looking to try to match RLDCL. However, the above
5128 // DAG would better match RLDICL instead which is not what we are looking
5129 // for here.
5130 SDValue RotateAmt = Val.getOperand(1);
5131 if (RotateAmt.getOpcode() == ISD::Constant)
5132 return false;
5133
5134 unsigned MB = 64 - llvm::countr_one(Imm64);
5135 SDLoc dl(N);
5136 SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5137 CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5138 return true;
5139}
5140
5141bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5142 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5144 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5145 return false;
5146
5147 // If this is a 64-bit zero-extension mask, emit rldicl.
5148 unsigned MB = 64 - llvm::countr_one(Imm64);
5149 unsigned SH = 0;
5150 unsigned Imm;
5151 SDValue Val = N->getOperand(0);
5152 SDLoc dl(N);
5153
5154 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5155 auto Op0 = Val.getOperand(0);
5156 if (Op0.getOpcode() == ISD::SRL &&
5157 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5158
5159 auto ResultType = Val.getNode()->getValueType(0);
5160 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5161 SDValue IDVal(ImDef, 0);
5162
5163 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5164 IDVal, Op0.getOperand(0),
5165 getI32Imm(1, dl)),
5166 0);
5167 SH = 64 - Imm;
5168 }
5169 }
5170
5171 // If the operand is a logical right shift, we can fold it into this
5172 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5173 // for n <= mb. The right shift is really a left rotate followed by a
5174 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5175 // by the shift.
5176 if (Val.getOpcode() == ISD::SRL &&
5177 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5178 assert(Imm < 64 && "Illegal shift amount");
5179 Val = Val.getOperand(0);
5180 SH = 64 - Imm;
5181 }
5182
5183 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5184 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5185 return true;
5186}
5187
5188bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5189 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5191 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5192 !isMask_64(~Imm64))
5193 return false;
5194
5195 // If this is a negated 64-bit zero-extension mask,
5196 // i.e. the immediate is a sequence of ones from most significant side
5197 // and all zero for reminder, we should use rldicr.
5198 unsigned MB = 63 - llvm::countr_one(~Imm64);
5199 unsigned SH = 0;
5200 SDLoc dl(N);
5201 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5202 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5203 return true;
5204}
5205
5206bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5207 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5209 unsigned MB, ME;
5210 SDValue N0 = N->getOperand(0);
5211
5212 // We won't get fewer instructions if the imm is 32-bit integer.
5213 // rldimi requires the imm to have consecutive ones with both sides zero.
5214 // Also, make sure the first Op has only one use, otherwise this may increase
5215 // register pressure since rldimi is destructive.
5216 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5217 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5218 return false;
5219
5220 unsigned SH = 63 - ME;
5221 SDLoc Dl(N);
5222 // Use select64Imm for making LI instr instead of directly putting Imm64
5223 SDValue Ops[] = {
5224 N->getOperand(0),
5225 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5226 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5227 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5228 return true;
5229}
5230
5231// Select - Convert the specified operand from a target-independent to a
5232// target-specific node if it hasn't already been changed.
5233void PPCDAGToDAGISel::Select(SDNode *N) {
5234 SDLoc dl(N);
5235 if (N->isMachineOpcode()) {
5236 N->setNodeId(-1);
5237 return; // Already selected.
5238 }
5239
5240 // In case any misguided DAG-level optimizations form an ADD with a
5241 // TargetConstant operand, crash here instead of miscompiling (by selecting
5242 // an r+r add instead of some kind of r+i add).
5243 if (N->getOpcode() == ISD::ADD &&
5244 N->getOperand(1).getOpcode() == ISD::TargetConstant)
5245 llvm_unreachable("Invalid ADD with TargetConstant operand");
5246
5247 // Try matching complex bit permutations before doing anything else.
5248 if (tryBitPermutation(N))
5249 return;
5250
5251 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5252 if (tryIntCompareInGPR(N))
5253 return;
5254
5255 switch (N->getOpcode()) {
5256 default: break;
5257
5258 case ISD::Constant:
5259 if (N->getValueType(0) == MVT::i64) {
5260 ReplaceNode(N, selectI64Imm(CurDAG, N));
5261 return;
5262 }
5263 break;
5264
5265 case ISD::INTRINSIC_VOID: {
5266 auto IntrinsicID = N->getConstantOperandVal(1);
5267 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5268 IntrinsicID != Intrinsic::ppc_trapd &&
5269 IntrinsicID != Intrinsic::ppc_trap)
5270 break;
5271 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5272 IntrinsicID == Intrinsic::ppc_trapd)
5273 ? PPC::TDI
5274 : PPC::TWI;
5275 SmallVector<SDValue, 4> OpsWithMD;
5276 unsigned MDIndex;
5277 if (IntrinsicID == Intrinsic::ppc_tdw ||
5278 IntrinsicID == Intrinsic::ppc_tw) {
5279 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5280 int16_t SImmOperand2;
5281 int16_t SImmOperand3;
5282 int16_t SImmOperand4;
5283 bool isOperand2IntS16Immediate =
5284 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5285 bool isOperand3IntS16Immediate =
5286 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5287 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5288 // reg or imm + imm. The imm + imm form will be optimized to either an
5289 // unconditional trap or a nop in a later pass.
5290 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5291 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5292 else if (isOperand3IntS16Immediate)
5293 // The 2nd and 3rd operands are reg + imm.
5294 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5295 else {
5296 // The 2nd and 3rd operands are imm + reg.
5297 bool isOperand4IntS16Immediate =
5298 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5299 (void)isOperand4IntS16Immediate;
5300 assert(isOperand4IntS16Immediate &&
5301 "The 4th operand is not an Immediate");
5302 // We need to flip the condition immediate TO.
5303 int16_t TO = int(SImmOperand4) & 0x1F;
5304 // We swap the first and second bit of TO if they are not same.
5305 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5306 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5307 // We swap the fourth and fifth bit of TO if they are not same.
5308 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5309 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5310 Ops[0] = getI32Imm(TO, dl);
5311 Ops[1] = N->getOperand(3);
5312 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5313 }
5314 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5315 MDIndex = 5;
5316 } else {
5317 OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5318 MDIndex = 3;
5319 }
5320
5321 if (N->getNumOperands() > MDIndex) {
5322 SDValue MDV = N->getOperand(MDIndex);
5323 const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5324 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5325 assert((isa<MDString>(MD->getOperand(0)) &&
5326 cast<MDString>(MD->getOperand(0))->getString() ==
5327 "ppc-trap-reason") &&
5328 "Unsupported annotation data type!");
5329 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5330 assert(isa<MDString>(MD->getOperand(i)) &&
5331 "Invalid data type for annotation ppc-trap-reason!");
5332 OpsWithMD.push_back(
5333 getI32Imm(std::stoi(cast<MDString>(
5334 MD->getOperand(i))->getString().str()), dl));
5335 }
5336 }
5337 OpsWithMD.push_back(N->getOperand(0)); // chain
5338 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5339 return;
5340 }
5341
5343 // We emit the PPC::FSELS instruction here because of type conflicts with
5344 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5345 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5346 // value for the comparison. When selecting through a .td file, a type
5347 // error is raised. Must check this first so we never break on the
5348 // !Subtarget->isISA3_1() check.
5349 auto IntID = N->getConstantOperandVal(0);
5350 if (IntID == Intrinsic::ppc_fsels) {
5351 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5352 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5353 return;
5354 }
5355
5356 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5357 auto Pred = N->getConstantOperandVal(1);
5358 unsigned Opcode =
5359 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5360 unsigned SubReg = 0;
5361 unsigned ShiftVal = 0;
5362 bool Reverse = false;
5363 switch (Pred) {
5364 case 0:
5365 SubReg = PPC::sub_eq;
5366 ShiftVal = 1;
5367 break;
5368 case 1:
5369 SubReg = PPC::sub_eq;
5370 ShiftVal = 1;
5371 Reverse = true;
5372 break;
5373 case 2:
5374 SubReg = PPC::sub_lt;
5375 ShiftVal = 3;
5376 break;
5377 case 3:
5378 SubReg = PPC::sub_lt;
5379 ShiftVal = 3;
5380 Reverse = true;
5381 break;
5382 case 4:
5383 SubReg = PPC::sub_gt;
5384 ShiftVal = 2;
5385 break;
5386 case 5:
5387 SubReg = PPC::sub_gt;
5388 ShiftVal = 2;
5389 Reverse = true;
5390 break;
5391 case 6:
5392 SubReg = PPC::sub_un;
5393 break;
5394 case 7:
5395 SubReg = PPC::sub_un;
5396 Reverse = true;
5397 break;
5398 }
5399
5400 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5401 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5402 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5403 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5404 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5405 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5406 // MFOCRF and shift/negate the value.
5407 if (Subtarget->isISA3_1()) {
5408 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5409 SDValue CRBit = SDValue(
5410 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5411 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5412 0);
5413 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5414 CRBit);
5415 } else {
5416 SDValue Move =
5417 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5418 BCDOp.getValue(1)),
5419 0);
5420 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5421 getI32Imm(31, dl), getI32Imm(31, dl)};
5422 if (!Reverse)
5423 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5424 else {
5425 SDValue Shift = SDValue(
5426 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5427 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5428 }
5429 }
5430 return;
5431 }
5432
5433 if (!Subtarget->isISA3_1())
5434 break;
5435 unsigned Opcode = 0;
5436 switch (IntID) {
5437 default:
5438 break;
5439 case Intrinsic::ppc_altivec_vstribr_p:
5440 Opcode = PPC::VSTRIBR_rec;
5441 break;
5442 case Intrinsic::ppc_altivec_vstribl_p:
5443 Opcode = PPC::VSTRIBL_rec;
5444 break;
5445 case Intrinsic::ppc_altivec_vstrihr_p:
5446 Opcode = PPC::VSTRIHR_rec;
5447 break;
5448 case Intrinsic::ppc_altivec_vstrihl_p:
5449 Opcode = PPC::VSTRIHL_rec;
5450 break;
5451 }
5452 if (!Opcode)
5453 break;
5454
5455 // Generate the appropriate vector string isolate intrinsic to match.
5456 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5457 SDValue VecStrOp =
5458 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5459 // Vector string isolate instructions update the EQ bit of CR6.
5460 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5461 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5462 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5463 SDValue CRBit = SDValue(
5464 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5465 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5466 0);
5467 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5468 return;
5469 }
5470
5471 case ISD::SETCC:
5472 case ISD::STRICT_FSETCC:
5474 if (trySETCC(N))
5475 return;
5476 break;
5477 // These nodes will be transformed into GETtlsADDR32 node, which
5478 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5481 const Module *Mod = MF->getFunction().getParent();
5482 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5483 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5484 Mod->getPICLevel() == PICLevel::SmallPIC)
5485 break;
5486 // Attach global base pointer on GETtlsADDR32 node in order to
5487 // generate secure plt code for TLS symbols.
5488 getGlobalBaseReg();
5489 } break;
5490 case PPCISD::CALL:
5491 case PPCISD::CALL_RM: {
5492 if (Subtarget->isPPC64() || !TM.isPositionIndependent() ||
5493 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF())
5494 break;
5495
5496 SDValue Op = N->getOperand(1);
5497
5498 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5499 if (GA->getTargetFlags() == PPCII::MO_PLT)
5500 getGlobalBaseReg();
5501 }
5502 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5503 if (ES->getTargetFlags() == PPCII::MO_PLT)
5504 getGlobalBaseReg();
5505 }
5506 } break;
5507
5509 ReplaceNode(N, getGlobalBaseReg());
5510 return;
5511
5512 case ISD::FrameIndex:
5513 selectFrameIndex(N, N);
5514 return;
5515
5516 case PPCISD::MFOCRF: {
5517 SDValue InGlue = N->getOperand(1);
5518 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5519 N->getOperand(0), InGlue));
5520 return;
5521 }
5522
5524 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5525 MVT::Other, N->getOperand(0)));
5526 return;
5527
5528 case PPCISD::SRA_ADDZE: {
5529 SDValue N0 = N->getOperand(0);
5530 SDValue ShiftAmt =
5531 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5532 getConstantIntValue(), dl,
5533 N->getValueType(0));
5534 if (N->getValueType(0) == MVT::i64) {
5535 SDNode *Op =
5536 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5537 N0, ShiftAmt);
5538 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5539 SDValue(Op, 1));
5540 return;
5541 } else {
5542 assert(N->getValueType(0) == MVT::i32 &&
5543 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5544 SDNode *Op =
5545 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5546 N0, ShiftAmt);
5547 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5548 SDValue(Op, 1));
5549 return;
5550 }
5551 }
5552
5553 case ISD::STORE: {
5554 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5555 // X-form stores.
5556 StoreSDNode *ST = cast<StoreSDNode>(N);
5557 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5558 ST->getAddressingMode() != ISD::PRE_INC)
5559 if (tryTLSXFormStore(ST))
5560 return;
5561 break;
5562 }
5563 case ISD::LOAD: {
5564 // Handle preincrement loads.
5565 LoadSDNode *LD = cast<LoadSDNode>(N);
5566 EVT LoadedVT = LD->getMemoryVT();
5567
5568 // Normal loads are handled by code generated from the .td file.
5569 if (LD->getAddressingMode() != ISD::PRE_INC) {
5570 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5571 // X-form loads.
5572 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5573 if (tryTLSXFormLoad(LD))
5574 return;
5575 break;
5576 }
5577
5578 SDValue Offset = LD->getOffset();
5579 if (Offset.getOpcode() == ISD::TargetConstant ||
5580 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5581
5582 unsigned Opcode;
5583 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5584 if (LD->getValueType(0) != MVT::i64) {
5585 // Handle PPC32 integer and normal FP loads.
5586 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5587 switch (LoadedVT.getSimpleVT().SimpleTy) {
5588 default: llvm_unreachable("Invalid PPC load type!");
5589 case MVT::f64: Opcode = PPC::LFDU; break;
5590 case MVT::f32: Opcode = PPC::LFSU; break;
5591 case MVT::i32: Opcode = PPC::LWZU; break;
5592 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5593 case MVT::i1:
5594 case MVT::i8: Opcode = PPC::LBZU; break;
5595 }
5596 } else {
5597 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5598 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5599 switch (LoadedVT.getSimpleVT().SimpleTy) {
5600 default: llvm_unreachable("Invalid PPC load type!");
5601 case MVT::i64: Opcode = PPC::LDU; break;
5602 case MVT::i32: Opcode = PPC::LWZU8; break;
5603 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5604 case MVT::i1:
5605 case MVT::i8: Opcode = PPC::LBZU8; break;
5606 }
5607 }
5608
5609 SDValue Chain = LD->getChain();
5610 SDValue Base = LD->getBasePtr();
5611 SDValue Ops[] = { Offset, Base, Chain };
5612 SDNode *MN = CurDAG->getMachineNode(
5613 Opcode, dl, LD->getValueType(0),
5614 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5615 transferMemOperands(N, MN);
5616 ReplaceNode(N, MN);
5617 return;
5618 } else {
5619 unsigned Opcode;
5620 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5621 if (LD->getValueType(0) != MVT::i64) {
5622 // Handle PPC32 integer and normal FP loads.
5623 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5624 switch (LoadedVT.getSimpleVT().SimpleTy) {
5625 default: llvm_unreachable("Invalid PPC load type!");
5626 case MVT::f64: Opcode = PPC::LFDUX; break;
5627 case MVT::f32: Opcode = PPC::LFSUX; break;
5628 case MVT::i32: Opcode = PPC::LWZUX; break;
5629 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5630 case MVT::i1:
5631 case MVT::i8: Opcode = PPC::LBZUX; break;
5632 }
5633 } else {
5634 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5635 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5636 "Invalid sext update load");
5637 switch (LoadedVT.getSimpleVT().SimpleTy) {
5638 default: llvm_unreachable("Invalid PPC load type!");
5639 case MVT::i64: Opcode = PPC::LDUX; break;
5640 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5641 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5642 case MVT::i1:
5643 case MVT::i8: Opcode = PPC::LBZUX8; break;
5644 }
5645 }
5646
5647 SDValue Chain = LD->getChain();
5648 SDValue Base = LD->getBasePtr();
5649 SDValue Ops[] = { Base, Offset, Chain };
5650 SDNode *MN = CurDAG->getMachineNode(
5651 Opcode, dl, LD->getValueType(0),
5652 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5653 transferMemOperands(N, MN);
5654 ReplaceNode(N, MN);
5655 return;
5656 }
5657 }
5658
5659 case ISD::AND:
5660 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5661 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5662 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5663 tryAsPairOfRLDICL(N))
5664 return;
5665
5666 // Other cases are autogenerated.
5667 break;
5668 case ISD::OR: {
5669 if (N->getValueType(0) == MVT::i32)
5670 if (tryBitfieldInsert(N))
5671 return;
5672
5673 int16_t Imm;
5674 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5675 isIntS16Immediate(N->getOperand(1), Imm)) {
5676 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5677
5678 // If this is equivalent to an add, then we can fold it with the
5679 // FrameIndex calculation.
5680 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5681 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5682 return;
5683 }
5684 }
5685
5686 // If this is 'or' against an imm with consecutive ones and both sides zero,
5687 // try to emit rldimi
5688 if (tryAsSingleRLDIMI(N))
5689 return;
5690
5691 // OR with a 32-bit immediate can be handled by ori + oris
5692 // without creating an immediate in a GPR.
5693 uint64_t Imm64 = 0;
5694 bool IsPPC64 = Subtarget->isPPC64();
5695 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5696 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5697 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5698 uint64_t ImmHi = Imm64 >> 16;
5699 uint64_t ImmLo = Imm64 & 0xFFFF;
5700 if (ImmHi != 0 && ImmLo != 0) {
5701 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5702 N->getOperand(0),
5703 getI16Imm(ImmLo, dl));
5704 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5705 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5706 return;
5707 }
5708 }
5709
5710 // Other cases are autogenerated.
5711 break;
5712 }
5713 case ISD::XOR: {
5714 // XOR with a 32-bit immediate can be handled by xori + xoris
5715 // without creating an immediate in a GPR.
5716 uint64_t Imm64 = 0;
5717 bool IsPPC64 = Subtarget->isPPC64();
5718 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5719 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5720 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5721 uint64_t ImmHi = Imm64 >> 16;
5722 uint64_t ImmLo = Imm64 & 0xFFFF;
5723 if (ImmHi != 0 && ImmLo != 0) {
5724 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5725 N->getOperand(0),
5726 getI16Imm(ImmLo, dl));
5727 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5728 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5729 return;
5730 }
5731 }
5732
5733 break;
5734 }
5735 case ISD::ADD: {
5736 int16_t Imm;
5737 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5738 isIntS16Immediate(N->getOperand(1), Imm)) {
5739 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5740 return;
5741 }
5742
5743 break;
5744 }
5745 case ISD::SHL: {
5746 unsigned Imm, SH, MB, ME;
5747 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5748 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5749 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5750 getI32Imm(SH, dl), getI32Imm(MB, dl),
5751 getI32Imm(ME, dl) };
5752 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5753 return;
5754 }
5755
5756 // Other cases are autogenerated.
5757 break;
5758 }
5759 case ISD::SRL: {
5760 unsigned Imm, SH, MB, ME;
5761 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5762 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5763 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5764 getI32Imm(SH, dl), getI32Imm(MB, dl),
5765 getI32Imm(ME, dl) };
5766 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5767 return;
5768 }
5769
5770 // Other cases are autogenerated.
5771 break;
5772 }
5773 case ISD::MUL: {
5774 SDValue Op1 = N->getOperand(1);
5775 if (Op1.getOpcode() != ISD::Constant ||
5776 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5777 break;
5778
5779 // If the multiplier fits int16, we can handle it with mulli.
5780 int64_t Imm = Op1->getAsZExtVal();
5781 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5782 if (isInt<16>(Imm) || !Shift)
5783 break;
5784
5785 // If the shifted value fits int16, we can do this transformation:
5786 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5787 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5788 uint64_t ImmSh = Imm >> Shift;
5789 if (!isInt<16>(ImmSh))
5790 break;
5791
5792 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5793 if (Op1.getValueType() == MVT::i64) {
5794 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5795 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5796 N->getOperand(0), SDImm);
5797
5798 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5799 getI32Imm(63 - Shift, dl)};
5800 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5801 return;
5802 } else {
5803 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5804 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5805 N->getOperand(0), SDImm);
5806
5807 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5808 getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5809 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5810 return;
5811 }
5812 break;
5813 }
5814 // FIXME: Remove this once the ANDI glue bug is fixed:
5817 if (!ANDIGlueBug)
5818 break;
5819
5820 EVT InVT = N->getOperand(0).getValueType();
5821 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5822 "Invalid input type for ANDI_rec_1_EQ_BIT");
5823
5824 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5825 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5826 N->getOperand(0),
5827 CurDAG->getTargetConstant(1, dl, InVT)),
5828 0);
5829 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5830 SDValue SRIdxVal = CurDAG->getTargetConstant(
5831 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5832 dl, MVT::i32);
5833
5834 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5835 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5836 return;
5837 }
5838 case ISD::SELECT_CC: {
5839 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5840 EVT PtrVT =
5842 bool isPPC64 = (PtrVT == MVT::i64);
5843
5844 // If this is a select of i1 operands, we'll pattern match it.
5845 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5846 break;
5847
5848 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5849 bool NeedSwapOps = false;
5850 bool IsUnCmp = false;
5851 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5852 SDValue LHS = N->getOperand(0);
5853 SDValue RHS = N->getOperand(1);
5854 if (NeedSwapOps)
5855 std::swap(LHS, RHS);
5856
5857 // Make use of SelectCC to generate the comparison to set CR bits, for
5858 // equality comparisons having one literal operand, SelectCC probably
5859 // doesn't need to materialize the whole literal and just use xoris to
5860 // check it first, it leads the following comparison result can't
5861 // exactly represent GT/LT relationship. So to avoid this we specify
5862 // SETGT/SETUGT here instead of SETEQ.
5863 SDValue GenCC =
5864 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5865 CurDAG->SelectNodeTo(
5866 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5867 N->getValueType(0), GenCC);
5868 NumP9Setb++;
5869 return;
5870 }
5871 }
5872
5873 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5874 if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5875 isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5876 CC == ISD::SETNE &&
5877 // FIXME: Implement this optzn for PPC64.
5878 N->getValueType(0) == MVT::i32) {
5879 SDNode *Tmp =
5880 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5881 N->getOperand(0), getI32Imm(~0U, dl));
5882 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5883 N->getOperand(0), SDValue(Tmp, 1));
5884 return;
5885 }
5886
5887 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5888
5889 if (N->getValueType(0) == MVT::i1) {
5890 // An i1 select is: (c & t) | (!c & f).
5891 bool Inv;
5892 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5893
5894 unsigned SRI;
5895 switch (Idx) {
5896 default: llvm_unreachable("Invalid CC index");
5897 case 0: SRI = PPC::sub_lt; break;
5898 case 1: SRI = PPC::sub_gt; break;
5899 case 2: SRI = PPC::sub_eq; break;
5900 case 3: SRI = PPC::sub_un; break;
5901 }
5902
5903 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5904
5905 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5906 CCBit, CCBit), 0);
5907 SDValue C = Inv ? NotCCBit : CCBit,
5908 NotC = Inv ? CCBit : NotCCBit;
5909
5910 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5911 C, N->getOperand(2)), 0);
5912 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5913 NotC, N->getOperand(3)), 0);
5914
5915 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5916 return;
5917 }
5918
5919 unsigned BROpc =
5920 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5921
5922 unsigned SelectCCOp;
5923 if (N->getValueType(0) == MVT::i32)
5924 SelectCCOp = PPC::SELECT_CC_I4;
5925 else if (N->getValueType(0) == MVT::i64)
5926 SelectCCOp = PPC::SELECT_CC_I8;
5927 else if (N->getValueType(0) == MVT::f32) {
5928 if (Subtarget->hasP8Vector())
5929 SelectCCOp = PPC::SELECT_CC_VSSRC;
5930 else if (Subtarget->hasSPE())
5931 SelectCCOp = PPC::SELECT_CC_SPE4;
5932 else
5933 SelectCCOp = PPC::SELECT_CC_F4;
5934 } else if (N->getValueType(0) == MVT::f64) {
5935 if (Subtarget->hasVSX())
5936 SelectCCOp = PPC::SELECT_CC_VSFRC;
5937 else if (Subtarget->hasSPE())
5938 SelectCCOp = PPC::SELECT_CC_SPE;
5939 else
5940 SelectCCOp = PPC::SELECT_CC_F8;
5941 } else if (N->getValueType(0) == MVT::f128)
5942 SelectCCOp = PPC::SELECT_CC_F16;
5943 else if (Subtarget->hasSPE())
5944 SelectCCOp = PPC::SELECT_CC_SPE;
5945 else if (N->getValueType(0) == MVT::v2f64 ||
5946 N->getValueType(0) == MVT::v2i64)
5947 SelectCCOp = PPC::SELECT_CC_VSRC;
5948 else
5949 SelectCCOp = PPC::SELECT_CC_VRRC;
5950
5951 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5952 getI32Imm(BROpc, dl) };
5953 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5954 return;
5955 }
5957 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5958 N->getValueType(0) == MVT::v2i64)) {
5959 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5960
5961 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5962 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5963 unsigned DM[2];
5964
5965 for (int i = 0; i < 2; ++i)
5966 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5967 DM[i] = 0;
5968 else
5969 DM[i] = 1;
5970
5971 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5973 isa<LoadSDNode>(Op1.getOperand(0))) {
5974 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5976
5977 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5978 (LD->getMemoryVT() == MVT::f64 ||
5979 LD->getMemoryVT() == MVT::i64) &&
5980 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5981 SDValue Chain = LD->getChain();
5982 SDValue Ops[] = { Base, Offset, Chain };
5983 MachineMemOperand *MemOp = LD->getMemOperand();
5984 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5985 N->getValueType(0), Ops);
5986 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5987 return;
5988 }
5989 }
5990
5991 // For little endian, we must swap the input operands and adjust
5992 // the mask elements (reverse and invert them).
5993 if (Subtarget->isLittleEndian()) {
5994 std::swap(Op1, Op2);
5995 unsigned tmp = DM[0];
5996 DM[0] = 1 - DM[1];
5997 DM[1] = 1 - tmp;
5998 }
5999
6000 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
6001 MVT::i32);
6002 SDValue Ops[] = { Op1, Op2, DMV };
6003 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
6004 return;
6005 }
6006
6007 break;
6008 case PPCISD::BDNZ:
6009 case PPCISD::BDZ: {
6010 bool IsPPC64 = Subtarget->isPPC64();
6011 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
6012 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
6013 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
6014 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
6015 MVT::Other, Ops);
6016 return;
6017 }
6018 case PPCISD::COND_BRANCH: {
6019 // Op #0 is the Chain.
6020 // Op #1 is the PPC::PRED_* number.
6021 // Op #2 is the CR#
6022 // Op #3 is the Dest MBB
6023 // Op #4 is the Flag.
6024 // Prevent PPC::PRED_* from being selected into LI.
6025 unsigned PCC = N->getConstantOperandVal(1);
6026 if (EnableBranchHint)
6027 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6028
6029 SDValue Pred = getI32Imm(PCC, dl);
6030 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6031 N->getOperand(0), N->getOperand(4) };
6032 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6033 return;
6034 }
6035 case ISD::BR_CC: {
6036 if (tryFoldSWTestBRCC(N))
6037 return;
6038 if (trySelectLoopCountIntrinsic(N))
6039 return;
6040 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6041 unsigned PCC =
6042 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6043
6044 if (N->getOperand(2).getValueType() == MVT::i1) {
6045 unsigned Opc;
6046 bool Swap;
6047 switch (PCC) {
6048 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6049 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6050 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6051 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6052 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6053 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6054 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6055 }
6056
6057 // A signed comparison of i1 values produces the opposite result to an
6058 // unsigned one if the condition code includes less-than or greater-than.
6059 // This is because 1 is the most negative signed i1 number and the most
6060 // positive unsigned i1 number. The CR-logical operations used for such
6061 // comparisons are non-commutative so for signed comparisons vs. unsigned
6062 // ones, the input operands just need to be swapped.
6063 if (ISD::isSignedIntSetCC(CC))
6064 Swap = !Swap;
6065
6066 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6067 N->getOperand(Swap ? 3 : 2),
6068 N->getOperand(Swap ? 2 : 3)), 0);
6069 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6070 N->getOperand(0));
6071 return;
6072 }
6073
6074 if (EnableBranchHint)
6075 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6076
6077 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6078 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6079 N->getOperand(4), N->getOperand(0) };
6080 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6081 return;
6082 }
6083 case ISD::BRIND: {
6084 // FIXME: Should custom lower this.
6085 SDValue Chain = N->getOperand(0);
6086 SDValue Target = N->getOperand(1);
6087 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6088 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6089 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6090 Chain), 0);
6091 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6092 return;
6093 }
6094 case PPCISD::TOC_ENTRY: {
6095 const bool isPPC64 = Subtarget->isPPC64();
6096 const bool isELFABI = Subtarget->isSVR4ABI();
6097 const bool isAIXABI = Subtarget->isAIXABI();
6098
6099 // PowerPC only support small, medium and large code model.
6100 const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6101
6102 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6103 "PowerPC doesn't support tiny or kernel code models.");
6104
6105 if (isAIXABI && CModel == CodeModel::Medium)
6106 report_fatal_error("Medium code model is not supported on AIX.");
6107
6108 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6109 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6110 // small code model, we need to check for a toc-data attribute.
6111 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6112 break;
6113
6114 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6115 EVT OperandTy) {
6116 SDValue GA = TocEntry->getOperand(0);
6117 SDValue TocBase = TocEntry->getOperand(1);
6118 SDNode *MN = nullptr;
6119 if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
6120 // toc-data access doesn't involve in loading from got, no need to
6121 // keep memory operands.
6122 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
6123 else {
6124 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6125 transferMemOperands(TocEntry, MN);
6126 }
6127 ReplaceNode(TocEntry, MN);
6128 };
6129
6130 // Handle 32-bit small code model.
6131 if (!isPPC64 && CModel == CodeModel::Small) {
6132 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6133 // PPC::ADDItoc, or PPC::LWZtoc
6134 if (isELFABI) {
6135 assert(TM.isPositionIndependent() &&
6136 "32-bit ELF can only have TOC entries in position independent"
6137 " code.");
6138 // 32-bit ELF always uses a small code model toc access.
6139 replaceWith(PPC::LWZtoc, N, MVT::i32);
6140 return;
6141 }
6142
6143 assert(isAIXABI && "ELF ABI already handled");
6144
6145 if (hasTocDataAttr(N->getOperand(0))) {
6146 replaceWith(PPC::ADDItoc, N, MVT::i32);
6147 return;
6148 }
6149
6150 replaceWith(PPC::LWZtoc, N, MVT::i32);
6151 return;
6152 }
6153
6154 if (isPPC64 && CModel == CodeModel::Small) {
6155 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6156
6157 if (hasTocDataAttr(N->getOperand(0))) {
6158 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6159 return;
6160 }
6161 // Break if it doesn't have toc data attribute. Proceed with common
6162 // SelectCode.
6163 break;
6164 }
6165
6166 assert(CModel != CodeModel::Small && "All small code models handled.");
6167
6168 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6169 " ELF/AIX or 32-bit AIX in the following.");
6170
6171 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode,
6172 // 64-bit medium (ELF-only), or 64-bit large (ELF and AIX) code model code
6173 // that does not contain TOC data symbols. We generate two instructions as
6174 // described below. The first source operand is a symbol reference. If it
6175 // must be referenced via the TOC according to Subtarget, we generate:
6176 // [32-bit AIX]
6177 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6178 // [64-bit ELF/AIX]
6179 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6180 // Otherwise for medium code model ELF we generate:
6181 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6182
6183 // And finally for AIX with toc-data we generate:
6184 // [32-bit AIX]
6185 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6186 // [64-bit AIX]
6187 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6188
6189 SDValue GA = N->getOperand(0);
6190 SDValue TOCbase = N->getOperand(1);
6191
6192 EVT VT = Subtarget->getScalarIntVT();
6193 SDNode *Tmp = CurDAG->getMachineNode(
6194 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6195
6196 // On AIX, if the symbol has the toc-data attribute it will be defined
6197 // in the TOC entry, so we use an ADDItocL/ADDItocL8.
6198 if (isAIXABI && hasTocDataAttr(GA)) {
6199 ReplaceNode(
6200 N, CurDAG->getMachineNode(isPPC64 ? PPC::ADDItocL8 : PPC::ADDItocL,
6201 dl, VT, SDValue(Tmp, 0), GA));
6202 return;
6203 }
6204
6205 if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6206 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6207 // the address.
6208 SDNode *MN = CurDAG->getMachineNode(
6209 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6210
6211 transferMemOperands(N, MN);
6212 ReplaceNode(N, MN);
6213 return;
6214 }
6215
6216 assert(isPPC64 && "TOC_ENTRY already handled for 32-bit.");
6217 // Build the address relative to the TOC-pointer.
6218 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6219 SDValue(Tmp, 0), GA));
6220 return;
6221 }
6223 // Generate a PIC-safe GOT reference.
6224 assert(Subtarget->is32BitELFABI() &&
6225 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6226 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6227 PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6228 MVT::i32);
6229 return;
6230
6231 case PPCISD::VADD_SPLAT: {
6232 // This expands into one of three sequences, depending on whether
6233 // the first operand is odd or even, positive or negative.
6234 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6235 isa<ConstantSDNode>(N->getOperand(1)) &&
6236 "Invalid operand on VADD_SPLAT!");
6237
6238 int Elt = N->getConstantOperandVal(0);
6239 int EltSize = N->getConstantOperandVal(1);
6240 unsigned Opc1, Opc2, Opc3;
6241 EVT VT;
6242
6243 if (EltSize == 1) {
6244 Opc1 = PPC::VSPLTISB;
6245 Opc2 = PPC::VADDUBM;
6246 Opc3 = PPC::VSUBUBM;
6247 VT = MVT::v16i8;
6248 } else if (EltSize == 2) {
6249 Opc1 = PPC::VSPLTISH;
6250 Opc2 = PPC::VADDUHM;
6251 Opc3 = PPC::VSUBUHM;
6252 VT = MVT::v8i16;
6253 } else {
6254 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6255 Opc1 = PPC::VSPLTISW;
6256 Opc2 = PPC::VADDUWM;
6257 Opc3 = PPC::VSUBUWM;
6258 VT = MVT::v4i32;
6259 }
6260
6261 if ((Elt & 1) == 0) {
6262 // Elt is even, in the range [-32,-18] + [16,30].
6263 //
6264 // Convert: VADD_SPLAT elt, size
6265 // Into: tmp = VSPLTIS[BHW] elt
6266 // VADDU[BHW]M tmp, tmp
6267 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6268 SDValue EltVal = getI32Imm(Elt >> 1, dl);
6269 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6270 SDValue TmpVal = SDValue(Tmp, 0);
6271 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6272 return;
6273 } else if (Elt > 0) {
6274 // Elt is odd and positive, in the range [17,31].
6275 //
6276 // Convert: VADD_SPLAT elt, size
6277 // Into: tmp1 = VSPLTIS[BHW] elt-16
6278 // tmp2 = VSPLTIS[BHW] -16
6279 // VSUBU[BHW]M tmp1, tmp2
6280 SDValue EltVal = getI32Imm(Elt - 16, dl);
6281 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6282 EltVal = getI32Imm(-16, dl);
6283 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6284 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6285 SDValue(Tmp2, 0)));
6286 return;
6287 } else {
6288 // Elt is odd and negative, in the range [-31,-17].
6289 //
6290 // Convert: VADD_SPLAT elt, size
6291 // Into: tmp1 = VSPLTIS[BHW] elt+16
6292 // tmp2 = VSPLTIS[BHW] -16
6293 // VADDU[BHW]M tmp1, tmp2
6294 SDValue EltVal = getI32Imm(Elt + 16, dl);
6295 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6296 EltVal = getI32Imm(-16, dl);
6297 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6298 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6299 SDValue(Tmp2, 0)));
6300 return;
6301 }
6302 }
6303 case PPCISD::LD_SPLAT: {
6304 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6305 // no direct move, we don't need to use stack for this case. If target has
6306 // direct move, we should be able to get the best selection in the .td file.
6307 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6308 break;
6309
6310 EVT Type = N->getValueType(0);
6311 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6312 break;
6313
6314 // If the alignment for the load is 16 or bigger, we don't need the
6315 // permutated mask to get the required value. The value must be the 0
6316 // element in big endian target or 7/15 in little endian target in the
6317 // result vsx register of lvx instruction.
6318 // Select the instruction in the .td file.
6319 if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6320 isOffsetMultipleOf(N, 16))
6321 break;
6322
6323 SDValue ZeroReg =
6324 CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6325 Subtarget->getScalarIntVT());
6326 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6327 // v16i8 LD_SPLAT addr
6328 // ======>
6329 // Mask = LVSR/LVSL 0, addr
6330 // LoadLow = LVX 0, addr
6331 // Perm = VPERM LoadLow, LoadLow, Mask
6332 // Splat = VSPLTB 15/0, Perm
6333 //
6334 // v8i16 LD_SPLAT addr
6335 // ======>
6336 // Mask = LVSR/LVSL 0, addr
6337 // LoadLow = LVX 0, addr
6338 // LoadHigh = LVX (LI, 1), addr
6339 // Perm = VPERM LoadLow, LoadHigh, Mask
6340 // Splat = VSPLTH 7/0, Perm
6341 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6342 unsigned SplatElemIndex =
6343 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6344
6345 SDNode *Mask = CurDAG->getMachineNode(
6346 Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6347 N->getOperand(1));
6348
6349 SDNode *LoadLow =
6350 CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6351 {ZeroReg, N->getOperand(1), N->getOperand(0)});
6352
6353 SDNode *LoadHigh = LoadLow;
6354 if (Type == MVT::v8i16) {
6355 LoadHigh = CurDAG->getMachineNode(
6356 PPC::LVX, dl, MVT::v16i8, MVT::Other,
6357 {SDValue(CurDAG->getMachineNode(
6358 LIOpcode, dl, MVT::i32,
6359 CurDAG->getTargetConstant(1, dl, MVT::i8)),
6360 0),
6361 N->getOperand(1), SDValue(LoadLow, 1)});
6362 }
6363
6364 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6365 transferMemOperands(N, LoadHigh);
6366
6367 SDNode *Perm =
6368 CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6369 SDValue(LoadHigh, 0), SDValue(Mask, 0));
6370 CurDAG->SelectNodeTo(N, SplatOp, Type,
6371 CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6372 SDValue(Perm, 0));
6373 return;
6374 }
6375 }
6376
6377 SelectCode(N);
6378}
6379
6380// If the target supports the cmpb instruction, do the idiom recognition here.
6381// We don't do this as a DAG combine because we don't want to do it as nodes
6382// are being combined (because we might miss part of the eventual idiom). We
6383// don't want to do it during instruction selection because we want to reuse
6384// the logic for lowering the masking operations already part of the
6385// instruction selector.
6386SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6387 SDLoc dl(N);
6388
6389 assert(N->getOpcode() == ISD::OR &&
6390 "Only OR nodes are supported for CMPB");
6391
6392 SDValue Res;
6393 if (!Subtarget->hasCMPB())
6394 return Res;
6395
6396 if (N->getValueType(0) != MVT::i32 &&
6397 N->getValueType(0) != MVT::i64)
6398 return Res;
6399
6400 EVT VT = N->getValueType(0);
6401
6402 SDValue RHS, LHS;
6403 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6404 uint64_t Mask = 0, Alt = 0;
6405
6406 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6407 uint64_t &Mask, uint64_t &Alt,
6408 SDValue &LHS, SDValue &RHS) {
6409 if (O.getOpcode() != ISD::SELECT_CC)
6410 return false;
6411 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6412
6413 if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6414 !isa<ConstantSDNode>(O.getOperand(3)))
6415 return false;
6416
6417 uint64_t PM = O.getConstantOperandVal(2);
6418 uint64_t PAlt = O.getConstantOperandVal(3);
6419 for (b = 0; b < 8; ++b) {
6420 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6421 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6422 break;
6423 }
6424
6425 if (b == 8)
6426 return false;
6427 Mask |= PM;
6428 Alt |= PAlt;
6429
6430 if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6431 O.getConstantOperandVal(1) != 0) {
6432 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6433 if (Op0.getOpcode() == ISD::TRUNCATE)
6434 Op0 = Op0.getOperand(0);
6435 if (Op1.getOpcode() == ISD::TRUNCATE)
6436 Op1 = Op1.getOperand(0);
6437
6438 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6439 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6440 isa<ConstantSDNode>(Op0.getOperand(1))) {
6441
6442 unsigned Bits = Op0.getValueSizeInBits();
6443 if (b != Bits/8-1)
6444 return false;
6445 if (Op0.getConstantOperandVal(1) != Bits-8)
6446 return false;
6447
6448 LHS = Op0.getOperand(0);
6449 RHS = Op1.getOperand(0);
6450 return true;
6451 }
6452
6453 // When we have small integers (i16 to be specific), the form present
6454 // post-legalization uses SETULT in the SELECT_CC for the
6455 // higher-order byte, depending on the fact that the
6456 // even-higher-order bytes are known to all be zero, for example:
6457 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6458 // (so when the second byte is the same, because all higher-order
6459 // bits from bytes 3 and 4 are known to be zero, the result of the
6460 // xor can be at most 255)
6461 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6462 isa<ConstantSDNode>(O.getOperand(1))) {
6463
6464 uint64_t ULim = O.getConstantOperandVal(1);
6465 if (ULim != (UINT64_C(1) << b*8))
6466 return false;
6467
6468 // Now we need to make sure that the upper bytes are known to be
6469 // zero.
6470 unsigned Bits = Op0.getValueSizeInBits();
6471 if (!CurDAG->MaskedValueIsZero(
6472 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6473 return false;
6474
6475 LHS = Op0.getOperand(0);
6476 RHS = Op0.getOperand(1);
6477 return true;
6478 }
6479
6480 return false;
6481 }
6482
6483 if (CC != ISD::SETEQ)
6484 return false;
6485
6486 SDValue Op = O.getOperand(0);
6487 if (Op.getOpcode() == ISD::AND) {
6488 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6489 return false;
6490 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6491 return false;
6492
6493 SDValue XOR = Op.getOperand(0);
6494 if (XOR.getOpcode() == ISD::TRUNCATE)
6495 XOR = XOR.getOperand(0);
6496 if (XOR.getOpcode() != ISD::XOR)
6497 return false;
6498
6499 LHS = XOR.getOperand(0);
6500 RHS = XOR.getOperand(1);
6501 return true;
6502 } else if (Op.getOpcode() == ISD::SRL) {
6503 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6504 return false;
6505 unsigned Bits = Op.getValueSizeInBits();
6506 if (b != Bits/8-1)
6507 return false;
6508 if (Op.getConstantOperandVal(1) != Bits-8)
6509 return false;
6510
6511 SDValue XOR = Op.getOperand(0);
6512 if (XOR.getOpcode() == ISD::TRUNCATE)
6513 XOR = XOR.getOperand(0);
6514 if (XOR.getOpcode() != ISD::XOR)
6515 return false;
6516
6517 LHS = XOR.getOperand(0);
6518 RHS = XOR.getOperand(1);
6519 return true;
6520 }
6521
6522 return false;
6523 };
6524
6526 while (!Queue.empty()) {
6527 SDValue V = Queue.pop_back_val();
6528
6529 for (const SDValue &O : V.getNode()->ops()) {
6530 unsigned b = 0;
6531 uint64_t M = 0, A = 0;
6532 SDValue OLHS, ORHS;
6533 if (O.getOpcode() == ISD::OR) {
6534 Queue.push_back(O);
6535 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6536 if (!LHS) {
6537 LHS = OLHS;
6538 RHS = ORHS;
6539 BytesFound[b] = true;
6540 Mask |= M;
6541 Alt |= A;
6542 } else if ((LHS == ORHS && RHS == OLHS) ||
6543 (RHS == ORHS && LHS == OLHS)) {
6544 BytesFound[b] = true;
6545 Mask |= M;
6546 Alt |= A;
6547 } else {
6548 return Res;
6549 }
6550 } else {
6551 return Res;
6552 }
6553 }
6554 }
6555
6556 unsigned LastB = 0, BCnt = 0;
6557 for (unsigned i = 0; i < 8; ++i)
6558 if (BytesFound[LastB]) {
6559 ++BCnt;
6560 LastB = i;
6561 }
6562
6563 if (!LastB || BCnt < 2)
6564 return Res;
6565
6566 // Because we'll be zero-extending the output anyway if don't have a specific
6567 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6568 if (LHS.getValueType() != VT) {
6569 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6570 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6571 }
6572
6573 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6574
6575 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6576 if (NonTrivialMask && !Alt) {
6577 // Res = Mask & CMPB
6578 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6579 CurDAG->getConstant(Mask, dl, VT));
6580 } else if (Alt) {
6581 // Res = (CMPB & Mask) | (~CMPB & Alt)
6582 // Which, as suggested here:
6583 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6584 // can be written as:
6585 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6586 // useful because the (Alt ^ Mask) can be pre-computed.
6587 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6588 CurDAG->getConstant(Mask ^ Alt, dl, VT));
6589 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6590 CurDAG->getConstant(Alt, dl, VT));
6591 }
6592
6593 return Res;
6594}
6595
6596// When CR bit registers are enabled, an extension of an i1 variable to a i32
6597// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6598// involves constant materialization of a 0 or a 1 or both. If the result of
6599// the extension is then operated upon by some operator that can be constant
6600// folded with a constant 0 or 1, and that constant can be materialized using
6601// only one instruction (like a zero or one), then we should fold in those
6602// operations with the select.
6603void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6604 if (!Subtarget->useCRBits())
6605 return;
6606
6607 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6608 N->getOpcode() != ISD::SIGN_EXTEND &&
6609 N->getOpcode() != ISD::ANY_EXTEND)
6610 return;
6611
6612 if (N->getOperand(0).getValueType() != MVT::i1)
6613 return;
6614
6615 if (!N->hasOneUse())
6616 return;
6617
6618 SDLoc dl(N);
6619 EVT VT = N->getValueType(0);
6620 SDValue Cond = N->getOperand(0);
6621 SDValue ConstTrue = CurDAG->getSignedConstant(
6622 N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6623 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6624
6625 do {
6626 SDNode *User = *N->user_begin();
6627 if (User->getNumOperands() != 2)
6628 break;
6629
6630 auto TryFold = [this, N, User, dl](SDValue Val) {
6631 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6632 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6633 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6634
6635 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6636 User->getValueType(0), {O0, O1});
6637 };
6638
6639 // FIXME: When the semantics of the interaction between select and undef
6640 // are clearly defined, it may turn out to be unnecessary to break here.
6641 SDValue TrueRes = TryFold(ConstTrue);
6642 if (!TrueRes || TrueRes.isUndef())
6643 break;
6644 SDValue FalseRes = TryFold(ConstFalse);
6645 if (!FalseRes || FalseRes.isUndef())
6646 break;
6647
6648 // For us to materialize these using one instruction, we must be able to
6649 // represent them as signed 16-bit integers.
6650 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6651 if (!isInt<16>(True) || !isInt<16>(False))
6652 break;
6653
6654 // We can replace User with a new SELECT node, and try again to see if we
6655 // can fold the select with its user.
6656 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6657 N = User;
6658 ConstTrue = TrueRes;
6659 ConstFalse = FalseRes;
6660 } while (N->hasOneUse());
6661}
6662
6663void PPCDAGToDAGISel::PreprocessISelDAG() {
6664 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6665
6666 bool MadeChange = false;
6667 while (Position != CurDAG->allnodes_begin()) {
6668 SDNode *N = &*--Position;
6669 if (N->use_empty())
6670 continue;
6671
6672 SDValue Res;
6673 switch (N->getOpcode()) {
6674 default: break;
6675 case ISD::OR:
6676 Res = combineToCMPB(N);
6677 break;
6678 }
6679
6680 if (!Res)
6681 foldBoolExts(Res, N);
6682
6683 if (Res) {
6684 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6685 LLVM_DEBUG(N->dump(CurDAG));
6686 LLVM_DEBUG(dbgs() << "\nNew: ");
6687 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6688 LLVM_DEBUG(dbgs() << "\n");
6689
6690 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6691 MadeChange = true;
6692 }
6693 }
6694
6695 if (MadeChange)
6696 CurDAG->RemoveDeadNodes();
6697}
6698
6699/// PostprocessISelDAG - Perform some late peephole optimizations
6700/// on the DAG representation.
6701void PPCDAGToDAGISel::PostprocessISelDAG() {
6702 // Skip peepholes at -O0.
6703 if (TM.getOptLevel() == CodeGenOptLevel::None)
6704 return;
6705
6706 PeepholePPC64();
6707 PeepholeCROps();
6708 PeepholePPC64ZExt();
6709}
6710
6711// Check if all users of this node will become isel where the second operand
6712// is the constant zero. If this is so, and if we can negate the condition,
6713// then we can flip the true and false operands. This will allow the zero to
6714// be folded with the isel so that we don't need to materialize a register
6715// containing zero.
6716bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6717 for (const SDNode *User : N->users()) {
6718 if (!User->isMachineOpcode())
6719 return false;
6720 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6721 User->getMachineOpcode() != PPC::SELECT_I8)
6722 return false;
6723
6724 SDNode *Op1 = User->getOperand(1).getNode();
6725 SDNode *Op2 = User->getOperand(2).getNode();
6726 // If we have a degenerate select with two equal operands, swapping will
6727 // not do anything, and we may run into an infinite loop.
6728 if (Op1 == Op2)
6729 return false;
6730
6731 if (!Op2->isMachineOpcode())
6732 return false;
6733
6734 if (Op2->getMachineOpcode() != PPC::LI &&
6735 Op2->getMachineOpcode() != PPC::LI8)
6736 return false;
6737
6738 if (!isNullConstant(Op2->getOperand(0)))
6739 return false;
6740 }
6741
6742 return true;
6743}
6744
6745void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6746 SmallVector<SDNode *, 4> ToReplace;
6747 for (SDNode *User : N->users()) {
6748 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6749 User->getMachineOpcode() == PPC::SELECT_I8) &&
6750 "Must have all select users");
6751 ToReplace.push_back(User);
6752 }
6753
6754 for (SDNode *User : ToReplace) {
6755 SDNode *ResNode =
6756 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6757 User->getValueType(0), User->getOperand(0),
6758 User->getOperand(2),
6759 User->getOperand(1));
6760
6761 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6762 LLVM_DEBUG(User->dump(CurDAG));
6763 LLVM_DEBUG(dbgs() << "\nNew: ");
6764 LLVM_DEBUG(ResNode->dump(CurDAG));
6765 LLVM_DEBUG(dbgs() << "\n");
6766
6767 ReplaceUses(User, ResNode);
6768 }
6769}
6770
6771void PPCDAGToDAGISel::PeepholeCROps() {
6772 bool IsModified;
6773 do {
6774 IsModified = false;
6775 for (SDNode &Node : CurDAG->allnodes()) {
6776 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6777 if (!MachineNode || MachineNode->use_empty())
6778 continue;
6779 SDNode *ResNode = MachineNode;
6780
6781 bool Op1Set = false, Op1Unset = false,
6782 Op1Not = false,
6783 Op2Set = false, Op2Unset = false,
6784 Op2Not = false;
6785
6786 unsigned Opcode = MachineNode->getMachineOpcode();
6787 switch (Opcode) {
6788 default: break;
6789 case PPC::CRAND:
6790 case PPC::CRNAND:
6791 case PPC::CROR:
6792 case PPC::CRXOR:
6793 case PPC::CRNOR:
6794 case PPC::CREQV:
6795 case PPC::CRANDC:
6796 case PPC::CRORC: {
6797 SDValue Op = MachineNode->getOperand(1);
6798 if (Op.isMachineOpcode()) {
6799 if (Op.getMachineOpcode() == PPC::CRSET)
6800 Op2Set = true;
6801 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6802 Op2Unset = true;
6803 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6804 Op.getOperand(0) == Op.getOperand(1)) ||
6805 Op.getMachineOpcode() == PPC::CRNOT)
6806 Op2Not = true;
6807 }
6808 [[fallthrough]];
6809 }
6810 case PPC::BC:
6811 case PPC::BCn:
6812 case PPC::SELECT_I4:
6813 case PPC::SELECT_I8:
6814 case PPC::SELECT_F4:
6815 case PPC::SELECT_F8:
6816 case PPC::SELECT_SPE:
6817 case PPC::SELECT_SPE4:
6818 case PPC::SELECT_VRRC:
6819 case PPC::SELECT_VSFRC:
6820 case PPC::SELECT_VSSRC:
6821 case PPC::SELECT_VSRC: {
6822 SDValue Op = MachineNode->getOperand(0);
6823 if (Op.isMachineOpcode()) {
6824 if (Op.getMachineOpcode() == PPC::CRSET)
6825 Op1Set = true;
6826 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6827 Op1Unset = true;
6828 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6829 Op.getOperand(0) == Op.getOperand(1)) ||
6830 Op.getMachineOpcode() == PPC::CRNOT)
6831 Op1Not = true;
6832 }
6833 }
6834 break;
6835 }
6836
6837 bool SelectSwap = false;
6838 switch (Opcode) {
6839 default: break;
6840 case PPC::CRAND:
6841 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6842 // x & x = x
6843 ResNode = MachineNode->getOperand(0).getNode();
6844 else if (Op1Set)
6845 // 1 & y = y
6846 ResNode = MachineNode->getOperand(1).getNode();
6847 else if (Op2Set)
6848 // x & 1 = x
6849 ResNode = MachineNode->getOperand(0).getNode();
6850 else if (Op1Unset || Op2Unset)
6851 // x & 0 = 0 & y = 0
6852 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6853 MVT::i1);
6854 else if (Op1Not)
6855 // ~x & y = andc(y, x)
6856 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6857 MVT::i1, MachineNode->getOperand(1),
6858 MachineNode->getOperand(0).
6859 getOperand(0));
6860 else if (Op2Not)
6861 // x & ~y = andc(x, y)
6862 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6863 MVT::i1, MachineNode->getOperand(0),
6864 MachineNode->getOperand(1).
6865 getOperand(0));
6866 else if (AllUsersSelectZero(MachineNode)) {
6867 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6868 MVT::i1, MachineNode->getOperand(0),
6869 MachineNode->getOperand(1));
6870 SelectSwap = true;
6871 }
6872 break;
6873 case PPC::CRNAND:
6874 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6875 // nand(x, x) -> nor(x, x)
6876 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6877 MVT::i1, MachineNode->getOperand(0),
6878 MachineNode->getOperand(0));
6879 else if (Op1Set)
6880 // nand(1, y) -> nor(y, y)
6881 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6882 MVT::i1, MachineNode->getOperand(1),
6883 MachineNode->getOperand(1));
6884 else if (Op2Set)
6885 // nand(x, 1) -> nor(x, x)
6886 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6887 MVT::i1, MachineNode->getOperand(0),
6888 MachineNode->getOperand(0));
6889 else if (Op1Unset || Op2Unset)
6890 // nand(x, 0) = nand(0, y) = 1
6891 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6892 MVT::i1);
6893 else if (Op1Not)
6894 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6895 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6896 MVT::i1, MachineNode->getOperand(0).
6897 getOperand(0),
6898 MachineNode->getOperand(1));
6899 else if (Op2Not)
6900 // nand(x, ~y) = ~x | y = orc(y, x)
6901 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6902 MVT::i1, MachineNode->getOperand(1).
6903 getOperand(0),
6904 MachineNode->getOperand(0));
6905 else if (AllUsersSelectZero(MachineNode)) {
6906 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6907 MVT::i1, MachineNode->getOperand(0),
6908 MachineNode->getOperand(1));
6909 SelectSwap = true;
6910 }
6911 break;
6912 case PPC::CROR:
6913 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6914 // x | x = x
6915 ResNode = MachineNode->getOperand(0).getNode();
6916 else if (Op1Set || Op2Set)
6917 // x | 1 = 1 | y = 1
6918 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6919 MVT::i1);
6920 else if (Op1Unset)
6921 // 0 | y = y
6922 ResNode = MachineNode->getOperand(1).getNode();
6923 else if (Op2Unset)
6924 // x | 0 = x
6925 ResNode = MachineNode->getOperand(0).getNode();
6926 else if (Op1Not)
6927 // ~x | y = orc(y, x)
6928 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6929 MVT::i1, MachineNode->getOperand(1),
6930 MachineNode->getOperand(0).
6931 getOperand(0));
6932 else if (Op2Not)
6933 // x | ~y = orc(x, y)
6934 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6935 MVT::i1, MachineNode->getOperand(0),
6936 MachineNode->getOperand(1).
6937 getOperand(0));
6938 else if (AllUsersSelectZero(MachineNode)) {
6939 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6940 MVT::i1, MachineNode->getOperand(0),
6941 MachineNode->getOperand(1));
6942 SelectSwap = true;
6943 }
6944 break;
6945 case PPC::CRXOR:
6946 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6947 // xor(x, x) = 0
6948 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6949 MVT::i1);
6950 else if (Op1Set)
6951 // xor(1, y) -> nor(y, y)
6952 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6953 MVT::i1, MachineNode->getOperand(1),
6954 MachineNode->getOperand(1));
6955 else if (Op2Set)
6956 // xor(x, 1) -> nor(x, x)
6957 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6958 MVT::i1, MachineNode->getOperand(0),
6959 MachineNode->getOperand(0));
6960 else if (Op1Unset)
6961 // xor(0, y) = y
6962 ResNode = MachineNode->getOperand(1).getNode();
6963 else if (Op2Unset)
6964 // xor(x, 0) = x
6965 ResNode = MachineNode->getOperand(0).getNode();
6966 else if (Op1Not)
6967 // xor(~x, y) = eqv(x, y)
6968 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6969 MVT::i1, MachineNode->getOperand(0).
6970 getOperand(0),
6971 MachineNode->getOperand(1));
6972 else if (Op2Not)
6973 // xor(x, ~y) = eqv(x, y)
6974 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6975 MVT::i1, MachineNode->getOperand(0),
6976 MachineNode->getOperand(1).
6977 getOperand(0));
6978 else if (AllUsersSelectZero(MachineNode)) {
6979 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6980 MVT::i1, MachineNode->getOperand(0),
6981 MachineNode->getOperand(1));
6982 SelectSwap = true;
6983 }
6984 break;
6985 case PPC::CRNOR:
6986 if (Op1Set || Op2Set)
6987 // nor(1, y) -> 0
6988 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6989 MVT::i1);
6990 else if (Op1Unset)
6991 // nor(0, y) = ~y -> nor(y, y)
6992 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6993 MVT::i1, MachineNode->getOperand(1),
6994 MachineNode->getOperand(1));
6995 else if (Op2Unset)
6996 // nor(x, 0) = ~x
6997 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6998 MVT::i1, MachineNode->getOperand(0),
6999 MachineNode->getOperand(0));
7000 else if (Op1Not)
7001 // nor(~x, y) = andc(x, y)
7002 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7003 MVT::i1, MachineNode->getOperand(0).
7004 getOperand(0),
7005 MachineNode->getOperand(1));
7006 else if (Op2Not)
7007 // nor(x, ~y) = andc(y, x)
7008 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7009 MVT::i1, MachineNode->getOperand(1).
7010 getOperand(0),
7011 MachineNode->getOperand(0));
7012 else if (AllUsersSelectZero(MachineNode)) {
7013 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7014 MVT::i1, MachineNode->getOperand(0),
7015 MachineNode->getOperand(1));
7016 SelectSwap = true;
7017 }
7018 break;
7019 case PPC::CREQV:
7020 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7021 // eqv(x, x) = 1
7022 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7023 MVT::i1);
7024 else if (Op1Set)
7025 // eqv(1, y) = y
7026 ResNode = MachineNode->getOperand(1).getNode();
7027 else if (Op2Set)
7028 // eqv(x, 1) = x
7029 ResNode = MachineNode->getOperand(0).getNode();
7030 else if (Op1Unset)
7031 // eqv(0, y) = ~y -> nor(y, y)
7032 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7033 MVT::i1, MachineNode->getOperand(1),
7034 MachineNode->getOperand(1));
7035 else if (Op2Unset)
7036 // eqv(x, 0) = ~x
7037 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7038 MVT::i1, MachineNode->getOperand(0),
7039 MachineNode->getOperand(0));
7040 else if (Op1Not)
7041 // eqv(~x, y) = xor(x, y)
7042 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7043 MVT::i1, MachineNode->getOperand(0).
7044 getOperand(0),
7045 MachineNode->getOperand(1));
7046 else if (Op2Not)
7047 // eqv(x, ~y) = xor(x, y)
7048 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7049 MVT::i1, MachineNode->getOperand(0),
7050 MachineNode->getOperand(1).
7051 getOperand(0));
7052 else if (AllUsersSelectZero(MachineNode)) {
7053 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7054 MVT::i1, MachineNode->getOperand(0),
7055 MachineNode->getOperand(1));
7056 SelectSwap = true;
7057 }
7058 break;
7059 case PPC::CRANDC:
7060 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7061 // andc(x, x) = 0
7062 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7063 MVT::i1);
7064 else if (Op1Set)
7065 // andc(1, y) = ~y
7066 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7067 MVT::i1, MachineNode->getOperand(1),
7068 MachineNode->getOperand(1));
7069 else if (Op1Unset || Op2Set)
7070 // andc(0, y) = andc(x, 1) = 0
7071 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7072 MVT::i1);
7073 else if (Op2Unset)
7074 // andc(x, 0) = x
7075 ResNode = MachineNode->getOperand(0).getNode();
7076 else if (Op1Not)
7077 // andc(~x, y) = ~(x | y) = nor(x, y)
7078 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7079 MVT::i1, MachineNode->getOperand(0).
7080 getOperand(0),
7081 MachineNode->getOperand(1));
7082 else if (Op2Not)
7083 // andc(x, ~y) = x & y
7084 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7085 MVT::i1, MachineNode->getOperand(0),
7086 MachineNode->getOperand(1).
7087 getOperand(0));
7088 else if (AllUsersSelectZero(MachineNode)) {
7089 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7090 MVT::i1, MachineNode->getOperand(1),
7091 MachineNode->getOperand(0));
7092 SelectSwap = true;
7093 }
7094 break;
7095 case PPC::CRORC:
7096 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7097 // orc(x, x) = 1
7098 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7099 MVT::i1);
7100 else if (Op1Set || Op2Unset)
7101 // orc(1, y) = orc(x, 0) = 1
7102 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7103 MVT::i1);
7104 else if (Op2Set)
7105 // orc(x, 1) = x
7106 ResNode = MachineNode->getOperand(0).getNode();
7107 else if (Op1Unset)
7108 // orc(0, y) = ~y
7109 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7110 MVT::i1, MachineNode->getOperand(1),
7111 MachineNode->getOperand(1));
7112 else if (Op1Not)
7113 // orc(~x, y) = ~(x & y) = nand(x, y)
7114 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7115 MVT::i1, MachineNode->getOperand(0).
7116 getOperand(0),
7117 MachineNode->getOperand(1));
7118 else if (Op2Not)
7119 // orc(x, ~y) = x | y
7120 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7121 MVT::i1, MachineNode->getOperand(0),
7122 MachineNode->getOperand(1).
7123 getOperand(0));
7124 else if (AllUsersSelectZero(MachineNode)) {
7125 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7126 MVT::i1, MachineNode->getOperand(1),
7127 MachineNode->getOperand(0));
7128 SelectSwap = true;
7129 }
7130 break;
7131 case PPC::SELECT_I4:
7132 case PPC::SELECT_I8:
7133 case PPC::SELECT_F4:
7134 case PPC::SELECT_F8:
7135 case PPC::SELECT_SPE:
7136 case PPC::SELECT_SPE4:
7137 case PPC::SELECT_VRRC:
7138 case PPC::SELECT_VSFRC:
7139 case PPC::SELECT_VSSRC:
7140 case PPC::SELECT_VSRC:
7141 if (Op1Set)
7142 ResNode = MachineNode->getOperand(1).getNode();
7143 else if (Op1Unset)
7144 ResNode = MachineNode->getOperand(2).getNode();
7145 else if (Op1Not)
7146 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7147 SDLoc(MachineNode),
7148 MachineNode->getValueType(0),
7149 MachineNode->getOperand(0).
7150 getOperand(0),
7151 MachineNode->getOperand(2),
7152 MachineNode->getOperand(1));
7153 break;
7154 case PPC::BC:
7155 case PPC::BCn:
7156 if (Op1Not)
7157 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7158 PPC::BC,
7159 SDLoc(MachineNode),
7160 MVT::Other,
7161 MachineNode->getOperand(0).
7162 getOperand(0),
7163 MachineNode->getOperand(1),
7164 MachineNode->getOperand(2));
7165 // FIXME: Handle Op1Set, Op1Unset here too.
7166 break;
7167 }
7168
7169 // If we're inverting this node because it is used only by selects that
7170 // we'd like to swap, then swap the selects before the node replacement.
7171 if (SelectSwap)
7172 SwapAllSelectUsers(MachineNode);
7173
7174 if (ResNode != MachineNode) {
7175 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7176 LLVM_DEBUG(MachineNode->dump(CurDAG));
7177 LLVM_DEBUG(dbgs() << "\nNew: ");
7178 LLVM_DEBUG(ResNode->dump(CurDAG));
7179 LLVM_DEBUG(dbgs() << "\n");
7180
7181 ReplaceUses(MachineNode, ResNode);
7182 IsModified = true;
7183 }
7184 }
7185 if (IsModified)
7186 CurDAG->RemoveDeadNodes();
7187 } while (IsModified);
7188}
7189
7190// Gather the set of 32-bit operations that are known to have their
7191// higher-order 32 bits zero, where ToPromote contains all such operations.
7193 SmallPtrSetImpl<SDNode *> &ToPromote) {
7194 if (!Op32.isMachineOpcode())
7195 return false;
7196
7197 // First, check for the "frontier" instructions (those that will clear the
7198 // higher-order 32 bits.
7199
7200 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7201 // around. If it does not, then these instructions will clear the
7202 // higher-order bits.
7203 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7204 Op32.getMachineOpcode() == PPC::RLWNM) &&
7205 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7206 ToPromote.insert(Op32.getNode());
7207 return true;
7208 }
7209
7210 // SLW and SRW always clear the higher-order bits.
7211 if (Op32.getMachineOpcode() == PPC::SLW ||
7212 Op32.getMachineOpcode() == PPC::SRW) {
7213 ToPromote.insert(Op32.getNode());
7214 return true;
7215 }
7216
7217 // For LI and LIS, we need the immediate to be positive (so that it is not
7218 // sign extended).
7219 if (Op32.getMachineOpcode() == PPC::LI ||
7220 Op32.getMachineOpcode() == PPC::LIS) {
7221 if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7222 return false;
7223
7224 ToPromote.insert(Op32.getNode());
7225 return true;
7226 }
7227
7228 // LHBRX and LWBRX always clear the higher-order bits.
7229 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7230 Op32.getMachineOpcode() == PPC::LWBRX) {
7231 ToPromote.insert(Op32.getNode());
7232 return true;
7233 }
7234
7235 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7236 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7237 Op32.getMachineOpcode() == PPC::CNTTZW) {
7238 ToPromote.insert(Op32.getNode());
7239 return true;
7240 }
7241
7242 // Next, check for those instructions we can look through.
7243
7244 // Assuming the mask does not wrap around, then the higher-order bits are
7245 // taken directly from the first operand.
7246 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7247 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7248 SmallPtrSet<SDNode *, 16> ToPromote1;
7249 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7250 return false;
7251
7252 ToPromote.insert(Op32.getNode());
7253 ToPromote.insert_range(ToPromote1);
7254 return true;
7255 }
7256
7257 // For OR, the higher-order bits are zero if that is true for both operands.
7258 // For SELECT_I4, the same is true (but the relevant operand numbers are
7259 // shifted by 1).
7260 if (Op32.getMachineOpcode() == PPC::OR ||
7261 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7262 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7263 SmallPtrSet<SDNode *, 16> ToPromote1;
7264 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7265 return false;
7266 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7267 return false;
7268
7269 ToPromote.insert(Op32.getNode());
7270 ToPromote.insert_range(ToPromote1);
7271 return true;
7272 }
7273
7274 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7275 // zero, and also for the constant to be positive (so that it is not sign
7276 // extended).
7277 if (Op32.getMachineOpcode() == PPC::ORI ||
7278 Op32.getMachineOpcode() == PPC::ORIS) {
7279 SmallPtrSet<SDNode *, 16> ToPromote1;
7280 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7281 return false;
7282 if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7283 return false;
7284
7285 ToPromote.insert(Op32.getNode());
7286 ToPromote.insert_range(ToPromote1);
7287 return true;
7288 }
7289
7290 // The higher-order bits of AND are zero if that is true for at least one of
7291 // the operands.
7292 if (Op32.getMachineOpcode() == PPC::AND) {
7293 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7294 bool Op0OK =
7295 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7296 bool Op1OK =
7297 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7298 if (!Op0OK && !Op1OK)
7299 return false;
7300
7301 ToPromote.insert(Op32.getNode());
7302
7303 if (Op0OK)
7304 ToPromote.insert_range(ToPromote1);
7305
7306 if (Op1OK)
7307 ToPromote.insert_range(ToPromote2);
7308
7309 return true;
7310 }
7311
7312 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7313 // of the first operand, or if the second operand is positive (so that it is
7314 // not sign extended).
7315 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7316 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7317 SmallPtrSet<SDNode *, 16> ToPromote1;
7318 bool Op0OK =
7319 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7320 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7321 if (!Op0OK && !Op1OK)
7322 return false;
7323
7324 ToPromote.insert(Op32.getNode());
7325
7326 if (Op0OK)
7327 ToPromote.insert_range(ToPromote1);
7328
7329 return true;
7330 }
7331
7332 return false;
7333}
7334
7335void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7336 if (!Subtarget->isPPC64())
7337 return;
7338
7339 // When we zero-extend from i32 to i64, we use a pattern like this:
7340 // def : Pat<(i64 (zext i32:$in)),
7341 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7342 // 0, 32)>;
7343 // There are several 32-bit shift/rotate instructions, however, that will
7344 // clear the higher-order bits of their output, rendering the RLDICL
7345 // unnecessary. When that happens, we remove it here, and redefine the
7346 // relevant 32-bit operation to be a 64-bit operation.
7347
7348 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7349
7350 bool MadeChange = false;
7351 while (Position != CurDAG->allnodes_begin()) {
7352 SDNode *N = &*--Position;
7353 // Skip dead nodes and any non-machine opcodes.
7354 if (N->use_empty() || !N->isMachineOpcode())
7355 continue;
7356
7357 if (N->getMachineOpcode() != PPC::RLDICL)
7358 continue;
7359
7360 if (N->getConstantOperandVal(1) != 0 ||
7361 N->getConstantOperandVal(2) != 32)
7362 continue;
7363
7364 SDValue ISR = N->getOperand(0);
7365 if (!ISR.isMachineOpcode() ||
7366 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7367 continue;
7368
7369 if (!ISR.hasOneUse())
7370 continue;
7371
7372 if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7373 continue;
7374
7375 SDValue IDef = ISR.getOperand(0);
7376 if (!IDef.isMachineOpcode() ||
7377 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7378 continue;
7379
7380 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7381 // can get rid of it.
7382
7383 SDValue Op32 = ISR->getOperand(1);
7384 if (!Op32.isMachineOpcode())
7385 continue;
7386
7387 // There are some 32-bit instructions that always clear the high-order 32
7388 // bits, there are also some instructions (like AND) that we can look
7389 // through.
7390 SmallPtrSet<SDNode *, 16> ToPromote;
7391 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7392 continue;
7393
7394 // If the ToPromote set contains nodes that have uses outside of the set
7395 // (except for the original INSERT_SUBREG), then abort the transformation.
7396 bool OutsideUse = false;
7397 for (SDNode *PN : ToPromote) {
7398 for (SDNode *UN : PN->users()) {
7399 if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7400 OutsideUse = true;
7401 break;
7402 }
7403 }
7404
7405 if (OutsideUse)
7406 break;
7407 }
7408 if (OutsideUse)
7409 continue;
7410
7411 MadeChange = true;
7412
7413 // We now know that this zero extension can be removed by promoting to
7414 // nodes in ToPromote to 64-bit operations, where for operations in the
7415 // frontier of the set, we need to insert INSERT_SUBREGs for their
7416 // operands.
7417 for (SDNode *PN : ToPromote) {
7418 unsigned NewOpcode;
7419 switch (PN->getMachineOpcode()) {
7420 default:
7421 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7422 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7423 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7424 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7425 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7426 case PPC::LI: NewOpcode = PPC::LI8; break;
7427 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7428 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7429 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7430 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7431 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7432 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7433 case PPC::OR: NewOpcode = PPC::OR8; break;
7434 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7435 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7436 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7437 case PPC::AND: NewOpcode = PPC::AND8; break;
7438 case PPC::ANDI_rec:
7439 NewOpcode = PPC::ANDI8_rec;
7440 break;
7441 case PPC::ANDIS_rec:
7442 NewOpcode = PPC::ANDIS8_rec;
7443 break;
7444 }
7445
7446 // Note: During the replacement process, the nodes will be in an
7447 // inconsistent state (some instructions will have operands with values
7448 // of the wrong type). Once done, however, everything should be right
7449 // again.
7450
7452 for (const SDValue &V : PN->ops()) {
7453 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7454 !isa<ConstantSDNode>(V)) {
7455 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7456 SDNode *ReplOp =
7457 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7458 ISR.getNode()->getVTList(), ReplOpOps);
7459 Ops.push_back(SDValue(ReplOp, 0));
7460 } else {
7461 Ops.push_back(V);
7462 }
7463 }
7464
7465 // Because all to-be-promoted nodes only have users that are other
7466 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7467 // the i32 result value type with i64.
7468
7469 SmallVector<EVT, 2> NewVTs;
7470 SDVTList VTs = PN->getVTList();
7471 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7472 if (VTs.VTs[i] == MVT::i32)
7473 NewVTs.push_back(MVT::i64);
7474 else
7475 NewVTs.push_back(VTs.VTs[i]);
7476
7477 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7478 LLVM_DEBUG(PN->dump(CurDAG));
7479
7480 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7481
7482 LLVM_DEBUG(dbgs() << "\nNew: ");
7483 LLVM_DEBUG(PN->dump(CurDAG));
7484 LLVM_DEBUG(dbgs() << "\n");
7485 }
7486
7487 // Now we replace the original zero extend and its associated INSERT_SUBREG
7488 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7489 // return an i64).
7490
7491 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7492 LLVM_DEBUG(N->dump(CurDAG));
7493 LLVM_DEBUG(dbgs() << "\nNew: ");
7494 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7495 LLVM_DEBUG(dbgs() << "\n");
7496
7497 ReplaceUses(N, Op32.getNode());
7498 }
7499
7500 if (MadeChange)
7501 CurDAG->RemoveDeadNodes();
7502}
7503
7504static bool isVSXSwap(SDValue N) {
7505 if (!N->isMachineOpcode())
7506 return false;
7507 unsigned Opc = N->getMachineOpcode();
7508
7509 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7510 // operand is 2.
7511 if (Opc == PPC::XXPERMDIs) {
7512 return isa<ConstantSDNode>(N->getOperand(1)) &&
7513 N->getConstantOperandVal(1) == 2;
7514 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7515 return N->getOperand(0) == N->getOperand(1) &&
7516 isa<ConstantSDNode>(N->getOperand(2)) &&
7517 N->getConstantOperandVal(2) == 2;
7518 }
7519
7520 return false;
7521}
7522
7523// TODO: Make this complete and replace with a table-gen bit.
7525 if (!N->isMachineOpcode())
7526 return false;
7527 unsigned Opc = N->getMachineOpcode();
7528
7529 switch (Opc) {
7530 default:
7531 return false;
7532 case PPC::VAVGSB:
7533 case PPC::VAVGUB:
7534 case PPC::VAVGSH:
7535 case PPC::VAVGUH:
7536 case PPC::VAVGSW:
7537 case PPC::VAVGUW:
7538 case PPC::VMAXFP:
7539 case PPC::VMAXSB:
7540 case PPC::VMAXUB:
7541 case PPC::VMAXSH:
7542 case PPC::VMAXUH:
7543 case PPC::VMAXSW:
7544 case PPC::VMAXUW:
7545 case PPC::VMINFP:
7546 case PPC::VMINSB:
7547 case PPC::VMINUB:
7548 case PPC::VMINSH:
7549 case PPC::VMINUH:
7550 case PPC::VMINSW:
7551 case PPC::VMINUW:
7552 case PPC::VADDFP:
7553 case PPC::VADDUBM:
7554 case PPC::VADDUHM:
7555 case PPC::VADDUWM:
7556 case PPC::VSUBFP:
7557 case PPC::VSUBUBM:
7558 case PPC::VSUBUHM:
7559 case PPC::VSUBUWM:
7560 case PPC::VAND:
7561 case PPC::VANDC:
7562 case PPC::VOR:
7563 case PPC::VORC:
7564 case PPC::VXOR:
7565 case PPC::VNOR:
7566 case PPC::VMULUWM:
7567 return true;
7568 }
7569}
7570
7571// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7572// lane-insensitive.
7573static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7574 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7575 // TODO: Can we put this a common method for DAG?
7576 auto SkipRCCopy = [](SDValue V) {
7577 while (V->isMachineOpcode() &&
7578 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7579 // All values in the chain should have single use.
7580 if (V->use_empty() || !V->user_begin()->isOnlyUserOf(V.getNode()))
7581 return SDValue();
7582 V = V->getOperand(0);
7583 }
7584 return V.hasOneUse() ? V : SDValue();
7585 };
7586
7587 SDValue VecOp = SkipRCCopy(N->getOperand(0));
7588 if (!VecOp || !isLaneInsensitive(VecOp))
7589 return;
7590
7591 SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7592 RHS = SkipRCCopy(VecOp.getOperand(1));
7593 if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7594 return;
7595
7596 // These swaps may still have chain-uses here, count on dead code elimination
7597 // in following passes to remove them.
7598 DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7599 DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7600 DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7601}
7602
7603// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7604static bool hasAIXSmallTLSAttr(SDValue Val) {
7605 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val))
7606 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
7607 if (GV->hasAttribute("aix-small-tls"))
7608 return true;
7609
7610 return false;
7611}
7612
7613// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7614// accesses?
7616 SDValue ADDIToFold) {
7617 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7618 // accesses), is truly an ADDI.
7619 if (!ADDIToFold.isMachineOpcode() ||
7620 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7621 return false;
7622
7623 // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7624 // attribute or when the 'aix-small-tls' global variable attribute is present.
7625 const PPCSubtarget &Subtarget =
7627 SDValue TLSVarNode = ADDIToFold.getOperand(1);
7628 if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7629 Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7630 return false;
7631
7632 // The second operand of the ADDIToFold should be the global TLS address
7633 // (the local-exec TLS variable). We only perform the folding if the TLS
7634 // variable is the second operand.
7635 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7636 if (!GA)
7637 return false;
7638
7639 if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
7640 // The first operand of the ADDIToFold should be the thread pointer.
7641 // This transformation is only performed if the first operand of the
7642 // addi is the thread pointer.
7643 SDValue TPRegNode = ADDIToFold.getOperand(0);
7644 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7645 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7646 return false;
7647 }
7648
7649 // The local-[exec|dynamic] TLS variable should only have the
7650 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7651 // performed otherwise if the flag is not set.
7652 unsigned TargetFlags = GA->getTargetFlags();
7653 if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7654 TargetFlags == PPCII::MO_TLSLD_FLAG))
7655 return false;
7656
7657 // If all conditions are satisfied, the ADDI is valid for folding.
7658 return true;
7659}
7660
7661// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7662// another addi, fold this sequence into a single addi if possible. Before this
7663// optimization, the sequence appears as:
7664// addi rN, r13, sym@[le|ld]
7665// addi rM, rN, imm
7666// After this optimization, we can fold the two addi into a single one:
7667// addi rM, r13, sym@[le|ld] + imm
7669 if (N->getMachineOpcode() != PPC::ADDI8)
7670 return;
7671
7672 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7673 // we want optimized out.
7674 SDValue InitialADDI = N->getOperand(0);
7675
7676 if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
7677 return;
7678
7679 // The second operand of the InitialADDI should be the global TLS address
7680 // (the local-[exec|dynamic] TLS variable), with the
7681 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7682 // isEligibleToFoldADDIForFasterLocalAccesses().
7683 SDValue TLSVarNode = InitialADDI.getOperand(1);
7684 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7685 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7686 "local-[exec|dynamic] accesses!");
7687 unsigned TargetFlags = GA->getTargetFlags();
7688
7689 // The second operand of the addi that we want to preserve will be an
7690 // immediate. We add this immediate, together with the address of the TLS
7691 // variable found in InitialADDI, in order to preserve the correct TLS address
7692 // information during assembly printing. The offset is likely to be non-zero
7693 // when we end up in this case.
7694 int Offset = N->getConstantOperandVal(1);
7695 TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7696 Offset, TargetFlags);
7697
7698 (void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
7699 if (InitialADDI.getNode()->use_empty())
7700 DAG->RemoveDeadNode(InitialADDI.getNode());
7701}
7702
7703void PPCDAGToDAGISel::PeepholePPC64() {
7704 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7705
7706 while (Position != CurDAG->allnodes_begin()) {
7707 SDNode *N = &*--Position;
7708 // Skip dead nodes and any non-machine opcodes.
7709 if (N->use_empty() || !N->isMachineOpcode())
7710 continue;
7711
7712 if (isVSXSwap(SDValue(N, 0)))
7713 reduceVSXSwap(N, CurDAG);
7714
7715 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7716 // accesses.
7718
7719 unsigned FirstOp;
7720 unsigned StorageOpcode = N->getMachineOpcode();
7721 bool RequiresMod4Offset = false;
7722
7723 switch (StorageOpcode) {
7724 default: continue;
7725
7726 case PPC::LWA:
7727 case PPC::LD:
7728 case PPC::DFLOADf64:
7729 case PPC::DFLOADf32:
7730 RequiresMod4Offset = true;
7731 [[fallthrough]];
7732 case PPC::LBZ:
7733 case PPC::LBZ8:
7734 case PPC::LFD:
7735 case PPC::LFS:
7736 case PPC::LHA:
7737 case PPC::LHA8:
7738 case PPC::LHZ:
7739 case PPC::LHZ8:
7740 case PPC::LWZ:
7741 case PPC::LWZ8:
7742 FirstOp = 0;
7743 break;
7744
7745 case PPC::STD:
7746 case PPC::DFSTOREf64:
7747 case PPC::DFSTOREf32:
7748 RequiresMod4Offset = true;
7749 [[fallthrough]];
7750 case PPC::STB:
7751 case PPC::STB8:
7752 case PPC::STFD:
7753 case PPC::STFS:
7754 case PPC::STH:
7755 case PPC::STH8:
7756 case PPC::STW:
7757 case PPC::STW8:
7758 FirstOp = 1;
7759 break;
7760 }
7761
7762 // If this is a load or store with a zero offset, or within the alignment,
7763 // we may be able to fold an add-immediate into the memory operation.
7764 // The check against alignment is below, as it can't occur until we check
7765 // the arguments to N
7766 if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7767 continue;
7768
7769 SDValue Base = N->getOperand(FirstOp + 1);
7770 if (!Base.isMachineOpcode())
7771 continue;
7772
7773 unsigned Flags = 0;
7774 bool ReplaceFlags = true;
7775
7776 // When the feeding operation is an add-immediate of some sort,
7777 // determine whether we need to add relocation information to the
7778 // target flags on the immediate operand when we fold it into the
7779 // load instruction.
7780 //
7781 // For something like ADDItocL8, the relocation information is
7782 // inferred from the opcode; when we process it in the AsmPrinter,
7783 // we add the necessary relocation there. A load, though, can receive
7784 // relocation from various flavors of ADDIxxx, so we need to carry
7785 // the relocation information in the target flags.
7786 switch (Base.getMachineOpcode()) {
7787 default: continue;
7788
7789 case PPC::ADDI8:
7790 case PPC::ADDI:
7791 // In some cases (such as TLS) the relocation information
7792 // is already in place on the operand, so copying the operand
7793 // is sufficient.
7794 ReplaceFlags = false;
7795 break;
7796 case PPC::ADDIdtprelL:
7798 break;
7799 case PPC::ADDItlsldL:
7801 break;
7802 case PPC::ADDItocL8:
7803 // Skip the following peephole optimizations for ADDItocL8 on AIX which
7804 // is used for toc-data access.
7805 if (Subtarget->isAIXABI())
7806 continue;
7808 break;
7809 }
7810
7811 SDValue ImmOpnd = Base.getOperand(1);
7812
7813 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7814 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7815 // we might have needed different @ha relocation values for the offset
7816 // pointers).
7817 int MaxDisplacement = 7;
7818 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7819 const GlobalValue *GV = GA->getGlobal();
7820 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7821 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7822 }
7823
7824 bool UpdateHBase = false;
7825 SDValue HBase = Base.getOperand(0);
7826
7827 int Offset = N->getConstantOperandVal(FirstOp);
7828 if (ReplaceFlags) {
7829 if (Offset < 0 || Offset > MaxDisplacement) {
7830 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7831 // one use, then we can do this for any offset, we just need to also
7832 // update the offset (i.e. the symbol addend) on the addis also.
7833 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7834 continue;
7835
7836 if (!HBase.isMachineOpcode() ||
7837 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7838 continue;
7839
7840 if (!Base.hasOneUse() || !HBase.hasOneUse())
7841 continue;
7842
7843 SDValue HImmOpnd = HBase.getOperand(1);
7844 if (HImmOpnd != ImmOpnd)
7845 continue;
7846
7847 UpdateHBase = true;
7848 }
7849 } else {
7850 // Global addresses can be folded, but only if they are sufficiently
7851 // aligned.
7852 if (RequiresMod4Offset) {
7853 if (GlobalAddressSDNode *GA =
7854 dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7855 const GlobalValue *GV = GA->getGlobal();
7856 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7857 if (Alignment < 4)
7858 continue;
7859 }
7860 }
7861
7862 // If we're directly folding the addend from an addi instruction, then:
7863 // 1. In general, the offset on the memory access must be zero.
7864 // 2. If the addend is a constant, then it can be combined with a
7865 // non-zero offset, but only if the result meets the encoding
7866 // requirements.
7867 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7868 Offset += C->getSExtValue();
7869
7870 if (RequiresMod4Offset && (Offset % 4) != 0)
7871 continue;
7872
7873 if (!isInt<16>(Offset))
7874 continue;
7875
7876 ImmOpnd = CurDAG->getSignedTargetConstant(Offset, SDLoc(ImmOpnd),
7877 ImmOpnd.getValueType());
7878 } else if (Offset != 0) {
7879 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7880 // accesses.
7882 // Add the non-zero offset information into the load or store
7883 // instruction to be used for non-TOC-based local-[exec|dynamic]
7884 // accesses.
7885 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7886 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7887 "addi into local-[exec|dynamic] accesses!");
7888 ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7889 MVT::i64, Offset,
7890 GA->getTargetFlags());
7891 } else
7892 continue;
7893 }
7894 }
7895
7896 // We found an opportunity. Reverse the operands from the add
7897 // immediate and substitute them into the load or store. If
7898 // needed, update the target flags for the immediate operand to
7899 // reflect the necessary relocation information.
7900 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7901 LLVM_DEBUG(Base->dump(CurDAG));
7902 LLVM_DEBUG(dbgs() << "\nN: ");
7903 LLVM_DEBUG(N->dump(CurDAG));
7904 LLVM_DEBUG(dbgs() << "\n");
7905
7906 // If the relocation information isn't already present on the
7907 // immediate operand, add it now.
7908 if (ReplaceFlags) {
7909 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7910 SDLoc dl(GA);
7911 const GlobalValue *GV = GA->getGlobal();
7912 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7913 // We can't perform this optimization for data whose alignment
7914 // is insufficient for the instruction encoding.
7915 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7916 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7917 continue;
7918 }
7919 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7920 } else if (ConstantPoolSDNode *CP =
7921 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
7922 const Constant *C = CP->getConstVal();
7923 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7924 Offset, Flags);
7925 }
7926 }
7927
7928 if (FirstOp == 1) // Store
7929 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7930 Base.getOperand(0), N->getOperand(3));
7931 else // Load
7932 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7933 N->getOperand(2));
7934
7935 if (UpdateHBase)
7936 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7937 ImmOpnd);
7938
7939 // The add-immediate may now be dead, in which case remove it.
7940 if (Base.getNode()->use_empty())
7941 CurDAG->RemoveDeadNode(Base.getNode());
7942 }
7943}
7944
7945/// createPPCISelDag - This pass converts a legalized DAG into a
7946/// PowerPC-specific DAG, ready for instruction scheduling.
7947///
7949 CodeGenOptLevel OptLevel) {
7950 return new PPCDAGToDAGISelLegacy(TM, OptLevel);
7951}
unsigned SubReg
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:442
Module.h This file contains the declarations for the Module class.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG, SDValue ADDIToFold)
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base)
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static bool hasTocDataAttr(SDValue Val)
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG)
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG)
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
static bool isLaneInsensitive(SDValue N)
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget, const TargetMachine &TM, const SDNode *Node)
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget)
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num)
static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB)
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
static SDNode * selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
#define PASS_NAME
#define DEBUG_TYPE
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static bool hasAIXSmallTLSAttr(SDValue Val)
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
static bool isSWTestOp(SDValue N)
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt=nullptr)
ICmpInGPRType
@ ICGPR_ZextI32
@ ICGPR_I64
@ ICGPR_All
@ ICGPR_None
@ ICGPR_NonExtIn
@ ICGPR_Sext
@ ICGPR_I32
@ ICGPR_SextI64
@ ICGPR_ZextI64
@ ICGPR_SextI32
@ ICGPR_Zext
static bool isVSXSwap(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:205
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
LLVM_ABI APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition: APInt.cpp:1154
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
MachineBasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
BranchProbabilityInfo * BPI
MachineBasicBlock * MBB
MBB - The current block.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists.
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:470
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Metadata node.
Definition: Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1445
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1451
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setROPProtectionHashSaveIndex(int Idx)
static int getRecordFormOpcode(unsigned Opcode)
bool is32BitELFABI() const
Definition: PPCSubtarget.h:224
MVT getScalarIntVT() const
Definition: PPCSubtarget.h:254
bool isAIXABI() const
Definition: PPCSubtarget.h:219
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:151
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
MCRegister getThreadPointerRegister() const
Definition: PPCSubtarget.h:291
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetELF() const
Definition: PPCSubtarget.h:215
CodeModel::Model getCodeModel(const TargetMachine &TM, const GlobalValue *GV) const
Calculates the effective code model for argument GV.
bool isELFv2ABI() const
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:157
Common code between 32-bit and 64-bit PowerPC targets.
Register getReg() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
LLVM_ABI void dump() const
Dump this node, for debugging.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual void PostprocessISelDAG()
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:558
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:559
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:570
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:561
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:35
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5465
An efficient, type-erasing, non-owning reference to a callable.
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1162
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:225
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:180
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ AssertZext
Definition: ISDOpcodes.h:63
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1572
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1718
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1724
@ MO_TLSLD_LO
Definition: PPC.h:184
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_DTPREL_LO
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:183
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TOC_LO
Definition: PPC.h:185
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ SRL
These nodes represent PPC shifts.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ CALL
CALL - A direct function call.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ FTSQRT
Test instruction for software square root.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ BR_NONTAKEN_HINT
Definition: PPCPredicates.h:64
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:82
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:164
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:577
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
unsigned int NumVTs