LLVM 22.0.0git
PPCFastISel.cpp
Go to the documentation of this file.
1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
20#include "PPCSubtarget.h"
29#include "llvm/IR/CallingConv.h"
32#include "llvm/IR/Operator.h"
34
35//===----------------------------------------------------------------------===//
36//
37// TBD:
38// fastLowerArguments: Handle simple cases.
39// PPCMaterializeGV: Handle TLS.
40// SelectCall: Handle function pointers.
41// SelectCall: Handle multi-register return values.
42// SelectCall: Optimize away nops for local calls.
43// processCallArgs: Handle bit-converted arguments.
44// finishCall: Handle multi-register return values.
45// PPCComputeAddress: Handle parameter references as FrameIndex's.
46// PPCEmitCmp: Handle immediate as operand 1.
47// SelectCall: Handle small byval arguments.
48// SelectIntrinsicCall: Implement.
49// SelectSelect: Implement.
50// Consider factoring isTypeLegal into the base class.
51// Implement switches and jump tables.
52//
53//===----------------------------------------------------------------------===//
54using namespace llvm;
55
56#define DEBUG_TYPE "ppcfastisel"
57
58namespace {
59
60struct Address {
61 enum {
62 RegBase,
63 FrameIndexBase
64 } BaseType;
65
66 union {
67 unsigned Reg;
68 int FI;
69 } Base;
70
71 int64_t Offset;
72
73 // Innocuous defaults for our address.
74 Address()
75 : BaseType(RegBase), Offset(0) {
76 Base.Reg = 0;
77 }
78};
79
80class PPCFastISel final : public FastISel {
81
82 const TargetMachine &TM;
83 const PPCSubtarget *Subtarget;
84 PPCFunctionInfo *PPCFuncInfo;
85 const TargetInstrInfo &TII;
86 const TargetLowering &TLI;
88
89 public:
90 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
91 const TargetLibraryInfo *LibInfo)
92 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
93 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
94 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
95 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
96 Context(&FuncInfo.Fn->getContext()) {}
97
98 // Backend specific FastISel code.
99 private:
100 bool fastSelectInstruction(const Instruction *I) override;
102 Register fastMaterializeAlloca(const AllocaInst *AI) override;
103 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
104 const LoadInst *LI) override;
105 bool fastLowerArguments() override;
106 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
107 Register fastEmitInst_ri(unsigned MachineInstOpcode,
108 const TargetRegisterClass *RC, Register Op0,
109 uint64_t Imm);
110 Register fastEmitInst_r(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, Register Op0);
112 Register fastEmitInst_rr(unsigned MachineInstOpcode,
113 const TargetRegisterClass *RC, Register Op0,
114 Register Op1);
115
116 bool fastLowerCall(CallLoweringInfo &CLI) override;
117
118 // Instruction selection routines.
119 private:
120 bool SelectLoad(const Instruction *I);
121 bool SelectStore(const Instruction *I);
122 bool SelectBranch(const Instruction *I);
123 bool SelectIndirectBr(const Instruction *I);
124 bool SelectFPExt(const Instruction *I);
125 bool SelectFPTrunc(const Instruction *I);
126 bool SelectIToFP(const Instruction *I, bool IsSigned);
127 bool SelectFPToI(const Instruction *I, bool IsSigned);
128 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
129 bool SelectRet(const Instruction *I);
130 bool SelectTrunc(const Instruction *I);
131 bool SelectIntExt(const Instruction *I);
132
133 // Utility routines.
134 private:
135 bool isTypeLegal(Type *Ty, MVT &VT);
136 bool isLoadTypeLegal(Type *Ty, MVT &VT);
137 bool isValueAvailable(const Value *V) const;
138 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
139 return RC->getID() == PPC::VSFRCRegClassID;
140 }
141 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSSRCRegClassID;
143 }
144 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
145 unsigned Flag = 0, unsigned SubReg = 0) {
146 Register TmpReg = createResultReg(ToRC);
147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
148 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
149 return TmpReg;
150 }
151 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
152 Register DestReg, const PPC::Predicate Pred);
153 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
154 const TargetRegisterClass *RC, bool IsZExt = true,
155 unsigned FP64LoadOpc = PPC::LFD);
156 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
157 bool PPCComputeAddress(const Value *Obj, Address &Addr);
158 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
159 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
160 bool IsZExt);
161 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
162 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
163 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
164 bool UseSExt = true);
165 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
166 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
167 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
168 bool IsSigned);
169 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
170
171 // Call handling routines.
172 private:
173 bool processCallArgs(SmallVectorImpl<Value *> &Args,
175 SmallVectorImpl<MVT> &ArgVTs,
178 unsigned &NumBytes, bool IsVarArg);
179 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
180
181 private:
182 #include "PPCGenFastISel.inc"
183
184};
185
186} // end anonymous namespace
187
188static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
189 switch (Pred) {
190 // These are not representable with any single compare.
193 // Major concern about the following 6 cases is NaN result. The comparison
194 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
195 // only one of which will be set. The result is generated by fcmpu
196 // instruction. However, bc instruction only inspects one of the first 3
197 // bits, so when un is set, bc instruction may jump to an undesired
198 // place.
199 //
200 // More specifically, if we expect an unordered comparison and un is set, we
201 // expect to always go to true branch; in such case UEQ, UGT and ULT still
202 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
203 // since they are tested by inspecting !eq, !lt, !gt, respectively.
204 //
205 // Similarly, for ordered comparison, when un is set, we always expect the
206 // result to be false. In such case OGT, OLT and OEQ is good, since they are
207 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
208 // and ONE are tested through !lt, !gt and !eq, and these are true.
215 default:
216 return std::nullopt;
217
219 case CmpInst::ICMP_EQ:
220 return PPC::PRED_EQ;
221
225 return PPC::PRED_GT;
226
230 return PPC::PRED_GE;
231
235 return PPC::PRED_LT;
236
240 return PPC::PRED_LE;
241
243 case CmpInst::ICMP_NE:
244 return PPC::PRED_NE;
245
247 return PPC::PRED_NU;
248
250 return PPC::PRED_UN;
251 }
252}
253
254// Determine whether the type Ty is simple enough to be handled by
255// fast-isel, and return its equivalent machine type in VT.
256// FIXME: Copied directly from ARM -- factor into base class?
257bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
258 EVT Evt = TLI.getValueType(DL, Ty, true);
259
260 // Only handle simple types.
261 if (Evt == MVT::Other || !Evt.isSimple()) return false;
262 VT = Evt.getSimpleVT();
263
264 // Handle all legal types, i.e. a register that will directly hold this
265 // value.
266 return TLI.isTypeLegal(VT);
267}
268
269// Determine whether the type Ty is simple enough to be handled by
270// fast-isel as a load target, and return its equivalent machine type in VT.
271bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
272 if (isTypeLegal(Ty, VT)) return true;
273
274 // If this is a type than can be sign or zero-extended to a basic operation
275 // go ahead and accept it now.
276 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
277 return true;
278 }
279
280 return false;
281}
282
283bool PPCFastISel::isValueAvailable(const Value *V) const {
284 if (!isa<Instruction>(V))
285 return true;
286
287 const auto *I = cast<Instruction>(V);
288 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
289}
290
291// Given a value Obj, create an Address object Addr that represents its
292// address. Return false if we can't handle it.
293bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
294 const User *U = nullptr;
295 unsigned Opcode = Instruction::UserOp1;
296 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
297 // Don't walk into other basic blocks unless the object is an alloca from
298 // another block, otherwise it may not have a virtual register assigned.
299 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
300 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
301 Opcode = I->getOpcode();
302 U = I;
303 }
304 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
305 Opcode = C->getOpcode();
306 U = C;
307 }
308
309 switch (Opcode) {
310 default:
311 break;
312 case Instruction::BitCast:
313 // Look through bitcasts.
314 return PPCComputeAddress(U->getOperand(0), Addr);
315 case Instruction::IntToPtr:
316 // Look past no-op inttoptrs.
317 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
318 TLI.getPointerTy(DL))
319 return PPCComputeAddress(U->getOperand(0), Addr);
320 break;
321 case Instruction::PtrToInt:
322 // Look past no-op ptrtoints.
323 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
324 return PPCComputeAddress(U->getOperand(0), Addr);
325 break;
326 case Instruction::GetElementPtr: {
327 Address SavedAddr = Addr;
328 int64_t TmpOffset = Addr.Offset;
329
330 // Iterate through the GEP folding the constants into offsets where
331 // we can.
333 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
334 II != IE; ++II, ++GTI) {
335 const Value *Op = *II;
336 if (StructType *STy = GTI.getStructTypeOrNull()) {
337 const StructLayout *SL = DL.getStructLayout(STy);
338 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
339 TmpOffset += SL->getElementOffset(Idx);
340 } else {
342 for (;;) {
343 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
344 // Constant-offset addressing.
345 TmpOffset += CI->getSExtValue() * S;
346 break;
347 }
348 if (canFoldAddIntoGEP(U, Op)) {
349 // A compatible add with a constant operand. Fold the constant.
350 ConstantInt *CI =
351 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
352 TmpOffset += CI->getSExtValue() * S;
353 // Iterate on the other operand.
354 Op = cast<AddOperator>(Op)->getOperand(0);
355 continue;
356 }
357 // Unsupported
358 goto unsupported_gep;
359 }
360 }
361 }
362
363 // Try to grab the base operand now.
364 Addr.Offset = TmpOffset;
365 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
366
367 // We failed, restore everything and try the other options.
368 Addr = SavedAddr;
369
370 unsupported_gep:
371 break;
372 }
373 case Instruction::Alloca: {
374 const AllocaInst *AI = cast<AllocaInst>(Obj);
376 FuncInfo.StaticAllocaMap.find(AI);
377 if (SI != FuncInfo.StaticAllocaMap.end()) {
378 Addr.BaseType = Address::FrameIndexBase;
379 Addr.Base.FI = SI->second;
380 return true;
381 }
382 break;
383 }
384 }
385
386 // FIXME: References to parameters fall through to the behavior
387 // below. They should be able to reference a frame index since
388 // they are stored to the stack, so we can get "ld rx, offset(r1)"
389 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
390 // just contain the parameter. Try to handle this with a FI.
391
392 // Try to get this in a register if nothing else has worked.
393 if (Addr.Base.Reg == 0)
394 Addr.Base.Reg = getRegForValue(Obj);
395
396 // Prevent assignment of base register to X0, which is inappropriate
397 // for loads and stores alike.
398 if (Addr.Base.Reg != 0)
399 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
400
401 return Addr.Base.Reg != 0;
402}
403
404// Fix up some addresses that can't be used directly. For example, if
405// an offset won't fit in an instruction field, we may need to move it
406// into an index register.
407void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
408 Register &IndexReg) {
409
410 // Check whether the offset fits in the instruction field.
411 if (!isInt<16>(Addr.Offset))
412 UseOffset = false;
413
414 // If this is a stack pointer and the offset needs to be simplified then
415 // put the alloca address into a register, set the base type back to
416 // register and continue. This should almost never happen.
417 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
418 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
420 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
421 Addr.Base.Reg = ResultReg;
422 Addr.BaseType = Address::RegBase;
423 }
424
425 if (!UseOffset) {
426 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
427 const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, Addr.Offset);
428 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
429 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
430 }
431}
432
433// Emit a load instruction if possible, returning true if we succeeded,
434// otherwise false. See commentary below for how the register class of
435// the load is determined.
436bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
437 const TargetRegisterClass *RC,
438 bool IsZExt, unsigned FP64LoadOpc) {
439 unsigned Opc;
440 bool UseOffset = true;
441 bool HasSPE = Subtarget->hasSPE();
442
443 // If ResultReg is given, it determines the register class of the load.
444 // Otherwise, RC is the register class to use. If the result of the
445 // load isn't anticipated in this block, both may be zero, in which
446 // case we must make a conservative guess. In particular, don't assign
447 // R0 or X0 to the result register, as the result may be used in a load,
448 // store, add-immediate, or isel that won't permit this. (Though
449 // perhaps the spill and reload of live-exit values would handle this?)
450 const TargetRegisterClass *UseRC =
451 (ResultReg ? MRI.getRegClass(ResultReg) :
452 (RC ? RC :
453 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
454 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
455 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
456 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
457
458 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
459
460 switch (VT.SimpleTy) {
461 default: // e.g., vector types not handled
462 return false;
463 case MVT::i8:
464 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
465 break;
466 case MVT::i16:
467 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
468 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
469 break;
470 case MVT::i32:
471 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
472 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
473 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
474 UseOffset = false;
475 break;
476 case MVT::i64:
477 Opc = PPC::LD;
478 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
479 "64-bit load with 32-bit target??");
480 UseOffset = ((Addr.Offset & 3) == 0);
481 break;
482 case MVT::f32:
483 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
484 break;
485 case MVT::f64:
486 Opc = FP64LoadOpc;
487 break;
488 }
489
490 // If necessary, materialize the offset into a register and use
491 // the indexed form. Also handle stack pointers with special needs.
492 Register IndexReg;
493 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
494
495 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
496 // be used.
497 bool IsVSSRC = isVSSRCRegClass(UseRC);
498 bool IsVSFRC = isVSFRCRegClass(UseRC);
499 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
500 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
501 if ((Is32VSXLoad || Is64VSXLoad) &&
502 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
503 (Addr.Offset == 0)) {
504 UseOffset = false;
505 }
506
507 if (!ResultReg)
508 ResultReg = createResultReg(UseRC);
509
510 // Note: If we still have a frame index here, we know the offset is
511 // in range, as otherwise PPCSimplifyAddress would have converted it
512 // into a RegBase.
513 if (Addr.BaseType == Address::FrameIndexBase) {
514 // VSX only provides an indexed load.
515 if (Is32VSXLoad || Is64VSXLoad) return false;
516
517 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
518 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
519 Addr.Offset),
520 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
521 MFI.getObjectAlign(Addr.Base.FI));
522
523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
524 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
525
526 // Base reg with offset in range.
527 } else if (UseOffset) {
528 // VSX only provides an indexed load.
529 if (Is32VSXLoad || Is64VSXLoad) return false;
530
531 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
532 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
533
534 // Indexed form.
535 } else {
536 // Get the RR opcode corresponding to the RI one. FIXME: It would be
537 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
538 // is hard to get at.
539 switch (Opc) {
540 default: llvm_unreachable("Unexpected opcode!");
541 case PPC::LBZ: Opc = PPC::LBZX; break;
542 case PPC::LBZ8: Opc = PPC::LBZX8; break;
543 case PPC::LHZ: Opc = PPC::LHZX; break;
544 case PPC::LHZ8: Opc = PPC::LHZX8; break;
545 case PPC::LHA: Opc = PPC::LHAX; break;
546 case PPC::LHA8: Opc = PPC::LHAX8; break;
547 case PPC::LWZ: Opc = PPC::LWZX; break;
548 case PPC::LWZ8: Opc = PPC::LWZX8; break;
549 case PPC::LWA: Opc = PPC::LWAX; break;
550 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
551 case PPC::LD: Opc = PPC::LDX; break;
552 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
553 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
554 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
555 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
556 }
557
558 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
559 ResultReg);
560
561 // If we have an index register defined we use it in the store inst,
562 // otherwise we use X0 as base as it makes the vector instructions to
563 // use zero in the computation of the effective address regardless the
564 // content of the register.
565 if (IndexReg)
566 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
567 else
568 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
569 }
570
571 return true;
572}
573
574// Attempt to fast-select a load instruction.
575bool PPCFastISel::SelectLoad(const Instruction *I) {
576 // FIXME: No atomic loads are supported.
577 if (cast<LoadInst>(I)->isAtomic())
578 return false;
579
580 // Verify we have a legal type before going any further.
581 MVT VT;
582 if (!isLoadTypeLegal(I->getType(), VT))
583 return false;
584
585 // See if we can handle this address.
587 if (!PPCComputeAddress(I->getOperand(0), Addr))
588 return false;
589
590 // Look at the currently assigned register for this instruction
591 // to determine the required register class. This is necessary
592 // to constrain RA from using R0/X0 when this is not legal.
593 Register AssignedReg = FuncInfo.ValueMap[I];
594 const TargetRegisterClass *RC =
595 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
596
597 Register ResultReg = 0;
598 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
599 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
600 return false;
601 updateValueMap(I, ResultReg);
602 return true;
603}
604
605// Emit a store instruction to store SrcReg at Addr.
606bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
607 assert(SrcReg && "Nothing to store!");
608 unsigned Opc;
609 bool UseOffset = true;
610
611 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
612 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
613
614 switch (VT.SimpleTy) {
615 default: // e.g., vector types not handled
616 return false;
617 case MVT::i8:
618 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
619 break;
620 case MVT::i16:
621 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
622 break;
623 case MVT::i32:
624 assert(Is32BitInt && "Not GPRC for i32??");
625 Opc = PPC::STW;
626 break;
627 case MVT::i64:
628 Opc = PPC::STD;
629 UseOffset = ((Addr.Offset & 3) == 0);
630 break;
631 case MVT::f32:
632 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
633 break;
634 case MVT::f64:
635 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
636 break;
637 }
638
639 // If necessary, materialize the offset into a register and use
640 // the indexed form. Also handle stack pointers with special needs.
641 Register IndexReg;
642 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
643
644 // If this is a potential VSX store with an offset of 0, a VSX indexed store
645 // can be used.
646 bool IsVSSRC = isVSSRCRegClass(RC);
647 bool IsVSFRC = isVSFRCRegClass(RC);
648 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
649 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
650 if ((Is32VSXStore || Is64VSXStore) &&
651 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
652 (Addr.Offset == 0)) {
653 UseOffset = false;
654 }
655
656 // Note: If we still have a frame index here, we know the offset is
657 // in range, as otherwise PPCSimplifyAddress would have converted it
658 // into a RegBase.
659 if (Addr.BaseType == Address::FrameIndexBase) {
660 // VSX only provides an indexed store.
661 if (Is32VSXStore || Is64VSXStore) return false;
662
663 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
664 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
665 Addr.Offset),
666 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
667 MFI.getObjectAlign(Addr.Base.FI));
668
669 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
670 .addReg(SrcReg)
671 .addImm(Addr.Offset)
672 .addFrameIndex(Addr.Base.FI)
673 .addMemOperand(MMO);
674
675 // Base reg with offset in range.
676 } else if (UseOffset) {
677 // VSX only provides an indexed store.
678 if (Is32VSXStore || Is64VSXStore)
679 return false;
680
681 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
682 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
683
684 // Indexed form.
685 } else {
686 // Get the RR opcode corresponding to the RI one. FIXME: It would be
687 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
688 // is hard to get at.
689 switch (Opc) {
690 default: llvm_unreachable("Unexpected opcode!");
691 case PPC::STB: Opc = PPC::STBX; break;
692 case PPC::STH : Opc = PPC::STHX; break;
693 case PPC::STW : Opc = PPC::STWX; break;
694 case PPC::STB8: Opc = PPC::STBX8; break;
695 case PPC::STH8: Opc = PPC::STHX8; break;
696 case PPC::STW8: Opc = PPC::STWX8; break;
697 case PPC::STD: Opc = PPC::STDX; break;
698 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
699 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
700 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
701 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
702 }
703
704 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
705 .addReg(SrcReg);
706
707 // If we have an index register defined we use it in the store inst,
708 // otherwise we use X0 as base as it makes the vector instructions to
709 // use zero in the computation of the effective address regardless the
710 // content of the register.
711 if (IndexReg)
712 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
713 else
714 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
715 }
716
717 return true;
718}
719
720// Attempt to fast-select a store instruction.
721bool PPCFastISel::SelectStore(const Instruction *I) {
722 Value *Op0 = I->getOperand(0);
723 Register SrcReg;
724
725 // FIXME: No atomics loads are supported.
726 if (cast<StoreInst>(I)->isAtomic())
727 return false;
728
729 // Verify we have a legal type before going any further.
730 MVT VT;
731 if (!isLoadTypeLegal(Op0->getType(), VT))
732 return false;
733
734 // Get the value to be stored into a register.
735 SrcReg = getRegForValue(Op0);
736 if (!SrcReg)
737 return false;
738
739 // See if we can handle this address.
741 if (!PPCComputeAddress(I->getOperand(1), Addr))
742 return false;
743
744 if (!PPCEmitStore(VT, SrcReg, Addr))
745 return false;
746
747 return true;
748}
749
750// Attempt to fast-select a branch instruction.
751bool PPCFastISel::SelectBranch(const Instruction *I) {
752 const BranchInst *BI = cast<BranchInst>(I);
753 MachineBasicBlock *BrBB = FuncInfo.MBB;
754 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
755 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
756
757 // For now, just try the simplest case where it's fed by a compare.
758 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
759 if (isValueAvailable(CI)) {
760 std::optional<PPC::Predicate> OptPPCPred =
761 getComparePred(CI->getPredicate());
762 if (!OptPPCPred)
763 return false;
764
765 PPC::Predicate PPCPred = *OptPPCPred;
766
767 // Take advantage of fall-through opportunities.
768 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
769 std::swap(TBB, FBB);
770 PPCPred = PPC::InvertPredicate(PPCPred);
771 }
772
773 Register CondReg = createResultReg(&PPC::CRRCRegClass);
774
775 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
776 CondReg, PPCPred))
777 return false;
778
779 BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
780 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
781 .addReg(CondReg)
782 .addMBB(TBB);
783 finishCondBranch(BI->getParent(), TBB, FBB);
784 return true;
785 }
786 } else if (const ConstantInt *CI =
787 dyn_cast<ConstantInt>(BI->getCondition())) {
788 uint64_t Imm = CI->getZExtValue();
789 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
790 fastEmitBranch(Target, MIMD.getDL());
791 return true;
792 }
793
794 // FIXME: ARM looks for a case where the block containing the compare
795 // has been split from the block containing the branch. If this happens,
796 // there is a vreg available containing the result of the compare. I'm
797 // not sure we can do much, as we've lost the predicate information with
798 // the compare instruction -- we have a 4-bit CR but don't know which bit
799 // to test here.
800 return false;
801}
802
803// Attempt to emit a compare of the two source values. Signed and unsigned
804// comparisons are supported. Return false if we can't handle it.
805bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
806 bool IsZExt, Register DestReg,
807 const PPC::Predicate Pred) {
808 Type *Ty = SrcValue1->getType();
809 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
810 if (!SrcEVT.isSimple())
811 return false;
812 MVT SrcVT = SrcEVT.getSimpleVT();
813
814 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
815 return false;
816
817 // See if operand 2 is an immediate encodeable in the compare.
818 // FIXME: Operands are not in canonical order at -O0, so an immediate
819 // operand in position 1 is a lost opportunity for now. We are
820 // similar to ARM in this regard.
821 int64_t Imm = 0;
822 bool UseImm = false;
823 const bool HasSPE = Subtarget->hasSPE();
824
825 // Only 16-bit integer constants can be represented in compares for
826 // PowerPC. Others will be materialized into a register.
827 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
828 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
829 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
830 const APInt &CIVal = ConstInt->getValue();
831 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
832 (int64_t)CIVal.getSExtValue();
833 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
834 UseImm = true;
835 }
836 }
837
838 Register SrcReg1 = getRegForValue(SrcValue1);
839 if (!SrcReg1)
840 return false;
841
842 Register SrcReg2;
843 if (!UseImm) {
844 SrcReg2 = getRegForValue(SrcValue2);
845 if (!SrcReg2)
846 return false;
847 }
848
849 unsigned CmpOpc;
850 bool NeedsExt = false;
851
852 auto RC1 = MRI.getRegClass(SrcReg1);
853 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
854
855 switch (SrcVT.SimpleTy) {
856 default: return false;
857 case MVT::f32:
858 if (HasSPE) {
859 switch (Pred) {
860 default: return false;
861 case PPC::PRED_EQ:
862 CmpOpc = PPC::EFSCMPEQ;
863 break;
864 case PPC::PRED_LT:
865 CmpOpc = PPC::EFSCMPLT;
866 break;
867 case PPC::PRED_GT:
868 CmpOpc = PPC::EFSCMPGT;
869 break;
870 }
871 } else {
872 CmpOpc = PPC::FCMPUS;
873 if (isVSSRCRegClass(RC1))
874 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
875 if (RC2 && isVSSRCRegClass(RC2))
876 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
877 }
878 break;
879 case MVT::f64:
880 if (HasSPE) {
881 switch (Pred) {
882 default: return false;
883 case PPC::PRED_EQ:
884 CmpOpc = PPC::EFDCMPEQ;
885 break;
886 case PPC::PRED_LT:
887 CmpOpc = PPC::EFDCMPLT;
888 break;
889 case PPC::PRED_GT:
890 CmpOpc = PPC::EFDCMPGT;
891 break;
892 }
893 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
894 CmpOpc = PPC::XSCMPUDP;
895 } else {
896 CmpOpc = PPC::FCMPUD;
897 }
898 break;
899 case MVT::i1:
900 case MVT::i8:
901 case MVT::i16:
902 NeedsExt = true;
903 [[fallthrough]];
904 case MVT::i32:
905 if (!UseImm)
906 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
907 else
908 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
909 break;
910 case MVT::i64:
911 if (!UseImm)
912 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
913 else
914 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
915 break;
916 }
917
918 if (NeedsExt) {
919 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
920 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
921 return false;
922 SrcReg1 = ExtReg;
923
924 if (!UseImm) {
925 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
926 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
927 return false;
928 SrcReg2 = ExtReg;
929 }
930 }
931
932 if (!UseImm)
933 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
934 .addReg(SrcReg1).addReg(SrcReg2);
935 else
936 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
937 .addReg(SrcReg1).addImm(Imm);
938
939 return true;
940}
941
942// Attempt to fast-select a floating-point extend instruction.
943bool PPCFastISel::SelectFPExt(const Instruction *I) {
944 Value *Src = I->getOperand(0);
945 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
946 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
947
948 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
949 return false;
950
951 Register SrcReg = getRegForValue(Src);
952 if (!SrcReg)
953 return false;
954
955 // No code is generated for a FP extend.
956 updateValueMap(I, SrcReg);
957 return true;
958}
959
960// Attempt to fast-select a floating-point truncate instruction.
961bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
962 Value *Src = I->getOperand(0);
963 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
964 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
965
966 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
967 return false;
968
969 Register SrcReg = getRegForValue(Src);
970 if (!SrcReg)
971 return false;
972
973 // Round the result to single precision.
974 Register DestReg;
975 auto RC = MRI.getRegClass(SrcReg);
976 if (Subtarget->hasSPE()) {
977 DestReg = createResultReg(&PPC::GPRCRegClass);
978 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
979 DestReg)
980 .addReg(SrcReg);
981 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
982 DestReg = createResultReg(&PPC::VSSRCRegClass);
983 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
984 DestReg)
985 .addReg(SrcReg);
986 } else {
987 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
988 DestReg = createResultReg(&PPC::F4RCRegClass);
989 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
990 TII.get(PPC::FRSP), DestReg)
991 .addReg(SrcReg);
992 }
993
994 updateValueMap(I, DestReg);
995 return true;
996}
997
998// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
999// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1000// those should be used instead of moving via a stack slot when the
1001// subtarget permits.
1002// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1003// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1004// case to 8 bytes which produces tighter code but wastes stack space.
1005Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1006 bool IsSigned) {
1007
1008 // If necessary, extend 32-bit int to 64-bit.
1009 if (SrcVT == MVT::i32) {
1010 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1011 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1012 return Register();
1013 SrcReg = TmpReg;
1014 }
1015
1016 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1017 Address Addr;
1018 Addr.BaseType = Address::FrameIndexBase;
1019 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1020
1021 // Store the value from the GPR.
1022 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1023 return Register();
1024
1025 // Load the integer value into an FPR. The kind of load used depends
1026 // on a number of conditions.
1027 unsigned LoadOpc = PPC::LFD;
1028
1029 if (SrcVT == MVT::i32) {
1030 if (!IsSigned) {
1031 LoadOpc = PPC::LFIWZX;
1032 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1033 } else if (Subtarget->hasLFIWAX()) {
1034 LoadOpc = PPC::LFIWAX;
1035 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1036 }
1037 }
1038
1039 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1040 Register ResultReg;
1041 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1042 return Register();
1043
1044 return ResultReg;
1045}
1046
1047// Attempt to fast-select an integer-to-floating-point conversion.
1048// FIXME: Once fast-isel has better support for VSX, conversions using
1049// direct moves should be implemented.
1050bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1051 MVT DstVT;
1052 Type *DstTy = I->getType();
1053 if (!isTypeLegal(DstTy, DstVT))
1054 return false;
1055
1056 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1057 return false;
1058
1059 Value *Src = I->getOperand(0);
1060 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1061 if (!SrcEVT.isSimple())
1062 return false;
1063
1064 MVT SrcVT = SrcEVT.getSimpleVT();
1065
1066 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1067 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1068 return false;
1069
1070 Register SrcReg = getRegForValue(Src);
1071 if (!SrcReg)
1072 return false;
1073
1074 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1075 if (Subtarget->hasSPE()) {
1076 unsigned Opc;
1077 if (DstVT == MVT::f32)
1078 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1079 else
1080 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1081
1082 Register DestReg = createResultReg(&PPC::SPERCRegClass);
1083 // Generate the convert.
1084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1085 .addReg(SrcReg);
1086 updateValueMap(I, DestReg);
1087 return true;
1088 }
1089
1090 // We can only lower an unsigned convert if we have the newer
1091 // floating-point conversion operations.
1092 if (!IsSigned && !Subtarget->hasFPCVT())
1093 return false;
1094
1095 // FIXME: For now we require the newer floating-point conversion operations
1096 // (which are present only on P7 and A2 server models) when converting
1097 // to single-precision float. Otherwise we have to generate a lot of
1098 // fiddly code to avoid double rounding. If necessary, the fiddly code
1099 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1100 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1101 return false;
1102
1103 // Extend the input if necessary.
1104 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1105 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1106 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1107 return false;
1108 SrcVT = MVT::i64;
1109 SrcReg = TmpReg;
1110 }
1111
1112 // Move the integer value to an FPR.
1113 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1114 if (!FPReg)
1115 return false;
1116
1117 // Determine the opcode for the conversion.
1118 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1119 Register DestReg = createResultReg(RC);
1120 unsigned Opc;
1121
1122 if (DstVT == MVT::f32)
1123 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1124 else
1125 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1126
1127 // Generate the convert.
1128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1129 .addReg(FPReg);
1130
1131 updateValueMap(I, DestReg);
1132 return true;
1133}
1134
1135// Move the floating-point value in SrcReg into an integer destination
1136// register, and return the register (or zero if we can't handle it).
1137// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1138// those should be used instead of moving via a stack slot when the
1139// subtarget permits.
1140Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1141 Register SrcReg, bool IsSigned) {
1142 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1143 // Note that if have STFIWX available, we could use a 4-byte stack
1144 // slot for i32, but this being fast-isel we'll just go with the
1145 // easiest code gen possible.
1146 Address Addr;
1147 Addr.BaseType = Address::FrameIndexBase;
1148 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1149
1150 // Store the value from the FPR.
1151 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1152 return Register();
1153
1154 // Reload it into a GPR. If we want an i32 on big endian, modify the
1155 // address to have a 4-byte offset so we load from the right place.
1156 if (VT == MVT::i32)
1157 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1158
1159 // Look at the currently assigned register for this instruction
1160 // to determine the required register class.
1161 Register AssignedReg = FuncInfo.ValueMap[I];
1162 const TargetRegisterClass *RC =
1163 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1164
1165 Register ResultReg;
1166 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1167 return Register();
1168
1169 return ResultReg;
1170}
1171
1172// Attempt to fast-select a floating-point-to-integer conversion.
1173// FIXME: Once fast-isel has better support for VSX, conversions using
1174// direct moves should be implemented.
1175bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1176 MVT DstVT, SrcVT;
1177 Type *DstTy = I->getType();
1178 if (!isTypeLegal(DstTy, DstVT))
1179 return false;
1180
1181 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1182 return false;
1183
1184 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1185 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1186 !Subtarget->hasSPE())
1187 return false;
1188
1189 Value *Src = I->getOperand(0);
1190 Type *SrcTy = Src->getType();
1191 if (!isTypeLegal(SrcTy, SrcVT))
1192 return false;
1193
1194 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1195 return false;
1196
1197 Register SrcReg = getRegForValue(Src);
1198 if (!SrcReg)
1199 return false;
1200
1201 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1202 // meaningless copy to get the register class right.
1203 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1204 if (InRC == &PPC::F4RCRegClass)
1205 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1206 else if (InRC == &PPC::VSSRCRegClass)
1207 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1208
1209 // Determine the opcode for the conversion, which takes place
1210 // entirely within FPRs or VSRs.
1211 Register DestReg;
1212 unsigned Opc;
1213 auto RC = MRI.getRegClass(SrcReg);
1214
1215 if (Subtarget->hasSPE()) {
1216 DestReg = createResultReg(&PPC::GPRCRegClass);
1217 if (IsSigned)
1218 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1219 else
1220 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1221 } else if (isVSFRCRegClass(RC)) {
1222 DestReg = createResultReg(&PPC::VSFRCRegClass);
1223 if (DstVT == MVT::i32)
1224 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1225 else
1226 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1227 } else {
1228 DestReg = createResultReg(&PPC::F8RCRegClass);
1229 if (DstVT == MVT::i32)
1230 if (IsSigned)
1231 Opc = PPC::FCTIWZ;
1232 else
1233 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1234 else
1235 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1236 }
1237
1238 // Generate the convert.
1239 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1240 .addReg(SrcReg);
1241
1242 // Now move the integer value from a float register to an integer register.
1243 Register IntReg = Subtarget->hasSPE()
1244 ? DestReg
1245 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1246
1247 if (!IntReg)
1248 return false;
1249
1250 updateValueMap(I, IntReg);
1251 return true;
1252}
1253
1254// Attempt to fast-select a binary integer operation that isn't already
1255// handled automatically.
1256bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1257 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1258
1259 // We can get here in the case when we have a binary operation on a non-legal
1260 // type and the target independent selector doesn't know how to handle it.
1261 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1262 return false;
1263
1264 // Look at the currently assigned register for this instruction
1265 // to determine the required register class. If there is no register,
1266 // make a conservative choice (don't assign R0).
1267 Register AssignedReg = FuncInfo.ValueMap[I];
1268 const TargetRegisterClass *RC =
1269 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1270 &PPC::GPRC_and_GPRC_NOR0RegClass);
1271 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1272
1273 unsigned Opc;
1274 switch (ISDOpcode) {
1275 default: return false;
1276 case ISD::ADD:
1277 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1278 break;
1279 case ISD::OR:
1280 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1281 break;
1282 case ISD::SUB:
1283 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1284 break;
1285 }
1286
1287 Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1288 Register SrcReg1 = getRegForValue(I->getOperand(0));
1289 if (!SrcReg1)
1290 return false;
1291
1292 // Handle case of small immediate operand.
1293 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1294 const APInt &CIVal = ConstInt->getValue();
1295 int Imm = (int)CIVal.getSExtValue();
1296 bool UseImm = true;
1297 if (isInt<16>(Imm)) {
1298 switch (Opc) {
1299 default:
1300 llvm_unreachable("Missing case!");
1301 case PPC::ADD4:
1302 Opc = PPC::ADDI;
1303 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1304 break;
1305 case PPC::ADD8:
1306 Opc = PPC::ADDI8;
1307 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1308 break;
1309 case PPC::OR:
1310 Opc = PPC::ORI;
1311 break;
1312 case PPC::OR8:
1313 Opc = PPC::ORI8;
1314 break;
1315 case PPC::SUBF:
1316 if (Imm == -32768)
1317 UseImm = false;
1318 else {
1319 Opc = PPC::ADDI;
1320 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1321 Imm = -Imm;
1322 }
1323 break;
1324 case PPC::SUBF8:
1325 if (Imm == -32768)
1326 UseImm = false;
1327 else {
1328 Opc = PPC::ADDI8;
1329 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1330 Imm = -Imm;
1331 }
1332 break;
1333 }
1334
1335 if (UseImm) {
1336 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
1337 ResultReg)
1338 .addReg(SrcReg1)
1339 .addImm(Imm);
1340 updateValueMap(I, ResultReg);
1341 return true;
1342 }
1343 }
1344 }
1345
1346 // Reg-reg case.
1347 Register SrcReg2 = getRegForValue(I->getOperand(1));
1348 if (!SrcReg2)
1349 return false;
1350
1351 // Reverse operands for subtract-from.
1352 if (ISDOpcode == ISD::SUB)
1353 std::swap(SrcReg1, SrcReg2);
1354
1355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
1356 .addReg(SrcReg1).addReg(SrcReg2);
1357 updateValueMap(I, ResultReg);
1358 return true;
1359}
1360
1361// Handle arguments to a call that we're attempting to fast-select.
1362// Return false if the arguments are too complex for us at the moment.
1363bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1365 SmallVectorImpl<MVT> &ArgVTs,
1368 CallingConv::ID CC, unsigned &NumBytes,
1369 bool IsVarArg) {
1371 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1372
1373 // Reserve space for the linkage area on the stack.
1374 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1375 CCInfo.AllocateStack(LinkageSize, Align(8));
1376
1378 for (Value *Arg : Args)
1379 ArgTys.push_back(Arg->getType());
1380 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, ArgTys, CC_PPC64_ELF_FIS);
1381
1382 // Bail out if we can't handle any of the arguments.
1383 for (const CCValAssign &VA : ArgLocs) {
1384 MVT ArgVT = ArgVTs[VA.getValNo()];
1385
1386 // Skip vector arguments for now, as well as long double and
1387 // uint128_t, and anything that isn't passed in a register.
1388 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1389 !VA.isRegLoc() || VA.needsCustom())
1390 return false;
1391
1392 // Skip bit-converted arguments for now.
1393 if (VA.getLocInfo() == CCValAssign::BCvt)
1394 return false;
1395 }
1396
1397 // Get a count of how many bytes are to be pushed onto the stack.
1398 NumBytes = CCInfo.getStackSize();
1399
1400 // The prolog code of the callee may store up to 8 GPR argument registers to
1401 // the stack, allowing va_start to index over them in memory if its varargs.
1402 // Because we cannot tell if this is needed on the caller side, we have to
1403 // conservatively assume that it is needed. As such, make sure we have at
1404 // least enough stack space for the caller to store the 8 GPRs.
1405 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1406 NumBytes = std::max(NumBytes, LinkageSize + 64);
1407
1408 // Issue CALLSEQ_START.
1409 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1410 TII.get(TII.getCallFrameSetupOpcode()))
1411 .addImm(NumBytes).addImm(0);
1412
1413 // Prepare to assign register arguments. Every argument uses up a
1414 // GPR protocol register even if it's passed in a floating-point
1415 // register (unless we're using the fast calling convention).
1416 unsigned NextGPR = PPC::X3;
1417 unsigned NextFPR = PPC::F1;
1418
1419 // Process arguments.
1420 for (const CCValAssign &VA : ArgLocs) {
1421 Register Arg = ArgRegs[VA.getValNo()];
1422 MVT ArgVT = ArgVTs[VA.getValNo()];
1423
1424 // Handle argument promotion and bitcasts.
1425 switch (VA.getLocInfo()) {
1426 default:
1427 llvm_unreachable("Unknown loc info!");
1428 case CCValAssign::Full:
1429 break;
1430 case CCValAssign::SExt: {
1431 MVT DestVT = VA.getLocVT();
1432 const TargetRegisterClass *RC =
1433 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1434 Register TmpReg = createResultReg(RC);
1435 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1436 llvm_unreachable("Failed to emit a sext!");
1437 ArgVT = DestVT;
1438 Arg = TmpReg;
1439 break;
1440 }
1441 case CCValAssign::AExt:
1442 case CCValAssign::ZExt: {
1443 MVT DestVT = VA.getLocVT();
1444 const TargetRegisterClass *RC =
1445 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1446 Register TmpReg = createResultReg(RC);
1447 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1448 llvm_unreachable("Failed to emit a zext!");
1449 ArgVT = DestVT;
1450 Arg = TmpReg;
1451 break;
1452 }
1453 case CCValAssign::BCvt: {
1454 // FIXME: Not yet handled.
1455 llvm_unreachable("Should have bailed before getting here!");
1456 break;
1457 }
1458 }
1459
1460 // Copy this argument to the appropriate register.
1461 unsigned ArgReg;
1462 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1463 ArgReg = NextFPR++;
1464 if (CC != CallingConv::Fast)
1465 ++NextGPR;
1466 } else
1467 ArgReg = NextGPR++;
1468
1469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1470 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1471 RegArgs.push_back(ArgReg);
1472 }
1473
1474 return true;
1475}
1476
1477// For a call that we've determined we can fast-select, finish the
1478// call sequence and generate a copy to obtain the return value (if any).
1479bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1480 CallingConv::ID CC = CLI.CallConv;
1481
1482 // Issue CallSEQ_END.
1483 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1484 TII.get(TII.getCallFrameDestroyOpcode()))
1485 .addImm(NumBytes).addImm(0);
1486
1487 // Next, generate a copy to obtain the return value.
1488 // FIXME: No multi-register return values yet, though I don't foresee
1489 // any real difficulties there.
1490 if (RetVT != MVT::isVoid) {
1492 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1493 CCInfo.AnalyzeCallResult(RetVT, CLI.RetTy, RetCC_PPC64_ELF_FIS);
1494 CCValAssign &VA = RVLocs[0];
1495 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1496 assert(VA.isRegLoc() && "Can only return in registers!");
1497
1498 MVT DestVT = VA.getValVT();
1499 MVT CopyVT = DestVT;
1500
1501 // Ints smaller than a register still arrive in a full 64-bit
1502 // register, so make sure we recognize this.
1503 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1504 CopyVT = MVT::i64;
1505
1506 Register SourcePhysReg = VA.getLocReg();
1507 Register ResultReg;
1508
1509 if (RetVT == CopyVT) {
1510 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1511 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1512
1513 // If necessary, round the floating result to single precision.
1514 } else if (CopyVT == MVT::f64) {
1515 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1517 ResultReg).addReg(SourcePhysReg);
1518
1519 // If only the low half of a general register is needed, generate
1520 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1521 // used along the fast-isel path (not lowered), and downstream logic
1522 // also doesn't like a direct subreg copy on a physical reg.)
1523 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1524 // Convert physical register from G8RC to GPRC.
1525 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1526 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1527 }
1528
1529 assert(ResultReg && "ResultReg unset!");
1530 CLI.InRegs.push_back(SourcePhysReg);
1531 CLI.ResultReg = ResultReg;
1532 CLI.NumResultRegs = 1;
1533 }
1534
1535 return true;
1536}
1537
1538bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1539 CallingConv::ID CC = CLI.CallConv;
1540 bool IsTailCall = CLI.IsTailCall;
1541 bool IsVarArg = CLI.IsVarArg;
1542 const Value *Callee = CLI.Callee;
1543 const MCSymbol *Symbol = CLI.Symbol;
1544
1545 if (!Callee && !Symbol)
1546 return false;
1547
1548 // Allow SelectionDAG isel to handle tail calls and long calls.
1549 if (IsTailCall || Subtarget->useLongCalls())
1550 return false;
1551
1552 // Let SDISel handle vararg functions.
1553 if (IsVarArg)
1554 return false;
1555
1556 // If this is a PC-Rel function, let SDISel handle the call.
1557 if (Subtarget->isUsingPCRelativeCalls())
1558 return false;
1559
1560 // Handle simple calls for now, with legal return types and
1561 // those that can be extended.
1562 Type *RetTy = CLI.RetTy;
1563 MVT RetVT;
1564 if (RetTy->isVoidTy())
1565 RetVT = MVT::isVoid;
1566 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1567 RetVT != MVT::i8)
1568 return false;
1569 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1570 // We can't handle boolean returns when CR bits are in use.
1571 return false;
1572
1573 // FIXME: No multi-register return values yet.
1574 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1575 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1576 RetVT != MVT::f64) {
1578 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1579 CCInfo.AnalyzeCallResult(RetVT, RetTy, RetCC_PPC64_ELF_FIS);
1580 if (RVLocs.size() > 1)
1581 return false;
1582 }
1583
1584 // Bail early if more than 8 arguments, as we only currently
1585 // handle arguments passed in registers.
1586 unsigned NumArgs = CLI.OutVals.size();
1587 if (NumArgs > 8)
1588 return false;
1589
1590 // Set up the argument vectors.
1593 SmallVector<MVT, 8> ArgVTs;
1595
1596 Args.reserve(NumArgs);
1597 ArgRegs.reserve(NumArgs);
1598 ArgVTs.reserve(NumArgs);
1599 ArgFlags.reserve(NumArgs);
1600
1601 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1602 // Only handle easy calls for now. It would be reasonably easy
1603 // to handle <= 8-byte structures passed ByVal in registers, but we
1604 // have to ensure they are right-justified in the register.
1605 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1606 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1607 return false;
1608
1609 Value *ArgValue = CLI.OutVals[i];
1610 Type *ArgTy = ArgValue->getType();
1611 MVT ArgVT;
1612 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1613 return false;
1614
1615 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1616 // types, which is passed through vector register. Skip these types and
1617 // fallback to default SelectionDAG based selection.
1618 if (ArgVT.isVector() || ArgVT == MVT::f128)
1619 return false;
1620
1621 Register Arg = getRegForValue(ArgValue);
1622 if (!Arg)
1623 return false;
1624
1625 Args.push_back(ArgValue);
1626 ArgRegs.push_back(Arg);
1627 ArgVTs.push_back(ArgVT);
1628 ArgFlags.push_back(Flags);
1629 }
1630
1631 // Process the arguments.
1633 unsigned NumBytes;
1634
1635 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1636 RegArgs, CC, NumBytes, IsVarArg))
1637 return false;
1638
1640 // FIXME: No handling for function pointers yet. This requires
1641 // implementing the function descriptor (OPD) setup.
1642 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1643 if (!GV) {
1644 // patchpoints are a special case; they always dispatch to a pointer value.
1645 // However, we don't actually want to generate the indirect call sequence
1646 // here (that will be generated, as necessary, during asm printing), and
1647 // the call we generate here will be erased by FastISel::selectPatchpoint,
1648 // so don't try very hard...
1649 if (CLI.IsPatchPoint)
1650 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1651 else
1652 return false;
1653 } else {
1654 // Build direct call with NOP for TOC restore.
1655 // FIXME: We can and should optimize away the NOP for local calls.
1656 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1657 TII.get(PPC::BL8_NOP));
1658 // Add callee.
1659 MIB.addGlobalAddress(GV);
1660 }
1661
1662 // Add implicit physical register uses to the call.
1663 for (unsigned Reg : RegArgs)
1664 MIB.addReg(Reg, RegState::Implicit);
1665
1666 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1667 // into the call.
1668 PPCFuncInfo->setUsesTOCBasePtr();
1669 MIB.addReg(PPC::X2, RegState::Implicit);
1670
1671 // Add a register mask with the call-preserved registers. Proper
1672 // defs for return values will be added by setPhysRegsDeadExcept().
1673 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1674
1675 CLI.Call = MIB;
1676
1677 // Finish off the call including any return values.
1678 return finishCall(RetVT, CLI, NumBytes);
1679}
1680
1681// Attempt to fast-select a return instruction.
1682bool PPCFastISel::SelectRet(const Instruction *I) {
1683
1684 if (!FuncInfo.CanLowerReturn)
1685 return false;
1686
1687 const ReturnInst *Ret = cast<ReturnInst>(I);
1688 const Function &F = *I->getParent()->getParent();
1689
1690 // Build a list of return value registers.
1692 CallingConv::ID CC = F.getCallingConv();
1693
1694 if (Ret->getNumOperands() > 0) {
1696 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1697
1698 // Analyze operands of the call, assigning locations to each operand.
1700 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1701 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1702 const Value *RV = Ret->getOperand(0);
1703
1704 // FIXME: Only one output register for now.
1705 if (ValLocs.size() > 1)
1706 return false;
1707
1708 // Special case for returning a constant integer of any size - materialize
1709 // the constant as an i64 and copy it to the return register.
1710 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1711 CCValAssign &VA = ValLocs[0];
1712
1713 Register RetReg = VA.getLocReg();
1714 // We still need to worry about properly extending the sign. For example,
1715 // we could have only a single bit or a constant that needs zero
1716 // extension rather than sign extension. Make sure we pass the return
1717 // value extension property to integer materialization.
1718 Register SrcReg =
1719 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1720
1721 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1722 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1723
1724 RetRegs.push_back(RetReg);
1725
1726 } else {
1727 Register Reg = getRegForValue(RV);
1728
1729 if (!Reg)
1730 return false;
1731
1732 // Copy the result values into the output registers.
1733 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1734
1735 CCValAssign &VA = ValLocs[i];
1736 assert(VA.isRegLoc() && "Can only return in registers!");
1737 RetRegs.push_back(VA.getLocReg());
1738 Register SrcReg = Reg + VA.getValNo();
1739
1740 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1741 if (!RVEVT.isSimple())
1742 return false;
1743 MVT RVVT = RVEVT.getSimpleVT();
1744 MVT DestVT = VA.getLocVT();
1745
1746 if (RVVT != DestVT && RVVT != MVT::i8 &&
1747 RVVT != MVT::i16 && RVVT != MVT::i32)
1748 return false;
1749
1750 if (RVVT != DestVT) {
1751 switch (VA.getLocInfo()) {
1752 default:
1753 llvm_unreachable("Unknown loc info!");
1754 case CCValAssign::Full:
1755 llvm_unreachable("Full value assign but types don't match?");
1756 case CCValAssign::AExt:
1757 case CCValAssign::ZExt: {
1758 const TargetRegisterClass *RC =
1759 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1760 Register TmpReg = createResultReg(RC);
1761 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1762 return false;
1763 SrcReg = TmpReg;
1764 break;
1765 }
1766 case CCValAssign::SExt: {
1767 const TargetRegisterClass *RC =
1768 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1769 Register TmpReg = createResultReg(RC);
1770 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1771 return false;
1772 SrcReg = TmpReg;
1773 break;
1774 }
1775 }
1776 }
1777
1778 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1779 TII.get(TargetOpcode::COPY), RetRegs[i])
1780 .addReg(SrcReg);
1781 }
1782 }
1783 }
1784
1785 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1786 TII.get(PPC::BLR8));
1787
1788 for (Register Reg : RetRegs)
1789 MIB.addReg(Reg, RegState::Implicit);
1790
1791 return true;
1792}
1793
1794// Attempt to emit an integer extend of SrcReg into DestReg. Both
1795// signed and zero extensions are supported. Return false if we
1796// can't handle it.
1797bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1798 Register DestReg, bool IsZExt) {
1799 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1800 return false;
1801 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1802 return false;
1803
1804 // Signed extensions use EXTSB, EXTSH, EXTSW.
1805 if (!IsZExt) {
1806 unsigned Opc;
1807 if (SrcVT == MVT::i8)
1808 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1809 else if (SrcVT == MVT::i16)
1810 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1811 else {
1812 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1813 Opc = PPC::EXTSW_32_64;
1814 }
1815 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1816 .addReg(SrcReg);
1817
1818 // Unsigned 32-bit extensions use RLWINM.
1819 } else if (DestVT == MVT::i32) {
1820 unsigned MB;
1821 if (SrcVT == MVT::i8)
1822 MB = 24;
1823 else {
1824 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1825 MB = 16;
1826 }
1827 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1828 DestReg)
1829 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1830
1831 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1832 } else {
1833 unsigned MB;
1834 if (SrcVT == MVT::i8)
1835 MB = 56;
1836 else if (SrcVT == MVT::i16)
1837 MB = 48;
1838 else
1839 MB = 32;
1840 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1841 TII.get(PPC::RLDICL_32_64), DestReg)
1842 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1843 }
1844
1845 return true;
1846}
1847
1848// Attempt to fast-select an indirect branch instruction.
1849bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1850 Register AddrReg = getRegForValue(I->getOperand(0));
1851 if (!AddrReg)
1852 return false;
1853
1854 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1855 .addReg(AddrReg);
1856 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1857
1858 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1859 for (const BasicBlock *SuccBB : IB->successors())
1860 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(SuccBB));
1861
1862 return true;
1863}
1864
1865// Attempt to fast-select an integer truncate instruction.
1866bool PPCFastISel::SelectTrunc(const Instruction *I) {
1867 Value *Src = I->getOperand(0);
1868 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1869 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1870
1871 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1872 return false;
1873
1874 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1875 return false;
1876
1877 Register SrcReg = getRegForValue(Src);
1878 if (!SrcReg)
1879 return false;
1880
1881 // The only interesting case is when we need to switch register classes.
1882 if (SrcVT == MVT::i64)
1883 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
1884
1885 updateValueMap(I, SrcReg);
1886 return true;
1887}
1888
1889// Attempt to fast-select an integer extend instruction.
1890bool PPCFastISel::SelectIntExt(const Instruction *I) {
1891 Type *DestTy = I->getType();
1892 Value *Src = I->getOperand(0);
1893 Type *SrcTy = Src->getType();
1894
1895 bool IsZExt = isa<ZExtInst>(I);
1896 Register SrcReg = getRegForValue(Src);
1897 if (!SrcReg) return false;
1898
1899 EVT SrcEVT, DestEVT;
1900 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1901 DestEVT = TLI.getValueType(DL, DestTy, true);
1902 if (!SrcEVT.isSimple())
1903 return false;
1904 if (!DestEVT.isSimple())
1905 return false;
1906
1907 MVT SrcVT = SrcEVT.getSimpleVT();
1908 MVT DestVT = DestEVT.getSimpleVT();
1909
1910 // If we know the register class needed for the result of this
1911 // instruction, use it. Otherwise pick the register class of the
1912 // correct size that does not contain X0/R0, since we don't know
1913 // whether downstream uses permit that assignment.
1914 Register AssignedReg = FuncInfo.ValueMap[I];
1915 const TargetRegisterClass *RC =
1916 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1917 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1918 &PPC::GPRC_and_GPRC_NOR0RegClass));
1919 Register ResultReg = createResultReg(RC);
1920
1921 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1922 return false;
1923
1924 updateValueMap(I, ResultReg);
1925 return true;
1926}
1927
1928// Attempt to fast-select an instruction that wasn't handled by
1929// the table-generated machinery.
1930bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1931
1932 switch (I->getOpcode()) {
1933 case Instruction::Load:
1934 return SelectLoad(I);
1935 case Instruction::Store:
1936 return SelectStore(I);
1937 case Instruction::Br:
1938 return SelectBranch(I);
1939 case Instruction::IndirectBr:
1940 return SelectIndirectBr(I);
1941 case Instruction::FPExt:
1942 return SelectFPExt(I);
1943 case Instruction::FPTrunc:
1944 return SelectFPTrunc(I);
1945 case Instruction::SIToFP:
1946 return SelectIToFP(I, /*IsSigned*/ true);
1947 case Instruction::UIToFP:
1948 return SelectIToFP(I, /*IsSigned*/ false);
1949 case Instruction::FPToSI:
1950 return SelectFPToI(I, /*IsSigned*/ true);
1951 case Instruction::FPToUI:
1952 return SelectFPToI(I, /*IsSigned*/ false);
1953 case Instruction::Add:
1954 return SelectBinaryIntOp(I, ISD::ADD);
1955 case Instruction::Or:
1956 return SelectBinaryIntOp(I, ISD::OR);
1957 case Instruction::Sub:
1958 return SelectBinaryIntOp(I, ISD::SUB);
1959 case Instruction::Ret:
1960 return SelectRet(I);
1961 case Instruction::Trunc:
1962 return SelectTrunc(I);
1963 case Instruction::ZExt:
1964 case Instruction::SExt:
1965 return SelectIntExt(I);
1966 // Here add other flavors of Instruction::XXX that automated
1967 // cases don't catch. For example, switches are terminators
1968 // that aren't yet handled.
1969 default:
1970 break;
1971 }
1972 return false;
1973}
1974
1975// Materialize a floating-point constant into a register, and return
1976// the register number (or zero if we failed to handle it).
1977Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1978 // If this is a PC-Rel function, let SDISel handle constant pool.
1979 if (Subtarget->isUsingPCRelativeCalls())
1980 return Register();
1981
1982 // No plans to handle long double here.
1983 if (VT != MVT::f32 && VT != MVT::f64)
1984 return Register();
1985
1986 // All FP constants are loaded from the constant pool.
1987 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1988 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
1989 const bool HasSPE = Subtarget->hasSPE();
1990 const TargetRegisterClass *RC;
1991 if (HasSPE)
1992 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1993 else
1994 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1995
1996 Register DestReg = createResultReg(RC);
1997 CodeModel::Model CModel = TM.getCodeModel();
1998
1999 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2001 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
2002
2003 unsigned Opc;
2004
2005 if (HasSPE)
2006 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2007 else
2008 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2009
2010 Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2011
2012 PPCFuncInfo->setUsesTOCBasePtr();
2013 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2014 if (CModel == CodeModel::Small) {
2015 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2016 TmpReg)
2017 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2018 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2019 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2020 } else {
2021 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2022 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2023 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2024 // But for large code model, we must generate a LDtocL followed
2025 // by the LF[SD].
2026 if (CModel == CodeModel::Large) {
2027 Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2028 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2029 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2030 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2031 .addImm(0)
2032 .addReg(TmpReg2);
2033 } else
2034 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2036 .addReg(TmpReg)
2037 .addMemOperand(MMO);
2038 }
2039
2040 return DestReg;
2041}
2042
2043// Materialize the address of a global value into a register, and return
2044// the register number (or zero if we failed to handle it).
2045Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2046 // If this is a PC-Rel function, let SDISel handle GV materialization.
2047 if (Subtarget->isUsingPCRelativeCalls())
2048 return Register();
2049
2050 assert(VT == MVT::i64 && "Non-address!");
2051 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2052 Register DestReg = createResultReg(RC);
2053
2054 // Global values may be plain old object addresses, TLS object
2055 // addresses, constant pool entries, or jump tables. How we generate
2056 // code for these may depend on small, medium, or large code model.
2057 CodeModel::Model CModel = TM.getCodeModel();
2058
2059 // FIXME: Jump tables are not yet required because fast-isel doesn't
2060 // handle switches; if that changes, we need them as well. For now,
2061 // what follows assumes everything's a generic (or TLS) global address.
2062
2063 // FIXME: We don't yet handle the complexity of TLS.
2064 if (GV->isThreadLocal())
2065 return Register();
2066
2067 PPCFuncInfo->setUsesTOCBasePtr();
2068 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2069 isa<GlobalVariable>(GV) &&
2070 cast<GlobalVariable>(GV)->hasAttribute("toc-data");
2071
2072 // For small code model, generate a simple TOC load.
2073 if (CModel == CodeModel::Small) {
2074 auto MIB = BuildMI(
2075 *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2076 IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
2077 if (IsAIXTocData)
2078 MIB.addReg(PPC::X2).addGlobalAddress(GV);
2079 else
2080 MIB.addGlobalAddress(GV).addReg(PPC::X2);
2081 } else {
2082 // If the address is an externally defined symbol, a symbol with common
2083 // or externally available linkage, a non-local function address, or a
2084 // jump table address (not yet needed), or if we are generating code
2085 // for large code model, we generate:
2086 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2087 // Otherwise we generate:
2088 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2089 // Either way, start with the ADDIStocHA8:
2090 Register HighPartReg = createResultReg(RC);
2091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2092 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2093
2094 if (Subtarget->isGVIndirectSymbol(GV)) {
2095 assert(!IsAIXTocData && "TOC data should always be direct.");
2096 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2097 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2098 } else {
2099 // Otherwise generate the ADDItocL8.
2100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2101 DestReg)
2102 .addReg(HighPartReg)
2103 .addGlobalAddress(GV);
2104 }
2105 }
2106
2107 return DestReg;
2108}
2109
2110// Materialize a 32-bit integer constant into a register, and return
2111// the register number (or zero if we failed to handle it).
2112Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2113 const TargetRegisterClass *RC) {
2114 unsigned Lo = Imm & 0xFFFF;
2115 unsigned Hi = (Imm >> 16) & 0xFFFF;
2116
2117 Register ResultReg = createResultReg(RC);
2118 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2119
2120 if (isInt<16>(Imm))
2121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2122 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2123 .addImm(Imm);
2124 else if (Lo) {
2125 // Both Lo and Hi have nonzero bits.
2126 Register TmpReg = createResultReg(RC);
2127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2128 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2129 .addImm(Hi);
2130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2131 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2132 .addReg(TmpReg).addImm(Lo);
2133 } else
2134 // Just Hi bits.
2135 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2136 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2137 .addImm(Hi);
2138
2139 return ResultReg;
2140}
2141
2142// Materialize a 64-bit integer constant into a register, and return
2143// the register number (or zero if we failed to handle it).
2144Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2145 const TargetRegisterClass *RC) {
2146 unsigned Remainder = 0;
2147 unsigned Shift = 0;
2148
2149 // If the value doesn't fit in 32 bits, see if we can shift it
2150 // so that it fits in 32 bits.
2151 if (!isInt<32>(Imm)) {
2152 Shift = llvm::countr_zero<uint64_t>(Imm);
2153 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2154
2155 if (isInt<32>(ImmSh))
2156 Imm = ImmSh;
2157 else {
2158 Remainder = Imm;
2159 Shift = 32;
2160 Imm >>= 32;
2161 }
2162 }
2163
2164 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2165 // (if not shifted).
2166 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2167 if (!Shift)
2168 return TmpReg1;
2169
2170 // If upper 32 bits were not zero, we've built them and need to shift
2171 // them into place.
2172 Register TmpReg2;
2173 if (Imm) {
2174 TmpReg2 = createResultReg(RC);
2175 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2176 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2177 } else
2178 TmpReg2 = TmpReg1;
2179
2180 Register TmpReg3;
2181 unsigned Hi, Lo;
2182 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2183 TmpReg3 = createResultReg(RC);
2184 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2185 TmpReg3).addReg(TmpReg2).addImm(Hi);
2186 } else
2187 TmpReg3 = TmpReg2;
2188
2189 if ((Lo = Remainder & 0xFFFF)) {
2190 Register ResultReg = createResultReg(RC);
2191 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2192 ResultReg).addReg(TmpReg3).addImm(Lo);
2193 return ResultReg;
2194 }
2195
2196 return TmpReg3;
2197}
2198
2199// Materialize an integer constant into a register, and return
2200// the register number (or zero if we failed to handle it).
2201Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2202 bool UseSExt) {
2203 // If we're using CR bit registers for i1 values, handle that as a special
2204 // case first.
2205 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2206 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2207 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2208 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2209 return ImmReg;
2210 }
2211
2212 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2213 VT != MVT::i1)
2214 return Register();
2215
2216 const TargetRegisterClass *RC =
2217 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2218 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2219
2220 // If the constant is in range, use a load-immediate.
2221 // Since LI will sign extend the constant we need to make sure that for
2222 // our zeroext constants that the sign extended constant fits into 16-bits -
2223 // a range of 0..0x7fff.
2224 if (isInt<16>(Imm)) {
2225 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2226 Register ImmReg = createResultReg(RC);
2227 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ImmReg)
2228 .addImm(Imm);
2229 return ImmReg;
2230 }
2231
2232 // Construct the constant piecewise.
2233 if (VT == MVT::i64)
2234 return PPCMaterialize64BitInt(Imm, RC);
2235 else if (VT == MVT::i32)
2236 return PPCMaterialize32BitInt(Imm, RC);
2237
2238 return Register();
2239}
2240
2241// Materialize a constant into a register, and return the register
2242// number (or zero if we failed to handle it).
2243Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2244 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2245
2246 // Only handle simple types.
2247 if (!CEVT.isSimple())
2248 return Register();
2249 MVT VT = CEVT.getSimpleVT();
2250
2251 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2252 return PPCMaterializeFP(CFP, VT);
2253 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2254 return PPCMaterializeGV(GV, VT);
2255 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2256 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2257 // assumes that constant PHI operands will be zero extended, and failure to
2258 // match that assumption will cause problems if we sign extend here but
2259 // some user of a PHI is in a block for which we fall back to full SDAG
2260 // instruction selection.
2261 return PPCMaterializeInt(CI, VT, false);
2262
2263 return Register();
2264}
2265
2266// Materialize the address created by an alloca into a register, and
2267// return the register number (or zero if we failed to handle it).
2268Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2270 FuncInfo.StaticAllocaMap.find(AI);
2271
2272 // Don't handle dynamic allocas.
2273 if (SI == FuncInfo.StaticAllocaMap.end())
2274 return Register();
2275
2276 MVT VT;
2277 if (!isLoadTypeLegal(AI->getType(), VT))
2278 return Register();
2279
2280 if (SI != FuncInfo.StaticAllocaMap.end()) {
2281 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2282 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2283 ResultReg).addFrameIndex(SI->second).addImm(0);
2284 return ResultReg;
2285 }
2286
2287 return Register();
2288}
2289
2290// Fold loads into extends when possible.
2291// FIXME: We can have multiple redundant extend/trunc instructions
2292// following a load. The folding only picks up one. Extend this
2293// to check subsequent instructions for the same pattern and remove
2294// them. Thus ResultReg should be the def reg for the last redundant
2295// instruction in a chain, and all intervening instructions can be
2296// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2297// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2298bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2299 const LoadInst *LI) {
2300 // Verify we have a legal type before going any further.
2301 MVT VT;
2302 if (!isLoadTypeLegal(LI->getType(), VT))
2303 return false;
2304
2305 // Combine load followed by zero- or sign-extend.
2306 bool IsZExt = false;
2307 switch(MI->getOpcode()) {
2308 default:
2309 return false;
2310
2311 case PPC::RLDICL:
2312 case PPC::RLDICL_32_64: {
2313 IsZExt = true;
2314 unsigned MB = MI->getOperand(3).getImm();
2315 if ((VT == MVT::i8 && MB <= 56) ||
2316 (VT == MVT::i16 && MB <= 48) ||
2317 (VT == MVT::i32 && MB <= 32))
2318 break;
2319 return false;
2320 }
2321
2322 case PPC::RLWINM:
2323 case PPC::RLWINM8: {
2324 IsZExt = true;
2325 unsigned MB = MI->getOperand(3).getImm();
2326 if ((VT == MVT::i8 && MB <= 24) ||
2327 (VT == MVT::i16 && MB <= 16))
2328 break;
2329 return false;
2330 }
2331
2332 case PPC::EXTSB:
2333 case PPC::EXTSB8:
2334 case PPC::EXTSB8_32_64:
2335 /* There is no sign-extending load-byte instruction. */
2336 return false;
2337
2338 case PPC::EXTSH:
2339 case PPC::EXTSH8:
2340 case PPC::EXTSH8_32_64: {
2341 if (VT != MVT::i16 && VT != MVT::i8)
2342 return false;
2343 break;
2344 }
2345
2346 case PPC::EXTSW:
2347 case PPC::EXTSW_32:
2348 case PPC::EXTSW_32_64: {
2349 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2350 return false;
2351 break;
2352 }
2353 }
2354
2355 // See if we can handle this address.
2356 Address Addr;
2357 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2358 return false;
2359
2360 Register ResultReg = MI->getOperand(0).getReg();
2361
2362 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2363 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2364 return false;
2365
2367 removeDeadCode(I, std::next(I));
2368 return true;
2369}
2370
2371// Attempt to lower call arguments in a faster way than done by
2372// the selection DAG code.
2373bool PPCFastISel::fastLowerArguments() {
2374 // Defer to normal argument lowering for now. It's reasonably
2375 // efficient. Consider doing something like ARM to handle the
2376 // case where all args fit in registers, no varargs, no float
2377 // or vector args.
2378 return false;
2379}
2380
2381// Handle materializing integer constants into a register. This is not
2382// automatically generated for PowerPC, so must be explicitly created here.
2383Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2384
2385 if (Opc != ISD::Constant)
2386 return Register();
2387
2388 // If we're using CR bit registers for i1 values, handle that as a special
2389 // case first.
2390 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2391 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2392 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2393 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2394 return ImmReg;
2395 }
2396
2397 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2398 VT != MVT::i1)
2399 return Register();
2400
2401 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2402 &PPC::GPRCRegClass);
2403 if (VT == MVT::i64)
2404 return PPCMaterialize64BitInt(Imm, RC);
2405 else
2406 return PPCMaterialize32BitInt(Imm, RC);
2407}
2408
2409// Override for ADDI and ADDI8 to set the correct register class
2410// on RHS operand 0. The automatic infrastructure naively assumes
2411// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2412// for these cases. At the moment, none of the other automatically
2413// generated RI instructions require special treatment. However, once
2414// SelectSelect is implemented, "isel" requires similar handling.
2415//
2416// Also be conservative about the output register class. Avoid
2417// assigning R0 or X0 to the output register for GPRC and G8RC
2418// register classes, as any such result could be used in ADDI, etc.,
2419// where those regs have another meaning.
2420Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2421 const TargetRegisterClass *RC,
2422 Register Op0, uint64_t Imm) {
2423 if (MachineInstOpcode == PPC::ADDI)
2424 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2425 else if (MachineInstOpcode == PPC::ADDI8)
2426 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2427
2428 const TargetRegisterClass *UseRC =
2429 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2430 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2431
2432 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Imm);
2433}
2434
2435// Override for instructions with one register operand to avoid use of
2436// R0/X0. The automatic infrastructure isn't aware of the context so
2437// we must be conservative.
2438Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2439 const TargetRegisterClass *RC,
2440 Register Op0) {
2441 const TargetRegisterClass *UseRC =
2442 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2443 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2444
2445 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0);
2446}
2447
2448// Override for instructions with two register operands to avoid use
2449// of R0/X0. The automatic infrastructure isn't aware of the context
2450// so we must be conservative.
2451Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2452 const TargetRegisterClass *RC,
2453 Register Op0, Register Op1) {
2454 const TargetRegisterClass *UseRC =
2455 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2456 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2457
2458 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op1);
2459}
2460
2461namespace llvm {
2462 // Create the fast instruction selector for PowerPC64 ELF.
2464 const TargetLibraryInfo *LibInfo) {
2465 // Only available on 64-bit for now.
2466 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2467 if (Subtarget.isPPC64())
2468 return new PPCFastISel(FuncInfo, LibInfo);
2469 return nullptr;
2470 }
2471}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
static std::optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
static constexpr MCPhysReg FPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
This file describes how to lower LLVM code to machine code.
support::ulittle16_t & Lo
Definition: aarch32.cpp:205
support::ulittle16_t & Hi
Definition: aarch32.cpp:204
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
an instruction to allocate memory on the stack
Definition: Instructions.h:64
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:101
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:681
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:695
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:687
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:694
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:691
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:680
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:688
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1120
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:131
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:169
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Register fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Definition: FastISel.cpp:2062
Register fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, Register Op1)
Emit a MachineInstr with two register operands and a result register in the given register class.
Definition: FastISel.cpp:2011
virtual Register fastMaterializeConstant(const Constant *C)
Emit a constant in a register using target-specific logic, such as constant pool loads.
Definition: FastISel.h:473
virtual Register fastEmit_i(MVT VT, MVT RetVT, unsigned Opcode, uint64_t Imm)
This method is called by target-independent code to request that an instruction with the given type,...
Definition: FastISel.cpp:1907
virtual bool tryToFoldLoadIntoMI(MachineInstr *, unsigned, const LoadInst *)
The specified machine instr operand is a vreg, and that vreg is being provided by the specified load ...
Definition: FastISel.h:300
virtual bool fastLowerCall(CallLoweringInfo &CLI)
This method is called by target-independent code to do target- specific call lowering.
Definition: FastISel.cpp:1890
virtual Register fastMaterializeAlloca(const AllocaInst *C)
Emit an alloca address in a register using target-specific logic.
Definition: FastISel.h:478
Register createResultReg(const TargetRegisterClass *RC)
Definition: FastISel.cpp:1960
virtual bool fastLowerArguments()
This method is called by target-independent code to do target- specific argument lowering.
Definition: FastISel.cpp:1888
const TargetInstrInfo & TII
Definition: FastISel.h:211
Register fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0)
Emit a MachineInstr with one register operand and a result register in the given register class.
Definition: FastISel.cpp:1990
virtual bool fastSelectInstruction(const Instruction *I)=0
This method is called by target-independent code when the normal FastISel process fails to select an ...
const TargetLowering & TLI
Definition: FastISel.h:212
const TargetMachine & TM
Definition: FastISel.h:209
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MachineBasicBlock::iterator InsertPt
MBB - The current insert position inside the current block.
MachineBasicBlock * MBB
MBB - The current block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:265
Indirect Branch Instruction.
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:151
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Return a value (possibly void), from a function.
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void reserve(size_type N)
Definition: SmallVector.h:664
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:626
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:657
Class to represent struct types.
Definition: DerivedTypes.h:218
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
unsigned getID() const
Return the register class ID number.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition: ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ MO_TOC_LO
Definition: PPC.h:185
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.