LLVM 22.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 using BaseKind = enum {
85 RegBase,
86 FrameIndexBase
87 };
88
89 private:
90 BaseKind Kind = RegBase;
92 union {
93 unsigned Reg;
94 int FI;
95 } Base;
96 Register OffsetReg;
97 unsigned Shift = 0;
98 int64_t Offset = 0;
99 const GlobalValue *GV = nullptr;
100
101 public:
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
108 bool isRegBase() const { return Kind == RegBase; }
109 bool isFIBase() const { return Kind == FrameIndexBase; }
110
111 void setReg(Register Reg) {
112 assert(isRegBase() && "Invalid base register access!");
113 Base.Reg = Reg.id();
114 }
115
116 Register getReg() const {
117 assert(isRegBase() && "Invalid base register access!");
118 return Base.Reg;
119 }
120
121 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
122
123 Register getOffsetReg() const { return OffsetReg; }
124
125 void setFI(unsigned FI) {
126 assert(isFIBase() && "Invalid base frame index access!");
127 Base.FI = FI;
128 }
129
130 unsigned getFI() const {
131 assert(isFIBase() && "Invalid base frame index access!");
132 return Base.FI;
133 }
134
135 void setOffset(int64_t O) { Offset = O; }
136 int64_t getOffset() { return Offset; }
137 void setShift(unsigned S) { Shift = S; }
138 unsigned getShift() { return Shift; }
139
140 void setGlobalValue(const GlobalValue *G) { GV = G; }
141 const GlobalValue *getGlobalValue() { return GV; }
142 };
143
144 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
145 /// make the right decision when generating code for different targets.
146 const AArch64Subtarget *Subtarget;
147 LLVMContext *Context;
148
149 bool fastLowerArguments() override;
150 bool fastLowerCall(CallLoweringInfo &CLI) override;
151 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
152
153private:
154 // Selection routines.
155 bool selectAddSub(const Instruction *I);
156 bool selectLogicalOp(const Instruction *I);
157 bool selectLoad(const Instruction *I);
158 bool selectStore(const Instruction *I);
159 bool selectBranch(const Instruction *I);
160 bool selectIndirectBr(const Instruction *I);
161 bool selectCmp(const Instruction *I);
162 bool selectSelect(const Instruction *I);
163 bool selectFPExt(const Instruction *I);
164 bool selectFPTrunc(const Instruction *I);
165 bool selectFPToInt(const Instruction *I, bool Signed);
166 bool selectIntToFP(const Instruction *I, bool Signed);
167 bool selectRem(const Instruction *I, unsigned ISDOpcode);
168 bool selectRet(const Instruction *I);
169 bool selectTrunc(const Instruction *I);
170 bool selectIntExt(const Instruction *I);
171 bool selectMul(const Instruction *I);
172 bool selectShift(const Instruction *I);
173 bool selectBitCast(const Instruction *I);
174 bool selectFRem(const Instruction *I);
175 bool selectSDiv(const Instruction *I);
176 bool selectGetElementPtr(const Instruction *I);
177 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
178
179 // Utility helper routines.
180 bool isTypeLegal(Type *Ty, MVT &VT);
181 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
182 bool isValueAvailable(const Value *V) const;
183 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
184 bool computeCallAddress(const Value *V, Address &Addr);
185 bool simplifyAddress(Address &Addr, MVT VT);
186 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
188 unsigned ScaleFactor, MachineMemOperand *MMO);
189 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
190 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
191 MaybeAlign Alignment);
192 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
193 const Value *Cond);
194 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
195 bool optimizeSelect(const SelectInst *SI);
196 Register getRegForGEPIndex(const Value *Idx);
197
198 // Emit helper routines.
199 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
200 const Value *RHS, bool SetFlags = false,
201 bool WantResult = true, bool IsZExt = false);
202 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
203 Register RHSReg, bool SetFlags = false,
204 bool WantResult = true);
205 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
206 bool SetFlags = false, bool WantResult = true);
207 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
208 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
209 uint64_t ShiftImm, bool SetFlags = false,
210 bool WantResult = true);
211 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
213 uint64_t ShiftImm, bool SetFlags = false,
214 bool WantResult = true);
215
216 // Emit functions.
217 bool emitCompareAndBranch(const BranchInst *BI);
218 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
219 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
220 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
221 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
222 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
223 MachineMemOperand *MMO = nullptr);
224 bool emitStore(MVT VT, Register SrcReg, Address Addr,
225 MachineMemOperand *MMO = nullptr);
226 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
227 MachineMemOperand *MMO = nullptr);
228 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
229 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
230 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
231 bool SetFlags = false, bool WantResult = true,
232 bool IsZExt = false);
233 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
234 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
235 bool SetFlags = false, bool WantResult = true,
236 bool IsZExt = false);
237 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
238 bool WantResult = true);
239 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
240 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
241 bool WantResult = true);
242 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
243 const Value *RHS);
244 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
245 uint64_t Imm);
246 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
247 Register RHSReg, uint64_t ShiftImm);
248 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
249 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
250 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
251 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
252 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
253 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
254 bool IsZExt = true);
255 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
256 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
257 bool IsZExt = true);
258 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
259 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
260 bool IsZExt = false);
261
262 Register materializeInt(const ConstantInt *CI, MVT VT);
263 Register materializeFP(const ConstantFP *CFP, MVT VT);
264 Register materializeGV(const GlobalValue *GV);
265
266 // Call handling routines.
267private:
268 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
269 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
270 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
271 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
272
273public:
274 // Backend specific FastISel code.
275 Register fastMaterializeAlloca(const AllocaInst *AI) override;
276 Register fastMaterializeConstant(const Constant *C) override;
277 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
278
279 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
280 const TargetLibraryInfo *LibInfo)
281 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
282 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
283 Context = &FuncInfo.Fn->getContext();
284 }
285
286 bool fastSelectInstruction(const Instruction *I) override;
287
288#include "AArch64GenFastISel.inc"
289};
290
291} // end anonymous namespace
292
293/// Check if the sign-/zero-extend will be a noop.
294static bool isIntExtFree(const Instruction *I) {
296 "Unexpected integer extend instruction.");
297 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
298 "Unexpected value type.");
299 bool IsZExt = isa<ZExtInst>(I);
300
301 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
302 if (LI->hasOneUse())
303 return true;
304
305 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
306 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
307 return true;
308
309 return false;
310}
311
312/// Determine the implicit scale factor that is applied by a memory
313/// operation for a given value type.
314static unsigned getImplicitScaleFactor(MVT VT) {
315 switch (VT.SimpleTy) {
316 default:
317 return 0; // invalid
318 case MVT::i1: // fall-through
319 case MVT::i8:
320 return 1;
321 case MVT::i16:
322 return 2;
323 case MVT::i32: // fall-through
324 case MVT::f32:
325 return 4;
326 case MVT::i64: // fall-through
327 case MVT::f64:
328 return 8;
329 }
330}
331
332CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
333 if (CC == CallingConv::GHC)
334 return CC_AArch64_GHC;
335 if (CC == CallingConv::CFGuard_Check)
337 if (Subtarget->isTargetDarwin())
339 if (Subtarget->isTargetWindows())
340 return CC_AArch64_Win64PCS;
341 return CC_AArch64_AAPCS;
342}
343
344Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
345 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
346 "Alloca should always return a pointer.");
347
348 // Don't handle dynamic allocas.
349 auto SI = FuncInfo.StaticAllocaMap.find(AI);
350 if (SI == FuncInfo.StaticAllocaMap.end())
351 return Register();
352
353 if (SI != FuncInfo.StaticAllocaMap.end()) {
354 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
356 ResultReg)
357 .addFrameIndex(SI->second)
358 .addImm(0)
359 .addImm(0);
360 return ResultReg;
361 }
362
363 return Register();
364}
365
366Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
367 if (VT > MVT::i64)
368 return Register();
369
370 if (!CI->isZero())
371 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
372
373 // Create a copy from the zero register to materialize a "0" value.
374 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
375 : &AArch64::GPR32RegClass;
376 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
377 Register ResultReg = createResultReg(RC);
378 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
379 ResultReg).addReg(ZeroReg, getKillRegState(true));
380 return ResultReg;
381}
382
383Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
384 // Positive zero (+0.0) has to be materialized with a fmov from the zero
385 // register, because the immediate version of fmov cannot encode zero.
386 if (CFP->isNullValue())
387 return fastMaterializeFloatZero(CFP);
388
389 if (VT != MVT::f32 && VT != MVT::f64)
390 return Register();
391
392 const APFloat Val = CFP->getValueAPF();
393 bool Is64Bit = (VT == MVT::f64);
394 // This checks to see if we can use FMOV instructions to materialize
395 // a constant, otherwise we have to materialize via the constant pool.
396 int Imm =
397 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
398 if (Imm != -1) {
399 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
400 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
401 }
402
403 // For the large code model materialize the FP constant in code.
404 if (TM.getCodeModel() == CodeModel::Large) {
405 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
406 const TargetRegisterClass *RC = Is64Bit ?
407 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
408
409 Register TmpReg = createResultReg(RC);
410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
411 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
412
413 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
414 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
415 TII.get(TargetOpcode::COPY), ResultReg)
416 .addReg(TmpReg, getKillRegState(true));
417
418 return ResultReg;
419 }
420
421 // Materialize via constant pool. MachineConstantPool wants an explicit
422 // alignment.
423 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
424
425 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
426 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
427 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
429
430 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
431 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
433 .addReg(ADRPReg)
435 return ResultReg;
436}
437
438Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
439 // We can't handle thread-local variables quickly yet.
440 if (GV->isThreadLocal())
441 return Register();
442
443 // MachO still uses GOT for large code-model accesses, but ELF requires
444 // movz/movk sequences, which FastISel doesn't handle yet.
445 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
446 return Register();
447
448 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
449 return Register();
450
451 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
452
453 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
454 if (!DestEVT.isSimple())
455 return Register();
456
457 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
458 Register ResultReg;
459
460 if (OpFlags & AArch64II::MO_GOT) {
461 // ADRP + LDRX
462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
463 ADRPReg)
464 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
465
466 unsigned LdrOpc;
467 if (Subtarget->isTargetILP32()) {
468 ResultReg = createResultReg(&AArch64::GPR32RegClass);
469 LdrOpc = AArch64::LDRWui;
470 } else {
471 ResultReg = createResultReg(&AArch64::GPR64RegClass);
472 LdrOpc = AArch64::LDRXui;
473 }
474 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
475 ResultReg)
476 .addReg(ADRPReg)
478 AArch64II::MO_NC | OpFlags);
479 if (!Subtarget->isTargetILP32())
480 return ResultReg;
481
482 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
483 // so we must extend the result on ILP32.
484 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
486 TII.get(TargetOpcode::SUBREG_TO_REG))
487 .addDef(Result64)
488 .addImm(0)
489 .addReg(ResultReg, RegState::Kill)
490 .addImm(AArch64::sub_32);
491 return Result64;
492 } else {
493 // ADRP + ADDX
494 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
495 ADRPReg)
496 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
497
498 if (OpFlags & AArch64II::MO_TAGGED) {
499 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
500 // We do so by creating a MOVK that sets bits 48-63 of the register to
501 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
502 // the small code model so we can assume a binary size of <= 4GB, which
503 // makes the untagged PC relative offset positive. The binary must also be
504 // loaded into address range [0, 2^48). Both of these properties need to
505 // be ensured at runtime when using tagged addresses.
506 //
507 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
508 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
509 // are not exactly 1:1 with FastISel so we cannot easily abstract this
510 // out. At some point, it would be nice to find a way to not have this
511 // duplicate code.
512 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
514 DstReg)
515 .addReg(ADRPReg)
516 .addGlobalAddress(GV, /*Offset=*/0x100000000,
518 .addImm(48);
519 ADRPReg = DstReg;
520 }
521
522 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
524 ResultReg)
525 .addReg(ADRPReg)
526 .addGlobalAddress(GV, 0,
528 .addImm(0);
529 }
530 return ResultReg;
531}
532
533Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
534 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
535
536 // Only handle simple types.
537 if (!CEVT.isSimple())
538 return Register();
539 MVT VT = CEVT.getSimpleVT();
540 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
541 // 'null' pointers need to have a somewhat special treatment.
543 assert(VT == MVT::i64 && "Expected 64-bit pointers");
544 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
545 }
546
547 if (const auto *CI = dyn_cast<ConstantInt>(C))
548 return materializeInt(CI, VT);
549 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
550 return materializeFP(CFP, VT);
551 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
552 return materializeGV(GV);
553
554 return Register();
555}
556
557Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
558 assert(CFP->isNullValue() &&
559 "Floating-point constant is not a positive zero.");
560 MVT VT;
561 if (!isTypeLegal(CFP->getType(), VT))
562 return Register();
563
564 if (VT != MVT::f32 && VT != MVT::f64)
565 return Register();
566
567 bool Is64Bit = (VT == MVT::f64);
568 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
569 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
570 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
571}
572
573/// Check if the multiply is by a power-of-2 constant.
574static bool isMulPowOf2(const Value *I) {
575 if (const auto *MI = dyn_cast<MulOperator>(I)) {
576 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
577 if (C->getValue().isPowerOf2())
578 return true;
579 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
580 if (C->getValue().isPowerOf2())
581 return true;
582 }
583 return false;
584}
585
586// Computes the address to get to an object.
587bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
588{
589 const User *U = nullptr;
590 unsigned Opcode = Instruction::UserOp1;
591 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
592 // Don't walk into other basic blocks unless the object is an alloca from
593 // another block, otherwise it may not have a virtual register assigned.
594 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
595 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
596 Opcode = I->getOpcode();
597 U = I;
598 }
599 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
600 Opcode = C->getOpcode();
601 U = C;
602 }
603
604 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
605 if (Ty->getAddressSpace() > 255)
606 // Fast instruction selection doesn't support the special
607 // address spaces.
608 return false;
609
610 switch (Opcode) {
611 default:
612 break;
613 case Instruction::BitCast:
614 // Look through bitcasts.
615 return computeAddress(U->getOperand(0), Addr, Ty);
616
617 case Instruction::IntToPtr:
618 // Look past no-op inttoptrs.
619 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
620 TLI.getPointerTy(DL))
621 return computeAddress(U->getOperand(0), Addr, Ty);
622 break;
623
624 case Instruction::PtrToInt:
625 // Look past no-op ptrtoints.
626 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
627 return computeAddress(U->getOperand(0), Addr, Ty);
628 break;
629
630 case Instruction::GetElementPtr: {
631 Address SavedAddr = Addr;
632 uint64_t TmpOffset = Addr.getOffset();
633
634 // Iterate through the GEP folding the constants into offsets where
635 // we can.
637 GTI != E; ++GTI) {
638 const Value *Op = GTI.getOperand();
639 if (StructType *STy = GTI.getStructTypeOrNull()) {
640 const StructLayout *SL = DL.getStructLayout(STy);
641 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
642 TmpOffset += SL->getElementOffset(Idx);
643 } else {
644 uint64_t S = GTI.getSequentialElementStride(DL);
645 while (true) {
646 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
647 // Constant-offset addressing.
648 TmpOffset += CI->getSExtValue() * S;
649 break;
650 }
651 if (canFoldAddIntoGEP(U, Op)) {
652 // A compatible add with a constant operand. Fold the constant.
653 ConstantInt *CI =
654 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
655 TmpOffset += CI->getSExtValue() * S;
656 // Iterate on the other operand.
657 Op = cast<AddOperator>(Op)->getOperand(0);
658 continue;
659 }
660 // Unsupported
661 goto unsupported_gep;
662 }
663 }
664 }
665
666 // Try to grab the base operand now.
667 Addr.setOffset(TmpOffset);
668 if (computeAddress(U->getOperand(0), Addr, Ty))
669 return true;
670
671 // We failed, restore everything and try the other options.
672 Addr = SavedAddr;
673
674 unsupported_gep:
675 break;
676 }
677 case Instruction::Alloca: {
678 const AllocaInst *AI = cast<AllocaInst>(Obj);
679 DenseMap<const AllocaInst *, int>::iterator SI =
680 FuncInfo.StaticAllocaMap.find(AI);
681 if (SI != FuncInfo.StaticAllocaMap.end()) {
682 Addr.setKind(Address::FrameIndexBase);
683 Addr.setFI(SI->second);
684 return true;
685 }
686 break;
687 }
688 case Instruction::Add: {
689 // Adds of constants are common and easy enough.
690 const Value *LHS = U->getOperand(0);
691 const Value *RHS = U->getOperand(1);
692
694 std::swap(LHS, RHS);
695
696 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
697 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
698 return computeAddress(LHS, Addr, Ty);
699 }
700
701 Address Backup = Addr;
702 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
703 return true;
704 Addr = Backup;
705
706 break;
707 }
708 case Instruction::Sub: {
709 // Subs of constants are common and easy enough.
710 const Value *LHS = U->getOperand(0);
711 const Value *RHS = U->getOperand(1);
712
713 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
714 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
715 return computeAddress(LHS, Addr, Ty);
716 }
717 break;
718 }
719 case Instruction::Shl: {
720 if (Addr.getOffsetReg())
721 break;
722
723 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
724 if (!CI)
725 break;
726
727 unsigned Val = CI->getZExtValue();
728 if (Val < 1 || Val > 3)
729 break;
730
731 uint64_t NumBytes = 0;
732 if (Ty && Ty->isSized()) {
733 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
734 NumBytes = NumBits / 8;
735 if (!isPowerOf2_64(NumBits))
736 NumBytes = 0;
737 }
738
739 if (NumBytes != (1ULL << Val))
740 break;
741
742 Addr.setShift(Val);
743 Addr.setExtendType(AArch64_AM::LSL);
744
745 const Value *Src = U->getOperand(0);
746 if (const auto *I = dyn_cast<Instruction>(Src)) {
747 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
748 // Fold the zext or sext when it won't become a noop.
749 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
750 if (!isIntExtFree(ZE) &&
751 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
752 Addr.setExtendType(AArch64_AM::UXTW);
753 Src = ZE->getOperand(0);
754 }
755 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
756 if (!isIntExtFree(SE) &&
757 SE->getOperand(0)->getType()->isIntegerTy(32)) {
758 Addr.setExtendType(AArch64_AM::SXTW);
759 Src = SE->getOperand(0);
760 }
761 }
762 }
763 }
764
765 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
766 if (AI->getOpcode() == Instruction::And) {
767 const Value *LHS = AI->getOperand(0);
768 const Value *RHS = AI->getOperand(1);
769
770 if (const auto *C = dyn_cast<ConstantInt>(LHS))
771 if (C->getValue() == 0xffffffff)
772 std::swap(LHS, RHS);
773
774 if (const auto *C = dyn_cast<ConstantInt>(RHS))
775 if (C->getValue() == 0xffffffff) {
776 Addr.setExtendType(AArch64_AM::UXTW);
777 Register Reg = getRegForValue(LHS);
778 if (!Reg)
779 return false;
780 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
781 Addr.setOffsetReg(Reg);
782 return true;
783 }
784 }
785
786 Register Reg = getRegForValue(Src);
787 if (!Reg)
788 return false;
789 Addr.setOffsetReg(Reg);
790 return true;
791 }
792 case Instruction::Mul: {
793 if (Addr.getOffsetReg())
794 break;
795
796 if (!isMulPowOf2(U))
797 break;
798
799 const Value *LHS = U->getOperand(0);
800 const Value *RHS = U->getOperand(1);
801
802 // Canonicalize power-of-2 value to the RHS.
803 if (const auto *C = dyn_cast<ConstantInt>(LHS))
804 if (C->getValue().isPowerOf2())
805 std::swap(LHS, RHS);
806
807 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
808 const auto *C = cast<ConstantInt>(RHS);
809 unsigned Val = C->getValue().logBase2();
810 if (Val < 1 || Val > 3)
811 break;
812
813 uint64_t NumBytes = 0;
814 if (Ty && Ty->isSized()) {
815 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
816 NumBytes = NumBits / 8;
817 if (!isPowerOf2_64(NumBits))
818 NumBytes = 0;
819 }
820
821 if (NumBytes != (1ULL << Val))
822 break;
823
824 Addr.setShift(Val);
825 Addr.setExtendType(AArch64_AM::LSL);
826
827 const Value *Src = LHS;
828 if (const auto *I = dyn_cast<Instruction>(Src)) {
829 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
830 // Fold the zext or sext when it won't become a noop.
831 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
832 if (!isIntExtFree(ZE) &&
833 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
834 Addr.setExtendType(AArch64_AM::UXTW);
835 Src = ZE->getOperand(0);
836 }
837 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
838 if (!isIntExtFree(SE) &&
839 SE->getOperand(0)->getType()->isIntegerTy(32)) {
840 Addr.setExtendType(AArch64_AM::SXTW);
841 Src = SE->getOperand(0);
842 }
843 }
844 }
845 }
846
847 Register Reg = getRegForValue(Src);
848 if (!Reg)
849 return false;
850 Addr.setOffsetReg(Reg);
851 return true;
852 }
853 case Instruction::And: {
854 if (Addr.getOffsetReg())
855 break;
856
857 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
858 break;
859
860 const Value *LHS = U->getOperand(0);
861 const Value *RHS = U->getOperand(1);
862
863 if (const auto *C = dyn_cast<ConstantInt>(LHS))
864 if (C->getValue() == 0xffffffff)
865 std::swap(LHS, RHS);
866
867 if (const auto *C = dyn_cast<ConstantInt>(RHS))
868 if (C->getValue() == 0xffffffff) {
869 Addr.setShift(0);
870 Addr.setExtendType(AArch64_AM::LSL);
871 Addr.setExtendType(AArch64_AM::UXTW);
872
873 Register Reg = getRegForValue(LHS);
874 if (!Reg)
875 return false;
876 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
877 Addr.setOffsetReg(Reg);
878 return true;
879 }
880 break;
881 }
882 case Instruction::SExt:
883 case Instruction::ZExt: {
884 if (!Addr.getReg() || Addr.getOffsetReg())
885 break;
886
887 const Value *Src = nullptr;
888 // Fold the zext or sext when it won't become a noop.
889 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
890 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
891 Addr.setExtendType(AArch64_AM::UXTW);
892 Src = ZE->getOperand(0);
893 }
894 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
895 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
896 Addr.setExtendType(AArch64_AM::SXTW);
897 Src = SE->getOperand(0);
898 }
899 }
900
901 if (!Src)
902 break;
903
904 Addr.setShift(0);
905 Register Reg = getRegForValue(Src);
906 if (!Reg)
907 return false;
908 Addr.setOffsetReg(Reg);
909 return true;
910 }
911 } // end switch
912
913 if (Addr.isRegBase() && !Addr.getReg()) {
914 Register Reg = getRegForValue(Obj);
915 if (!Reg)
916 return false;
917 Addr.setReg(Reg);
918 return true;
919 }
920
921 if (!Addr.getOffsetReg()) {
922 Register Reg = getRegForValue(Obj);
923 if (!Reg)
924 return false;
925 Addr.setOffsetReg(Reg);
926 return true;
927 }
928
929 return false;
930}
931
932bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
933 const User *U = nullptr;
934 unsigned Opcode = Instruction::UserOp1;
935 bool InMBB = true;
936
937 if (const auto *I = dyn_cast<Instruction>(V)) {
938 Opcode = I->getOpcode();
939 U = I;
940 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
941 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
942 Opcode = C->getOpcode();
943 U = C;
944 }
945
946 switch (Opcode) {
947 default: break;
948 case Instruction::BitCast:
949 // Look past bitcasts if its operand is in the same BB.
950 if (InMBB)
951 return computeCallAddress(U->getOperand(0), Addr);
952 break;
953 case Instruction::IntToPtr:
954 // Look past no-op inttoptrs if its operand is in the same BB.
955 if (InMBB &&
956 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
957 TLI.getPointerTy(DL))
958 return computeCallAddress(U->getOperand(0), Addr);
959 break;
960 case Instruction::PtrToInt:
961 // Look past no-op ptrtoints if its operand is in the same BB.
962 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
963 return computeCallAddress(U->getOperand(0), Addr);
964 break;
965 }
966
967 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
968 Addr.setGlobalValue(GV);
969 return true;
970 }
971
972 // If all else fails, try to materialize the value in a register.
973 if (!Addr.getGlobalValue()) {
974 Addr.setReg(getRegForValue(V));
975 return Addr.getReg().isValid();
976 }
977
978 return false;
979}
980
981bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
982 EVT evt = TLI.getValueType(DL, Ty, true);
983
984 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
985 return false;
986
987 // Only handle simple types.
988 if (evt == MVT::Other || !evt.isSimple())
989 return false;
990 VT = evt.getSimpleVT();
991
992 // This is a legal type, but it's not something we handle in fast-isel.
993 if (VT == MVT::f128)
994 return false;
995
996 // Handle all other legal types, i.e. a register that will directly hold this
997 // value.
998 return TLI.isTypeLegal(VT);
999}
1000
1001/// Determine if the value type is supported by FastISel.
1002///
1003/// FastISel for AArch64 can handle more value types than are legal. This adds
1004/// simple value type such as i1, i8, and i16.
1005bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1006 if (Ty->isVectorTy() && !IsVectorAllowed)
1007 return false;
1008
1009 if (isTypeLegal(Ty, VT))
1010 return true;
1011
1012 // If this is a type than can be sign or zero-extended to a basic operation
1013 // go ahead and accept it now.
1014 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1015 return true;
1016
1017 return false;
1018}
1019
1020bool AArch64FastISel::isValueAvailable(const Value *V) const {
1021 if (!isa<Instruction>(V))
1022 return true;
1023
1024 const auto *I = cast<Instruction>(V);
1025 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
1026}
1027
1028bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1029 if (Subtarget->isTargetILP32())
1030 return false;
1031
1032 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1033 if (!ScaleFactor)
1034 return false;
1035
1036 bool ImmediateOffsetNeedsLowering = false;
1037 bool RegisterOffsetNeedsLowering = false;
1038 int64_t Offset = Addr.getOffset();
1039 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1040 ImmediateOffsetNeedsLowering = true;
1041 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1042 !isUInt<12>(Offset / ScaleFactor))
1043 ImmediateOffsetNeedsLowering = true;
1044
1045 // Cannot encode an offset register and an immediate offset in the same
1046 // instruction. Fold the immediate offset into the load/store instruction and
1047 // emit an additional add to take care of the offset register.
1048 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1049 RegisterOffsetNeedsLowering = true;
1050
1051 // Cannot encode zero register as base.
1052 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1053 RegisterOffsetNeedsLowering = true;
1054
1055 // If this is a stack pointer and the offset needs to be simplified then put
1056 // the alloca address into a register, set the base type back to register and
1057 // continue. This should almost never happen.
1058 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1059 {
1060 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1061 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1062 ResultReg)
1063 .addFrameIndex(Addr.getFI())
1064 .addImm(0)
1065 .addImm(0);
1066 Addr.setKind(Address::RegBase);
1067 Addr.setReg(ResultReg);
1068 }
1069
1070 if (RegisterOffsetNeedsLowering) {
1071 Register ResultReg;
1072 if (Addr.getReg()) {
1073 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1074 Addr.getExtendType() == AArch64_AM::UXTW )
1075 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1076 Addr.getOffsetReg(), Addr.getExtendType(),
1077 Addr.getShift());
1078 else
1079 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1080 Addr.getOffsetReg(), AArch64_AM::LSL,
1081 Addr.getShift());
1082 } else {
1083 if (Addr.getExtendType() == AArch64_AM::UXTW)
1084 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1085 Addr.getShift(), /*IsZExt=*/true);
1086 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1087 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1088 Addr.getShift(), /*IsZExt=*/false);
1089 else
1090 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1091 Addr.getShift());
1092 }
1093 if (!ResultReg)
1094 return false;
1095
1096 Addr.setReg(ResultReg);
1097 Addr.setOffsetReg(0);
1098 Addr.setShift(0);
1099 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1100 }
1101
1102 // Since the offset is too large for the load/store instruction get the
1103 // reg+offset into a register.
1104 if (ImmediateOffsetNeedsLowering) {
1105 Register ResultReg;
1106 if (Addr.getReg())
1107 // Try to fold the immediate into the add instruction.
1108 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1109 else
1110 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1111
1112 if (!ResultReg)
1113 return false;
1114 Addr.setReg(ResultReg);
1115 Addr.setOffset(0);
1116 }
1117 return true;
1118}
1119
1120void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1121 const MachineInstrBuilder &MIB,
1123 unsigned ScaleFactor,
1124 MachineMemOperand *MMO) {
1125 int64_t Offset = Addr.getOffset() / ScaleFactor;
1126 // Frame base works a bit differently. Handle it separately.
1127 if (Addr.isFIBase()) {
1128 int FI = Addr.getFI();
1129 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1130 // and alignment should be based on the VT.
1131 MMO = FuncInfo.MF->getMachineMemOperand(
1132 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1133 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1134 // Now add the rest of the operands.
1135 MIB.addFrameIndex(FI).addImm(Offset);
1136 } else {
1137 assert(Addr.isRegBase() && "Unexpected address kind.");
1138 const MCInstrDesc &II = MIB->getDesc();
1139 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1140 Addr.setReg(
1141 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1142 Addr.setOffsetReg(
1143 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1144 if (Addr.getOffsetReg()) {
1145 assert(Addr.getOffset() == 0 && "Unexpected offset");
1146 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1147 Addr.getExtendType() == AArch64_AM::SXTX;
1148 MIB.addReg(Addr.getReg());
1149 MIB.addReg(Addr.getOffsetReg());
1150 MIB.addImm(IsSigned);
1151 MIB.addImm(Addr.getShift() != 0);
1152 } else
1153 MIB.addReg(Addr.getReg()).addImm(Offset);
1154 }
1155
1156 if (MMO)
1157 MIB.addMemOperand(MMO);
1158}
1159
1160Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1161 const Value *RHS, bool SetFlags,
1162 bool WantResult, bool IsZExt) {
1164 bool NeedExtend = false;
1165 switch (RetVT.SimpleTy) {
1166 default:
1167 return Register();
1168 case MVT::i1:
1169 NeedExtend = true;
1170 break;
1171 case MVT::i8:
1172 NeedExtend = true;
1173 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1174 break;
1175 case MVT::i16:
1176 NeedExtend = true;
1177 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1178 break;
1179 case MVT::i32: // fall-through
1180 case MVT::i64:
1181 break;
1182 }
1183 MVT SrcVT = RetVT;
1184 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1185
1186 // Canonicalize immediates to the RHS first.
1187 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1188 std::swap(LHS, RHS);
1189
1190 // Canonicalize mul by power of 2 to the RHS.
1191 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1192 if (isMulPowOf2(LHS))
1193 std::swap(LHS, RHS);
1194
1195 // Canonicalize shift immediate to the RHS.
1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1198 if (isa<ConstantInt>(SI->getOperand(1)))
1199 if (SI->getOpcode() == Instruction::Shl ||
1200 SI->getOpcode() == Instruction::LShr ||
1201 SI->getOpcode() == Instruction::AShr )
1202 std::swap(LHS, RHS);
1203
1204 Register LHSReg = getRegForValue(LHS);
1205 if (!LHSReg)
1206 return Register();
1207
1208 if (NeedExtend)
1209 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1210
1211 Register ResultReg;
1212 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1213 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1214 if (C->isNegative())
1215 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1216 WantResult);
1217 else
1218 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1219 WantResult);
1220 } else if (const auto *C = dyn_cast<Constant>(RHS))
1221 if (C->isNullValue())
1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1223
1224 if (ResultReg)
1225 return ResultReg;
1226
1227 // Only extend the RHS within the instruction if there is a valid extend type.
1228 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1229 isValueAvailable(RHS)) {
1230 Register RHSReg = getRegForValue(RHS);
1231 if (!RHSReg)
1232 return Register();
1233 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1234 SetFlags, WantResult);
1235 }
1236
1237 // Check if the mul can be folded into the instruction.
1238 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1239 if (isMulPowOf2(RHS)) {
1240 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1241 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1242
1243 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1244 if (C->getValue().isPowerOf2())
1245 std::swap(MulLHS, MulRHS);
1246
1247 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1248 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1249 Register RHSReg = getRegForValue(MulLHS);
1250 if (!RHSReg)
1251 return Register();
1252 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1253 ShiftVal, SetFlags, WantResult);
1254 if (ResultReg)
1255 return ResultReg;
1256 }
1257 }
1258
1259 // Check if the shift can be folded into the instruction.
1260 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1261 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1262 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1264 switch (SI->getOpcode()) {
1265 default: break;
1266 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1267 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1268 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1269 }
1270 uint64_t ShiftVal = C->getZExtValue();
1271 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1272 Register RHSReg = getRegForValue(SI->getOperand(0));
1273 if (!RHSReg)
1274 return Register();
1275 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1276 ShiftVal, SetFlags, WantResult);
1277 if (ResultReg)
1278 return ResultReg;
1279 }
1280 }
1281 }
1282 }
1283
1284 Register RHSReg = getRegForValue(RHS);
1285 if (!RHSReg)
1286 return Register();
1287
1288 if (NeedExtend)
1289 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1290
1291 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1292}
1293
1294Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1295 Register RHSReg, bool SetFlags,
1296 bool WantResult) {
1297 assert(LHSReg && RHSReg && "Invalid register number.");
1298
1299 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1300 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1301 return Register();
1302
1303 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1304 return Register();
1305
1306 static const unsigned OpcTable[2][2][2] = {
1307 { { AArch64::SUBWrr, AArch64::SUBXrr },
1308 { AArch64::ADDWrr, AArch64::ADDXrr } },
1309 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1310 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1311 };
1312 bool Is64Bit = RetVT == MVT::i64;
1313 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1314 const TargetRegisterClass *RC =
1315 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1316 Register ResultReg;
1317 if (WantResult)
1318 ResultReg = createResultReg(RC);
1319 else
1320 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1321
1322 const MCInstrDesc &II = TII.get(Opc);
1323 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1324 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1325 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1326 .addReg(LHSReg)
1327 .addReg(RHSReg);
1328 return ResultReg;
1329}
1330
1331Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1332 uint64_t Imm, bool SetFlags,
1333 bool WantResult) {
1334 assert(LHSReg && "Invalid register number.");
1335
1336 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1337 return Register();
1338
1339 unsigned ShiftImm;
1340 if (isUInt<12>(Imm))
1341 ShiftImm = 0;
1342 else if ((Imm & 0xfff000) == Imm) {
1343 ShiftImm = 12;
1344 Imm >>= 12;
1345 } else
1346 return Register();
1347
1348 static const unsigned OpcTable[2][2][2] = {
1349 { { AArch64::SUBWri, AArch64::SUBXri },
1350 { AArch64::ADDWri, AArch64::ADDXri } },
1351 { { AArch64::SUBSWri, AArch64::SUBSXri },
1352 { AArch64::ADDSWri, AArch64::ADDSXri } }
1353 };
1354 bool Is64Bit = RetVT == MVT::i64;
1355 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1356 const TargetRegisterClass *RC;
1357 if (SetFlags)
1358 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1359 else
1360 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1361 Register ResultReg;
1362 if (WantResult)
1363 ResultReg = createResultReg(RC);
1364 else
1365 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1366
1367 const MCInstrDesc &II = TII.get(Opc);
1368 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1370 .addReg(LHSReg)
1371 .addImm(Imm)
1372 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1373 return ResultReg;
1374}
1375
1376Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1377 Register RHSReg,
1379 uint64_t ShiftImm, bool SetFlags,
1380 bool WantResult) {
1381 assert(LHSReg && RHSReg && "Invalid register number.");
1382 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1383 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1384
1385 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1386 return Register();
1387
1388 // Don't deal with undefined shifts.
1389 if (ShiftImm >= RetVT.getSizeInBits())
1390 return Register();
1391
1392 static const unsigned OpcTable[2][2][2] = {
1393 { { AArch64::SUBWrs, AArch64::SUBXrs },
1394 { AArch64::ADDWrs, AArch64::ADDXrs } },
1395 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1396 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1397 };
1398 bool Is64Bit = RetVT == MVT::i64;
1399 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1400 const TargetRegisterClass *RC =
1401 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1402 Register ResultReg;
1403 if (WantResult)
1404 ResultReg = createResultReg(RC);
1405 else
1406 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1407
1408 const MCInstrDesc &II = TII.get(Opc);
1409 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1410 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1412 .addReg(LHSReg)
1413 .addReg(RHSReg)
1414 .addImm(getShifterImm(ShiftType, ShiftImm));
1415 return ResultReg;
1416}
1417
1418Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1419 Register RHSReg,
1421 uint64_t ShiftImm, bool SetFlags,
1422 bool WantResult) {
1423 assert(LHSReg && RHSReg && "Invalid register number.");
1424 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1425 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1426
1427 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1428 return Register();
1429
1430 if (ShiftImm >= 4)
1431 return Register();
1432
1433 static const unsigned OpcTable[2][2][2] = {
1434 { { AArch64::SUBWrx, AArch64::SUBXrx },
1435 { AArch64::ADDWrx, AArch64::ADDXrx } },
1436 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1437 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1438 };
1439 bool Is64Bit = RetVT == MVT::i64;
1440 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1441 const TargetRegisterClass *RC = nullptr;
1442 if (SetFlags)
1443 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1444 else
1445 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1446 Register ResultReg;
1447 if (WantResult)
1448 ResultReg = createResultReg(RC);
1449 else
1450 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1451
1452 const MCInstrDesc &II = TII.get(Opc);
1453 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1454 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1456 .addReg(LHSReg)
1457 .addReg(RHSReg)
1458 .addImm(getArithExtendImm(ExtType, ShiftImm));
1459 return ResultReg;
1460}
1461
1462bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1463 Type *Ty = LHS->getType();
1464 EVT EVT = TLI.getValueType(DL, Ty, true);
1465 if (!EVT.isSimple())
1466 return false;
1467 MVT VT = EVT.getSimpleVT();
1468
1469 switch (VT.SimpleTy) {
1470 default:
1471 return false;
1472 case MVT::i1:
1473 case MVT::i8:
1474 case MVT::i16:
1475 case MVT::i32:
1476 case MVT::i64:
1477 return emitICmp(VT, LHS, RHS, IsZExt);
1478 case MVT::f32:
1479 case MVT::f64:
1480 return emitFCmp(VT, LHS, RHS);
1481 }
1482}
1483
1484bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1485 bool IsZExt) {
1486 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1487 IsZExt)
1488 .isValid();
1489}
1490
1491bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1492 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1493 /*SetFlags=*/true, /*WantResult=*/false)
1494 .isValid();
1495}
1496
1497bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1498 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1499 return false;
1500
1501 // Check to see if the 2nd operand is a constant that we can encode directly
1502 // in the compare.
1503 bool UseImm = false;
1504 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1505 if (CFP->isZero() && !CFP->isNegative())
1506 UseImm = true;
1507
1508 Register LHSReg = getRegForValue(LHS);
1509 if (!LHSReg)
1510 return false;
1511
1512 if (UseImm) {
1513 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1514 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1515 .addReg(LHSReg);
1516 return true;
1517 }
1518
1519 Register RHSReg = getRegForValue(RHS);
1520 if (!RHSReg)
1521 return false;
1522
1523 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1525 .addReg(LHSReg)
1526 .addReg(RHSReg);
1527 return true;
1528}
1529
1530Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1531 bool SetFlags, bool WantResult, bool IsZExt) {
1532 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1533 IsZExt);
1534}
1535
1536/// This method is a wrapper to simplify add emission.
1537///
1538/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1539/// that fails, then try to materialize the immediate into a register and use
1540/// emitAddSub_rr instead.
1541Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1542 Register ResultReg;
1543 if (Imm < 0)
1544 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1545 else
1546 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1547
1548 if (ResultReg)
1549 return ResultReg;
1550
1551 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1552 if (!CReg)
1553 return Register();
1554
1555 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1556 return ResultReg;
1557}
1558
1559Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1560 bool SetFlags, bool WantResult, bool IsZExt) {
1561 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1562 IsZExt);
1563}
1564
1565Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1566 Register RHSReg, bool WantResult) {
1567 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1568 /*SetFlags=*/true, WantResult);
1569}
1570
1571Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1572 Register RHSReg,
1574 uint64_t ShiftImm, bool WantResult) {
1575 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1576 ShiftImm, /*SetFlags=*/true, WantResult);
1577}
1578
1579Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1580 const Value *LHS, const Value *RHS) {
1581 // Canonicalize immediates to the RHS first.
1583 std::swap(LHS, RHS);
1584
1585 // Canonicalize mul by power-of-2 to the RHS.
1586 if (LHS->hasOneUse() && isValueAvailable(LHS))
1587 if (isMulPowOf2(LHS))
1588 std::swap(LHS, RHS);
1589
1590 // Canonicalize shift immediate to the RHS.
1591 if (LHS->hasOneUse() && isValueAvailable(LHS))
1592 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1593 if (isa<ConstantInt>(SI->getOperand(1)))
1594 std::swap(LHS, RHS);
1595
1596 Register LHSReg = getRegForValue(LHS);
1597 if (!LHSReg)
1598 return Register();
1599
1600 Register ResultReg;
1601 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1602 uint64_t Imm = C->getZExtValue();
1603 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1604 }
1605 if (ResultReg)
1606 return ResultReg;
1607
1608 // Check if the mul can be folded into the instruction.
1609 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1610 if (isMulPowOf2(RHS)) {
1611 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1612 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1613
1614 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1615 if (C->getValue().isPowerOf2())
1616 std::swap(MulLHS, MulRHS);
1617
1618 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1619 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1620
1621 Register RHSReg = getRegForValue(MulLHS);
1622 if (!RHSReg)
1623 return Register();
1624 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1625 if (ResultReg)
1626 return ResultReg;
1627 }
1628 }
1629
1630 // Check if the shift can be folded into the instruction.
1631 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1632 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1633 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1634 uint64_t ShiftVal = C->getZExtValue();
1635 Register RHSReg = getRegForValue(SI->getOperand(0));
1636 if (!RHSReg)
1637 return Register();
1638 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1639 if (ResultReg)
1640 return ResultReg;
1641 }
1642 }
1643
1644 Register RHSReg = getRegForValue(RHS);
1645 if (!RHSReg)
1646 return Register();
1647
1648 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1649 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1650 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1651 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1652 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1653 }
1654 return ResultReg;
1655}
1656
1657Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1658 Register LHSReg, uint64_t Imm) {
1659 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1660 "ISD nodes are not consecutive!");
1661 static const unsigned OpcTable[3][2] = {
1662 { AArch64::ANDWri, AArch64::ANDXri },
1663 { AArch64::ORRWri, AArch64::ORRXri },
1664 { AArch64::EORWri, AArch64::EORXri }
1665 };
1666 const TargetRegisterClass *RC;
1667 unsigned Opc;
1668 unsigned RegSize;
1669 switch (RetVT.SimpleTy) {
1670 default:
1671 return Register();
1672 case MVT::i1:
1673 case MVT::i8:
1674 case MVT::i16:
1675 case MVT::i32: {
1676 unsigned Idx = ISDOpc - ISD::AND;
1677 Opc = OpcTable[Idx][0];
1678 RC = &AArch64::GPR32spRegClass;
1679 RegSize = 32;
1680 break;
1681 }
1682 case MVT::i64:
1683 Opc = OpcTable[ISDOpc - ISD::AND][1];
1684 RC = &AArch64::GPR64spRegClass;
1685 RegSize = 64;
1686 break;
1687 }
1688
1690 return Register();
1691
1692 Register ResultReg =
1693 fastEmitInst_ri(Opc, RC, LHSReg,
1695 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1696 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1697 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1698 }
1699 return ResultReg;
1700}
1701
1702Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1703 Register LHSReg, Register RHSReg,
1704 uint64_t ShiftImm) {
1705 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1706 "ISD nodes are not consecutive!");
1707 static const unsigned OpcTable[3][2] = {
1708 { AArch64::ANDWrs, AArch64::ANDXrs },
1709 { AArch64::ORRWrs, AArch64::ORRXrs },
1710 { AArch64::EORWrs, AArch64::EORXrs }
1711 };
1712
1713 // Don't deal with undefined shifts.
1714 if (ShiftImm >= RetVT.getSizeInBits())
1715 return Register();
1716
1717 const TargetRegisterClass *RC;
1718 unsigned Opc;
1719 switch (RetVT.SimpleTy) {
1720 default:
1721 return Register();
1722 case MVT::i1:
1723 case MVT::i8:
1724 case MVT::i16:
1725 case MVT::i32:
1726 Opc = OpcTable[ISDOpc - ISD::AND][0];
1727 RC = &AArch64::GPR32RegClass;
1728 break;
1729 case MVT::i64:
1730 Opc = OpcTable[ISDOpc - ISD::AND][1];
1731 RC = &AArch64::GPR64RegClass;
1732 break;
1733 }
1734 Register ResultReg =
1735 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1737 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1738 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1739 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1740 }
1741 return ResultReg;
1742}
1743
1744Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1745 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1746}
1747
1748Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749 bool WantZExt, MachineMemOperand *MMO) {
1750 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751 return Register();
1752
1753 // Simplify this down to something we can handle.
1754 if (!simplifyAddress(Addr, VT))
1755 return Register();
1756
1757 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758 if (!ScaleFactor)
1759 llvm_unreachable("Unexpected value type.");
1760
1761 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763 bool UseScaled = true;
1764 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1765 UseScaled = false;
1766 ScaleFactor = 1;
1767 }
1768
1769 static const unsigned GPOpcTable[2][8][4] = {
1770 // Sign-extend.
1771 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1772 AArch64::LDURXi },
1773 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1774 AArch64::LDURXi },
1775 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1776 AArch64::LDRXui },
1777 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1778 AArch64::LDRXui },
1779 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780 AArch64::LDRXroX },
1781 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782 AArch64::LDRXroX },
1783 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784 AArch64::LDRXroW },
1785 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786 AArch64::LDRXroW }
1787 },
1788 // Zero-extend.
1789 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790 AArch64::LDURXi },
1791 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1792 AArch64::LDURXi },
1793 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794 AArch64::LDRXui },
1795 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1796 AArch64::LDRXui },
1797 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798 AArch64::LDRXroX },
1799 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1800 AArch64::LDRXroX },
1801 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802 AArch64::LDRXroW },
1803 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1804 AArch64::LDRXroW }
1805 }
1806 };
1807
1808 static const unsigned FPOpcTable[4][2] = {
1809 { AArch64::LDURSi, AArch64::LDURDi },
1810 { AArch64::LDRSui, AArch64::LDRDui },
1811 { AArch64::LDRSroX, AArch64::LDRDroX },
1812 { AArch64::LDRSroW, AArch64::LDRDroW }
1813 };
1814
1815 unsigned Opc;
1816 const TargetRegisterClass *RC;
1817 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1818 Addr.getOffsetReg();
1819 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1820 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821 Addr.getExtendType() == AArch64_AM::SXTW)
1822 Idx++;
1823
1824 bool IsRet64Bit = RetVT == MVT::i64;
1825 switch (VT.SimpleTy) {
1826 default:
1827 llvm_unreachable("Unexpected value type.");
1828 case MVT::i1: // Intentional fall-through.
1829 case MVT::i8:
1830 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831 RC = (IsRet64Bit && !WantZExt) ?
1832 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1833 break;
1834 case MVT::i16:
1835 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836 RC = (IsRet64Bit && !WantZExt) ?
1837 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1838 break;
1839 case MVT::i32:
1840 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841 RC = (IsRet64Bit && !WantZExt) ?
1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843 break;
1844 case MVT::i64:
1845 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846 RC = &AArch64::GPR64RegClass;
1847 break;
1848 case MVT::f32:
1849 Opc = FPOpcTable[Idx][0];
1850 RC = &AArch64::FPR32RegClass;
1851 break;
1852 case MVT::f64:
1853 Opc = FPOpcTable[Idx][1];
1854 RC = &AArch64::FPR64RegClass;
1855 break;
1856 }
1857
1858 // Create the base instruction, then add the operands.
1859 Register ResultReg = createResultReg(RC);
1860 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1861 TII.get(Opc), ResultReg);
1862 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1863
1864 // Loading an i1 requires special handling.
1865 if (VT == MVT::i1) {
1866 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1867 assert(ANDReg && "Unexpected AND instruction emission failure.");
1868 ResultReg = ANDReg;
1869 }
1870
1871 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872 // the 32bit reg to a 64bit reg.
1873 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1874 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1876 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877 .addImm(0)
1878 .addReg(ResultReg, getKillRegState(true))
1879 .addImm(AArch64::sub_32);
1880 ResultReg = Reg64;
1881 }
1882 return ResultReg;
1883}
1884
1885bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886 MVT VT;
1887 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888 return false;
1889
1890 if (VT.isVector())
1891 return selectOperator(I, I->getOpcode());
1892
1893 Register ResultReg;
1894 switch (I->getOpcode()) {
1895 default:
1896 llvm_unreachable("Unexpected instruction.");
1897 case Instruction::Add:
1898 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899 break;
1900 case Instruction::Sub:
1901 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902 break;
1903 }
1904 if (!ResultReg)
1905 return false;
1906
1907 updateValueMap(I, ResultReg);
1908 return true;
1909}
1910
1911bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912 MVT VT;
1913 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914 return false;
1915
1916 if (VT.isVector())
1917 return selectOperator(I, I->getOpcode());
1918
1919 Register ResultReg;
1920 switch (I->getOpcode()) {
1921 default:
1922 llvm_unreachable("Unexpected instruction.");
1923 case Instruction::And:
1924 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925 break;
1926 case Instruction::Or:
1927 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928 break;
1929 case Instruction::Xor:
1930 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931 break;
1932 }
1933 if (!ResultReg)
1934 return false;
1935
1936 updateValueMap(I, ResultReg);
1937 return true;
1938}
1939
1940bool AArch64FastISel::selectLoad(const Instruction *I) {
1941 MVT VT;
1942 // Verify we have a legal type before going any further. Currently, we handle
1943 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946 cast<LoadInst>(I)->isAtomic())
1947 return false;
1948
1949 const Value *SV = I->getOperand(0);
1950 if (TLI.supportSwiftError()) {
1951 // Swifterror values can come from either a function parameter with
1952 // swifterror attribute or an alloca with swifterror attribute.
1953 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954 if (Arg->hasSwiftErrorAttr())
1955 return false;
1956 }
1957
1958 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959 if (Alloca->isSwiftError())
1960 return false;
1961 }
1962 }
1963
1964 // See if we can handle this address.
1965 Address Addr;
1966 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967 return false;
1968
1969 // Fold the following sign-/zero-extend into the load instruction.
1970 bool WantZExt = true;
1971 MVT RetVT = VT;
1972 const Value *IntExtVal = nullptr;
1973 if (I->hasOneUse()) {
1974 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975 if (isTypeSupported(ZE->getType(), RetVT))
1976 IntExtVal = ZE;
1977 else
1978 RetVT = VT;
1979 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980 if (isTypeSupported(SE->getType(), RetVT))
1981 IntExtVal = SE;
1982 else
1983 RetVT = VT;
1984 WantZExt = false;
1985 }
1986 }
1987
1988 Register ResultReg =
1989 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990 if (!ResultReg)
1991 return false;
1992
1993 // There are a few different cases we have to handle, because the load or the
1994 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995 // SelectionDAG. There is also an ordering issue when both instructions are in
1996 // different basic blocks.
1997 // 1.) The load instruction is selected by FastISel, but the integer extend
1998 // not. This usually happens when the integer extend is in a different
1999 // basic block and SelectionDAG took over for that basic block.
2000 // 2.) The load instruction is selected before the integer extend. This only
2001 // happens when the integer extend is in a different basic block.
2002 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003 // by FastISel. This happens if there are instructions between the load
2004 // and the integer extend that couldn't be selected by FastISel.
2005 if (IntExtVal) {
2006 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008 // it when it selects the integer extend.
2009 Register Reg = lookUpRegForValue(IntExtVal);
2010 auto *MI = MRI.getUniqueVRegDef(Reg);
2011 if (!MI) {
2012 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013 if (WantZExt) {
2014 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016 ResultReg = std::prev(I)->getOperand(0).getReg();
2017 removeDeadCode(I, std::next(I));
2018 } else
2019 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020 AArch64::sub_32);
2021 }
2022 updateValueMap(I, ResultReg);
2023 return true;
2024 }
2025
2026 // The integer extend has already been emitted - delete all the instructions
2027 // that have been emitted by the integer extend lowering code and use the
2028 // result from the load instruction directly.
2029 while (MI) {
2030 Reg = 0;
2031 for (auto &Opnd : MI->uses()) {
2032 if (Opnd.isReg()) {
2033 Reg = Opnd.getReg();
2034 break;
2035 }
2036 }
2038 removeDeadCode(I, std::next(I));
2039 MI = nullptr;
2040 if (Reg)
2041 MI = MRI.getUniqueVRegDef(Reg);
2042 }
2043 updateValueMap(IntExtVal, ResultReg);
2044 return true;
2045 }
2046
2047 updateValueMap(I, ResultReg);
2048 return true;
2049}
2050
2051bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2052 Register AddrReg,
2053 MachineMemOperand *MMO) {
2054 unsigned Opc;
2055 switch (VT.SimpleTy) {
2056 default: return false;
2057 case MVT::i8: Opc = AArch64::STLRB; break;
2058 case MVT::i16: Opc = AArch64::STLRH; break;
2059 case MVT::i32: Opc = AArch64::STLRW; break;
2060 case MVT::i64: Opc = AArch64::STLRX; break;
2061 }
2062
2063 const MCInstrDesc &II = TII.get(Opc);
2064 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2065 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2067 .addReg(SrcReg)
2068 .addReg(AddrReg)
2069 .addMemOperand(MMO);
2070 return true;
2071}
2072
2073bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2074 MachineMemOperand *MMO) {
2075 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2076 return false;
2077
2078 // Simplify this down to something we can handle.
2079 if (!simplifyAddress(Addr, VT))
2080 return false;
2081
2082 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2083 if (!ScaleFactor)
2084 llvm_unreachable("Unexpected value type.");
2085
2086 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2087 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2088 bool UseScaled = true;
2089 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2090 UseScaled = false;
2091 ScaleFactor = 1;
2092 }
2093
2094 static const unsigned OpcTable[4][6] = {
2095 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2096 AArch64::STURSi, AArch64::STURDi },
2097 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2098 AArch64::STRSui, AArch64::STRDui },
2099 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2100 AArch64::STRSroX, AArch64::STRDroX },
2101 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2102 AArch64::STRSroW, AArch64::STRDroW }
2103 };
2104
2105 unsigned Opc;
2106 bool VTIsi1 = false;
2107 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2108 Addr.getOffsetReg();
2109 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2110 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2111 Addr.getExtendType() == AArch64_AM::SXTW)
2112 Idx++;
2113
2114 switch (VT.SimpleTy) {
2115 default: llvm_unreachable("Unexpected value type.");
2116 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2117 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2118 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2119 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2120 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2121 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2122 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2123 }
2124
2125 // Storing an i1 requires special handling.
2126 if (VTIsi1 && SrcReg != AArch64::WZR) {
2127 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2128 assert(ANDReg && "Unexpected AND instruction emission failure.");
2129 SrcReg = ANDReg;
2130 }
2131 // Create the base instruction, then add the operands.
2132 const MCInstrDesc &II = TII.get(Opc);
2133 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2134 MachineInstrBuilder MIB =
2135 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2136 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2137
2138 return true;
2139}
2140
2141bool AArch64FastISel::selectStore(const Instruction *I) {
2142 MVT VT;
2143 const Value *Op0 = I->getOperand(0);
2144 // Verify we have a legal type before going any further. Currently, we handle
2145 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2146 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2147 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2148 return false;
2149
2150 const Value *PtrV = I->getOperand(1);
2151 if (TLI.supportSwiftError()) {
2152 // Swifterror values can come from either a function parameter with
2153 // swifterror attribute or an alloca with swifterror attribute.
2154 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2155 if (Arg->hasSwiftErrorAttr())
2156 return false;
2157 }
2158
2159 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2160 if (Alloca->isSwiftError())
2161 return false;
2162 }
2163 }
2164
2165 // Get the value to be stored into a register. Use the zero register directly
2166 // when possible to avoid an unnecessary copy and a wasted register.
2167 Register SrcReg;
2168 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2169 if (CI->isZero())
2170 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2171 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2172 if (CF->isZero() && !CF->isNegative()) {
2174 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175 }
2176 }
2177
2178 if (!SrcReg)
2179 SrcReg = getRegForValue(Op0);
2180
2181 if (!SrcReg)
2182 return false;
2183
2184 auto *SI = cast<StoreInst>(I);
2185
2186 // Try to emit a STLR for seq_cst/release.
2187 if (SI->isAtomic()) {
2188 AtomicOrdering Ord = SI->getOrdering();
2189 // The non-atomic instructions are sufficient for relaxed stores.
2190 if (isReleaseOrStronger(Ord)) {
2191 // The STLR addressing mode only supports a base reg; pass that directly.
2192 Register AddrReg = getRegForValue(PtrV);
2193 if (!AddrReg)
2194 return false;
2195 return emitStoreRelease(VT, SrcReg, AddrReg,
2196 createMachineMemOperandFor(I));
2197 }
2198 }
2199
2200 // See if we can handle this address.
2201 Address Addr;
2202 if (!computeAddress(PtrV, Addr, Op0->getType()))
2203 return false;
2204
2205 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2206 return false;
2207 return true;
2208}
2209
2211 switch (Pred) {
2212 case CmpInst::FCMP_ONE:
2213 case CmpInst::FCMP_UEQ:
2214 default:
2215 // AL is our "false" for now. The other two need more compares.
2216 return AArch64CC::AL;
2217 case CmpInst::ICMP_EQ:
2218 case CmpInst::FCMP_OEQ:
2219 return AArch64CC::EQ;
2220 case CmpInst::ICMP_SGT:
2221 case CmpInst::FCMP_OGT:
2222 return AArch64CC::GT;
2223 case CmpInst::ICMP_SGE:
2224 case CmpInst::FCMP_OGE:
2225 return AArch64CC::GE;
2226 case CmpInst::ICMP_UGT:
2227 case CmpInst::FCMP_UGT:
2228 return AArch64CC::HI;
2229 case CmpInst::FCMP_OLT:
2230 return AArch64CC::MI;
2231 case CmpInst::ICMP_ULE:
2232 case CmpInst::FCMP_OLE:
2233 return AArch64CC::LS;
2234 case CmpInst::FCMP_ORD:
2235 return AArch64CC::VC;
2236 case CmpInst::FCMP_UNO:
2237 return AArch64CC::VS;
2238 case CmpInst::FCMP_UGE:
2239 return AArch64CC::PL;
2240 case CmpInst::ICMP_SLT:
2241 case CmpInst::FCMP_ULT:
2242 return AArch64CC::LT;
2243 case CmpInst::ICMP_SLE:
2244 case CmpInst::FCMP_ULE:
2245 return AArch64CC::LE;
2246 case CmpInst::FCMP_UNE:
2247 case CmpInst::ICMP_NE:
2248 return AArch64CC::NE;
2249 case CmpInst::ICMP_UGE:
2250 return AArch64CC::HS;
2251 case CmpInst::ICMP_ULT:
2252 return AArch64CC::LO;
2253 }
2254}
2255
2256/// Try to emit a combined compare-and-branch instruction.
2257bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2258 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2259 // will not be produced, as they are conditional branch instructions that do
2260 // not set flags.
2261 if (FuncInfo.MF->getFunction().hasFnAttribute(
2262 Attribute::SpeculativeLoadHardening))
2263 return false;
2264
2265 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2266 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2267 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2268
2269 const Value *LHS = CI->getOperand(0);
2270 const Value *RHS = CI->getOperand(1);
2271
2272 MVT VT;
2273 if (!isTypeSupported(LHS->getType(), VT))
2274 return false;
2275
2276 unsigned BW = VT.getSizeInBits();
2277 if (BW > 64)
2278 return false;
2279
2280 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2281 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2282
2283 // Try to take advantage of fallthrough opportunities.
2284 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2285 std::swap(TBB, FBB);
2287 }
2288
2289 int TestBit = -1;
2290 bool IsCmpNE;
2291 switch (Predicate) {
2292 default:
2293 return false;
2294 case CmpInst::ICMP_EQ:
2295 case CmpInst::ICMP_NE:
2296 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2297 std::swap(LHS, RHS);
2298
2299 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2300 return false;
2301
2302 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2303 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2304 const Value *AndLHS = AI->getOperand(0);
2305 const Value *AndRHS = AI->getOperand(1);
2306
2307 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2308 if (C->getValue().isPowerOf2())
2309 std::swap(AndLHS, AndRHS);
2310
2311 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2312 if (C->getValue().isPowerOf2()) {
2313 TestBit = C->getValue().logBase2();
2314 LHS = AndLHS;
2315 }
2316 }
2317
2318 if (VT == MVT::i1)
2319 TestBit = 0;
2320
2321 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2322 break;
2323 case CmpInst::ICMP_SLT:
2324 case CmpInst::ICMP_SGE:
2325 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2326 return false;
2327
2328 TestBit = BW - 1;
2329 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2330 break;
2331 case CmpInst::ICMP_SGT:
2332 case CmpInst::ICMP_SLE:
2333 if (!isa<ConstantInt>(RHS))
2334 return false;
2335
2336 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2337 return false;
2338
2339 TestBit = BW - 1;
2340 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2341 break;
2342 } // end switch
2343
2344 static const unsigned OpcTable[2][2][2] = {
2345 { {AArch64::CBZW, AArch64::CBZX },
2346 {AArch64::CBNZW, AArch64::CBNZX} },
2347 { {AArch64::TBZW, AArch64::TBZX },
2348 {AArch64::TBNZW, AArch64::TBNZX} }
2349 };
2350
2351 bool IsBitTest = TestBit != -1;
2352 bool Is64Bit = BW == 64;
2353 if (TestBit < 32 && TestBit >= 0)
2354 Is64Bit = false;
2355
2356 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2357 const MCInstrDesc &II = TII.get(Opc);
2358
2359 Register SrcReg = getRegForValue(LHS);
2360 if (!SrcReg)
2361 return false;
2362
2363 if (BW == 64 && !Is64Bit)
2364 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2365
2366 if ((BW < 32) && !IsBitTest)
2367 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2368
2369 // Emit the combined compare and branch instruction.
2370 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2371 MachineInstrBuilder MIB =
2372 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2373 .addReg(SrcReg);
2374 if (IsBitTest)
2375 MIB.addImm(TestBit);
2376 MIB.addMBB(TBB);
2377
2378 finishCondBranch(BI->getParent(), TBB, FBB);
2379 return true;
2380}
2381
2382bool AArch64FastISel::selectBranch(const Instruction *I) {
2383 const BranchInst *BI = cast<BranchInst>(I);
2384 if (BI->isUnconditional()) {
2385 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
2386 fastEmitBranch(MSucc, BI->getDebugLoc());
2387 return true;
2388 }
2389
2390 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
2391 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
2392
2393 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2394 if (CI->hasOneUse() && isValueAvailable(CI)) {
2395 // Try to optimize or fold the cmp.
2396 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2397 switch (Predicate) {
2398 default:
2399 break;
2401 fastEmitBranch(FBB, MIMD.getDL());
2402 return true;
2403 case CmpInst::FCMP_TRUE:
2404 fastEmitBranch(TBB, MIMD.getDL());
2405 return true;
2406 }
2407
2408 // Try to emit a combined compare-and-branch first.
2409 if (emitCompareAndBranch(BI))
2410 return true;
2411
2412 // Try to take advantage of fallthrough opportunities.
2413 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2414 std::swap(TBB, FBB);
2416 }
2417
2418 // Emit the cmp.
2419 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2420 return false;
2421
2422 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2423 // instruction.
2424 AArch64CC::CondCode CC = getCompareCC(Predicate);
2426 switch (Predicate) {
2427 default:
2428 break;
2429 case CmpInst::FCMP_UEQ:
2430 ExtraCC = AArch64CC::EQ;
2431 CC = AArch64CC::VS;
2432 break;
2433 case CmpInst::FCMP_ONE:
2434 ExtraCC = AArch64CC::MI;
2435 CC = AArch64CC::GT;
2436 break;
2437 }
2438 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2439
2440 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2441 if (ExtraCC != AArch64CC::AL) {
2442 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2443 .addImm(ExtraCC)
2444 .addMBB(TBB);
2445 }
2446
2447 // Emit the branch.
2448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2449 .addImm(CC)
2450 .addMBB(TBB);
2451
2452 finishCondBranch(BI->getParent(), TBB, FBB);
2453 return true;
2454 }
2455 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2456 uint64_t Imm = CI->getZExtValue();
2457 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2458 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2459 .addMBB(Target);
2460
2461 // Obtain the branch probability and add the target to the successor list.
2462 if (FuncInfo.BPI) {
2463 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2464 BI->getParent(), Target->getBasicBlock());
2465 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2466 } else
2467 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2468 return true;
2469 } else {
2471 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2472 // Fake request the condition, otherwise the intrinsic might be completely
2473 // optimized away.
2474 Register CondReg = getRegForValue(BI->getCondition());
2475 if (!CondReg)
2476 return false;
2477
2478 // Emit the branch.
2479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2480 .addImm(CC)
2481 .addMBB(TBB);
2482
2483 finishCondBranch(BI->getParent(), TBB, FBB);
2484 return true;
2485 }
2486 }
2487
2488 Register CondReg = getRegForValue(BI->getCondition());
2489 if (!CondReg)
2490 return false;
2491
2492 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2493 unsigned Opcode = AArch64::TBNZW;
2494 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2495 std::swap(TBB, FBB);
2496 Opcode = AArch64::TBZW;
2497 }
2498
2499 const MCInstrDesc &II = TII.get(Opcode);
2500 Register ConstrainedCondReg
2501 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2502 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2503 .addReg(ConstrainedCondReg)
2504 .addImm(0)
2505 .addMBB(TBB);
2506
2507 finishCondBranch(BI->getParent(), TBB, FBB);
2508 return true;
2509}
2510
2511bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2512 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2513 Register AddrReg = getRegForValue(BI->getOperand(0));
2514 if (!AddrReg)
2515 return false;
2516
2517 // Authenticated indirectbr is not implemented yet.
2518 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2519 return false;
2520
2521 // Emit the indirect branch.
2522 const MCInstrDesc &II = TII.get(AArch64::BR);
2523 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2525
2526 // Make sure the CFG is up-to-date.
2527 for (const auto *Succ : BI->successors())
2528 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
2529
2530 return true;
2531}
2532
2533bool AArch64FastISel::selectCmp(const Instruction *I) {
2534 const CmpInst *CI = cast<CmpInst>(I);
2535
2536 // Vectors of i1 are weird: bail out.
2537 if (CI->getType()->isVectorTy())
2538 return false;
2539
2540 // Try to optimize or fold the cmp.
2541 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2542 Register ResultReg;
2543 switch (Predicate) {
2544 default:
2545 break;
2547 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2548 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2549 TII.get(TargetOpcode::COPY), ResultReg)
2550 .addReg(AArch64::WZR, getKillRegState(true));
2551 break;
2552 case CmpInst::FCMP_TRUE:
2553 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2554 break;
2555 }
2556
2557 if (ResultReg) {
2558 updateValueMap(I, ResultReg);
2559 return true;
2560 }
2561
2562 // Emit the cmp.
2563 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2564 return false;
2565
2566 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2567
2568 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2569 // condition codes are inverted, because they are used by CSINC.
2570 static unsigned CondCodeTable[2][2] = {
2573 };
2574 unsigned *CondCodes = nullptr;
2575 switch (Predicate) {
2576 default:
2577 break;
2578 case CmpInst::FCMP_UEQ:
2579 CondCodes = &CondCodeTable[0][0];
2580 break;
2581 case CmpInst::FCMP_ONE:
2582 CondCodes = &CondCodeTable[1][0];
2583 break;
2584 }
2585
2586 if (CondCodes) {
2587 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2588 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2589 TmpReg1)
2590 .addReg(AArch64::WZR, getKillRegState(true))
2591 .addReg(AArch64::WZR, getKillRegState(true))
2592 .addImm(CondCodes[0]);
2593 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2594 ResultReg)
2595 .addReg(TmpReg1, getKillRegState(true))
2596 .addReg(AArch64::WZR, getKillRegState(true))
2597 .addImm(CondCodes[1]);
2598
2599 updateValueMap(I, ResultReg);
2600 return true;
2601 }
2602
2603 // Now set a register based on the comparison.
2604 AArch64CC::CondCode CC = getCompareCC(Predicate);
2605 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2606 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2607 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2608 ResultReg)
2609 .addReg(AArch64::WZR, getKillRegState(true))
2610 .addReg(AArch64::WZR, getKillRegState(true))
2611 .addImm(invertedCC);
2612
2613 updateValueMap(I, ResultReg);
2614 return true;
2615}
2616
2617/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2618/// value.
2619bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2620 if (!SI->getType()->isIntegerTy(1))
2621 return false;
2622
2623 const Value *Src1Val, *Src2Val;
2624 unsigned Opc = 0;
2625 bool NeedExtraOp = false;
2626 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2627 if (CI->isOne()) {
2628 Src1Val = SI->getCondition();
2629 Src2Val = SI->getFalseValue();
2630 Opc = AArch64::ORRWrr;
2631 } else {
2632 assert(CI->isZero());
2633 Src1Val = SI->getFalseValue();
2634 Src2Val = SI->getCondition();
2635 Opc = AArch64::BICWrr;
2636 }
2637 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2638 if (CI->isOne()) {
2639 Src1Val = SI->getCondition();
2640 Src2Val = SI->getTrueValue();
2641 Opc = AArch64::ORRWrr;
2642 NeedExtraOp = true;
2643 } else {
2644 assert(CI->isZero());
2645 Src1Val = SI->getCondition();
2646 Src2Val = SI->getTrueValue();
2647 Opc = AArch64::ANDWrr;
2648 }
2649 }
2650
2651 if (!Opc)
2652 return false;
2653
2654 Register Src1Reg = getRegForValue(Src1Val);
2655 if (!Src1Reg)
2656 return false;
2657
2658 Register Src2Reg = getRegForValue(Src2Val);
2659 if (!Src2Reg)
2660 return false;
2661
2662 if (NeedExtraOp)
2663 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2664
2665 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2666 Src2Reg);
2667 updateValueMap(SI, ResultReg);
2668 return true;
2669}
2670
2671bool AArch64FastISel::selectSelect(const Instruction *I) {
2672 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2673 MVT VT;
2674 if (!isTypeSupported(I->getType(), VT))
2675 return false;
2676
2677 unsigned Opc;
2678 const TargetRegisterClass *RC;
2679 switch (VT.SimpleTy) {
2680 default:
2681 return false;
2682 case MVT::i1:
2683 case MVT::i8:
2684 case MVT::i16:
2685 case MVT::i32:
2686 Opc = AArch64::CSELWr;
2687 RC = &AArch64::GPR32RegClass;
2688 break;
2689 case MVT::i64:
2690 Opc = AArch64::CSELXr;
2691 RC = &AArch64::GPR64RegClass;
2692 break;
2693 case MVT::f32:
2694 Opc = AArch64::FCSELSrrr;
2695 RC = &AArch64::FPR32RegClass;
2696 break;
2697 case MVT::f64:
2698 Opc = AArch64::FCSELDrrr;
2699 RC = &AArch64::FPR64RegClass;
2700 break;
2701 }
2702
2703 const SelectInst *SI = cast<SelectInst>(I);
2704 const Value *Cond = SI->getCondition();
2707
2708 if (optimizeSelect(SI))
2709 return true;
2710
2711 // Try to pickup the flags, so we don't have to emit another compare.
2712 if (foldXALUIntrinsic(CC, I, Cond)) {
2713 // Fake request the condition to force emission of the XALU intrinsic.
2714 Register CondReg = getRegForValue(Cond);
2715 if (!CondReg)
2716 return false;
2717 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2718 isValueAvailable(Cond)) {
2719 const auto *Cmp = cast<CmpInst>(Cond);
2720 // Try to optimize or fold the cmp.
2721 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2722 const Value *FoldSelect = nullptr;
2723 switch (Predicate) {
2724 default:
2725 break;
2727 FoldSelect = SI->getFalseValue();
2728 break;
2729 case CmpInst::FCMP_TRUE:
2730 FoldSelect = SI->getTrueValue();
2731 break;
2732 }
2733
2734 if (FoldSelect) {
2735 Register SrcReg = getRegForValue(FoldSelect);
2736 if (!SrcReg)
2737 return false;
2738
2739 updateValueMap(I, SrcReg);
2740 return true;
2741 }
2742
2743 // Emit the cmp.
2744 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2745 return false;
2746
2747 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2748 CC = getCompareCC(Predicate);
2749 switch (Predicate) {
2750 default:
2751 break;
2752 case CmpInst::FCMP_UEQ:
2753 ExtraCC = AArch64CC::EQ;
2754 CC = AArch64CC::VS;
2755 break;
2756 case CmpInst::FCMP_ONE:
2757 ExtraCC = AArch64CC::MI;
2758 CC = AArch64CC::GT;
2759 break;
2760 }
2761 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2762 } else {
2763 Register CondReg = getRegForValue(Cond);
2764 if (!CondReg)
2765 return false;
2766
2767 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2768 CondReg = constrainOperandRegClass(II, CondReg, 1);
2769
2770 // Emit a TST instruction (ANDS wzr, reg, #imm).
2771 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2772 AArch64::WZR)
2773 .addReg(CondReg)
2775 }
2776
2777 Register Src1Reg = getRegForValue(SI->getTrueValue());
2778 Register Src2Reg = getRegForValue(SI->getFalseValue());
2779
2780 if (!Src1Reg || !Src2Reg)
2781 return false;
2782
2783 if (ExtraCC != AArch64CC::AL)
2784 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2785
2786 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2787 updateValueMap(I, ResultReg);
2788 return true;
2789}
2790
2791bool AArch64FastISel::selectFPExt(const Instruction *I) {
2792 Value *V = I->getOperand(0);
2793 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2794 return false;
2795
2796 Register Op = getRegForValue(V);
2797 if (Op == 0)
2798 return false;
2799
2800 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2801 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2802 ResultReg).addReg(Op);
2803 updateValueMap(I, ResultReg);
2804 return true;
2805}
2806
2807bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2808 Value *V = I->getOperand(0);
2809 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2810 return false;
2811
2812 Register Op = getRegForValue(V);
2813 if (Op == 0)
2814 return false;
2815
2816 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2817 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2818 ResultReg).addReg(Op);
2819 updateValueMap(I, ResultReg);
2820 return true;
2821}
2822
2823// FPToUI and FPToSI
2824bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2825 MVT DestVT;
2826 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2827 return false;
2828
2829 Register SrcReg = getRegForValue(I->getOperand(0));
2830 if (!SrcReg)
2831 return false;
2832
2833 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2834 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2835 return false;
2836
2837 unsigned Opc;
2838 if (SrcVT == MVT::f64) {
2839 if (Signed)
2840 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2841 else
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2843 } else {
2844 if (Signed)
2845 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2846 else
2847 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2848 }
2849 Register ResultReg = createResultReg(
2850 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2851 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2852 .addReg(SrcReg);
2853 updateValueMap(I, ResultReg);
2854 return true;
2855}
2856
2857bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2858 MVT DestVT;
2859 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2860 return false;
2861 // Let regular ISEL handle FP16
2862 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2863 return false;
2864
2865 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2866 "Unexpected value type.");
2867
2868 Register SrcReg = getRegForValue(I->getOperand(0));
2869 if (!SrcReg)
2870 return false;
2871
2872 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2873
2874 // Handle sign-extension.
2875 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2876 SrcReg =
2877 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2878 if (!SrcReg)
2879 return false;
2880 }
2881
2882 unsigned Opc;
2883 if (SrcVT == MVT::i64) {
2884 if (Signed)
2885 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2886 else
2887 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2888 } else {
2889 if (Signed)
2890 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2891 else
2892 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2893 }
2894
2895 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2896 updateValueMap(I, ResultReg);
2897 return true;
2898}
2899
2900bool AArch64FastISel::fastLowerArguments() {
2901 if (!FuncInfo.CanLowerReturn)
2902 return false;
2903
2904 const Function *F = FuncInfo.Fn;
2905 if (F->isVarArg())
2906 return false;
2907
2908 CallingConv::ID CC = F->getCallingConv();
2909 if (CC != CallingConv::C && CC != CallingConv::Swift)
2910 return false;
2911
2912 if (Subtarget->hasCustomCallingConv())
2913 return false;
2914
2915 // Only handle simple cases of up to 8 GPR and FPR each.
2916 unsigned GPRCnt = 0;
2917 unsigned FPRCnt = 0;
2918 for (auto const &Arg : F->args()) {
2919 if (Arg.hasAttribute(Attribute::ByVal) ||
2920 Arg.hasAttribute(Attribute::InReg) ||
2921 Arg.hasAttribute(Attribute::StructRet) ||
2922 Arg.hasAttribute(Attribute::SwiftSelf) ||
2923 Arg.hasAttribute(Attribute::SwiftAsync) ||
2924 Arg.hasAttribute(Attribute::SwiftError) ||
2925 Arg.hasAttribute(Attribute::Nest))
2926 return false;
2927
2928 Type *ArgTy = Arg.getType();
2929 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2930 return false;
2931
2932 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2933 if (!ArgVT.isSimple())
2934 return false;
2935
2936 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2937 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2938 return false;
2939
2940 if (VT.isVector() &&
2941 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2942 return false;
2943
2944 if (VT >= MVT::i1 && VT <= MVT::i64)
2945 ++GPRCnt;
2946 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2947 VT.is128BitVector())
2948 ++FPRCnt;
2949 else
2950 return false;
2951
2952 if (GPRCnt > 8 || FPRCnt > 8)
2953 return false;
2954 }
2955
2956 static const MCPhysReg Registers[6][8] = {
2957 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2958 AArch64::W5, AArch64::W6, AArch64::W7 },
2959 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2960 AArch64::X5, AArch64::X6, AArch64::X7 },
2961 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2962 AArch64::H5, AArch64::H6, AArch64::H7 },
2963 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2964 AArch64::S5, AArch64::S6, AArch64::S7 },
2965 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2966 AArch64::D5, AArch64::D6, AArch64::D7 },
2967 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2968 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2969 };
2970
2971 unsigned GPRIdx = 0;
2972 unsigned FPRIdx = 0;
2973 for (auto const &Arg : F->args()) {
2974 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2975 unsigned SrcReg;
2976 const TargetRegisterClass *RC;
2977 if (VT >= MVT::i1 && VT <= MVT::i32) {
2978 SrcReg = Registers[0][GPRIdx++];
2979 RC = &AArch64::GPR32RegClass;
2980 VT = MVT::i32;
2981 } else if (VT == MVT::i64) {
2982 SrcReg = Registers[1][GPRIdx++];
2983 RC = &AArch64::GPR64RegClass;
2984 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2985 SrcReg = Registers[2][FPRIdx++];
2986 RC = &AArch64::FPR16RegClass;
2987 } else if (VT == MVT::f32) {
2988 SrcReg = Registers[3][FPRIdx++];
2989 RC = &AArch64::FPR32RegClass;
2990 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2991 SrcReg = Registers[4][FPRIdx++];
2992 RC = &AArch64::FPR64RegClass;
2993 } else if (VT.is128BitVector()) {
2994 SrcReg = Registers[5][FPRIdx++];
2995 RC = &AArch64::FPR128RegClass;
2996 } else
2997 llvm_unreachable("Unexpected value type.");
2998
2999 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3000 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3001 // Without this, EmitLiveInCopies may eliminate the livein if its only
3002 // use is a bitcast (which isn't turned into an instruction).
3003 Register ResultReg = createResultReg(RC);
3004 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3005 TII.get(TargetOpcode::COPY), ResultReg)
3006 .addReg(DstReg, getKillRegState(true));
3007 updateValueMap(&Arg, ResultReg);
3008 }
3009 return true;
3010}
3011
3012bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3013 SmallVectorImpl<MVT> &OutVTs,
3014 SmallVectorImpl<Type *> &OrigTys,
3015 unsigned &NumBytes) {
3016 CallingConv::ID CC = CLI.CallConv;
3018 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3019 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, OrigTys,
3020 CCAssignFnForCall(CC));
3021
3022 // Get a count of how many bytes are to be pushed on the stack.
3023 NumBytes = CCInfo.getStackSize();
3024
3025 // Issue CALLSEQ_START
3026 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3028 .addImm(NumBytes).addImm(0);
3029
3030 // Process the args.
3031 for (CCValAssign &VA : ArgLocs) {
3032 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3033 MVT ArgVT = OutVTs[VA.getValNo()];
3034
3035 Register ArgReg = getRegForValue(ArgVal);
3036 if (!ArgReg)
3037 return false;
3038
3039 // Handle arg promotion: SExt, ZExt, AExt.
3040 switch (VA.getLocInfo()) {
3041 case CCValAssign::Full:
3042 break;
3043 case CCValAssign::SExt: {
3044 MVT DestVT = VA.getLocVT();
3045 MVT SrcVT = ArgVT;
3046 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3047 if (!ArgReg)
3048 return false;
3049 break;
3050 }
3051 case CCValAssign::AExt:
3052 // Intentional fall-through.
3053 case CCValAssign::ZExt: {
3054 MVT DestVT = VA.getLocVT();
3055 MVT SrcVT = ArgVT;
3056 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3057 if (!ArgReg)
3058 return false;
3059 break;
3060 }
3061 default:
3062 llvm_unreachable("Unknown arg promotion!");
3063 }
3064
3065 // Now copy/store arg to correct locations.
3066 if (VA.isRegLoc() && !VA.needsCustom()) {
3067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3068 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3069 CLI.OutRegs.push_back(VA.getLocReg());
3070 } else if (VA.needsCustom()) {
3071 // FIXME: Handle custom args.
3072 return false;
3073 } else {
3074 assert(VA.isMemLoc() && "Assuming store on stack.");
3075
3076 // Don't emit stores for undef values.
3077 if (isa<UndefValue>(ArgVal))
3078 continue;
3079
3080 // Need to store on the stack.
3081 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3082
3083 unsigned BEAlign = 0;
3084 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3085 BEAlign = 8 - ArgSize;
3086
3087 Address Addr;
3088 Addr.setKind(Address::RegBase);
3089 Addr.setReg(AArch64::SP);
3090 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3091
3092 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3093 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3094 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3095 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3096
3097 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3098 return false;
3099 }
3100 }
3101 return true;
3102}
3103
3104bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3105 CallingConv::ID CC = CLI.CallConv;
3106
3107 // Issue CALLSEQ_END
3108 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3110 .addImm(NumBytes).addImm(0);
3111
3112 // Now the return values.
3114 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3115 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3116
3117 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3118 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3119 CCValAssign &VA = RVLocs[i];
3120 MVT CopyVT = VA.getValVT();
3121 Register CopyReg = ResultReg + i;
3122
3123 // TODO: Handle big-endian results
3124 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3125 return false;
3126
3127 // Copy result out of their specified physreg.
3128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3129 CopyReg)
3130 .addReg(VA.getLocReg());
3131 CLI.InRegs.push_back(VA.getLocReg());
3132 }
3133
3134 CLI.ResultReg = ResultReg;
3135 CLI.NumResultRegs = RVLocs.size();
3136
3137 return true;
3138}
3139
3140bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3141 CallingConv::ID CC = CLI.CallConv;
3142 bool IsTailCall = CLI.IsTailCall;
3143 bool IsVarArg = CLI.IsVarArg;
3144 const Value *Callee = CLI.Callee;
3145 MCSymbol *Symbol = CLI.Symbol;
3146
3147 if (!Callee && !Symbol)
3148 return false;
3149
3150 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3151 // a bti instruction following the call.
3152 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3153 !Subtarget->noBTIAtReturnTwice() &&
3154 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3158 if (CLI.CB && CLI.CB->isIndirectCall() &&
3159 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3160 return false;
3161
3162 // Allow SelectionDAG isel to handle tail calls.
3163 if (IsTailCall)
3164 return false;
3165
3166 // FIXME: we could and should support this, but for now correctness at -O0 is
3167 // more important.
3168 if (Subtarget->isTargetILP32())
3169 return false;
3170
3171 CodeModel::Model CM = TM.getCodeModel();
3172 // Only support the small-addressing and large code models.
3173 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3174 return false;
3175
3176 // FIXME: Add large code model support for ELF.
3177 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3178 return false;
3179
3180 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3181 // attribute. Check "RtLibUseGOT" instead.
3182 if (MF->getFunction().getParent()->getRtLibUseGOT())
3183 return false;
3184
3185 // Let SDISel handle vararg functions.
3186 if (IsVarArg)
3187 return false;
3188
3189 if (Subtarget->isWindowsArm64EC())
3190 return false;
3191
3192 for (auto Flag : CLI.OutFlags)
3193 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3195 return false;
3196
3197 // Set up the argument vectors.
3198 SmallVector<MVT, 16> OutVTs;
3200 OutVTs.reserve(CLI.OutVals.size());
3201
3202 for (auto *Val : CLI.OutVals) {
3203 MVT VT;
3204 if (!isTypeLegal(Val->getType(), VT) &&
3205 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3206 return false;
3207
3208 // We don't handle vector parameters yet.
3209 if (VT.isVector() || VT.getSizeInBits() > 64)
3210 return false;
3211
3212 OutVTs.push_back(VT);
3213 OrigTys.push_back(Val->getType());
3214 }
3215
3216 Address Addr;
3217 if (Callee && !computeCallAddress(Callee, Addr))
3218 return false;
3219
3220 // The weak function target may be zero; in that case we must use indirect
3221 // addressing via a stub on windows as it may be out of range for a
3222 // PC-relative jump.
3223 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3224 Addr.getGlobalValue()->hasExternalWeakLinkage())
3225 return false;
3226
3227 // Handle the arguments now that we've gotten them.
3228 unsigned NumBytes;
3229 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3230 return false;
3231
3232 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3233 if (RegInfo->isAnyArgRegReserved(*MF))
3234 RegInfo->emitReservedArgRegCallError(*MF);
3235
3236 // Issue the call.
3237 MachineInstrBuilder MIB;
3238 if (Subtarget->useSmallAddressing()) {
3239 const MCInstrDesc &II =
3240 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3241 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3242 if (Symbol)
3243 MIB.addSym(Symbol, 0);
3244 else if (Addr.getGlobalValue())
3245 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3246 else if (Addr.getReg()) {
3247 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3248 MIB.addReg(Reg);
3249 } else
3250 return false;
3251 } else {
3252 Register CallReg;
3253 if (Symbol) {
3254 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3255 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3256 ADRPReg)
3258
3259 CallReg = createResultReg(&AArch64::GPR64RegClass);
3260 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3261 TII.get(AArch64::LDRXui), CallReg)
3262 .addReg(ADRPReg)
3263 .addSym(Symbol,
3265 } else if (Addr.getGlobalValue())
3266 CallReg = materializeGV(Addr.getGlobalValue());
3267 else if (Addr.getReg())
3268 CallReg = Addr.getReg();
3269
3270 if (!CallReg)
3271 return false;
3272
3273 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3274 CallReg = constrainOperandRegClass(II, CallReg, 0);
3275 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3276 }
3277
3278 // Add implicit physical register uses to the call.
3279 for (auto Reg : CLI.OutRegs)
3281
3282 // Add a register mask with the call-preserved registers.
3283 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3284 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3285
3286 CLI.Call = MIB;
3287
3288 // Finish off the call including any return values.
3289 return finishCall(CLI, NumBytes);
3290}
3291
3292bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3293 if (Alignment)
3294 return Len / Alignment->value() <= 4;
3295 else
3296 return Len < 32;
3297}
3298
3299bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3300 uint64_t Len, MaybeAlign Alignment) {
3301 // Make sure we don't bloat code by inlining very large memcpy's.
3302 if (!isMemCpySmall(Len, Alignment))
3303 return false;
3304
3305 int64_t UnscaledOffset = 0;
3306 Address OrigDest = Dest;
3307 Address OrigSrc = Src;
3308
3309 while (Len) {
3310 MVT VT;
3311 if (!Alignment || *Alignment >= 8) {
3312 if (Len >= 8)
3313 VT = MVT::i64;
3314 else if (Len >= 4)
3315 VT = MVT::i32;
3316 else if (Len >= 2)
3317 VT = MVT::i16;
3318 else {
3319 VT = MVT::i8;
3320 }
3321 } else {
3322 assert(Alignment && "Alignment is set in this branch");
3323 // Bound based on alignment.
3324 if (Len >= 4 && *Alignment == 4)
3325 VT = MVT::i32;
3326 else if (Len >= 2 && *Alignment == 2)
3327 VT = MVT::i16;
3328 else {
3329 VT = MVT::i8;
3330 }
3331 }
3332
3333 Register ResultReg = emitLoad(VT, VT, Src);
3334 if (!ResultReg)
3335 return false;
3336
3337 if (!emitStore(VT, ResultReg, Dest))
3338 return false;
3339
3340 int64_t Size = VT.getSizeInBits() / 8;
3341 Len -= Size;
3342 UnscaledOffset += Size;
3343
3344 // We need to recompute the unscaled offset for each iteration.
3345 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3346 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3347 }
3348
3349 return true;
3350}
3351
3352/// Check if it is possible to fold the condition from the XALU intrinsic
3353/// into the user. The condition code will only be updated on success.
3354bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3355 const Instruction *I,
3356 const Value *Cond) {
3358 return false;
3359
3360 const auto *EV = cast<ExtractValueInst>(Cond);
3361 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3362 return false;
3363
3364 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3365 MVT RetVT;
3366 const Function *Callee = II->getCalledFunction();
3367 Type *RetTy =
3368 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3369 if (!isTypeLegal(RetTy, RetVT))
3370 return false;
3371
3372 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3373 return false;
3374
3375 const Value *LHS = II->getArgOperand(0);
3376 const Value *RHS = II->getArgOperand(1);
3377
3378 // Canonicalize immediate to the RHS.
3379 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3380 std::swap(LHS, RHS);
3381
3382 // Simplify multiplies.
3383 Intrinsic::ID IID = II->getIntrinsicID();
3384 switch (IID) {
3385 default:
3386 break;
3387 case Intrinsic::smul_with_overflow:
3388 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3389 if (C->getValue() == 2)
3390 IID = Intrinsic::sadd_with_overflow;
3391 break;
3392 case Intrinsic::umul_with_overflow:
3393 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3394 if (C->getValue() == 2)
3395 IID = Intrinsic::uadd_with_overflow;
3396 break;
3397 }
3398
3399 AArch64CC::CondCode TmpCC;
3400 switch (IID) {
3401 default:
3402 return false;
3403 case Intrinsic::sadd_with_overflow:
3404 case Intrinsic::ssub_with_overflow:
3405 TmpCC = AArch64CC::VS;
3406 break;
3407 case Intrinsic::uadd_with_overflow:
3408 TmpCC = AArch64CC::HS;
3409 break;
3410 case Intrinsic::usub_with_overflow:
3411 TmpCC = AArch64CC::LO;
3412 break;
3413 case Intrinsic::smul_with_overflow:
3414 case Intrinsic::umul_with_overflow:
3415 TmpCC = AArch64CC::NE;
3416 break;
3417 }
3418
3419 // Check if both instructions are in the same basic block.
3420 if (!isValueAvailable(II))
3421 return false;
3422
3423 // Make sure nothing is in the way
3426 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3427 // We only expect extractvalue instructions between the intrinsic and the
3428 // instruction to be selected.
3429 if (!isa<ExtractValueInst>(Itr))
3430 return false;
3431
3432 // Check that the extractvalue operand comes from the intrinsic.
3433 const auto *EVI = cast<ExtractValueInst>(Itr);
3434 if (EVI->getAggregateOperand() != II)
3435 return false;
3436 }
3437
3438 CC = TmpCC;
3439 return true;
3440}
3441
3442bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3443 // FIXME: Handle more intrinsics.
3444 switch (II->getIntrinsicID()) {
3445 default: return false;
3446 case Intrinsic::frameaddress: {
3447 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3448 MFI.setFrameAddressIsTaken(true);
3449
3450 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3451 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3452 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3454 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3455 // Recursively load frame address
3456 // ldr x0, [fp]
3457 // ldr x0, [x0]
3458 // ldr x0, [x0]
3459 // ...
3460 Register DestReg;
3461 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3462 while (Depth--) {
3463 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3464 SrcReg, 0);
3465 assert(DestReg && "Unexpected LDR instruction emission failure.");
3466 SrcReg = DestReg;
3467 }
3468
3469 updateValueMap(II, SrcReg);
3470 return true;
3471 }
3472 case Intrinsic::sponentry: {
3473 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3474
3475 // SP = FP + Fixed Object + 16
3476 int FI = MFI.CreateFixedObject(4, 0, false);
3477 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3479 TII.get(AArch64::ADDXri), ResultReg)
3480 .addFrameIndex(FI)
3481 .addImm(0)
3482 .addImm(0);
3483
3484 updateValueMap(II, ResultReg);
3485 return true;
3486 }
3487 case Intrinsic::memcpy:
3488 case Intrinsic::memmove: {
3489 const auto *MTI = cast<MemTransferInst>(II);
3490 // Don't handle volatile.
3491 if (MTI->isVolatile())
3492 return false;
3493
3494 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3495 // we would emit dead code because we don't currently handle memmoves.
3496 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3497 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3498 // Small memcpy's are common enough that we want to do them without a call
3499 // if possible.
3500 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3501 MaybeAlign Alignment;
3502 if (MTI->getDestAlign() || MTI->getSourceAlign())
3503 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3504 MTI->getSourceAlign().valueOrOne());
3505 if (isMemCpySmall(Len, Alignment)) {
3506 Address Dest, Src;
3507 if (!computeAddress(MTI->getRawDest(), Dest) ||
3508 !computeAddress(MTI->getRawSource(), Src))
3509 return false;
3510 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3511 return true;
3512 }
3513 }
3514
3515 if (!MTI->getLength()->getType()->isIntegerTy(64))
3516 return false;
3517
3518 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3519 // Fast instruction selection doesn't support the special
3520 // address spaces.
3521 return false;
3522
3523 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3524 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3525 }
3526 case Intrinsic::memset: {
3527 const MemSetInst *MSI = cast<MemSetInst>(II);
3528 // Don't handle volatile.
3529 if (MSI->isVolatile())
3530 return false;
3531
3532 if (!MSI->getLength()->getType()->isIntegerTy(64))
3533 return false;
3534
3535 if (MSI->getDestAddressSpace() > 255)
3536 // Fast instruction selection doesn't support the special
3537 // address spaces.
3538 return false;
3539
3540 return lowerCallTo(II, "memset", II->arg_size() - 1);
3541 }
3542 case Intrinsic::sin:
3543 case Intrinsic::cos:
3544 case Intrinsic::tan:
3545 case Intrinsic::pow: {
3546 MVT RetVT;
3547 if (!isTypeLegal(II->getType(), RetVT))
3548 return false;
3549
3550 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3551 return false;
3552
3553 static const RTLIB::Libcall LibCallTable[4][2] = {
3554 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3555 {RTLIB::COS_F32, RTLIB::COS_F64},
3556 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3557 {RTLIB::POW_F32, RTLIB::POW_F64}};
3558 RTLIB::Libcall LC;
3559 bool Is64Bit = RetVT == MVT::f64;
3560 switch (II->getIntrinsicID()) {
3561 default:
3562 llvm_unreachable("Unexpected intrinsic.");
3563 case Intrinsic::sin:
3564 LC = LibCallTable[0][Is64Bit];
3565 break;
3566 case Intrinsic::cos:
3567 LC = LibCallTable[1][Is64Bit];
3568 break;
3569 case Intrinsic::tan:
3570 LC = LibCallTable[2][Is64Bit];
3571 break;
3572 case Intrinsic::pow:
3573 LC = LibCallTable[3][Is64Bit];
3574 break;
3575 }
3576
3577 ArgListTy Args;
3578 Args.reserve(II->arg_size());
3579
3580 // Populate the argument list.
3581 for (auto &Arg : II->args())
3582 Args.emplace_back(Arg);
3583
3584 CallLoweringInfo CLI;
3585 MCContext &Ctx = MF->getContext();
3586 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3587 TLI.getLibcallName(LC), std::move(Args));
3588 if (!lowerCallTo(CLI))
3589 return false;
3590 updateValueMap(II, CLI.ResultReg);
3591 return true;
3592 }
3593 case Intrinsic::fabs: {
3594 MVT VT;
3595 if (!isTypeLegal(II->getType(), VT))
3596 return false;
3597
3598 unsigned Opc;
3599 switch (VT.SimpleTy) {
3600 default:
3601 return false;
3602 case MVT::f32:
3603 Opc = AArch64::FABSSr;
3604 break;
3605 case MVT::f64:
3606 Opc = AArch64::FABSDr;
3607 break;
3608 }
3609 Register SrcReg = getRegForValue(II->getOperand(0));
3610 if (!SrcReg)
3611 return false;
3612 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3613 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3614 .addReg(SrcReg);
3615 updateValueMap(II, ResultReg);
3616 return true;
3617 }
3618 case Intrinsic::trap:
3619 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3620 .addImm(1);
3621 return true;
3622 case Intrinsic::debugtrap:
3623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3624 .addImm(0xF000);
3625 return true;
3626
3627 case Intrinsic::sqrt: {
3628 Type *RetTy = II->getCalledFunction()->getReturnType();
3629
3630 MVT VT;
3631 if (!isTypeLegal(RetTy, VT))
3632 return false;
3633
3634 Register Op0Reg = getRegForValue(II->getOperand(0));
3635 if (!Op0Reg)
3636 return false;
3637
3638 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3639 if (!ResultReg)
3640 return false;
3641
3642 updateValueMap(II, ResultReg);
3643 return true;
3644 }
3645 case Intrinsic::sadd_with_overflow:
3646 case Intrinsic::uadd_with_overflow:
3647 case Intrinsic::ssub_with_overflow:
3648 case Intrinsic::usub_with_overflow:
3649 case Intrinsic::smul_with_overflow:
3650 case Intrinsic::umul_with_overflow: {
3651 // This implements the basic lowering of the xalu with overflow intrinsics.
3652 const Function *Callee = II->getCalledFunction();
3653 auto *Ty = cast<StructType>(Callee->getReturnType());
3654 Type *RetTy = Ty->getTypeAtIndex(0U);
3655
3656 MVT VT;
3657 if (!isTypeLegal(RetTy, VT))
3658 return false;
3659
3660 if (VT != MVT::i32 && VT != MVT::i64)
3661 return false;
3662
3663 const Value *LHS = II->getArgOperand(0);
3664 const Value *RHS = II->getArgOperand(1);
3665 // Canonicalize immediate to the RHS.
3666 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3667 std::swap(LHS, RHS);
3668
3669 // Simplify multiplies.
3670 Intrinsic::ID IID = II->getIntrinsicID();
3671 switch (IID) {
3672 default:
3673 break;
3674 case Intrinsic::smul_with_overflow:
3675 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3676 if (C->getValue() == 2) {
3677 IID = Intrinsic::sadd_with_overflow;
3678 RHS = LHS;
3679 }
3680 break;
3681 case Intrinsic::umul_with_overflow:
3682 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3683 if (C->getValue() == 2) {
3684 IID = Intrinsic::uadd_with_overflow;
3685 RHS = LHS;
3686 }
3687 break;
3688 }
3689
3690 Register ResultReg1, ResultReg2, MulReg;
3692 switch (IID) {
3693 default: llvm_unreachable("Unexpected intrinsic!");
3694 case Intrinsic::sadd_with_overflow:
3695 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3696 CC = AArch64CC::VS;
3697 break;
3698 case Intrinsic::uadd_with_overflow:
3699 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3700 CC = AArch64CC::HS;
3701 break;
3702 case Intrinsic::ssub_with_overflow:
3703 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3704 CC = AArch64CC::VS;
3705 break;
3706 case Intrinsic::usub_with_overflow:
3707 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3708 CC = AArch64CC::LO;
3709 break;
3710 case Intrinsic::smul_with_overflow: {
3711 CC = AArch64CC::NE;
3712 Register LHSReg = getRegForValue(LHS);
3713 if (!LHSReg)
3714 return false;
3715
3716 Register RHSReg = getRegForValue(RHS);
3717 if (!RHSReg)
3718 return false;
3719
3720 if (VT == MVT::i32) {
3721 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3722 Register MulSubReg =
3723 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3724 // cmp xreg, wreg, sxtw
3725 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3726 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3727 /*WantResult=*/false);
3728 MulReg = MulSubReg;
3729 } else {
3730 assert(VT == MVT::i64 && "Unexpected value type.");
3731 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3732 // reused in the next instruction.
3733 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3734 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3735 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3736 /*WantResult=*/false);
3737 }
3738 break;
3739 }
3740 case Intrinsic::umul_with_overflow: {
3741 CC = AArch64CC::NE;
3742 Register LHSReg = getRegForValue(LHS);
3743 if (!LHSReg)
3744 return false;
3745
3746 Register RHSReg = getRegForValue(RHS);
3747 if (!RHSReg)
3748 return false;
3749
3750 if (VT == MVT::i32) {
3751 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3752 // tst xreg, #0xffffffff00000000
3753 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3754 TII.get(AArch64::ANDSXri), AArch64::XZR)
3755 .addReg(MulReg)
3756 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3757 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3758 } else {
3759 assert(VT == MVT::i64 && "Unexpected value type.");
3760 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3761 // reused in the next instruction.
3762 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3763 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3764 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3765 }
3766 break;
3767 }
3768 }
3769
3770 if (MulReg) {
3771 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3772 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3773 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3774 }
3775
3776 if (!ResultReg1)
3777 return false;
3778
3779 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3780 AArch64::WZR, AArch64::WZR,
3781 getInvertedCondCode(CC));
3782 (void)ResultReg2;
3783 assert((ResultReg1 + 1) == ResultReg2 &&
3784 "Nonconsecutive result registers.");
3785 updateValueMap(II, ResultReg1, 2);
3786 return true;
3787 }
3788 case Intrinsic::aarch64_crc32b:
3789 case Intrinsic::aarch64_crc32h:
3790 case Intrinsic::aarch64_crc32w:
3791 case Intrinsic::aarch64_crc32x:
3792 case Intrinsic::aarch64_crc32cb:
3793 case Intrinsic::aarch64_crc32ch:
3794 case Intrinsic::aarch64_crc32cw:
3795 case Intrinsic::aarch64_crc32cx: {
3796 if (!Subtarget->hasCRC())
3797 return false;
3798
3799 unsigned Opc;
3800 switch (II->getIntrinsicID()) {
3801 default:
3802 llvm_unreachable("Unexpected intrinsic!");
3803 case Intrinsic::aarch64_crc32b:
3804 Opc = AArch64::CRC32Brr;
3805 break;
3806 case Intrinsic::aarch64_crc32h:
3807 Opc = AArch64::CRC32Hrr;
3808 break;
3809 case Intrinsic::aarch64_crc32w:
3810 Opc = AArch64::CRC32Wrr;
3811 break;
3812 case Intrinsic::aarch64_crc32x:
3813 Opc = AArch64::CRC32Xrr;
3814 break;
3815 case Intrinsic::aarch64_crc32cb:
3816 Opc = AArch64::CRC32CBrr;
3817 break;
3818 case Intrinsic::aarch64_crc32ch:
3819 Opc = AArch64::CRC32CHrr;
3820 break;
3821 case Intrinsic::aarch64_crc32cw:
3822 Opc = AArch64::CRC32CWrr;
3823 break;
3824 case Intrinsic::aarch64_crc32cx:
3825 Opc = AArch64::CRC32CXrr;
3826 break;
3827 }
3828
3829 Register LHSReg = getRegForValue(II->getArgOperand(0));
3830 Register RHSReg = getRegForValue(II->getArgOperand(1));
3831 if (!LHSReg || !RHSReg)
3832 return false;
3833
3834 Register ResultReg =
3835 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3836 updateValueMap(II, ResultReg);
3837 return true;
3838 }
3839 }
3840 return false;
3841}
3842
3843bool AArch64FastISel::selectRet(const Instruction *I) {
3844 const ReturnInst *Ret = cast<ReturnInst>(I);
3845 const Function &F = *I->getParent()->getParent();
3846
3847 if (!FuncInfo.CanLowerReturn)
3848 return false;
3849
3850 if (F.isVarArg())
3851 return false;
3852
3853 if (TLI.supportSwiftError() &&
3854 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3855 return false;
3856
3857 if (TLI.supportSplitCSR(FuncInfo.MF))
3858 return false;
3859
3860 // Build a list of return value registers.
3862
3863 if (Ret->getNumOperands() > 0) {
3864 CallingConv::ID CC = F.getCallingConv();
3866 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3867
3868 // Analyze operands of the call, assigning locations to each operand.
3870 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3871 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3872
3873 // Only handle a single return value for now.
3874 if (ValLocs.size() != 1)
3875 return false;
3876
3877 CCValAssign &VA = ValLocs[0];
3878 const Value *RV = Ret->getOperand(0);
3879
3880 // Don't bother handling odd stuff for now.
3881 if ((VA.getLocInfo() != CCValAssign::Full) &&
3882 (VA.getLocInfo() != CCValAssign::BCvt))
3883 return false;
3884
3885 // Only handle register returns for now.
3886 if (!VA.isRegLoc())
3887 return false;
3888
3889 Register Reg = getRegForValue(RV);
3890 if (!Reg)
3891 return false;
3892
3893 Register SrcReg = Reg + VA.getValNo();
3894 Register DestReg = VA.getLocReg();
3895 // Avoid a cross-class copy. This is very unlikely.
3896 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3897 return false;
3898
3899 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3900 if (!RVEVT.isSimple())
3901 return false;
3902
3903 // Vectors (of > 1 lane) in big endian need tricky handling.
3904 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3905 !Subtarget->isLittleEndian())
3906 return false;
3907
3908 MVT RVVT = RVEVT.getSimpleVT();
3909 if (RVVT == MVT::f128)
3910 return false;
3911
3912 MVT DestVT = VA.getValVT();
3913 // Special handling for extended integers.
3914 if (RVVT != DestVT) {
3915 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3916 return false;
3917
3918 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3919 return false;
3920
3921 bool IsZExt = Outs[0].Flags.isZExt();
3922 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3923 if (!SrcReg)
3924 return false;
3925 }
3926
3927 // "Callee" (i.e. value producer) zero extends pointers at function
3928 // boundary.
3929 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3930 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3931
3932 // Make the copy.
3933 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3934 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3935
3936 // Add register to return instruction.
3937 RetRegs.push_back(VA.getLocReg());
3938 }
3939
3940 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3941 TII.get(AArch64::RET_ReallyLR));
3942 for (Register RetReg : RetRegs)
3943 MIB.addReg(RetReg, RegState::Implicit);
3944 return true;
3945}
3946
3947bool AArch64FastISel::selectTrunc(const Instruction *I) {
3948 Type *DestTy = I->getType();
3949 Value *Op = I->getOperand(0);
3950 Type *SrcTy = Op->getType();
3951
3952 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3953 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3954 if (!SrcEVT.isSimple())
3955 return false;
3956 if (!DestEVT.isSimple())
3957 return false;
3958
3959 MVT SrcVT = SrcEVT.getSimpleVT();
3960 MVT DestVT = DestEVT.getSimpleVT();
3961
3962 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3963 SrcVT != MVT::i8)
3964 return false;
3965 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3966 DestVT != MVT::i1)
3967 return false;
3968
3969 Register SrcReg = getRegForValue(Op);
3970 if (!SrcReg)
3971 return false;
3972
3973 // If we're truncating from i64 to a smaller non-legal type then generate an
3974 // AND. Otherwise, we know the high bits are undefined and a truncate only
3975 // generate a COPY. We cannot mark the source register also as result
3976 // register, because this can incorrectly transfer the kill flag onto the
3977 // source register.
3978 Register ResultReg;
3979 if (SrcVT == MVT::i64) {
3980 uint64_t Mask = 0;
3981 switch (DestVT.SimpleTy) {
3982 default:
3983 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3984 return false;
3985 case MVT::i1:
3986 Mask = 0x1;
3987 break;
3988 case MVT::i8:
3989 Mask = 0xff;
3990 break;
3991 case MVT::i16:
3992 Mask = 0xffff;
3993 break;
3994 }
3995 // Issue an extract_subreg to get the lower 32-bits.
3996 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3997 AArch64::sub_32);
3998 // Create the AND instruction which performs the actual truncation.
3999 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4000 assert(ResultReg && "Unexpected AND instruction emission failure.");
4001 } else {
4002 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4003 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4004 TII.get(TargetOpcode::COPY), ResultReg)
4005 .addReg(SrcReg);
4006 }
4007
4008 updateValueMap(I, ResultReg);
4009 return true;
4010}
4011
4012Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4013 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4014 DestVT == MVT::i64) &&
4015 "Unexpected value type.");
4016 // Handle i8 and i16 as i32.
4017 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4018 DestVT = MVT::i32;
4019
4020 if (IsZExt) {
4021 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4022 assert(ResultReg && "Unexpected AND instruction emission failure.");
4023 if (DestVT == MVT::i64) {
4024 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4025 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4026 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4028 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4029 .addImm(0)
4030 .addReg(ResultReg)
4031 .addImm(AArch64::sub_32);
4032 ResultReg = Reg64;
4033 }
4034 return ResultReg;
4035 } else {
4036 if (DestVT == MVT::i64) {
4037 // FIXME: We're SExt i1 to i64.
4038 return Register();
4039 }
4040 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4041 0, 0);
4042 }
4043}
4044
4045Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4046 unsigned Opc;
4047 Register ZReg;
4048 switch (RetVT.SimpleTy) {
4049 default:
4050 return Register();
4051 case MVT::i8:
4052 case MVT::i16:
4053 case MVT::i32:
4054 RetVT = MVT::i32;
4055 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4056 case MVT::i64:
4057 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4058 }
4059
4060 const TargetRegisterClass *RC =
4061 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4063}
4064
4065Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4066 if (RetVT != MVT::i64)
4067 return Register();
4068
4069 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4070 Op0, Op1, AArch64::XZR);
4071}
4072
4073Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4074 if (RetVT != MVT::i64)
4075 return Register();
4076
4077 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4078 Op0, Op1, AArch64::XZR);
4079}
4080
4081Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4082 Register Op1Reg) {
4083 unsigned Opc = 0;
4084 bool NeedTrunc = false;
4085 uint64_t Mask = 0;
4086 switch (RetVT.SimpleTy) {
4087 default:
4088 return Register();
4089 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4090 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4091 case MVT::i32: Opc = AArch64::LSLVWr; break;
4092 case MVT::i64: Opc = AArch64::LSLVXr; break;
4093 }
4094
4095 const TargetRegisterClass *RC =
4096 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4097 if (NeedTrunc)
4098 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4099
4100 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4101 if (NeedTrunc)
4102 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4103 return ResultReg;
4104}
4105
4106Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4107 uint64_t Shift, bool IsZExt) {
4108 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4109 "Unexpected source/return type pair.");
4110 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4111 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4112 "Unexpected source value type.");
4113 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4114 RetVT == MVT::i64) && "Unexpected return value type.");
4115
4116 bool Is64Bit = (RetVT == MVT::i64);
4117 unsigned RegSize = Is64Bit ? 64 : 32;
4118 unsigned DstBits = RetVT.getSizeInBits();
4119 unsigned SrcBits = SrcVT.getSizeInBits();
4120 const TargetRegisterClass *RC =
4121 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4122
4123 // Just emit a copy for "zero" shifts.
4124 if (Shift == 0) {
4125 if (RetVT == SrcVT) {
4126 Register ResultReg = createResultReg(RC);
4127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4128 TII.get(TargetOpcode::COPY), ResultReg)
4129 .addReg(Op0);
4130 return ResultReg;
4131 } else
4132 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4133 }
4134
4135 // Don't deal with undefined shifts.
4136 if (Shift >= DstBits)
4137 return Register();
4138
4139 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4140 // {S|U}BFM Wd, Wn, #r, #s
4141 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4142
4143 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4144 // %2 = shl i16 %1, 4
4145 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4146 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4147 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4148 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4149
4150 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4151 // %2 = shl i16 %1, 8
4152 // Wd<32+7-24,32-24> = Wn<7:0>
4153 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4154 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4155 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4156
4157 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4158 // %2 = shl i16 %1, 12
4159 // Wd<32+3-20,32-20> = Wn<3:0>
4160 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4161 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4162 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4163
4164 unsigned ImmR = RegSize - Shift;
4165 // Limit the width to the length of the source type.
4166 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4167 static const unsigned OpcTable[2][2] = {
4168 {AArch64::SBFMWri, AArch64::SBFMXri},
4169 {AArch64::UBFMWri, AArch64::UBFMXri}
4170 };
4171 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4172 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4173 Register TmpReg = MRI.createVirtualRegister(RC);
4174 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4175 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4176 .addImm(0)
4177 .addReg(Op0)
4178 .addImm(AArch64::sub_32);
4179 Op0 = TmpReg;
4180 }
4181 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4182}
4183
4184Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4185 Register Op1Reg) {
4186 unsigned Opc = 0;
4187 bool NeedTrunc = false;
4188 uint64_t Mask = 0;
4189 switch (RetVT.SimpleTy) {
4190 default:
4191 return Register();
4192 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4193 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4194 case MVT::i32: Opc = AArch64::LSRVWr; break;
4195 case MVT::i64: Opc = AArch64::LSRVXr; break;
4196 }
4197
4198 const TargetRegisterClass *RC =
4199 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4200 if (NeedTrunc) {
4201 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4202 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4203 }
4204 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4205 if (NeedTrunc)
4206 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4207 return ResultReg;
4208}
4209
4210Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4211 uint64_t Shift, bool IsZExt) {
4212 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4213 "Unexpected source/return type pair.");
4214 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4215 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4216 "Unexpected source value type.");
4217 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4218 RetVT == MVT::i64) && "Unexpected return value type.");
4219
4220 bool Is64Bit = (RetVT == MVT::i64);
4221 unsigned RegSize = Is64Bit ? 64 : 32;
4222 unsigned DstBits = RetVT.getSizeInBits();
4223 unsigned SrcBits = SrcVT.getSizeInBits();
4224 const TargetRegisterClass *RC =
4225 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4226
4227 // Just emit a copy for "zero" shifts.
4228 if (Shift == 0) {
4229 if (RetVT == SrcVT) {
4230 Register ResultReg = createResultReg(RC);
4231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4232 TII.get(TargetOpcode::COPY), ResultReg)
4233 .addReg(Op0);
4234 return ResultReg;
4235 } else
4236 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4237 }
4238
4239 // Don't deal with undefined shifts.
4240 if (Shift >= DstBits)
4241 return Register();
4242
4243 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4244 // {S|U}BFM Wd, Wn, #r, #s
4245 // Wd<s-r:0> = Wn<s:r> when r <= s
4246
4247 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4248 // %2 = lshr i16 %1, 4
4249 // Wd<7-4:0> = Wn<7:4>
4250 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4251 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4252 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4253
4254 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4255 // %2 = lshr i16 %1, 8
4256 // Wd<7-7,0> = Wn<7:7>
4257 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4258 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4260
4261 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4262 // %2 = lshr i16 %1, 12
4263 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4264 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4265 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4266 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4267
4268 if (Shift >= SrcBits && IsZExt)
4269 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4270
4271 // It is not possible to fold a sign-extend into the LShr instruction. In this
4272 // case emit a sign-extend.
4273 if (!IsZExt) {
4274 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4275 if (!Op0)
4276 return Register();
4277 SrcVT = RetVT;
4278 SrcBits = SrcVT.getSizeInBits();
4279 IsZExt = true;
4280 }
4281
4282 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4283 unsigned ImmS = SrcBits - 1;
4284 static const unsigned OpcTable[2][2] = {
4285 {AArch64::SBFMWri, AArch64::SBFMXri},
4286 {AArch64::UBFMWri, AArch64::UBFMXri}
4287 };
4288 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4289 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4290 Register TmpReg = MRI.createVirtualRegister(RC);
4291 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4292 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4293 .addImm(0)
4294 .addReg(Op0)
4295 .addImm(AArch64::sub_32);
4296 Op0 = TmpReg;
4297 }
4298 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4299}
4300
4301Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4302 Register Op1Reg) {
4303 unsigned Opc = 0;
4304 bool NeedTrunc = false;
4305 uint64_t Mask = 0;
4306 switch (RetVT.SimpleTy) {
4307 default:
4308 return Register();
4309 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4310 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4311 case MVT::i32: Opc = AArch64::ASRVWr; break;
4312 case MVT::i64: Opc = AArch64::ASRVXr; break;
4313 }
4314
4315 const TargetRegisterClass *RC =
4316 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4317 if (NeedTrunc) {
4318 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4319 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4320 }
4321 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4322 if (NeedTrunc)
4323 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4324 return ResultReg;
4325}
4326
4327Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4328 uint64_t Shift, bool IsZExt) {
4329 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4330 "Unexpected source/return type pair.");
4331 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4332 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4333 "Unexpected source value type.");
4334 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4335 RetVT == MVT::i64) && "Unexpected return value type.");
4336
4337 bool Is64Bit = (RetVT == MVT::i64);
4338 unsigned RegSize = Is64Bit ? 64 : 32;
4339 unsigned DstBits = RetVT.getSizeInBits();
4340 unsigned SrcBits = SrcVT.getSizeInBits();
4341 const TargetRegisterClass *RC =
4342 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4343
4344 // Just emit a copy for "zero" shifts.
4345 if (Shift == 0) {
4346 if (RetVT == SrcVT) {
4347 Register ResultReg = createResultReg(RC);
4348 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4349 TII.get(TargetOpcode::COPY), ResultReg)
4350 .addReg(Op0);
4351 return ResultReg;
4352 } else
4353 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4354 }
4355
4356 // Don't deal with undefined shifts.
4357 if (Shift >= DstBits)
4358 return Register();
4359
4360 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4361 // {S|U}BFM Wd, Wn, #r, #s
4362 // Wd<s-r:0> = Wn<s:r> when r <= s
4363
4364 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4365 // %2 = ashr i16 %1, 4
4366 // Wd<7-4:0> = Wn<7:4>
4367 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4368 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4369 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4370
4371 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4372 // %2 = ashr i16 %1, 8
4373 // Wd<7-7,0> = Wn<7:7>
4374 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4375 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4376 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4377
4378 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4379 // %2 = ashr i16 %1, 12
4380 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4381 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4382 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4383 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4384
4385 if (Shift >= SrcBits && IsZExt)
4386 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4387
4388 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4389 unsigned ImmS = SrcBits - 1;
4390 static const unsigned OpcTable[2][2] = {
4391 {AArch64::SBFMWri, AArch64::SBFMXri},
4392 {AArch64::UBFMWri, AArch64::UBFMXri}
4393 };
4394 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4395 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4396 Register TmpReg = MRI.createVirtualRegister(RC);
4397 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4398 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4399 .addImm(0)
4400 .addReg(Op0)
4401 .addImm(AArch64::sub_32);
4402 Op0 = TmpReg;
4403 }
4404 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4405}
4406
4407Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4408 bool IsZExt) {
4409 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4410
4411 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4412 // DestVT are odd things, so test to make sure that they are both types we can
4413 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4414 // bail out to SelectionDAG.
4415 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4416 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4417 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4418 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4419 return Register();
4420
4421 unsigned Opc;
4422 unsigned Imm = 0;
4423
4424 switch (SrcVT.SimpleTy) {
4425 default:
4426 return Register();
4427 case MVT::i1:
4428 return emiti1Ext(SrcReg, DestVT, IsZExt);
4429 case MVT::i8:
4430 if (DestVT == MVT::i64)
4431 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4432 else
4433 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4434 Imm = 7;
4435 break;
4436 case MVT::i16:
4437 if (DestVT == MVT::i64)
4438 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4439 else
4440 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4441 Imm = 15;
4442 break;
4443 case MVT::i32:
4444 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4445 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4446 Imm = 31;
4447 break;
4448 }
4449
4450 // Handle i8 and i16 as i32.
4451 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4452 DestVT = MVT::i32;
4453 else if (DestVT == MVT::i64) {
4454 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4456 TII.get(AArch64::SUBREG_TO_REG), Src64)
4457 .addImm(0)
4458 .addReg(SrcReg)
4459 .addImm(AArch64::sub_32);
4460 SrcReg = Src64;
4461 }
4462
4463 const TargetRegisterClass *RC =
4464 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4465 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4466}
4467
4468static bool isZExtLoad(const MachineInstr *LI) {
4469 switch (LI->getOpcode()) {
4470 default:
4471 return false;
4472 case AArch64::LDURBBi:
4473 case AArch64::LDURHHi:
4474 case AArch64::LDURWi:
4475 case AArch64::LDRBBui:
4476 case AArch64::LDRHHui:
4477 case AArch64::LDRWui:
4478 case AArch64::LDRBBroX:
4479 case AArch64::LDRHHroX:
4480 case AArch64::LDRWroX:
4481 case AArch64::LDRBBroW:
4482 case AArch64::LDRHHroW:
4483 case AArch64::LDRWroW:
4484 return true;
4485 }
4486}
4487
4488static bool isSExtLoad(const MachineInstr *LI) {
4489 switch (LI->getOpcode()) {
4490 default:
4491 return false;
4492 case AArch64::LDURSBWi:
4493 case AArch64::LDURSHWi:
4494 case AArch64::LDURSBXi:
4495 case AArch64::LDURSHXi:
4496 case AArch64::LDURSWi:
4497 case AArch64::LDRSBWui:
4498 case AArch64::LDRSHWui:
4499 case AArch64::LDRSBXui:
4500 case AArch64::LDRSHXui:
4501 case AArch64::LDRSWui:
4502 case AArch64::LDRSBWroX:
4503 case AArch64::LDRSHWroX:
4504 case AArch64::LDRSBXroX:
4505 case AArch64::LDRSHXroX:
4506 case AArch64::LDRSWroX:
4507 case AArch64::LDRSBWroW:
4508 case AArch64::LDRSHWroW:
4509 case AArch64::LDRSBXroW:
4510 case AArch64::LDRSHXroW:
4511 case AArch64::LDRSWroW:
4512 return true;
4513 }
4514}
4515
4516bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4517 MVT SrcVT) {
4518 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4519 if (!LI || !LI->hasOneUse())
4520 return false;
4521
4522 // Check if the load instruction has already been selected.
4523 Register Reg = lookUpRegForValue(LI);
4524 if (!Reg)
4525 return false;
4526
4527 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4528 if (!MI)
4529 return false;
4530
4531 // Check if the correct load instruction has been emitted - SelectionDAG might
4532 // have emitted a zero-extending load, but we need a sign-extending load.
4533 bool IsZExt = isa<ZExtInst>(I);
4534 const auto *LoadMI = MI;
4535 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4536 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4537 Register LoadReg = MI->getOperand(1).getReg();
4538 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4539 assert(LoadMI && "Expected valid instruction");
4540 }
4541 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4542 return false;
4543
4544 // Nothing to be done.
4545 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4546 updateValueMap(I, Reg);
4547 return true;
4548 }
4549
4550 if (IsZExt) {
4551 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4552 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4553 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4554 .addImm(0)
4555 .addReg(Reg, getKillRegState(true))
4556 .addImm(AArch64::sub_32);
4557 Reg = Reg64;
4558 } else {
4559 assert((MI->getOpcode() == TargetOpcode::COPY &&
4560 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4561 "Expected copy instruction");
4562 Reg = MI->getOperand(1).getReg();
4564 removeDeadCode(I, std::next(I));
4565 }
4566 updateValueMap(I, Reg);
4567 return true;
4568}
4569
4570bool AArch64FastISel::selectIntExt(const Instruction *I) {
4572 "Unexpected integer extend instruction.");
4573 MVT RetVT;
4574 MVT SrcVT;
4575 if (!isTypeSupported(I->getType(), RetVT))
4576 return false;
4577
4578 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4579 return false;
4580
4581 // Try to optimize already sign-/zero-extended values from load instructions.
4582 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4583 return true;
4584
4585 Register SrcReg = getRegForValue(I->getOperand(0));
4586 if (!SrcReg)
4587 return false;
4588
4589 // Try to optimize already sign-/zero-extended values from function arguments.
4590 bool IsZExt = isa<ZExtInst>(I);
4591 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4592 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4593 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4594 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4595 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4596 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4597 .addImm(0)
4598 .addReg(SrcReg)
4599 .addImm(AArch64::sub_32);
4600 SrcReg = ResultReg;
4601 }
4602
4603 updateValueMap(I, SrcReg);
4604 return true;
4605 }
4606 }
4607
4608 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4609 if (!ResultReg)
4610 return false;
4611
4612 updateValueMap(I, ResultReg);
4613 return true;
4614}
4615
4616bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4617 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4618 if (!DestEVT.isSimple())
4619 return false;
4620
4621 MVT DestVT = DestEVT.getSimpleVT();
4622 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4623 return false;
4624
4625 unsigned DivOpc;
4626 bool Is64bit = (DestVT == MVT::i64);
4627 switch (ISDOpcode) {
4628 default:
4629 return false;
4630 case ISD::SREM:
4631 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4632 break;
4633 case ISD::UREM:
4634 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4635 break;
4636 }
4637 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4638 Register Src0Reg = getRegForValue(I->getOperand(0));
4639 if (!Src0Reg)
4640 return false;
4641
4642 Register Src1Reg = getRegForValue(I->getOperand(1));
4643 if (!Src1Reg)
4644 return false;
4645
4646 const TargetRegisterClass *RC =
4647 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4648 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4649 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4650 // The remainder is computed as numerator - (quotient * denominator) using the
4651 // MSUB instruction.
4652 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4653 updateValueMap(I, ResultReg);
4654 return true;
4655}
4656
4657bool AArch64FastISel::selectMul(const Instruction *I) {
4658 MVT VT;
4659 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4660 return false;
4661
4662 if (VT.isVector())
4663 return selectBinaryOp(I, ISD::MUL);
4664
4665 const Value *Src0 = I->getOperand(0);
4666 const Value *Src1 = I->getOperand(1);
4667 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4668 if (C->getValue().isPowerOf2())
4669 std::swap(Src0, Src1);
4670
4671 // Try to simplify to a shift instruction.
4672 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4673 if (C->getValue().isPowerOf2()) {
4674 uint64_t ShiftVal = C->getValue().logBase2();
4675 MVT SrcVT = VT;
4676 bool IsZExt = true;
4677 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4678 if (!isIntExtFree(ZExt)) {
4679 MVT VT;
4680 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4681 SrcVT = VT;
4682 IsZExt = true;
4683 Src0 = ZExt->getOperand(0);
4684 }
4685 }
4686 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4687 if (!isIntExtFree(SExt)) {
4688 MVT VT;
4689 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4690 SrcVT = VT;
4691 IsZExt = false;
4692 Src0 = SExt->getOperand(0);
4693 }
4694 }
4695 }
4696
4697 Register Src0Reg = getRegForValue(Src0);
4698 if (!Src0Reg)
4699 return false;
4700
4701 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4702
4703 if (ResultReg) {
4704 updateValueMap(I, ResultReg);
4705 return true;
4706 }
4707 }
4708
4709 Register Src0Reg = getRegForValue(I->getOperand(0));
4710 if (!Src0Reg)
4711 return false;
4712
4713 Register Src1Reg = getRegForValue(I->getOperand(1));
4714 if (!Src1Reg)
4715 return false;
4716
4717 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4718
4719 if (!ResultReg)
4720 return false;
4721
4722 updateValueMap(I, ResultReg);
4723 return true;
4724}
4725
4726bool AArch64FastISel::selectShift(const Instruction *I) {
4727 MVT RetVT;
4728 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4729 return false;
4730
4731 if (RetVT.isVector())
4732 return selectOperator(I, I->getOpcode());
4733
4734 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4735 Register ResultReg;
4736 uint64_t ShiftVal = C->getZExtValue();
4737 MVT SrcVT = RetVT;
4738 bool IsZExt = I->getOpcode() != Instruction::AShr;
4739 const Value *Op0 = I->getOperand(0);
4740 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4741 if (!isIntExtFree(ZExt)) {
4742 MVT TmpVT;
4743 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4744 SrcVT = TmpVT;
4745 IsZExt = true;
4746 Op0 = ZExt->getOperand(0);
4747 }
4748 }
4749 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4750 if (!isIntExtFree(SExt)) {
4751 MVT TmpVT;
4752 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4753 SrcVT = TmpVT;
4754 IsZExt = false;
4755 Op0 = SExt->getOperand(0);
4756 }
4757 }
4758 }
4759
4760 Register Op0Reg = getRegForValue(Op0);
4761 if (!Op0Reg)
4762 return false;
4763
4764 switch (I->getOpcode()) {
4765 default: llvm_unreachable("Unexpected instruction.");
4766 case Instruction::Shl:
4767 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4768 break;
4769 case Instruction::AShr:
4770 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4771 break;
4772 case Instruction::LShr:
4773 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4774 break;
4775 }
4776 if (!ResultReg)
4777 return false;
4778
4779 updateValueMap(I, ResultReg);
4780 return true;
4781 }
4782
4783 Register Op0Reg = getRegForValue(I->getOperand(0));
4784 if (!Op0Reg)
4785 return false;
4786
4787 Register Op1Reg = getRegForValue(I->getOperand(1));
4788 if (!Op1Reg)
4789 return false;
4790
4791 Register ResultReg;
4792 switch (I->getOpcode()) {
4793 default: llvm_unreachable("Unexpected instruction.");
4794 case Instruction::Shl:
4795 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4796 break;
4797 case Instruction::AShr:
4798 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4799 break;
4800 case Instruction::LShr:
4801 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4802 break;
4803 }
4804
4805 if (!ResultReg)
4806 return false;
4807
4808 updateValueMap(I, ResultReg);
4809 return true;
4810}
4811
4812bool AArch64FastISel::selectBitCast(const Instruction *I) {
4813 MVT RetVT, SrcVT;
4814
4815 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4816 return false;
4817 if (!isTypeLegal(I->getType(), RetVT))
4818 return false;
4819
4820 unsigned Opc;
4821 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4822 Opc = AArch64::FMOVWSr;
4823 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4824 Opc = AArch64::FMOVXDr;
4825 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4826 Opc = AArch64::FMOVSWr;
4827 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4828 Opc = AArch64::FMOVDXr;
4829 else
4830 return false;
4831
4832 const TargetRegisterClass *RC = nullptr;
4833 switch (RetVT.SimpleTy) {
4834 default: llvm_unreachable("Unexpected value type.");
4835 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4836 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4837 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4838 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4839 }
4840 Register Op0Reg = getRegForValue(I->getOperand(0));
4841 if (!Op0Reg)
4842 return false;
4843
4844 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4845 if (!ResultReg)
4846 return false;
4847
4848 updateValueMap(I, ResultReg);
4849 return true;
4850}
4851
4852bool AArch64FastISel::selectFRem(const Instruction *I) {
4853 MVT RetVT;
4854 if (!isTypeLegal(I->getType(), RetVT))
4855 return false;
4856
4857 RTLIB::Libcall LC;
4858 switch (RetVT.SimpleTy) {
4859 default:
4860 return false;
4861 case MVT::f32:
4862 LC = RTLIB::REM_F32;
4863 break;
4864 case MVT::f64:
4865 LC = RTLIB::REM_F64;
4866 break;
4867 }
4868
4869 ArgListTy Args;
4870 Args.reserve(I->getNumOperands());
4871
4872 // Populate the argument list.
4873 for (auto &Arg : I->operands())
4874 Args.emplace_back(Arg);
4875
4876 CallLoweringInfo CLI;
4877 MCContext &Ctx = MF->getContext();
4878 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4879 TLI.getLibcallName(LC), std::move(Args));
4880 if (!lowerCallTo(CLI))
4881 return false;
4882 updateValueMap(I, CLI.ResultReg);
4883 return true;
4884}
4885
4886bool AArch64FastISel::selectSDiv(const Instruction *I) {
4887 MVT VT;
4888 if (!isTypeLegal(I->getType(), VT))
4889 return false;
4890
4891 if (!isa<ConstantInt>(I->getOperand(1)))
4892 return selectBinaryOp(I, ISD::SDIV);
4893
4894 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4895 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4896 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4897 return selectBinaryOp(I, ISD::SDIV);
4898
4899 unsigned Lg2 = C.countr_zero();
4900 Register Src0Reg = getRegForValue(I->getOperand(0));
4901 if (!Src0Reg)
4902 return false;
4903
4904 if (cast<BinaryOperator>(I)->isExact()) {
4905 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4906 if (!ResultReg)
4907 return false;
4908 updateValueMap(I, ResultReg);
4909 return true;
4910 }
4911
4912 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4913 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4914 if (!AddReg)
4915 return false;
4916
4917 // (Src0 < 0) ? Pow2 - 1 : 0;
4918 if (!emitICmp_ri(VT, Src0Reg, 0))
4919 return false;
4920
4921 unsigned SelectOpc;
4922 const TargetRegisterClass *RC;
4923 if (VT == MVT::i64) {
4924 SelectOpc = AArch64::CSELXr;
4925 RC = &AArch64::GPR64RegClass;
4926 } else {
4927 SelectOpc = AArch64::CSELWr;
4928 RC = &AArch64::GPR32RegClass;
4929 }
4930 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4932 if (!SelectReg)
4933 return false;
4934
4935 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4936 // negate the result.
4937 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4938 Register ResultReg;
4939 if (C.isNegative())
4940 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4941 AArch64_AM::ASR, Lg2);
4942 else
4943 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4944
4945 if (!ResultReg)
4946 return false;
4947
4948 updateValueMap(I, ResultReg);
4949 return true;
4950}
4951
4952/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4953/// have to duplicate it for AArch64, because otherwise we would fail during the
4954/// sign-extend emission.
4955Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4956 Register IdxN = getRegForValue(Idx);
4957 if (!IdxN)
4958 // Unhandled operand. Halt "fast" selection and bail.
4959 return Register();
4960
4961 // If the index is smaller or larger than intptr_t, truncate or extend it.
4962 MVT PtrVT = TLI.getPointerTy(DL);
4963 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4964 if (IdxVT.bitsLT(PtrVT)) {
4965 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4966 } else if (IdxVT.bitsGT(PtrVT))
4967 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4968 return IdxN;
4969}
4970
4971/// This is mostly a copy of the existing FastISel GEP code, but we have to
4972/// duplicate it for AArch64, because otherwise we would bail out even for
4973/// simple cases. This is because the standard fastEmit functions don't cover
4974/// MUL at all and ADD is lowered very inefficientily.
4975bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4976 if (Subtarget->isTargetILP32())
4977 return false;
4978
4979 Register N = getRegForValue(I->getOperand(0));
4980 if (!N)
4981 return false;
4982
4983 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4984 // into a single N = N + TotalOffset.
4985 uint64_t TotalOffs = 0;
4986 MVT VT = TLI.getPointerTy(DL);
4988 GTI != E; ++GTI) {
4989 const Value *Idx = GTI.getOperand();
4990 if (auto *StTy = GTI.getStructTypeOrNull()) {
4991 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4992 // N = N + Offset
4993 if (Field)
4994 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4995 } else {
4996 // If this is a constant subscript, handle it quickly.
4997 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4998 if (CI->isZero())
4999 continue;
5000 // N = N + Offset
5001 TotalOffs += GTI.getSequentialElementStride(DL) *
5002 cast<ConstantInt>(CI)->getSExtValue();
5003 continue;
5004 }
5005 if (TotalOffs) {
5006 N = emitAdd_ri_(VT, N, TotalOffs);
5007 if (!N)
5008 return false;
5009 TotalOffs = 0;
5010 }
5011
5012 // N = N + Idx * ElementSize;
5013 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5014 Register IdxN = getRegForGEPIndex(Idx);
5015 if (!IdxN)
5016 return false;
5017
5018 if (ElementSize != 1) {
5019 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5020 if (!C)
5021 return false;
5022 IdxN = emitMul_rr(VT, IdxN, C);
5023 if (!IdxN)
5024 return false;
5025 }
5026 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5027 if (!N)
5028 return false;
5029 }
5030 }
5031 if (TotalOffs) {
5032 N = emitAdd_ri_(VT, N, TotalOffs);
5033 if (!N)
5034 return false;
5035 }
5036 updateValueMap(I, N);
5037 return true;
5038}
5039
5040bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5041 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5042 "cmpxchg survived AtomicExpand at optlevel > -O0");
5043
5044 auto *RetPairTy = cast<StructType>(I->getType());
5045 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5046 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5047 "cmpxchg has a non-i1 status result");
5048
5049 MVT VT;
5050 if (!isTypeLegal(RetTy, VT))
5051 return false;
5052
5053 const TargetRegisterClass *ResRC;
5054 unsigned Opc, CmpOpc;
5055 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5056 // extractvalue selection doesn't support that.
5057 if (VT == MVT::i32) {
5058 Opc = AArch64::CMP_SWAP_32;
5059 CmpOpc = AArch64::SUBSWrs;
5060 ResRC = &AArch64::GPR32RegClass;
5061 } else if (VT == MVT::i64) {
5062 Opc = AArch64::CMP_SWAP_64;
5063 CmpOpc = AArch64::SUBSXrs;
5064 ResRC = &AArch64::GPR64RegClass;
5065 } else {
5066 return false;
5067 }
5068
5069 const MCInstrDesc &II = TII.get(Opc);
5070
5071 Register AddrReg = getRegForValue(I->getPointerOperand());
5072 Register DesiredReg = getRegForValue(I->getCompareOperand());
5073 Register NewReg = getRegForValue(I->getNewValOperand());
5074
5075 if (!AddrReg || !DesiredReg || !NewReg)
5076 return false;
5077
5078 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
5079 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
5080 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
5081
5082 const Register ResultReg1 = createResultReg(ResRC);
5083 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5084 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5085
5086 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5088 .addDef(ResultReg1)
5089 .addDef(ScratchReg)
5090 .addUse(AddrReg)
5091 .addUse(DesiredReg)
5092 .addUse(NewReg);
5093
5094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5095 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5096 .addUse(ResultReg1)
5097 .addUse(DesiredReg)
5098 .addImm(0);
5099
5100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5101 .addDef(ResultReg2)
5102 .addUse(AArch64::WZR)
5103 .addUse(AArch64::WZR)
5105
5106 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5107 updateValueMap(I, ResultReg1, 2);
5108 return true;
5109}
5110
5111bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5112 if (TLI.fallBackToDAGISel(*I))
5113 return false;
5114 switch (I->getOpcode()) {
5115 default:
5116 break;
5117 case Instruction::Add:
5118 case Instruction::Sub:
5119 return selectAddSub(I);
5120 case Instruction::Mul:
5121 return selectMul(I);
5122 case Instruction::SDiv:
5123 return selectSDiv(I);
5124 case Instruction::SRem:
5125 if (!selectBinaryOp(I, ISD::SREM))
5126 return selectRem(I, ISD::SREM);
5127 return true;
5128 case Instruction::URem:
5129 if (!selectBinaryOp(I, ISD::UREM))
5130 return selectRem(I, ISD::UREM);
5131 return true;
5132 case Instruction::Shl:
5133 case Instruction::LShr:
5134 case Instruction::AShr:
5135 return selectShift(I);
5136 case Instruction::And:
5137 case Instruction::Or:
5138 case Instruction::Xor:
5139 return selectLogicalOp(I);
5140 case Instruction::Br:
5141 return selectBranch(I);
5142 case Instruction::IndirectBr:
5143 return selectIndirectBr(I);
5144 case Instruction::BitCast:
5146 return selectBitCast(I);
5147 return true;
5148 case Instruction::FPToSI:
5149 if (!selectCast(I, ISD::FP_TO_SINT))
5150 return selectFPToInt(I, /*Signed=*/true);
5151 return true;
5152 case Instruction::FPToUI:
5153 return selectFPToInt(I, /*Signed=*/false);
5154 case Instruction::ZExt:
5155 case Instruction::SExt:
5156 return selectIntExt(I);
5157 case Instruction::Trunc:
5158 if (!selectCast(I, ISD::TRUNCATE))
5159 return selectTrunc(I);
5160 return true;
5161 case Instruction::FPExt:
5162 return selectFPExt(I);
5163 case Instruction::FPTrunc:
5164 return selectFPTrunc(I);
5165 case Instruction::SIToFP:
5166 if (!selectCast(I, ISD::SINT_TO_FP))
5167 return selectIntToFP(I, /*Signed=*/true);
5168 return true;
5169 case Instruction::UIToFP:
5170 return selectIntToFP(I, /*Signed=*/false);
5171 case Instruction::Load:
5172 return selectLoad(I);
5173 case Instruction::Store:
5174 return selectStore(I);
5175 case Instruction::FCmp:
5176 case Instruction::ICmp:
5177 return selectCmp(I);
5178 case Instruction::Select:
5179 return selectSelect(I);
5180 case Instruction::Ret:
5181 return selectRet(I);
5182 case Instruction::FRem:
5183 return selectFRem(I);
5184 case Instruction::GetElementPtr:
5185 return selectGetElementPtr(I);
5186 case Instruction::AtomicCmpXchg:
5187 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5188 }
5189
5190 // fall-back to target-independent instruction selection.
5191 return selectOperator(I, I->getOpcode());
5192}
5193
5195 const TargetLibraryInfo *LibInfo) {
5196
5197 SMEAttrs CallerAttrs =
5198 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5199 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5200 CallerAttrs.hasStreamingInterfaceOrBody() ||
5201 CallerAttrs.hasStreamingCompatibleInterface() ||
5202 CallerAttrs.hasAgnosticZAInterface())
5203 return nullptr;
5204 return new AArch64FastISel(FuncInfo, LibInfo);
5205}
unsigned const MachineRegisterInfo * MRI
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
static bool isSExtLoad(const MachineInstr *LI)
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type.
static bool isZExtLoad(const MachineInstr *LI)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPostDec)
Emit a load-pair instruction for frame-destroy.
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, int Offset, bool IsPreDec)
Emit a store-pair instruction for frame-setup.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Pre allocate WWM Registers
This file defines the SmallVector class.
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
static const unsigned FramePtr
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAnyArgRegReserved(const MachineFunction &MF) const
void emitReservedArgRegCallError(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool hasCustomCallingConv() const
PointerType * getType() const
Overload to return most specific pointer type.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:695
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:680
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
bool isUnsigned() const
Definition InstrTypes.h:938
const APFloat & getValueAPF() const
Definition Constants.h:320
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:327
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:324
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
bool selectBitCast(const User *I)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
PointerType * getType() const
Global values are always pointers.
iterator_range< succ_op_iterator > successors()
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Value * getLength() const
unsigned getDestAddressSpace() const
bool isVolatile() const
constexpr unsigned id() const
Definition Register.h:95
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:652
Provides information about what library functions are available for the current target.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition Type.h:264
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
gep_type_iterator gep_type_end(const User *GEP)
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.