LLVM 22.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
351 MachineIRBuilder &MIB) const;
356 MachineIRBuilder &MIB) const;
358 bool Negate, Register CCOp,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
519 const AArch64TargetMachine &TM;
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
557#include "AArch64GenGlobalISel.inc"
560#include "AArch64GenGlobalISel.inc"
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
619 if (SizeInBits <= 32)
620 return GetAllRegSet ? &AArch64::GPR32allRegClass
621 : &AArch64::GPR32RegClass;
622 if (SizeInBits == 64)
623 return GetAllRegSet ? &AArch64::GPR64allRegClass
624 : &AArch64::GPR64RegClass;
625 if (SizeInBits == 128)
626 return &AArch64::XSeqPairsClassRegClass;
627 }
628
629 if (RegBankID == AArch64::FPRRegBankID) {
630 if (SizeInBits.isScalable()) {
631 assert(SizeInBits == TypeSize::getScalable(128) &&
632 "Unexpected scalable register size");
633 return &AArch64::ZPRRegClass;
634 }
635
636 switch (SizeInBits) {
637 default:
638 return nullptr;
639 case 8:
640 return &AArch64::FPR8RegClass;
641 case 16:
642 return &AArch64::FPR16RegClass;
643 case 32:
644 return &AArch64::FPR32RegClass;
645 case 64:
646 return &AArch64::FPR64RegClass;
647 case 128:
648 return &AArch64::FPR128RegClass;
649 }
650 }
651
652 return nullptr;
653}
654
655/// Returns the correct subregister to use for a given register class.
657 const TargetRegisterInfo &TRI, unsigned &SubReg) {
658 switch (TRI.getRegSizeInBits(*RC)) {
659 case 8:
660 SubReg = AArch64::bsub;
661 break;
662 case 16:
663 SubReg = AArch64::hsub;
664 break;
665 case 32:
666 if (RC != &AArch64::FPR32RegClass)
667 SubReg = AArch64::sub_32;
668 else
669 SubReg = AArch64::ssub;
670 break;
671 case 64:
672 SubReg = AArch64::dsub;
673 break;
674 default:
676 dbgs() << "Couldn't find appropriate subregister for register class.");
677 return false;
678 }
679
680 return true;
681}
682
683/// Returns the minimum size the given register bank can hold.
684static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
685 switch (RB.getID()) {
686 case AArch64::GPRRegBankID:
687 return 32;
688 case AArch64::FPRRegBankID:
689 return 8;
690 default:
691 llvm_unreachable("Tried to get minimum size for unknown register bank.");
692 }
693}
694
695/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
696/// Helper function for functions like createDTuple and createQTuple.
697///
698/// \p RegClassIDs - The list of register class IDs available for some tuple of
699/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
700/// expected to contain between 2 and 4 tuple classes.
701///
702/// \p SubRegs - The list of subregister classes associated with each register
703/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
704/// subregister class. The index of each subregister class is expected to
705/// correspond with the index of each register class.
706///
707/// \returns Either the destination register of REG_SEQUENCE instruction that
708/// was created, or the 0th element of \p Regs if \p Regs contains a single
709/// element.
711 const unsigned RegClassIDs[],
712 const unsigned SubRegs[], MachineIRBuilder &MIB) {
713 unsigned NumRegs = Regs.size();
714 if (NumRegs == 1)
715 return Regs[0];
716 assert(NumRegs >= 2 && NumRegs <= 4 &&
717 "Only support between two and 4 registers in a tuple!");
719 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
720 auto RegSequence =
721 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
722 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
723 RegSequence.addUse(Regs[I]);
724 RegSequence.addImm(SubRegs[I]);
725 }
726 return RegSequence.getReg(0);
727}
728
729/// Create a tuple of D-registers using the registers in \p Regs.
731 static const unsigned RegClassIDs[] = {
732 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
733 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
734 AArch64::dsub2, AArch64::dsub3};
735 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
736}
737
738/// Create a tuple of Q-registers using the registers in \p Regs.
740 static const unsigned RegClassIDs[] = {
741 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
742 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
743 AArch64::qsub2, AArch64::qsub3};
744 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
745}
746
747static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
748 auto &MI = *Root.getParent();
749 auto &MBB = *MI.getParent();
750 auto &MF = *MBB.getParent();
751 auto &MRI = MF.getRegInfo();
752 uint64_t Immed;
753 if (Root.isImm())
754 Immed = Root.getImm();
755 else if (Root.isCImm())
756 Immed = Root.getCImm()->getZExtValue();
757 else if (Root.isReg()) {
758 auto ValAndVReg =
760 if (!ValAndVReg)
761 return std::nullopt;
762 Immed = ValAndVReg->Value.getSExtValue();
763 } else
764 return std::nullopt;
765 return Immed;
766}
767
768/// Check whether \p I is a currently unsupported binary operation:
769/// - it has an unsized type
770/// - an operand is not a vreg
771/// - all operands are not in the same bank
772/// These are checks that should someday live in the verifier, but right now,
773/// these are mostly limitations of the aarch64 selector.
774static bool unsupportedBinOp(const MachineInstr &I,
775 const AArch64RegisterBankInfo &RBI,
777 const AArch64RegisterInfo &TRI) {
778 LLT Ty = MRI.getType(I.getOperand(0).getReg());
779 if (!Ty.isValid()) {
780 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
781 return true;
782 }
783
784 const RegisterBank *PrevOpBank = nullptr;
785 for (auto &MO : I.operands()) {
786 // FIXME: Support non-register operands.
787 if (!MO.isReg()) {
788 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
789 return true;
790 }
791
792 // FIXME: Can generic operations have physical registers operands? If
793 // so, this will need to be taught about that, and we'll need to get the
794 // bank out of the minimal class for the register.
795 // Either way, this needs to be documented (and possibly verified).
796 if (!MO.getReg().isVirtual()) {
797 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
798 return true;
799 }
800
801 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
802 if (!OpBank) {
803 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
804 return true;
805 }
806
807 if (PrevOpBank && OpBank != PrevOpBank) {
808 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
809 return true;
810 }
811 PrevOpBank = OpBank;
812 }
813 return false;
814}
815
816/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
817/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
818/// and of size \p OpSize.
819/// \returns \p GenericOpc if the combination is unsupported.
820static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
821 unsigned OpSize) {
822 switch (RegBankID) {
823 case AArch64::GPRRegBankID:
824 if (OpSize == 32) {
825 switch (GenericOpc) {
826 case TargetOpcode::G_SHL:
827 return AArch64::LSLVWr;
828 case TargetOpcode::G_LSHR:
829 return AArch64::LSRVWr;
830 case TargetOpcode::G_ASHR:
831 return AArch64::ASRVWr;
832 default:
833 return GenericOpc;
834 }
835 } else if (OpSize == 64) {
836 switch (GenericOpc) {
837 case TargetOpcode::G_PTR_ADD:
838 return AArch64::ADDXrr;
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVXr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVXr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVXr;
845 default:
846 return GenericOpc;
847 }
848 }
849 break;
850 case AArch64::FPRRegBankID:
851 switch (OpSize) {
852 case 32:
853 switch (GenericOpc) {
854 case TargetOpcode::G_FADD:
855 return AArch64::FADDSrr;
856 case TargetOpcode::G_FSUB:
857 return AArch64::FSUBSrr;
858 case TargetOpcode::G_FMUL:
859 return AArch64::FMULSrr;
860 case TargetOpcode::G_FDIV:
861 return AArch64::FDIVSrr;
862 default:
863 return GenericOpc;
864 }
865 case 64:
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDDrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBDrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULDrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVDrr;
875 case TargetOpcode::G_OR:
876 return AArch64::ORRv8i8;
877 default:
878 return GenericOpc;
879 }
880 }
881 break;
882 }
883 return GenericOpc;
884}
885
886/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
887/// appropriate for the (value) register bank \p RegBankID and of memory access
888/// size \p OpSize. This returns the variant with the base+unsigned-immediate
889/// addressing mode (e.g., LDRXui).
890/// \returns \p GenericOpc if the combination is unsupported.
891static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
892 unsigned OpSize) {
893 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
894 switch (RegBankID) {
895 case AArch64::GPRRegBankID:
896 switch (OpSize) {
897 case 8:
898 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
899 case 16:
900 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
901 case 32:
902 return isStore ? AArch64::STRWui : AArch64::LDRWui;
903 case 64:
904 return isStore ? AArch64::STRXui : AArch64::LDRXui;
905 }
906 break;
907 case AArch64::FPRRegBankID:
908 switch (OpSize) {
909 case 8:
910 return isStore ? AArch64::STRBui : AArch64::LDRBui;
911 case 16:
912 return isStore ? AArch64::STRHui : AArch64::LDRHui;
913 case 32:
914 return isStore ? AArch64::STRSui : AArch64::LDRSui;
915 case 64:
916 return isStore ? AArch64::STRDui : AArch64::LDRDui;
917 case 128:
918 return isStore ? AArch64::STRQui : AArch64::LDRQui;
919 }
920 break;
921 }
922 return GenericOpc;
923}
924
925/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
926/// to \p *To.
927///
928/// E.g "To = COPY SrcReg:SubReg"
930 const RegisterBankInfo &RBI, Register SrcReg,
931 const TargetRegisterClass *To, unsigned SubReg) {
932 assert(SrcReg.isValid() && "Expected a valid source register?");
933 assert(To && "Destination register class cannot be null");
934 assert(SubReg && "Expected a valid subregister");
935
936 MachineIRBuilder MIB(I);
937 auto SubRegCopy =
938 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
939 MachineOperand &RegOp = I.getOperand(1);
940 RegOp.setReg(SubRegCopy.getReg(0));
941
942 // It's possible that the destination register won't be constrained. Make
943 // sure that happens.
944 if (!I.getOperand(0).getReg().isPhysical())
945 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
946
947 return true;
948}
949
950/// Helper function to get the source and destination register classes for a
951/// copy. Returns a std::pair containing the source register class for the
952/// copy, and the destination register class for the copy. If a register class
953/// cannot be determined, then it will be nullptr.
954static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
957 const RegisterBankInfo &RBI) {
958 Register DstReg = I.getOperand(0).getReg();
959 Register SrcReg = I.getOperand(1).getReg();
960 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
961 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
962
963 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
964 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
965
966 // Special casing for cross-bank copies of s1s. We can technically represent
967 // a 1-bit value with any size of register. The minimum size for a GPR is 32
968 // bits. So, we need to put the FPR on 32 bits as well.
969 //
970 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
971 // then we can pull it into the helpers that get the appropriate class for a
972 // register bank. Or make a new helper that carries along some constraint
973 // information.
974 if (SrcRegBank != DstRegBank &&
975 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
976 SrcSize = DstSize = TypeSize::getFixed(32);
977
978 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
979 getMinClassForRegBank(DstRegBank, DstSize, true)};
980}
981
982// FIXME: We need some sort of API in RBI/TRI to allow generic code to
983// constrain operands of simple instructions given a TargetRegisterClass
984// and LLT
986 const RegisterBankInfo &RBI) {
987 for (MachineOperand &MO : I.operands()) {
988 if (!MO.isReg())
989 continue;
990 Register Reg = MO.getReg();
991 if (!Reg)
992 continue;
993 if (Reg.isPhysical())
994 continue;
995 LLT Ty = MRI.getType(Reg);
996 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
997 const TargetRegisterClass *RC =
998 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
999 if (!RC) {
1000 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1001 RC = getRegClassForTypeOnBank(Ty, RB);
1002 if (!RC) {
1003 LLVM_DEBUG(
1004 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1005 break;
1006 }
1007 }
1008 RBI.constrainGenericRegister(Reg, *RC, MRI);
1009 }
1010
1011 return true;
1012}
1013
1016 const RegisterBankInfo &RBI) {
1017 Register DstReg = I.getOperand(0).getReg();
1018 Register SrcReg = I.getOperand(1).getReg();
1019 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1020 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1021
1022 // Find the correct register classes for the source and destination registers.
1023 const TargetRegisterClass *SrcRC;
1024 const TargetRegisterClass *DstRC;
1025 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1026
1027 if (!DstRC) {
1028 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1029 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1030 return false;
1031 }
1032
1033 // Is this a copy? If so, then we may need to insert a subregister copy.
1034 if (I.isCopy()) {
1035 // Yes. Check if there's anything to fix up.
1036 if (!SrcRC) {
1037 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1038 return false;
1039 }
1040
1041 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1042 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1043 unsigned SubReg;
1044
1045 // If the source bank doesn't support a subregister copy small enough,
1046 // then we first need to copy to the destination bank.
1047 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1048 const TargetRegisterClass *DstTempRC =
1049 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1050 getSubRegForClass(DstRC, TRI, SubReg);
1051
1052 MachineIRBuilder MIB(I);
1053 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1054 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1055 } else if (SrcSize > DstSize) {
1056 // If the source register is bigger than the destination we need to
1057 // perform a subregister copy.
1058 const TargetRegisterClass *SubRegRC =
1059 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1060 getSubRegForClass(SubRegRC, TRI, SubReg);
1061 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1062 } else if (DstSize > SrcSize) {
1063 // If the destination register is bigger than the source we need to do
1064 // a promotion using SUBREG_TO_REG.
1065 const TargetRegisterClass *PromotionRC =
1066 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1067 getSubRegForClass(SrcRC, TRI, SubReg);
1068
1069 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1070 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1071 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1072 .addImm(0)
1073 .addUse(SrcReg)
1074 .addImm(SubReg);
1075 MachineOperand &RegOp = I.getOperand(1);
1076 RegOp.setReg(PromoteReg);
1077 }
1078
1079 // If the destination is a physical register, then there's nothing to
1080 // change, so we're done.
1081 if (DstReg.isPhysical())
1082 return true;
1083 }
1084
1085 // No need to constrain SrcReg. It will get constrained when we hit another
1086 // of its use or its defs. Copies do not have constraints.
1087 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1088 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1089 << " operand\n");
1090 return false;
1091 }
1092
1093 // If this a GPR ZEXT that we want to just reduce down into a copy.
1094 // The sizes will be mismatched with the source < 32b but that's ok.
1095 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1096 I.setDesc(TII.get(AArch64::COPY));
1097 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1098 return selectCopy(I, TII, MRI, TRI, RBI);
1099 }
1100
1101 I.setDesc(TII.get(AArch64::COPY));
1102 return true;
1103}
1104
1106AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1107 Register False, AArch64CC::CondCode CC,
1108 MachineIRBuilder &MIB) const {
1109 MachineRegisterInfo &MRI = *MIB.getMRI();
1110 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1111 RBI.getRegBank(True, MRI, TRI)->getID() &&
1112 "Expected both select operands to have the same regbank?");
1113 LLT Ty = MRI.getType(True);
1114 if (Ty.isVector())
1115 return nullptr;
1116 const unsigned Size = Ty.getSizeInBits();
1117 assert((Size == 32 || Size == 64) &&
1118 "Expected 32 bit or 64 bit select only?");
1119 const bool Is32Bit = Size == 32;
1120 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1121 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1122 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1124 return &*FCSel;
1125 }
1126
1127 // By default, we'll try and emit a CSEL.
1128 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1129 bool Optimized = false;
1130 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1131 &Optimized](Register &Reg, Register &OtherReg,
1132 bool Invert) {
1133 if (Optimized)
1134 return false;
1135
1136 // Attempt to fold:
1137 //
1138 // %sub = G_SUB 0, %x
1139 // %select = G_SELECT cc, %reg, %sub
1140 //
1141 // Into:
1142 // %select = CSNEG %reg, %x, cc
1143 Register MatchReg;
1144 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1145 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1146 Reg = MatchReg;
1147 if (Invert) {
1149 std::swap(Reg, OtherReg);
1150 }
1151 return true;
1152 }
1153
1154 // Attempt to fold:
1155 //
1156 // %xor = G_XOR %x, -1
1157 // %select = G_SELECT cc, %reg, %xor
1158 //
1159 // Into:
1160 // %select = CSINV %reg, %x, cc
1161 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1162 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1163 Reg = MatchReg;
1164 if (Invert) {
1166 std::swap(Reg, OtherReg);
1167 }
1168 return true;
1169 }
1170
1171 // Attempt to fold:
1172 //
1173 // %add = G_ADD %x, 1
1174 // %select = G_SELECT cc, %reg, %add
1175 //
1176 // Into:
1177 // %select = CSINC %reg, %x, cc
1178 if (mi_match(Reg, MRI,
1179 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1180 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1181 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1182 Reg = MatchReg;
1183 if (Invert) {
1185 std::swap(Reg, OtherReg);
1186 }
1187 return true;
1188 }
1189
1190 return false;
1191 };
1192
1193 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1194 // true/false values are constants.
1195 // FIXME: All of these patterns already exist in tablegen. We should be
1196 // able to import these.
1197 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1198 &Optimized]() {
1199 if (Optimized)
1200 return false;
1201 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1202 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1203 if (!TrueCst && !FalseCst)
1204 return false;
1205
1206 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1207 if (TrueCst && FalseCst) {
1208 int64_t T = TrueCst->Value.getSExtValue();
1209 int64_t F = FalseCst->Value.getSExtValue();
1210
1211 if (T == 0 && F == 1) {
1212 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1213 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1214 True = ZReg;
1215 False = ZReg;
1216 return true;
1217 }
1218
1219 if (T == 0 && F == -1) {
1220 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1221 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1222 True = ZReg;
1223 False = ZReg;
1224 return true;
1225 }
1226 }
1227
1228 if (TrueCst) {
1229 int64_t T = TrueCst->Value.getSExtValue();
1230 if (T == 1) {
1231 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1232 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1233 True = False;
1234 False = ZReg;
1236 return true;
1237 }
1238
1239 if (T == -1) {
1240 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1241 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1242 True = False;
1243 False = ZReg;
1245 return true;
1246 }
1247 }
1248
1249 if (FalseCst) {
1250 int64_t F = FalseCst->Value.getSExtValue();
1251 if (F == 1) {
1252 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1253 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1254 False = ZReg;
1255 return true;
1256 }
1257
1258 if (F == -1) {
1259 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1260 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1261 False = ZReg;
1262 return true;
1263 }
1264 }
1265 return false;
1266 };
1267
1268 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1269 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1270 Optimized |= TryOptSelectCst();
1271 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1273 return &*SelectInst;
1274}
1275
1278 MachineRegisterInfo *MRI = nullptr) {
1279 switch (P) {
1280 default:
1281 llvm_unreachable("Unknown condition code!");
1282 case CmpInst::ICMP_NE:
1283 return AArch64CC::NE;
1284 case CmpInst::ICMP_EQ:
1285 return AArch64CC::EQ;
1286 case CmpInst::ICMP_SGT:
1287 return AArch64CC::GT;
1288 case CmpInst::ICMP_SGE:
1289 if (RHS && MRI) {
1290 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1291 if (ValAndVReg && ValAndVReg->Value == 0)
1292 return AArch64CC::PL;
1293 }
1294 return AArch64CC::GE;
1295 case CmpInst::ICMP_SLT:
1296 if (RHS && MRI) {
1297 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1298 if (ValAndVReg && ValAndVReg->Value == 0)
1299 return AArch64CC::MI;
1300 }
1301 return AArch64CC::LT;
1302 case CmpInst::ICMP_SLE:
1303 return AArch64CC::LE;
1304 case CmpInst::ICMP_UGT:
1305 return AArch64CC::HI;
1306 case CmpInst::ICMP_UGE:
1307 return AArch64CC::HS;
1308 case CmpInst::ICMP_ULT:
1309 return AArch64CC::LO;
1310 case CmpInst::ICMP_ULE:
1311 return AArch64CC::LS;
1312 }
1313}
1314
1315/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1317 AArch64CC::CondCode &CondCode,
1318 AArch64CC::CondCode &CondCode2) {
1319 CondCode2 = AArch64CC::AL;
1320 switch (CC) {
1321 default:
1322 llvm_unreachable("Unknown FP condition!");
1323 case CmpInst::FCMP_OEQ:
1324 CondCode = AArch64CC::EQ;
1325 break;
1326 case CmpInst::FCMP_OGT:
1327 CondCode = AArch64CC::GT;
1328 break;
1329 case CmpInst::FCMP_OGE:
1330 CondCode = AArch64CC::GE;
1331 break;
1332 case CmpInst::FCMP_OLT:
1333 CondCode = AArch64CC::MI;
1334 break;
1335 case CmpInst::FCMP_OLE:
1336 CondCode = AArch64CC::LS;
1337 break;
1338 case CmpInst::FCMP_ONE:
1339 CondCode = AArch64CC::MI;
1340 CondCode2 = AArch64CC::GT;
1341 break;
1342 case CmpInst::FCMP_ORD:
1343 CondCode = AArch64CC::VC;
1344 break;
1345 case CmpInst::FCMP_UNO:
1346 CondCode = AArch64CC::VS;
1347 break;
1348 case CmpInst::FCMP_UEQ:
1349 CondCode = AArch64CC::EQ;
1350 CondCode2 = AArch64CC::VS;
1351 break;
1352 case CmpInst::FCMP_UGT:
1353 CondCode = AArch64CC::HI;
1354 break;
1355 case CmpInst::FCMP_UGE:
1356 CondCode = AArch64CC::PL;
1357 break;
1358 case CmpInst::FCMP_ULT:
1359 CondCode = AArch64CC::LT;
1360 break;
1361 case CmpInst::FCMP_ULE:
1362 CondCode = AArch64CC::LE;
1363 break;
1364 case CmpInst::FCMP_UNE:
1365 CondCode = AArch64CC::NE;
1366 break;
1367 }
1368}
1369
1370/// Convert an IR fp condition code to an AArch64 CC.
1371/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1372/// should be AND'ed instead of OR'ed.
1374 AArch64CC::CondCode &CondCode,
1375 AArch64CC::CondCode &CondCode2) {
1376 CondCode2 = AArch64CC::AL;
1377 switch (CC) {
1378 default:
1379 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1380 assert(CondCode2 == AArch64CC::AL);
1381 break;
1382 case CmpInst::FCMP_ONE:
1383 // (a one b)
1384 // == ((a olt b) || (a ogt b))
1385 // == ((a ord b) && (a une b))
1386 CondCode = AArch64CC::VC;
1387 CondCode2 = AArch64CC::NE;
1388 break;
1389 case CmpInst::FCMP_UEQ:
1390 // (a ueq b)
1391 // == ((a uno b) || (a oeq b))
1392 // == ((a ule b) && (a uge b))
1393 CondCode = AArch64CC::PL;
1394 CondCode2 = AArch64CC::LE;
1395 break;
1396 }
1397}
1398
1399/// Return a register which can be used as a bit to test in a TB(N)Z.
1400static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1402 assert(Reg.isValid() && "Expected valid register!");
1403 bool HasZext = false;
1404 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1405 unsigned Opc = MI->getOpcode();
1406
1407 if (!MI->getOperand(0).isReg() ||
1408 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1409 break;
1410
1411 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1412 //
1413 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1414 // on the truncated x is the same as the bit number on x.
1415 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1416 Opc == TargetOpcode::G_TRUNC) {
1417 if (Opc == TargetOpcode::G_ZEXT)
1418 HasZext = true;
1419
1420 Register NextReg = MI->getOperand(1).getReg();
1421 // Did we find something worth folding?
1422 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1423 break;
1424
1425 // NextReg is worth folding. Keep looking.
1426 Reg = NextReg;
1427 continue;
1428 }
1429
1430 // Attempt to find a suitable operation with a constant on one side.
1431 std::optional<uint64_t> C;
1432 Register TestReg;
1433 switch (Opc) {
1434 default:
1435 break;
1436 case TargetOpcode::G_AND:
1437 case TargetOpcode::G_XOR: {
1438 TestReg = MI->getOperand(1).getReg();
1439 Register ConstantReg = MI->getOperand(2).getReg();
1440 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1441 if (!VRegAndVal) {
1442 // AND commutes, check the other side for a constant.
1443 // FIXME: Can we canonicalize the constant so that it's always on the
1444 // same side at some point earlier?
1445 std::swap(ConstantReg, TestReg);
1446 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1447 }
1448 if (VRegAndVal) {
1449 if (HasZext)
1450 C = VRegAndVal->Value.getZExtValue();
1451 else
1452 C = VRegAndVal->Value.getSExtValue();
1453 }
1454 break;
1455 }
1456 case TargetOpcode::G_ASHR:
1457 case TargetOpcode::G_LSHR:
1458 case TargetOpcode::G_SHL: {
1459 TestReg = MI->getOperand(1).getReg();
1460 auto VRegAndVal =
1461 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1462 if (VRegAndVal)
1463 C = VRegAndVal->Value.getSExtValue();
1464 break;
1465 }
1466 }
1467
1468 // Didn't find a constant or viable register. Bail out of the loop.
1469 if (!C || !TestReg.isValid())
1470 break;
1471
1472 // We found a suitable instruction with a constant. Check to see if we can
1473 // walk through the instruction.
1474 Register NextReg;
1475 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1476 switch (Opc) {
1477 default:
1478 break;
1479 case TargetOpcode::G_AND:
1480 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1481 if ((*C >> Bit) & 1)
1482 NextReg = TestReg;
1483 break;
1484 case TargetOpcode::G_SHL:
1485 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1486 // the type of the register.
1487 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1488 NextReg = TestReg;
1489 Bit = Bit - *C;
1490 }
1491 break;
1492 case TargetOpcode::G_ASHR:
1493 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1494 // in x
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 if (Bit >= TestRegSize)
1498 Bit = TestRegSize - 1;
1499 break;
1500 case TargetOpcode::G_LSHR:
1501 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1502 if ((Bit + *C) < TestRegSize) {
1503 NextReg = TestReg;
1504 Bit = Bit + *C;
1505 }
1506 break;
1507 case TargetOpcode::G_XOR:
1508 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1509 // appropriate.
1510 //
1511 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1512 //
1513 // tbz x', b -> tbnz x, b
1514 //
1515 // Because x' only has the b-th bit set if x does not.
1516 if ((*C >> Bit) & 1)
1517 Invert = !Invert;
1518 NextReg = TestReg;
1519 break;
1520 }
1521
1522 // Check if we found anything worth folding.
1523 if (!NextReg.isValid())
1524 return Reg;
1525 Reg = NextReg;
1526 }
1527
1528 return Reg;
1529}
1530
1531MachineInstr *AArch64InstructionSelector::emitTestBit(
1532 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1533 MachineIRBuilder &MIB) const {
1534 assert(TestReg.isValid());
1535 assert(ProduceNonFlagSettingCondBr &&
1536 "Cannot emit TB(N)Z with speculation tracking!");
1537 MachineRegisterInfo &MRI = *MIB.getMRI();
1538
1539 // Attempt to optimize the test bit by walking over instructions.
1540 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1541 LLT Ty = MRI.getType(TestReg);
1542 unsigned Size = Ty.getSizeInBits();
1543 assert(!Ty.isVector() && "Expected a scalar!");
1544 assert(Bit < 64 && "Bit is too large!");
1545
1546 // When the test register is a 64-bit register, we have to narrow to make
1547 // TBNZW work.
1548 bool UseWReg = Bit < 32;
1549 unsigned NecessarySize = UseWReg ? 32 : 64;
1550 if (Size != NecessarySize)
1551 TestReg = moveScalarRegClass(
1552 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1553 MIB);
1554
1555 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1556 {AArch64::TBZW, AArch64::TBNZW}};
1557 unsigned Opc = OpcTable[UseWReg][IsNegative];
1558 auto TestBitMI =
1559 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1560 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1561 return &*TestBitMI;
1562}
1563
1564bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1565 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1566 MachineIRBuilder &MIB) const {
1567 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1568 // Given something like this:
1569 //
1570 // %x = ...Something...
1571 // %one = G_CONSTANT i64 1
1572 // %zero = G_CONSTANT i64 0
1573 // %and = G_AND %x, %one
1574 // %cmp = G_ICMP intpred(ne), %and, %zero
1575 // %cmp_trunc = G_TRUNC %cmp
1576 // G_BRCOND %cmp_trunc, %bb.3
1577 //
1578 // We want to try and fold the AND into the G_BRCOND and produce either a
1579 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1580 //
1581 // In this case, we'd get
1582 //
1583 // TBNZ %x %bb.3
1584 //
1585
1586 // Check if the AND has a constant on its RHS which we can use as a mask.
1587 // If it's a power of 2, then it's the same as checking a specific bit.
1588 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1589 auto MaybeBit = getIConstantVRegValWithLookThrough(
1590 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1591 if (!MaybeBit)
1592 return false;
1593
1594 int32_t Bit = MaybeBit->Value.exactLogBase2();
1595 if (Bit < 0)
1596 return false;
1597
1598 Register TestReg = AndInst.getOperand(1).getReg();
1599
1600 // Emit a TB(N)Z.
1601 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1602 return true;
1603}
1604
1605MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1606 bool IsNegative,
1607 MachineBasicBlock *DestMBB,
1608 MachineIRBuilder &MIB) const {
1609 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1610 MachineRegisterInfo &MRI = *MIB.getMRI();
1611 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1612 AArch64::GPRRegBankID &&
1613 "Expected GPRs only?");
1614 auto Ty = MRI.getType(CompareReg);
1615 unsigned Width = Ty.getSizeInBits();
1616 assert(!Ty.isVector() && "Expected scalar only?");
1617 assert(Width <= 64 && "Expected width to be at most 64?");
1618 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1619 {AArch64::CBNZW, AArch64::CBNZX}};
1620 unsigned Opc = OpcTable[IsNegative][Width == 64];
1621 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1622 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1623 return &*BranchMI;
1624}
1625
1626bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1627 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1628 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1629 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1630 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1631 // totally clean. Some of them require two branches to implement.
1632 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1633 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1634 Pred);
1635 AArch64CC::CondCode CC1, CC2;
1636 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1637 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1638 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1639 if (CC2 != AArch64CC::AL)
1640 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1641 I.eraseFromParent();
1642 return true;
1643}
1644
1645bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1646 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1647 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1648 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1649 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1650 //
1651 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1652 // instructions will not be produced, as they are conditional branch
1653 // instructions that do not set flags.
1654 if (!ProduceNonFlagSettingCondBr)
1655 return false;
1656
1657 MachineRegisterInfo &MRI = *MIB.getMRI();
1658 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1659 auto Pred =
1660 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1661 Register LHS = ICmp.getOperand(2).getReg();
1662 Register RHS = ICmp.getOperand(3).getReg();
1663
1664 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1665 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1666 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1667
1668 // When we can emit a TB(N)Z, prefer that.
1669 //
1670 // Handle non-commutative condition codes first.
1671 // Note that we don't want to do this when we have a G_AND because it can
1672 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1673 if (VRegAndVal && !AndInst) {
1674 int64_t C = VRegAndVal->Value.getSExtValue();
1675
1676 // When we have a greater-than comparison, we can just test if the msb is
1677 // zero.
1678 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1679 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1680 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1681 I.eraseFromParent();
1682 return true;
1683 }
1684
1685 // When we have a less than comparison, we can just test if the msb is not
1686 // zero.
1687 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1688 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1689 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1690 I.eraseFromParent();
1691 return true;
1692 }
1693
1694 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1695 // we can test if the msb is zero.
1696 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1697 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1698 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1699 I.eraseFromParent();
1700 return true;
1701 }
1702 }
1703
1704 // Attempt to handle commutative condition codes. Right now, that's only
1705 // eq/ne.
1706 if (ICmpInst::isEquality(Pred)) {
1707 if (!VRegAndVal) {
1708 std::swap(RHS, LHS);
1709 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1710 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1711 }
1712
1713 if (VRegAndVal && VRegAndVal->Value == 0) {
1714 // If there's a G_AND feeding into this branch, try to fold it away by
1715 // emitting a TB(N)Z instead.
1716 //
1717 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1718 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1719 // would be redundant.
1720 if (AndInst &&
1721 tryOptAndIntoCompareBranch(
1722 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1723 I.eraseFromParent();
1724 return true;
1725 }
1726
1727 // Otherwise, try to emit a CB(N)Z instead.
1728 auto LHSTy = MRI.getType(LHS);
1729 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1730 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1731 I.eraseFromParent();
1732 return true;
1733 }
1734 }
1735 }
1736
1737 return false;
1738}
1739
1740bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1741 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1742 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1743 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1744 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1745 return true;
1746
1747 // Couldn't optimize. Emit a compare + a Bcc.
1748 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1749 auto &PredOp = ICmp.getOperand(1);
1750 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1752 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1753 ICmp.getOperand(3).getReg(), MIB.getMRI());
1754 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1755 I.eraseFromParent();
1756 return true;
1757}
1758
1759bool AArch64InstructionSelector::selectCompareBranch(
1761 Register CondReg = I.getOperand(0).getReg();
1762 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1763 // Try to select the G_BRCOND using whatever is feeding the condition if
1764 // possible.
1765 unsigned CCMIOpc = CCMI->getOpcode();
1766 if (CCMIOpc == TargetOpcode::G_FCMP)
1767 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1768 if (CCMIOpc == TargetOpcode::G_ICMP)
1769 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1770
1771 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1772 // instructions will not be produced, as they are conditional branch
1773 // instructions that do not set flags.
1774 if (ProduceNonFlagSettingCondBr) {
1775 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1776 I.getOperand(1).getMBB(), MIB);
1777 I.eraseFromParent();
1778 return true;
1779 }
1780
1781 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1782 auto TstMI =
1783 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1785 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1787 .addMBB(I.getOperand(1).getMBB());
1788 I.eraseFromParent();
1789 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1790}
1791
1792/// Returns the element immediate value of a vector shift operand if found.
1793/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1794static std::optional<int64_t> getVectorShiftImm(Register Reg,
1796 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1797 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1798 return getAArch64VectorSplatScalar(*OpMI, MRI);
1799}
1800
1801/// Matches and returns the shift immediate value for a SHL instruction given
1802/// a shift operand.
1803static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1805 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1806 if (!ShiftImm)
1807 return std::nullopt;
1808 // Check the immediate is in range for a SHL.
1809 int64_t Imm = *ShiftImm;
1810 if (Imm < 0)
1811 return std::nullopt;
1812 switch (SrcTy.getElementType().getSizeInBits()) {
1813 default:
1814 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1815 return std::nullopt;
1816 case 8:
1817 if (Imm > 7)
1818 return std::nullopt;
1819 break;
1820 case 16:
1821 if (Imm > 15)
1822 return std::nullopt;
1823 break;
1824 case 32:
1825 if (Imm > 31)
1826 return std::nullopt;
1827 break;
1828 case 64:
1829 if (Imm > 63)
1830 return std::nullopt;
1831 break;
1832 }
1833 return Imm;
1834}
1835
1836bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1838 assert(I.getOpcode() == TargetOpcode::G_SHL);
1839 Register DstReg = I.getOperand(0).getReg();
1840 const LLT Ty = MRI.getType(DstReg);
1841 Register Src1Reg = I.getOperand(1).getReg();
1842 Register Src2Reg = I.getOperand(2).getReg();
1843
1844 if (!Ty.isVector())
1845 return false;
1846
1847 // Check if we have a vector of constants on RHS that we can select as the
1848 // immediate form.
1849 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1850
1851 unsigned Opc = 0;
1852 if (Ty == LLT::fixed_vector(2, 64)) {
1853 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1854 } else if (Ty == LLT::fixed_vector(4, 32)) {
1855 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1856 } else if (Ty == LLT::fixed_vector(2, 32)) {
1857 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1858 } else if (Ty == LLT::fixed_vector(4, 16)) {
1859 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1860 } else if (Ty == LLT::fixed_vector(8, 16)) {
1861 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1862 } else if (Ty == LLT::fixed_vector(16, 8)) {
1863 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1864 } else if (Ty == LLT::fixed_vector(8, 8)) {
1865 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1866 } else {
1867 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1868 return false;
1869 }
1870
1871 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1872 if (ImmVal)
1873 Shl.addImm(*ImmVal);
1874 else
1875 Shl.addUse(Src2Reg);
1877 I.eraseFromParent();
1878 return true;
1879}
1880
1881bool AArch64InstructionSelector::selectVectorAshrLshr(
1883 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1884 I.getOpcode() == TargetOpcode::G_LSHR);
1885 Register DstReg = I.getOperand(0).getReg();
1886 const LLT Ty = MRI.getType(DstReg);
1887 Register Src1Reg = I.getOperand(1).getReg();
1888 Register Src2Reg = I.getOperand(2).getReg();
1889
1890 if (!Ty.isVector())
1891 return false;
1892
1893 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1894
1895 // We expect the immediate case to be lowered in the PostLegalCombiner to
1896 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1897
1898 // There is not a shift right register instruction, but the shift left
1899 // register instruction takes a signed value, where negative numbers specify a
1900 // right shift.
1901
1902 unsigned Opc = 0;
1903 unsigned NegOpc = 0;
1904 const TargetRegisterClass *RC =
1905 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1906 if (Ty == LLT::fixed_vector(2, 64)) {
1907 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1908 NegOpc = AArch64::NEGv2i64;
1909 } else if (Ty == LLT::fixed_vector(4, 32)) {
1910 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1911 NegOpc = AArch64::NEGv4i32;
1912 } else if (Ty == LLT::fixed_vector(2, 32)) {
1913 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1914 NegOpc = AArch64::NEGv2i32;
1915 } else if (Ty == LLT::fixed_vector(4, 16)) {
1916 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1917 NegOpc = AArch64::NEGv4i16;
1918 } else if (Ty == LLT::fixed_vector(8, 16)) {
1919 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1920 NegOpc = AArch64::NEGv8i16;
1921 } else if (Ty == LLT::fixed_vector(16, 8)) {
1922 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1923 NegOpc = AArch64::NEGv16i8;
1924 } else if (Ty == LLT::fixed_vector(8, 8)) {
1925 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1926 NegOpc = AArch64::NEGv8i8;
1927 } else {
1928 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1929 return false;
1930 }
1931
1932 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1934 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1936 I.eraseFromParent();
1937 return true;
1938}
1939
1940bool AArch64InstructionSelector::selectVaStartAAPCS(
1942
1943 if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
1944 MF.getFunction().isVarArg()))
1945 return false;
1946
1947 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1948 // Standard, section 10.1.5.
1949
1950 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1951 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1952 const auto *PtrRegClass =
1953 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1954
1955 const MCInstrDesc &MCIDAddAddr =
1956 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1957 const MCInstrDesc &MCIDStoreAddr =
1958 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1959
1960 /*
1961 * typedef struct va_list {
1962 * void * stack; // next stack param
1963 * void * gr_top; // end of GP arg reg save area
1964 * void * vr_top; // end of FP/SIMD arg reg save area
1965 * int gr_offs; // offset from gr_top to next GP register arg
1966 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1967 * } va_list;
1968 */
1969 const auto VAList = I.getOperand(0).getReg();
1970
1971 // Our current offset in bytes from the va_list struct (VAList).
1972 unsigned OffsetBytes = 0;
1973
1974 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1975 // and increment OffsetBytes by PtrSize.
1976 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1977 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1978 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
1979 .addDef(Top)
1980 .addFrameIndex(FrameIndex)
1981 .addImm(Imm)
1982 .addImm(0);
1984
1985 const auto *MMO = *I.memoperands_begin();
1986 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
1987 .addUse(Top)
1988 .addUse(VAList)
1989 .addImm(OffsetBytes / PtrSize)
1991 MMO->getPointerInfo().getWithOffset(OffsetBytes),
1992 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
1994
1995 OffsetBytes += PtrSize;
1996 };
1997
1998 // void* stack at offset 0
1999 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2000
2001 // void* gr_top at offset 8 (4 on ILP32)
2002 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2003 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2004
2005 // void* vr_top at offset 16 (8 on ILP32)
2006 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2007 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2008
2009 // Helper function to store a 4-byte integer constant to VAList at offset
2010 // OffsetBytes, and increment OffsetBytes by 4.
2011 const auto PushIntConstant = [&](const int32_t Value) {
2012 constexpr int IntSize = 4;
2013 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2014 auto MIB =
2015 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2016 .addDef(Temp)
2017 .addImm(Value);
2019
2020 const auto *MMO = *I.memoperands_begin();
2021 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2022 .addUse(Temp)
2023 .addUse(VAList)
2024 .addImm(OffsetBytes / IntSize)
2026 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2027 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2029 OffsetBytes += IntSize;
2030 };
2031
2032 // int gr_offs at offset 24 (12 on ILP32)
2033 PushIntConstant(-static_cast<int32_t>(GPRSize));
2034
2035 // int vr_offs at offset 28 (16 on ILP32)
2036 PushIntConstant(-static_cast<int32_t>(FPRSize));
2037
2038 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2039
2040 I.eraseFromParent();
2041 return true;
2042}
2043
2044bool AArch64InstructionSelector::selectVaStartDarwin(
2047 Register ListReg = I.getOperand(0).getReg();
2048
2049 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2050
2051 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2054 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2055 ? FuncInfo->getVarArgsGPRIndex()
2056 : FuncInfo->getVarArgsStackIndex();
2057 }
2058
2059 auto MIB =
2060 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2061 .addDef(ArgsAddrReg)
2062 .addFrameIndex(FrameIdx)
2063 .addImm(0)
2064 .addImm(0);
2065
2067
2068 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2069 .addUse(ArgsAddrReg)
2070 .addUse(ListReg)
2071 .addImm(0)
2072 .addMemOperand(*I.memoperands_begin());
2073
2075 I.eraseFromParent();
2076 return true;
2077}
2078
2079void AArch64InstructionSelector::materializeLargeCMVal(
2080 MachineInstr &I, const Value *V, unsigned OpFlags) {
2081 MachineBasicBlock &MBB = *I.getParent();
2082 MachineFunction &MF = *MBB.getParent();
2084
2085 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2086 MovZ->addOperand(MF, I.getOperand(1));
2087 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2089 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2091
2092 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2093 Register ForceDstReg) {
2094 Register DstReg = ForceDstReg
2095 ? ForceDstReg
2096 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2097 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2098 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2100 GV, MovZ->getOperand(1).getOffset(), Flags));
2101 } else {
2102 MovI->addOperand(
2103 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
2104 MovZ->getOperand(1).getOffset(), Flags));
2105 }
2106 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
2108 return DstReg;
2109 };
2110 Register DstReg = BuildMovK(MovZ.getReg(0),
2112 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2113 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2114}
2115
2116bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2117 MachineBasicBlock &MBB = *I.getParent();
2118 MachineFunction &MF = *MBB.getParent();
2120
2121 switch (I.getOpcode()) {
2122 case TargetOpcode::G_STORE: {
2123 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2124 MachineOperand &SrcOp = I.getOperand(0);
2125 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2126 // Allow matching with imported patterns for stores of pointers. Unlike
2127 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2128 // and constrain.
2129 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2130 Register NewSrc = Copy.getReg(0);
2131 SrcOp.setReg(NewSrc);
2132 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2133 Changed = true;
2134 }
2135 return Changed;
2136 }
2137 case TargetOpcode::G_PTR_ADD: {
2138 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2139 // arithmetic semantics instead of falling back to regular arithmetic.
2140 const auto &TL = STI.getTargetLowering();
2141 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2142 return false;
2143 return convertPtrAddToAdd(I, MRI);
2144 }
2145 case TargetOpcode::G_LOAD: {
2146 // For scalar loads of pointers, we try to convert the dest type from p0
2147 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2148 // conversion, this should be ok because all users should have been
2149 // selected already, so the type doesn't matter for them.
2150 Register DstReg = I.getOperand(0).getReg();
2151 const LLT DstTy = MRI.getType(DstReg);
2152 if (!DstTy.isPointer())
2153 return false;
2154 MRI.setType(DstReg, LLT::scalar(64));
2155 return true;
2156 }
2157 case AArch64::G_DUP: {
2158 // Convert the type from p0 to s64 to help selection.
2159 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2160 if (!DstTy.isPointerVector())
2161 return false;
2162 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2163 MRI.setType(I.getOperand(0).getReg(),
2164 DstTy.changeElementType(LLT::scalar(64)));
2165 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2166 I.getOperand(1).setReg(NewSrc.getReg(0));
2167 return true;
2168 }
2169 case AArch64::G_INSERT_VECTOR_ELT: {
2170 // Convert the type from p0 to s64 to help selection.
2171 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2172 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2173 if (!SrcVecTy.isPointerVector())
2174 return false;
2175 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2176 MRI.setType(I.getOperand(1).getReg(),
2177 DstTy.changeElementType(LLT::scalar(64)));
2178 MRI.setType(I.getOperand(0).getReg(),
2179 DstTy.changeElementType(LLT::scalar(64)));
2180 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2181 I.getOperand(2).setReg(NewSrc.getReg(0));
2182 return true;
2183 }
2184 case TargetOpcode::G_UITOFP:
2185 case TargetOpcode::G_SITOFP: {
2186 // If both source and destination regbanks are FPR, then convert the opcode
2187 // to G_SITOF so that the importer can select it to an fpr variant.
2188 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2189 // copy.
2190 Register SrcReg = I.getOperand(1).getReg();
2191 LLT SrcTy = MRI.getType(SrcReg);
2192 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2193 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2194 return false;
2195
2196 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2197 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2198 I.setDesc(TII.get(AArch64::G_SITOF));
2199 else
2200 I.setDesc(TII.get(AArch64::G_UITOF));
2201 return true;
2202 }
2203 return false;
2204 }
2205 default:
2206 return false;
2207 }
2208}
2209
2210/// This lowering tries to look for G_PTR_ADD instructions and then converts
2211/// them to a standard G_ADD with a COPY on the source.
2212///
2213/// The motivation behind this is to expose the add semantics to the imported
2214/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2215/// because the selector works bottom up, uses before defs. By the time we
2216/// end up trying to select a G_PTR_ADD, we should have already attempted to
2217/// fold this into addressing modes and were therefore unsuccessful.
2218bool AArch64InstructionSelector::convertPtrAddToAdd(
2220 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2221 Register DstReg = I.getOperand(0).getReg();
2222 Register AddOp1Reg = I.getOperand(1).getReg();
2223 const LLT PtrTy = MRI.getType(DstReg);
2224 if (PtrTy.getAddressSpace() != 0)
2225 return false;
2226
2227 const LLT CastPtrTy =
2228 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2229 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2230 // Set regbanks on the registers.
2231 if (PtrTy.isVector())
2232 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2233 else
2234 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2235
2236 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2237 // %dst(intty) = G_ADD %intbase, off
2238 I.setDesc(TII.get(TargetOpcode::G_ADD));
2239 MRI.setType(DstReg, CastPtrTy);
2240 I.getOperand(1).setReg(PtrToInt.getReg(0));
2241 if (!select(*PtrToInt)) {
2242 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2243 return false;
2244 }
2245
2246 // Also take the opportunity here to try to do some optimization.
2247 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2248 Register NegatedReg;
2249 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2250 return true;
2251 I.getOperand(2).setReg(NegatedReg);
2252 I.setDesc(TII.get(TargetOpcode::G_SUB));
2253 return true;
2254}
2255
2256bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2258 // We try to match the immediate variant of LSL, which is actually an alias
2259 // for a special case of UBFM. Otherwise, we fall back to the imported
2260 // selector which will match the register variant.
2261 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2262 const auto &MO = I.getOperand(2);
2263 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2264 if (!VRegAndVal)
2265 return false;
2266
2267 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2268 if (DstTy.isVector())
2269 return false;
2270 bool Is64Bit = DstTy.getSizeInBits() == 64;
2271 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2272 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2273
2274 if (!Imm1Fn || !Imm2Fn)
2275 return false;
2276
2277 auto NewI =
2278 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2279 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2280
2281 for (auto &RenderFn : *Imm1Fn)
2282 RenderFn(NewI);
2283 for (auto &RenderFn : *Imm2Fn)
2284 RenderFn(NewI);
2285
2286 I.eraseFromParent();
2287 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2288}
2289
2290bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2292 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2293 // If we're storing a scalar, it doesn't matter what register bank that
2294 // scalar is on. All that matters is the size.
2295 //
2296 // So, if we see something like this (with a 32-bit scalar as an example):
2297 //
2298 // %x:gpr(s32) = ... something ...
2299 // %y:fpr(s32) = COPY %x:gpr(s32)
2300 // G_STORE %y:fpr(s32)
2301 //
2302 // We can fix this up into something like this:
2303 //
2304 // G_STORE %x:gpr(s32)
2305 //
2306 // And then continue the selection process normally.
2307 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2308 if (!DefDstReg.isValid())
2309 return false;
2310 LLT DefDstTy = MRI.getType(DefDstReg);
2311 Register StoreSrcReg = I.getOperand(0).getReg();
2312 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2313
2314 // If we get something strange like a physical register, then we shouldn't
2315 // go any further.
2316 if (!DefDstTy.isValid())
2317 return false;
2318
2319 // Are the source and dst types the same size?
2320 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2321 return false;
2322
2323 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2324 RBI.getRegBank(DefDstReg, MRI, TRI))
2325 return false;
2326
2327 // We have a cross-bank copy, which is entering a store. Let's fold it.
2328 I.getOperand(0).setReg(DefDstReg);
2329 return true;
2330}
2331
2332bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2333 assert(I.getParent() && "Instruction should be in a basic block!");
2334 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2335
2336 MachineBasicBlock &MBB = *I.getParent();
2337 MachineFunction &MF = *MBB.getParent();
2339
2340 switch (I.getOpcode()) {
2341 case AArch64::G_DUP: {
2342 // Before selecting a DUP instruction, check if it is better selected as a
2343 // MOV or load from a constant pool.
2344 Register Src = I.getOperand(1).getReg();
2345 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
2346 if (!ValAndVReg)
2347 return false;
2348 LLVMContext &Ctx = MF.getFunction().getContext();
2349 Register Dst = I.getOperand(0).getReg();
2351 MRI.getType(Dst).getNumElements(),
2352 ConstantInt::get(
2353 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2354 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2355 if (!emitConstantVector(Dst, CV, MIB, MRI))
2356 return false;
2357 I.eraseFromParent();
2358 return true;
2359 }
2360 case TargetOpcode::G_SEXT:
2361 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2362 // over a normal extend.
2363 if (selectUSMovFromExtend(I, MRI))
2364 return true;
2365 return false;
2366 case TargetOpcode::G_BR:
2367 return false;
2368 case TargetOpcode::G_SHL:
2369 return earlySelectSHL(I, MRI);
2370 case TargetOpcode::G_CONSTANT: {
2371 bool IsZero = false;
2372 if (I.getOperand(1).isCImm())
2373 IsZero = I.getOperand(1).getCImm()->isZero();
2374 else if (I.getOperand(1).isImm())
2375 IsZero = I.getOperand(1).getImm() == 0;
2376
2377 if (!IsZero)
2378 return false;
2379
2380 Register DefReg = I.getOperand(0).getReg();
2381 LLT Ty = MRI.getType(DefReg);
2382 if (Ty.getSizeInBits() == 64) {
2383 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2384 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2385 } else if (Ty.getSizeInBits() == 32) {
2386 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2387 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2388 } else
2389 return false;
2390
2391 I.setDesc(TII.get(TargetOpcode::COPY));
2392 return true;
2393 }
2394
2395 case TargetOpcode::G_ADD: {
2396 // Check if this is being fed by a G_ICMP on either side.
2397 //
2398 // (cmp pred, x, y) + z
2399 //
2400 // In the above case, when the cmp is true, we increment z by 1. So, we can
2401 // fold the add into the cset for the cmp by using cinc.
2402 //
2403 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2404 Register AddDst = I.getOperand(0).getReg();
2405 Register AddLHS = I.getOperand(1).getReg();
2406 Register AddRHS = I.getOperand(2).getReg();
2407 // Only handle scalars.
2408 LLT Ty = MRI.getType(AddLHS);
2409 if (Ty.isVector())
2410 return false;
2411 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2412 // bits.
2413 unsigned Size = Ty.getSizeInBits();
2414 if (Size != 32 && Size != 64)
2415 return false;
2416 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2417 if (!MRI.hasOneNonDBGUse(Reg))
2418 return nullptr;
2419 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2420 // compare.
2421 if (Size == 32)
2422 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2423 // We model scalar compares using 32-bit destinations right now.
2424 // If it's a 64-bit compare, it'll have 64-bit sources.
2425 Register ZExt;
2426 if (!mi_match(Reg, MRI,
2428 return nullptr;
2429 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2430 if (!Cmp ||
2431 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2432 return nullptr;
2433 return Cmp;
2434 };
2435 // Try to match
2436 // z + (cmp pred, x, y)
2437 MachineInstr *Cmp = MatchCmp(AddRHS);
2438 if (!Cmp) {
2439 // (cmp pred, x, y) + z
2440 std::swap(AddLHS, AddRHS);
2441 Cmp = MatchCmp(AddRHS);
2442 if (!Cmp)
2443 return false;
2444 }
2445 auto &PredOp = Cmp->getOperand(1);
2447 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2448 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2449 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2451 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2452 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2453 I.eraseFromParent();
2454 return true;
2455 }
2456 case TargetOpcode::G_OR: {
2457 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2458 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2459 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2460 Register Dst = I.getOperand(0).getReg();
2461 LLT Ty = MRI.getType(Dst);
2462
2463 if (!Ty.isScalar())
2464 return false;
2465
2466 unsigned Size = Ty.getSizeInBits();
2467 if (Size != 32 && Size != 64)
2468 return false;
2469
2470 Register ShiftSrc;
2471 int64_t ShiftImm;
2472 Register MaskSrc;
2473 int64_t MaskImm;
2474 if (!mi_match(
2475 Dst, MRI,
2476 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2477 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2478 return false;
2479
2480 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2481 return false;
2482
2483 int64_t Immr = Size - ShiftImm;
2484 int64_t Imms = Size - ShiftImm - 1;
2485 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2486 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2487 I.eraseFromParent();
2488 return true;
2489 }
2490 case TargetOpcode::G_FENCE: {
2491 if (I.getOperand(1).getImm() == 0)
2492 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2493 else
2494 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2495 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2496 I.eraseFromParent();
2497 return true;
2498 }
2499 default:
2500 return false;
2501 }
2502}
2503
2504bool AArch64InstructionSelector::select(MachineInstr &I) {
2505 assert(I.getParent() && "Instruction should be in a basic block!");
2506 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2507
2508 MachineBasicBlock &MBB = *I.getParent();
2509 MachineFunction &MF = *MBB.getParent();
2511
2512 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2513 if (Subtarget->requiresStrictAlign()) {
2514 // We don't support this feature yet.
2515 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2516 return false;
2517 }
2518
2520
2521 unsigned Opcode = I.getOpcode();
2522 // G_PHI requires same handling as PHI
2523 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2524 // Certain non-generic instructions also need some special handling.
2525
2526 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2528
2529 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2530 const Register DefReg = I.getOperand(0).getReg();
2531 const LLT DefTy = MRI.getType(DefReg);
2532
2533 const RegClassOrRegBank &RegClassOrBank =
2534 MRI.getRegClassOrRegBank(DefReg);
2535
2536 const TargetRegisterClass *DefRC =
2537 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
2538 if (!DefRC) {
2539 if (!DefTy.isValid()) {
2540 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2541 return false;
2542 }
2543 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2544 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2545 if (!DefRC) {
2546 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2547 return false;
2548 }
2549 }
2550
2551 I.setDesc(TII.get(TargetOpcode::PHI));
2552
2553 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2554 }
2555
2556 if (I.isCopy())
2557 return selectCopy(I, TII, MRI, TRI, RBI);
2558
2559 if (I.isDebugInstr())
2560 return selectDebugInstr(I, MRI, RBI);
2561
2562 return true;
2563 }
2564
2565
2566 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2567 LLVM_DEBUG(
2568 dbgs() << "Generic instruction has unexpected implicit operands\n");
2569 return false;
2570 }
2571
2572 // Try to do some lowering before we start instruction selecting. These
2573 // lowerings are purely transformations on the input G_MIR and so selection
2574 // must continue after any modification of the instruction.
2575 if (preISelLower(I)) {
2576 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2577 }
2578
2579 // There may be patterns where the importer can't deal with them optimally,
2580 // but does select it to a suboptimal sequence so our custom C++ selection
2581 // code later never has a chance to work on it. Therefore, we have an early
2582 // selection attempt here to give priority to certain selection routines
2583 // over the imported ones.
2584 if (earlySelect(I))
2585 return true;
2586
2587 if (selectImpl(I, *CoverageInfo))
2588 return true;
2589
2590 LLT Ty =
2591 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2592
2593 switch (Opcode) {
2594 case TargetOpcode::G_SBFX:
2595 case TargetOpcode::G_UBFX: {
2596 static const unsigned OpcTable[2][2] = {
2597 {AArch64::UBFMWri, AArch64::UBFMXri},
2598 {AArch64::SBFMWri, AArch64::SBFMXri}};
2599 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2600 unsigned Size = Ty.getSizeInBits();
2601 unsigned Opc = OpcTable[IsSigned][Size == 64];
2602 auto Cst1 =
2603 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2604 assert(Cst1 && "Should have gotten a constant for src 1?");
2605 auto Cst2 =
2606 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2607 assert(Cst2 && "Should have gotten a constant for src 2?");
2608 auto LSB = Cst1->Value.getZExtValue();
2609 auto Width = Cst2->Value.getZExtValue();
2610 auto BitfieldInst =
2611 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2612 .addImm(LSB)
2613 .addImm(LSB + Width - 1);
2614 I.eraseFromParent();
2615 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2616 }
2617 case TargetOpcode::G_BRCOND:
2618 return selectCompareBranch(I, MF, MRI);
2619
2620 case TargetOpcode::G_BRINDIRECT: {
2621 const Function &Fn = MF.getFunction();
2622 if (std::optional<uint16_t> BADisc =
2623 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2624 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2625 MI.addImm(AArch64PACKey::IA);
2626 MI.addImm(*BADisc);
2627 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2628 I.eraseFromParent();
2630 }
2631 I.setDesc(TII.get(AArch64::BR));
2633 }
2634
2635 case TargetOpcode::G_BRJT:
2636 return selectBrJT(I, MRI);
2637
2638 case AArch64::G_ADD_LOW: {
2639 // This op may have been separated from it's ADRP companion by the localizer
2640 // or some other code motion pass. Given that many CPUs will try to
2641 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2642 // which will later be expanded into an ADRP+ADD pair after scheduling.
2643 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2644 if (BaseMI->getOpcode() != AArch64::ADRP) {
2645 I.setDesc(TII.get(AArch64::ADDXri));
2646 I.addOperand(MachineOperand::CreateImm(0));
2648 }
2649 assert(TM.getCodeModel() == CodeModel::Small &&
2650 "Expected small code model");
2651 auto Op1 = BaseMI->getOperand(1);
2652 auto Op2 = I.getOperand(2);
2653 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2654 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2655 Op1.getTargetFlags())
2656 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2657 Op2.getTargetFlags());
2658 I.eraseFromParent();
2659 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2660 }
2661
2662 case TargetOpcode::G_FCONSTANT:
2663 case TargetOpcode::G_CONSTANT: {
2664 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2665
2666 const LLT s8 = LLT::scalar(8);
2667 const LLT s16 = LLT::scalar(16);
2668 const LLT s32 = LLT::scalar(32);
2669 const LLT s64 = LLT::scalar(64);
2670 const LLT s128 = LLT::scalar(128);
2671 const LLT p0 = LLT::pointer(0, 64);
2672
2673 const Register DefReg = I.getOperand(0).getReg();
2674 const LLT DefTy = MRI.getType(DefReg);
2675 const unsigned DefSize = DefTy.getSizeInBits();
2676 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2677
2678 // FIXME: Redundant check, but even less readable when factored out.
2679 if (isFP) {
2680 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2681 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2682 << " constant, expected: " << s16 << " or " << s32
2683 << " or " << s64 << " or " << s128 << '\n');
2684 return false;
2685 }
2686
2687 if (RB.getID() != AArch64::FPRRegBankID) {
2688 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2689 << " constant on bank: " << RB
2690 << ", expected: FPR\n");
2691 return false;
2692 }
2693
2694 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2695 // can be sure tablegen works correctly and isn't rescued by this code.
2696 // 0.0 is not covered by tablegen for FP128. So we will handle this
2697 // scenario in the code here.
2698 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2699 return false;
2700 } else {
2701 // s32 and s64 are covered by tablegen.
2702 if (Ty != p0 && Ty != s8 && Ty != s16) {
2703 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2704 << " constant, expected: " << s32 << ", " << s64
2705 << ", or " << p0 << '\n');
2706 return false;
2707 }
2708
2709 if (RB.getID() != AArch64::GPRRegBankID) {
2710 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2711 << " constant on bank: " << RB
2712 << ", expected: GPR\n");
2713 return false;
2714 }
2715 }
2716
2717 if (isFP) {
2718 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2719 // For 16, 64, and 128b values, emit a constant pool load.
2720 switch (DefSize) {
2721 default:
2722 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2723 case 32:
2724 case 64: {
2725 bool OptForSize = shouldOptForSize(&MF);
2726 const auto &TLI = MF.getSubtarget().getTargetLowering();
2727 // If TLI says that this fpimm is illegal, then we'll expand to a
2728 // constant pool load.
2729 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2730 EVT::getFloatingPointVT(DefSize), OptForSize))
2731 break;
2732 [[fallthrough]];
2733 }
2734 case 16:
2735 case 128: {
2736 auto *FPImm = I.getOperand(1).getFPImm();
2737 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2738 if (!LoadMI) {
2739 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2740 return false;
2741 }
2742 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2743 I.eraseFromParent();
2744 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2745 }
2746 }
2747
2748 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2749 // Either emit a FMOV, or emit a copy to emit a normal mov.
2750 const Register DefGPRReg = MRI.createVirtualRegister(
2751 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2752 MachineOperand &RegOp = I.getOperand(0);
2753 RegOp.setReg(DefGPRReg);
2754 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2755 MIB.buildCopy({DefReg}, {DefGPRReg});
2756
2757 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2758 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2759 return false;
2760 }
2761
2762 MachineOperand &ImmOp = I.getOperand(1);
2763 // FIXME: Is going through int64_t always correct?
2764 ImmOp.ChangeToImmediate(
2766 } else if (I.getOperand(1).isCImm()) {
2767 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2768 I.getOperand(1).ChangeToImmediate(Val);
2769 } else if (I.getOperand(1).isImm()) {
2770 uint64_t Val = I.getOperand(1).getImm();
2771 I.getOperand(1).ChangeToImmediate(Val);
2772 }
2773
2774 const unsigned MovOpc =
2775 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2776 I.setDesc(TII.get(MovOpc));
2778 return true;
2779 }
2780 case TargetOpcode::G_EXTRACT: {
2781 Register DstReg = I.getOperand(0).getReg();
2782 Register SrcReg = I.getOperand(1).getReg();
2783 LLT SrcTy = MRI.getType(SrcReg);
2784 LLT DstTy = MRI.getType(DstReg);
2785 (void)DstTy;
2786 unsigned SrcSize = SrcTy.getSizeInBits();
2787
2788 if (SrcTy.getSizeInBits() > 64) {
2789 // This should be an extract of an s128, which is like a vector extract.
2790 if (SrcTy.getSizeInBits() != 128)
2791 return false;
2792 // Only support extracting 64 bits from an s128 at the moment.
2793 if (DstTy.getSizeInBits() != 64)
2794 return false;
2795
2796 unsigned Offset = I.getOperand(2).getImm();
2797 if (Offset % 64 != 0)
2798 return false;
2799
2800 // Check we have the right regbank always.
2801 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2802 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2803 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2804
2805 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2806 auto NewI =
2807 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2808 .addUse(SrcReg, 0,
2809 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2810 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2811 AArch64::GPR64RegClass, NewI->getOperand(0));
2812 I.eraseFromParent();
2813 return true;
2814 }
2815
2816 // Emit the same code as a vector extract.
2817 // Offset must be a multiple of 64.
2818 unsigned LaneIdx = Offset / 64;
2819 MachineInstr *Extract = emitExtractVectorElt(
2820 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2821 if (!Extract)
2822 return false;
2823 I.eraseFromParent();
2824 return true;
2825 }
2826
2827 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2828 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2829 Ty.getSizeInBits() - 1);
2830
2831 if (SrcSize < 64) {
2832 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2833 "unexpected G_EXTRACT types");
2835 }
2836
2837 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2838 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2839 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2840 .addReg(DstReg, 0, AArch64::sub_32);
2841 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2842 AArch64::GPR32RegClass, MRI);
2843 I.getOperand(0).setReg(DstReg);
2844
2846 }
2847
2848 case TargetOpcode::G_INSERT: {
2849 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2850 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2851 unsigned DstSize = DstTy.getSizeInBits();
2852 // Larger inserts are vectors, same-size ones should be something else by
2853 // now (split up or turned into COPYs).
2854 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2855 return false;
2856
2857 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2858 unsigned LSB = I.getOperand(3).getImm();
2859 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2860 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2861 MachineInstrBuilder(MF, I).addImm(Width - 1);
2862
2863 if (DstSize < 64) {
2864 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2865 "unexpected G_INSERT types");
2867 }
2868
2869 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2870 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2871 TII.get(AArch64::SUBREG_TO_REG))
2872 .addDef(SrcReg)
2873 .addImm(0)
2874 .addUse(I.getOperand(2).getReg())
2875 .addImm(AArch64::sub_32);
2876 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2877 AArch64::GPR32RegClass, MRI);
2878 I.getOperand(2).setReg(SrcReg);
2879
2881 }
2882 case TargetOpcode::G_FRAME_INDEX: {
2883 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2884 if (Ty != LLT::pointer(0, 64)) {
2885 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2886 << ", expected: " << LLT::pointer(0, 64) << '\n');
2887 return false;
2888 }
2889 I.setDesc(TII.get(AArch64::ADDXri));
2890
2891 // MOs for a #0 shifted immediate.
2892 I.addOperand(MachineOperand::CreateImm(0));
2893 I.addOperand(MachineOperand::CreateImm(0));
2894
2896 }
2897
2898 case TargetOpcode::G_GLOBAL_VALUE: {
2899 const GlobalValue *GV = nullptr;
2900 unsigned OpFlags;
2901 if (I.getOperand(1).isSymbol()) {
2902 OpFlags = I.getOperand(1).getTargetFlags();
2903 // Currently only used by "RtLibUseGOT".
2904 assert(OpFlags == AArch64II::MO_GOT);
2905 } else {
2906 GV = I.getOperand(1).getGlobal();
2907 if (GV->isThreadLocal()) {
2908 // We don't support instructions with emulated TLS variables yet
2909 if (TM.useEmulatedTLS())
2910 return false;
2911 return selectTLSGlobalValue(I, MRI);
2912 }
2913 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2914 }
2915
2916 if (OpFlags & AArch64II::MO_GOT) {
2917 I.setDesc(TII.get(MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
2918 ? AArch64::LOADgotAUTH
2919 : AArch64::LOADgot));
2920 I.getOperand(1).setTargetFlags(OpFlags);
2921 } else if (TM.getCodeModel() == CodeModel::Large &&
2922 !TM.isPositionIndependent()) {
2923 // Materialize the global using movz/movk instructions.
2924 materializeLargeCMVal(I, GV, OpFlags);
2925 I.eraseFromParent();
2926 return true;
2927 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2928 I.setDesc(TII.get(AArch64::ADR));
2929 I.getOperand(1).setTargetFlags(OpFlags);
2930 } else {
2931 I.setDesc(TII.get(AArch64::MOVaddr));
2932 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2933 MachineInstrBuilder MIB(MF, I);
2934 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2936 }
2938 }
2939
2940 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2941 return selectPtrAuthGlobalValue(I, MRI);
2942
2943 case TargetOpcode::G_ZEXTLOAD:
2944 case TargetOpcode::G_LOAD:
2945 case TargetOpcode::G_STORE: {
2946 GLoadStore &LdSt = cast<GLoadStore>(I);
2947 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2948 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2949
2950 // Can only handle AddressSpace 0, 64-bit pointers.
2951 if (PtrTy != LLT::pointer(0, 64)) {
2952 return false;
2953 }
2954
2955 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2956 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2957 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2958
2959 // Need special instructions for atomics that affect ordering.
2960 if (Order != AtomicOrdering::NotAtomic &&
2961 Order != AtomicOrdering::Unordered &&
2962 Order != AtomicOrdering::Monotonic) {
2963 assert(!isa<GZExtLoad>(LdSt));
2964 assert(MemSizeInBytes <= 8 &&
2965 "128-bit atomics should already be custom-legalized");
2966
2967 if (isa<GLoad>(LdSt)) {
2968 static constexpr unsigned LDAPROpcodes[] = {
2969 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2970 static constexpr unsigned LDAROpcodes[] = {
2971 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2972 ArrayRef<unsigned> Opcodes =
2973 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2974 ? LDAPROpcodes
2975 : LDAROpcodes;
2976 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2977 } else {
2978 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2979 AArch64::STLRW, AArch64::STLRX};
2980 Register ValReg = LdSt.getReg(0);
2981 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2982 // Emit a subreg copy of 32 bits.
2983 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2984 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2985 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2986 I.getOperand(0).setReg(NewVal);
2987 }
2988 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2989 }
2991 return true;
2992 }
2993
2994#ifndef NDEBUG
2995 const Register PtrReg = LdSt.getPointerReg();
2996 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2997 // Check that the pointer register is valid.
2998 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2999 "Load/Store pointer operand isn't a GPR");
3000 assert(MRI.getType(PtrReg).isPointer() &&
3001 "Load/Store pointer operand isn't a pointer");
3002#endif
3003
3004 const Register ValReg = LdSt.getReg(0);
3005 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
3006 LLT ValTy = MRI.getType(ValReg);
3007
3008 // The code below doesn't support truncating stores, so we need to split it
3009 // again.
3010 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3011 unsigned SubReg;
3012 LLT MemTy = LdSt.getMMO().getMemoryType();
3013 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3014 if (!getSubRegForClass(RC, TRI, SubReg))
3015 return false;
3016
3017 // Generate a subreg copy.
3018 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
3019 .addReg(ValReg, 0, SubReg)
3020 .getReg(0);
3021 RBI.constrainGenericRegister(Copy, *RC, MRI);
3022 LdSt.getOperand(0).setReg(Copy);
3023 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3024 // If this is an any-extending load from the FPR bank, split it into a regular
3025 // load + extend.
3026 if (RB.getID() == AArch64::FPRRegBankID) {
3027 unsigned SubReg;
3028 LLT MemTy = LdSt.getMMO().getMemoryType();
3029 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3030 if (!getSubRegForClass(RC, TRI, SubReg))
3031 return false;
3032 Register OldDst = LdSt.getReg(0);
3033 Register NewDst =
3034 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
3035 LdSt.getOperand(0).setReg(NewDst);
3036 MRI.setRegBank(NewDst, RB);
3037 // Generate a SUBREG_TO_REG to extend it.
3038 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3039 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3040 .addImm(0)
3041 .addUse(NewDst)
3042 .addImm(SubReg);
3043 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3044 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3045 MIB.setInstr(LdSt);
3046 ValTy = MemTy; // This is no longer an extending load.
3047 }
3048 }
3049
3050 // Helper lambda for partially selecting I. Either returns the original
3051 // instruction with an updated opcode, or a new instruction.
3052 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3053 bool IsStore = isa<GStore>(I);
3054 const unsigned NewOpc =
3055 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3056 if (NewOpc == I.getOpcode())
3057 return nullptr;
3058 // Check if we can fold anything into the addressing mode.
3059 auto AddrModeFns =
3060 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3061 if (!AddrModeFns) {
3062 // Can't fold anything. Use the original instruction.
3063 I.setDesc(TII.get(NewOpc));
3064 I.addOperand(MachineOperand::CreateImm(0));
3065 return &I;
3066 }
3067
3068 // Folded something. Create a new instruction and return it.
3069 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3070 Register CurValReg = I.getOperand(0).getReg();
3071 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3072 NewInst.cloneMemRefs(I);
3073 for (auto &Fn : *AddrModeFns)
3074 Fn(NewInst);
3075 I.eraseFromParent();
3076 return &*NewInst;
3077 };
3078
3079 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3080 if (!LoadStore)
3081 return false;
3082
3083 // If we're storing a 0, use WZR/XZR.
3084 if (Opcode == TargetOpcode::G_STORE) {
3086 LoadStore->getOperand(0).getReg(), MRI);
3087 if (CVal && CVal->Value == 0) {
3088 switch (LoadStore->getOpcode()) {
3089 case AArch64::STRWui:
3090 case AArch64::STRHHui:
3091 case AArch64::STRBBui:
3092 LoadStore->getOperand(0).setReg(AArch64::WZR);
3093 break;
3094 case AArch64::STRXui:
3095 LoadStore->getOperand(0).setReg(AArch64::XZR);
3096 break;
3097 }
3098 }
3099 }
3100
3101 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3102 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3103 // The any/zextload from a smaller type to i32 should be handled by the
3104 // importer.
3105 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3106 return false;
3107 // If we have an extending load then change the load's type to be a
3108 // narrower reg and zero_extend with SUBREG_TO_REG.
3109 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3110 Register DstReg = LoadStore->getOperand(0).getReg();
3111 LoadStore->getOperand(0).setReg(LdReg);
3112
3113 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3114 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3115 .addImm(0)
3116 .addUse(LdReg)
3117 .addImm(AArch64::sub_32);
3118 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3119 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3120 MRI);
3121 }
3122 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3123 }
3124
3125 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3126 case TargetOpcode::G_INDEXED_SEXTLOAD:
3127 return selectIndexedExtLoad(I, MRI);
3128 case TargetOpcode::G_INDEXED_LOAD:
3129 return selectIndexedLoad(I, MRI);
3130 case TargetOpcode::G_INDEXED_STORE:
3131 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3132
3133 case TargetOpcode::G_LSHR:
3134 case TargetOpcode::G_ASHR:
3135 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3136 return selectVectorAshrLshr(I, MRI);
3137 [[fallthrough]];
3138 case TargetOpcode::G_SHL:
3139 if (Opcode == TargetOpcode::G_SHL &&
3140 MRI.getType(I.getOperand(0).getReg()).isVector())
3141 return selectVectorSHL(I, MRI);
3142
3143 // These shifts were legalized to have 64 bit shift amounts because we
3144 // want to take advantage of the selection patterns that assume the
3145 // immediates are s64s, however, selectBinaryOp will assume both operands
3146 // will have the same bit size.
3147 {
3148 Register SrcReg = I.getOperand(1).getReg();
3149 Register ShiftReg = I.getOperand(2).getReg();
3150 const LLT ShiftTy = MRI.getType(ShiftReg);
3151 const LLT SrcTy = MRI.getType(SrcReg);
3152 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3153 ShiftTy.getSizeInBits() == 64) {
3154 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3155 // Insert a subregister copy to implement a 64->32 trunc
3156 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3157 .addReg(ShiftReg, 0, AArch64::sub_32);
3158 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3159 I.getOperand(2).setReg(Trunc.getReg(0));
3160 }
3161 }
3162 [[fallthrough]];
3163 case TargetOpcode::G_OR: {
3164 // Reject the various things we don't support yet.
3165 if (unsupportedBinOp(I, RBI, MRI, TRI))
3166 return false;
3167
3168 const unsigned OpSize = Ty.getSizeInBits();
3169
3170 const Register DefReg = I.getOperand(0).getReg();
3171 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3172
3173 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3174 if (NewOpc == I.getOpcode())
3175 return false;
3176
3177 I.setDesc(TII.get(NewOpc));
3178 // FIXME: Should the type be always reset in setDesc?
3179
3180 // Now that we selected an opcode, we need to constrain the register
3181 // operands to use appropriate classes.
3183 }
3184
3185 case TargetOpcode::G_PTR_ADD: {
3186 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3187 I.eraseFromParent();
3188 return true;
3189 }
3190
3191 case TargetOpcode::G_SADDE:
3192 case TargetOpcode::G_UADDE:
3193 case TargetOpcode::G_SSUBE:
3194 case TargetOpcode::G_USUBE:
3195 case TargetOpcode::G_SADDO:
3196 case TargetOpcode::G_UADDO:
3197 case TargetOpcode::G_SSUBO:
3198 case TargetOpcode::G_USUBO:
3199 return selectOverflowOp(I, MRI);
3200
3201 case TargetOpcode::G_PTRMASK: {
3202 Register MaskReg = I.getOperand(2).getReg();
3203 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3204 // TODO: Implement arbitrary cases
3205 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3206 return false;
3207
3208 uint64_t Mask = *MaskVal;
3209 I.setDesc(TII.get(AArch64::ANDXri));
3210 I.getOperand(2).ChangeToImmediate(
3212
3214 }
3215 case TargetOpcode::G_PTRTOINT:
3216 case TargetOpcode::G_TRUNC: {
3217 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3218 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3219
3220 const Register DstReg = I.getOperand(0).getReg();
3221 const Register SrcReg = I.getOperand(1).getReg();
3222
3223 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3224 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3225
3226 if (DstRB.getID() != SrcRB.getID()) {
3227 LLVM_DEBUG(
3228 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3229 return false;
3230 }
3231
3232 if (DstRB.getID() == AArch64::GPRRegBankID) {
3233 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3234 if (!DstRC)
3235 return false;
3236
3237 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3238 if (!SrcRC)
3239 return false;
3240
3241 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3242 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3243 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3244 return false;
3245 }
3246
3247 if (DstRC == SrcRC) {
3248 // Nothing to be done
3249 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3250 SrcTy == LLT::scalar(64)) {
3251 llvm_unreachable("TableGen can import this case");
3252 return false;
3253 } else if (DstRC == &AArch64::GPR32RegClass &&
3254 SrcRC == &AArch64::GPR64RegClass) {
3255 I.getOperand(1).setSubReg(AArch64::sub_32);
3256 } else {
3257 LLVM_DEBUG(
3258 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3259 return false;
3260 }
3261
3262 I.setDesc(TII.get(TargetOpcode::COPY));
3263 return true;
3264 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3265 if (DstTy == LLT::fixed_vector(4, 16) &&
3266 SrcTy == LLT::fixed_vector(4, 32)) {
3267 I.setDesc(TII.get(AArch64::XTNv4i16));
3269 return true;
3270 }
3271
3272 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3273 MachineInstr *Extract = emitExtractVectorElt(
3274 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3275 if (!Extract)
3276 return false;
3277 I.eraseFromParent();
3278 return true;
3279 }
3280
3281 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3282 if (Opcode == TargetOpcode::G_PTRTOINT) {
3283 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3284 I.setDesc(TII.get(TargetOpcode::COPY));
3285 return selectCopy(I, TII, MRI, TRI, RBI);
3286 }
3287 }
3288
3289 return false;
3290 }
3291
3292 case TargetOpcode::G_ANYEXT: {
3293 if (selectUSMovFromExtend(I, MRI))
3294 return true;
3295
3296 const Register DstReg = I.getOperand(0).getReg();
3297 const Register SrcReg = I.getOperand(1).getReg();
3298
3299 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3300 if (RBDst.getID() != AArch64::GPRRegBankID) {
3301 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3302 << ", expected: GPR\n");
3303 return false;
3304 }
3305
3306 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3307 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3308 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3309 << ", expected: GPR\n");
3310 return false;
3311 }
3312
3313 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3314
3315 if (DstSize == 0) {
3316 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3317 return false;
3318 }
3319
3320 if (DstSize != 64 && DstSize > 32) {
3321 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3322 << ", expected: 32 or 64\n");
3323 return false;
3324 }
3325 // At this point G_ANYEXT is just like a plain COPY, but we need
3326 // to explicitly form the 64-bit value if any.
3327 if (DstSize > 32) {
3328 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3329 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3330 .addDef(ExtSrc)
3331 .addImm(0)
3332 .addUse(SrcReg)
3333 .addImm(AArch64::sub_32);
3334 I.getOperand(1).setReg(ExtSrc);
3335 }
3336 return selectCopy(I, TII, MRI, TRI, RBI);
3337 }
3338
3339 case TargetOpcode::G_ZEXT:
3340 case TargetOpcode::G_SEXT_INREG:
3341 case TargetOpcode::G_SEXT: {
3342 if (selectUSMovFromExtend(I, MRI))
3343 return true;
3344
3345 unsigned Opcode = I.getOpcode();
3346 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3347 const Register DefReg = I.getOperand(0).getReg();
3348 Register SrcReg = I.getOperand(1).getReg();
3349 const LLT DstTy = MRI.getType(DefReg);
3350 const LLT SrcTy = MRI.getType(SrcReg);
3351 unsigned DstSize = DstTy.getSizeInBits();
3352 unsigned SrcSize = SrcTy.getSizeInBits();
3353
3354 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3355 // extended is encoded in the imm.
3356 if (Opcode == TargetOpcode::G_SEXT_INREG)
3357 SrcSize = I.getOperand(2).getImm();
3358
3359 if (DstTy.isVector())
3360 return false; // Should be handled by imported patterns.
3361
3362 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3363 AArch64::GPRRegBankID &&
3364 "Unexpected ext regbank");
3365
3366 MachineInstr *ExtI;
3367
3368 // First check if we're extending the result of a load which has a dest type
3369 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3370 // GPR register on AArch64 and all loads which are smaller automatically
3371 // zero-extend the upper bits. E.g.
3372 // %v(s8) = G_LOAD %p, :: (load 1)
3373 // %v2(s32) = G_ZEXT %v(s8)
3374 if (!IsSigned) {
3375 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3376 bool IsGPR =
3377 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3378 if (LoadMI && IsGPR) {
3379 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3380 unsigned BytesLoaded = MemOp->getSize().getValue();
3381 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3382 return selectCopy(I, TII, MRI, TRI, RBI);
3383 }
3384
3385 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3386 // + SUBREG_TO_REG.
3387 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3388 Register SubregToRegSrc =
3389 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3390 const Register ZReg = AArch64::WZR;
3391 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3392 .addImm(0);
3393
3394 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3395 .addImm(0)
3396 .addUse(SubregToRegSrc)
3397 .addImm(AArch64::sub_32);
3398
3399 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3400 MRI)) {
3401 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3402 return false;
3403 }
3404
3405 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3406 MRI)) {
3407 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3408 return false;
3409 }
3410
3411 I.eraseFromParent();
3412 return true;
3413 }
3414 }
3415
3416 if (DstSize == 64) {
3417 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3418 // FIXME: Can we avoid manually doing this?
3419 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3420 MRI)) {
3421 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3422 << " operand\n");
3423 return false;
3424 }
3425 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3426 {&AArch64::GPR64RegClass}, {})
3427 .addImm(0)
3428 .addUse(SrcReg)
3429 .addImm(AArch64::sub_32)
3430 .getReg(0);
3431 }
3432
3433 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3434 {DefReg}, {SrcReg})
3435 .addImm(0)
3436 .addImm(SrcSize - 1);
3437 } else if (DstSize <= 32) {
3438 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3439 {DefReg}, {SrcReg})
3440 .addImm(0)
3441 .addImm(SrcSize - 1);
3442 } else {
3443 return false;
3444 }
3445
3447 I.eraseFromParent();
3448 return true;
3449 }
3450
3451 case TargetOpcode::G_FREEZE:
3452 return selectCopy(I, TII, MRI, TRI, RBI);
3453
3454 case TargetOpcode::G_INTTOPTR:
3455 // The importer is currently unable to import pointer types since they
3456 // didn't exist in SelectionDAG.
3457 return selectCopy(I, TII, MRI, TRI, RBI);
3458
3459 case TargetOpcode::G_BITCAST:
3460 // Imported SelectionDAG rules can handle every bitcast except those that
3461 // bitcast from a type to the same type. Ideally, these shouldn't occur
3462 // but we might not run an optimizer that deletes them. The other exception
3463 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3464 // of them.
3465 return selectCopy(I, TII, MRI, TRI, RBI);
3466
3467 case TargetOpcode::G_SELECT: {
3468 auto &Sel = cast<GSelect>(I);
3469 const Register CondReg = Sel.getCondReg();
3470 const Register TReg = Sel.getTrueReg();
3471 const Register FReg = Sel.getFalseReg();
3472
3473 if (tryOptSelect(Sel))
3474 return true;
3475
3476 // Make sure to use an unused vreg instead of wzr, so that the peephole
3477 // optimizations will be able to optimize these.
3478 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3479 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3480 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3482 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3483 return false;
3484 Sel.eraseFromParent();
3485 return true;
3486 }
3487 case TargetOpcode::G_ICMP: {
3488 if (Ty.isVector())
3489 return false;
3490
3491 if (Ty != LLT::scalar(32)) {
3492 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3493 << ", expected: " << LLT::scalar(32) << '\n');
3494 return false;
3495 }
3496
3497 auto &PredOp = I.getOperand(1);
3498 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3499 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3501 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3502 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3503 /*Src2=*/AArch64::WZR, InvCC, MIB);
3504 I.eraseFromParent();
3505 return true;
3506 }
3507
3508 case TargetOpcode::G_FCMP: {
3509 CmpInst::Predicate Pred =
3510 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3511 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3512 Pred) ||
3513 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3514 return false;
3515 I.eraseFromParent();
3516 return true;
3517 }
3518 case TargetOpcode::G_VASTART:
3519 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3520 : selectVaStartAAPCS(I, MF, MRI);
3521 case TargetOpcode::G_INTRINSIC:
3522 return selectIntrinsic(I, MRI);
3523 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3524 return selectIntrinsicWithSideEffects(I, MRI);
3525 case TargetOpcode::G_IMPLICIT_DEF: {
3526 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3527 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3528 const Register DstReg = I.getOperand(0).getReg();
3529 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3530 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3531 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3532 return true;
3533 }
3534 case TargetOpcode::G_BLOCK_ADDR: {
3535 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3536 if (std::optional<uint16_t> BADisc =
3537 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3538 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3539 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3540 MIB.buildInstr(AArch64::MOVaddrPAC)
3541 .addBlockAddress(I.getOperand(1).getBlockAddress())
3543 .addReg(/*AddrDisc=*/AArch64::XZR)
3544 .addImm(*BADisc)
3545 .constrainAllUses(TII, TRI, RBI);
3546 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3547 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3548 AArch64::GPR64RegClass, MRI);
3549 I.eraseFromParent();
3550 return true;
3551 }
3552 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3553 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3554 I.eraseFromParent();
3555 return true;
3556 } else {
3557 I.setDesc(TII.get(AArch64::MOVaddrBA));
3558 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3559 I.getOperand(0).getReg())
3560 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3561 /* Offset */ 0, AArch64II::MO_PAGE)
3563 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3565 I.eraseFromParent();
3566 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3567 }
3568 }
3569 case AArch64::G_DUP: {
3570 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3571 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3572 // difficult because at RBS we may end up pessimizing the fpr case if we
3573 // decided to add an anyextend to fix this. Manual selection is the most
3574 // robust solution for now.
3575 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3576 AArch64::GPRRegBankID)
3577 return false; // We expect the fpr regbank case to be imported.
3578 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3579 if (VecTy == LLT::fixed_vector(8, 8))
3580 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3581 else if (VecTy == LLT::fixed_vector(16, 8))
3582 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3583 else if (VecTy == LLT::fixed_vector(4, 16))
3584 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3585 else if (VecTy == LLT::fixed_vector(8, 16))
3586 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3587 else
3588 return false;
3590 }
3591 case TargetOpcode::G_BUILD_VECTOR:
3592 return selectBuildVector(I, MRI);
3593 case TargetOpcode::G_MERGE_VALUES:
3594 return selectMergeValues(I, MRI);
3595 case TargetOpcode::G_UNMERGE_VALUES:
3596 return selectUnmergeValues(I, MRI);
3597 case TargetOpcode::G_SHUFFLE_VECTOR:
3598 return selectShuffleVector(I, MRI);
3599 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3600 return selectExtractElt(I, MRI);
3601 case TargetOpcode::G_CONCAT_VECTORS:
3602 return selectConcatVectors(I, MRI);
3603 case TargetOpcode::G_JUMP_TABLE:
3604 return selectJumpTable(I, MRI);
3605 case TargetOpcode::G_MEMCPY:
3606 case TargetOpcode::G_MEMCPY_INLINE:
3607 case TargetOpcode::G_MEMMOVE:
3608 case TargetOpcode::G_MEMSET:
3609 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3610 return selectMOPS(I, MRI);
3611 }
3612
3613 return false;
3614}
3615
3616bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3617 MachineIRBuilderState OldMIBState = MIB.getState();
3618 bool Success = select(I);
3619 MIB.setState(OldMIBState);
3620 return Success;
3621}
3622
3623bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3625 unsigned Mopcode;
3626 switch (GI.getOpcode()) {
3627 case TargetOpcode::G_MEMCPY:
3628 case TargetOpcode::G_MEMCPY_INLINE:
3629 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3630 break;
3631 case TargetOpcode::G_MEMMOVE:
3632 Mopcode = AArch64::MOPSMemoryMovePseudo;
3633 break;
3634 case TargetOpcode::G_MEMSET:
3635 // For tagged memset see llvm.aarch64.mops.memset.tag
3636 Mopcode = AArch64::MOPSMemorySetPseudo;
3637 break;
3638 }
3639
3640 auto &DstPtr = GI.getOperand(0);
3641 auto &SrcOrVal = GI.getOperand(1);
3642 auto &Size = GI.getOperand(2);
3643
3644 // Create copies of the registers that can be clobbered.
3645 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3646 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3647 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3648
3649 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3650 const auto &SrcValRegClass =
3651 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3652
3653 // Constrain to specific registers
3654 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3655 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3656 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3657
3658 MIB.buildCopy(DstPtrCopy, DstPtr);
3659 MIB.buildCopy(SrcValCopy, SrcOrVal);
3660 MIB.buildCopy(SizeCopy, Size);
3661
3662 // New instruction uses the copied registers because it must update them.
3663 // The defs are not used since they don't exist in G_MEM*. They are still
3664 // tied.
3665 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3666 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3667 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3668 if (IsSet) {
3669 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3670 {DstPtrCopy, SizeCopy, SrcValCopy});
3671 } else {
3672 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3673 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3674 {DstPtrCopy, SrcValCopy, SizeCopy});
3675 }
3676
3677 GI.eraseFromParent();
3678 return true;
3679}
3680
3681bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3683 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3684 Register JTAddr = I.getOperand(0).getReg();
3685 unsigned JTI = I.getOperand(1).getIndex();
3686 Register Index = I.getOperand(2).getReg();
3687
3688 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3689
3690 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3691 // sequence later, to guarantee the integrity of the intermediate values.
3692 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3693 CodeModel::Model CM = TM.getCodeModel();
3694 if (STI.isTargetMachO()) {
3695 if (CM != CodeModel::Small && CM != CodeModel::Large)
3696 report_fatal_error("Unsupported code-model for hardened jump-table");
3697 } else {
3698 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3699 assert(STI.isTargetELF() &&
3700 "jump table hardening only supported on MachO/ELF");
3701 if (CM != CodeModel::Small)
3702 report_fatal_error("Unsupported code-model for hardened jump-table");
3703 }
3704
3705 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3706 MIB.buildInstr(AArch64::BR_JumpTable)
3707 .addJumpTableIndex(I.getOperand(1).getIndex());
3708 I.eraseFromParent();
3709 return true;
3710 }
3711
3712 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3713 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3714
3715 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3716 {TargetReg, ScratchReg}, {JTAddr, Index})
3717 .addJumpTableIndex(JTI);
3718 // Save the jump table info.
3719 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3720 {static_cast<int64_t>(JTI)});
3721 // Build the indirect branch.
3722 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3723 I.eraseFromParent();
3724 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3725}
3726
3727bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3729 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3730 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3731
3732 Register DstReg = I.getOperand(0).getReg();
3733 unsigned JTI = I.getOperand(1).getIndex();
3734 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3735 auto MovMI =
3736 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3737 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3739 I.eraseFromParent();
3740 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3741}
3742
3743bool AArch64InstructionSelector::selectTLSGlobalValue(
3745 if (!STI.isTargetMachO())
3746 return false;
3747 MachineFunction &MF = *I.getParent()->getParent();
3748 MF.getFrameInfo().setAdjustsStack(true);
3749
3750 const auto &GlobalOp = I.getOperand(1);
3751 assert(GlobalOp.getOffset() == 0 &&
3752 "Shouldn't have an offset on TLS globals!");
3753 const GlobalValue &GV = *GlobalOp.getGlobal();
3754
3755 auto LoadGOT =
3756 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3757 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3758
3759 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3760 {LoadGOT.getReg(0)})
3761 .addImm(0);
3762
3763 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3764 // TLS calls preserve all registers except those that absolutely must be
3765 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3766 // silly).
3767 unsigned Opcode = getBLRCallOpcode(MF);
3768
3769 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3770 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3771 assert(Opcode == AArch64::BLR);
3772 Opcode = AArch64::BLRAAZ;
3773 }
3774
3775 MIB.buildInstr(Opcode, {}, {Load})
3776 .addUse(AArch64::X0, RegState::Implicit)
3777 .addDef(AArch64::X0, RegState::Implicit)
3778 .addRegMask(TRI.getTLSCallPreservedMask());
3779
3780 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3781 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3782 MRI);
3783 I.eraseFromParent();
3784 return true;
3785}
3786
3787MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3788 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3789 MachineIRBuilder &MIRBuilder) const {
3790 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3791
3792 auto BuildFn = [&](unsigned SubregIndex) {
3793 auto Ins =
3794 MIRBuilder
3795 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3796 .addImm(SubregIndex);
3799 return &*Ins;
3800 };
3801
3802 switch (EltSize) {
3803 case 8:
3804 return BuildFn(AArch64::bsub);
3805 case 16:
3806 return BuildFn(AArch64::hsub);
3807 case 32:
3808 return BuildFn(AArch64::ssub);
3809 case 64:
3810 return BuildFn(AArch64::dsub);
3811 default:
3812 return nullptr;
3813 }
3814}
3815
3817AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3818 MachineIRBuilder &MIB,
3819 MachineRegisterInfo &MRI) const {
3820 LLT DstTy = MRI.getType(DstReg);
3821 const TargetRegisterClass *RC =
3822 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3823 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3824 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3825 return nullptr;
3826 }
3827 unsigned SubReg = 0;
3828 if (!getSubRegForClass(RC, TRI, SubReg))
3829 return nullptr;
3830 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3831 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3832 << DstTy.getSizeInBits() << "\n");
3833 return nullptr;
3834 }
3835 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3836 .addReg(SrcReg, 0, SubReg);
3837 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3838 return Copy;
3839}
3840
3841bool AArch64InstructionSelector::selectMergeValues(
3843 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3844 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3845 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3846 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3847 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3848
3849 if (I.getNumOperands() != 3)
3850 return false;
3851
3852 // Merging 2 s64s into an s128.
3853 if (DstTy == LLT::scalar(128)) {
3854 if (SrcTy.getSizeInBits() != 64)
3855 return false;
3856 Register DstReg = I.getOperand(0).getReg();
3857 Register Src1Reg = I.getOperand(1).getReg();
3858 Register Src2Reg = I.getOperand(2).getReg();
3859 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3860 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3861 /* LaneIdx */ 0, RB, MIB);
3862 if (!InsMI)
3863 return false;
3864 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3865 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3866 if (!Ins2MI)
3867 return false;
3870 I.eraseFromParent();
3871 return true;
3872 }
3873
3874 if (RB.getID() != AArch64::GPRRegBankID)
3875 return false;
3876
3877 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3878 return false;
3879
3880 auto *DstRC = &AArch64::GPR64RegClass;
3881 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3882 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3883 TII.get(TargetOpcode::SUBREG_TO_REG))
3884 .addDef(SubToRegDef)
3885 .addImm(0)
3886 .addUse(I.getOperand(1).getReg())
3887 .addImm(AArch64::sub_32);
3888 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3889 // Need to anyext the second scalar before we can use bfm
3890 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3891 TII.get(TargetOpcode::SUBREG_TO_REG))
3892 .addDef(SubToRegDef2)
3893 .addImm(0)
3894 .addUse(I.getOperand(2).getReg())
3895 .addImm(AArch64::sub_32);
3896 MachineInstr &BFM =
3897 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3898 .addDef(I.getOperand(0).getReg())
3899 .addUse(SubToRegDef)
3900 .addUse(SubToRegDef2)
3901 .addImm(32)
3902 .addImm(31);
3903 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3904 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3906 I.eraseFromParent();
3907 return true;
3908}
3909
3910static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3911 const unsigned EltSize) {
3912 // Choose a lane copy opcode and subregister based off of the size of the
3913 // vector's elements.
3914 switch (EltSize) {
3915 case 8:
3916 CopyOpc = AArch64::DUPi8;
3917 ExtractSubReg = AArch64::bsub;
3918 break;
3919 case 16:
3920 CopyOpc = AArch64::DUPi16;
3921 ExtractSubReg = AArch64::hsub;
3922 break;
3923 case 32:
3924 CopyOpc = AArch64::DUPi32;
3925 ExtractSubReg = AArch64::ssub;
3926 break;
3927 case 64:
3928 CopyOpc = AArch64::DUPi64;
3929 ExtractSubReg = AArch64::dsub;
3930 break;
3931 default:
3932 // Unknown size, bail out.
3933 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3934 return false;
3935 }
3936 return true;
3937}
3938
3939MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3940 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3941 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3942 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3943 unsigned CopyOpc = 0;
3944 unsigned ExtractSubReg = 0;
3945 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3946 LLVM_DEBUG(
3947 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3948 return nullptr;
3949 }
3950
3951 const TargetRegisterClass *DstRC =
3952 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3953 if (!DstRC) {
3954 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3955 return nullptr;
3956 }
3957
3958 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3959 const LLT &VecTy = MRI.getType(VecReg);
3960 const TargetRegisterClass *VecRC =
3961 getRegClassForTypeOnBank(VecTy, VecRB, true);
3962 if (!VecRC) {
3963 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3964 return nullptr;
3965 }
3966
3967 // The register that we're going to copy into.
3968 Register InsertReg = VecReg;
3969 if (!DstReg)
3970 DstReg = MRI.createVirtualRegister(DstRC);
3971 // If the lane index is 0, we just use a subregister COPY.
3972 if (LaneIdx == 0) {
3973 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3974 .addReg(VecReg, 0, ExtractSubReg);
3975 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3976 return &*Copy;
3977 }
3978
3979 // Lane copies require 128-bit wide registers. If we're dealing with an
3980 // unpacked vector, then we need to move up to that width. Insert an implicit
3981 // def and a subregister insert to get us there.
3982 if (VecTy.getSizeInBits() != 128) {
3983 MachineInstr *ScalarToVector = emitScalarToVector(
3984 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3985 if (!ScalarToVector)
3986 return nullptr;
3987 InsertReg = ScalarToVector->getOperand(0).getReg();
3988 }
3989
3990 MachineInstr *LaneCopyMI =
3991 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3992 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3993
3994 // Make sure that we actually constrain the initial copy.
3995 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3996 return LaneCopyMI;
3997}
3998
3999bool AArch64InstructionSelector::selectExtractElt(
4001 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4002 "unexpected opcode!");
4003 Register DstReg = I.getOperand(0).getReg();
4004 const LLT NarrowTy = MRI.getType(DstReg);
4005 const Register SrcReg = I.getOperand(1).getReg();
4006 const LLT WideTy = MRI.getType(SrcReg);
4007 (void)WideTy;
4008 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4009 "source register size too small!");
4010 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4011
4012 // Need the lane index to determine the correct copy opcode.
4013 MachineOperand &LaneIdxOp = I.getOperand(2);
4014 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4015
4016 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4017 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4018 return false;
4019 }
4020
4021 // Find the index to extract from.
4022 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4023 if (!VRegAndVal)
4024 return false;
4025 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4026
4027
4028 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4029 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4030 LaneIdx, MIB);
4031 if (!Extract)
4032 return false;
4033
4034 I.eraseFromParent();
4035 return true;
4036}
4037
4038bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4040 unsigned NumElts = I.getNumOperands() - 1;
4041 Register SrcReg = I.getOperand(NumElts).getReg();
4042 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4043 const LLT SrcTy = MRI.getType(SrcReg);
4044
4045 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4046 if (SrcTy.getSizeInBits() > 128) {
4047 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4048 return false;
4049 }
4050
4051 // We implement a split vector operation by treating the sub-vectors as
4052 // scalars and extracting them.
4053 const RegisterBank &DstRB =
4054 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4055 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4056 Register Dst = I.getOperand(OpIdx).getReg();
4057 MachineInstr *Extract =
4058 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4059 if (!Extract)
4060 return false;
4061 }
4062 I.eraseFromParent();
4063 return true;
4064}
4065
4066bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4068 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4069 "unexpected opcode");
4070
4071 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4072 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4073 AArch64::FPRRegBankID ||
4074 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4075 AArch64::FPRRegBankID) {
4076 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4077 "currently unsupported.\n");
4078 return false;
4079 }
4080
4081 // The last operand is the vector source register, and every other operand is
4082 // a register to unpack into.
4083 unsigned NumElts = I.getNumOperands() - 1;
4084 Register SrcReg = I.getOperand(NumElts).getReg();
4085 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4086 const LLT WideTy = MRI.getType(SrcReg);
4087 (void)WideTy;
4088 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4089 "can only unmerge from vector or s128 types!");
4090 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4091 "source register size too small!");
4092
4093 if (!NarrowTy.isScalar())
4094 return selectSplitVectorUnmerge(I, MRI);
4095
4096 // Choose a lane copy opcode and subregister based off of the size of the
4097 // vector's elements.
4098 unsigned CopyOpc = 0;
4099 unsigned ExtractSubReg = 0;
4100 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4101 return false;
4102
4103 // Set up for the lane copies.
4104 MachineBasicBlock &MBB = *I.getParent();
4105
4106 // Stores the registers we'll be copying from.
4107 SmallVector<Register, 4> InsertRegs;
4108
4109 // We'll use the first register twice, so we only need NumElts-1 registers.
4110 unsigned NumInsertRegs = NumElts - 1;
4111
4112 // If our elements fit into exactly 128 bits, then we can copy from the source
4113 // directly. Otherwise, we need to do a bit of setup with some subregister
4114 // inserts.
4115 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4116 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4117 } else {
4118 // No. We have to perform subregister inserts. For each insert, create an
4119 // implicit def and a subregister insert, and save the register we create.
4120 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4121 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4122 *RBI.getRegBank(SrcReg, MRI, TRI));
4123 unsigned SubReg = 0;
4124 bool Found = getSubRegForClass(RC, TRI, SubReg);
4125 (void)Found;
4126 assert(Found && "expected to find last operand's subeg idx");
4127 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4128 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4129 MachineInstr &ImpDefMI =
4130 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4131 ImpDefReg);
4132
4133 // Now, create the subregister insert from SrcReg.
4134 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4135 MachineInstr &InsMI =
4136 *BuildMI(MBB, I, I.getDebugLoc(),
4137 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4138 .addUse(ImpDefReg)
4139 .addUse(SrcReg)
4140 .addImm(SubReg);
4141
4142 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4144
4145 // Save the register so that we can copy from it after.
4146 InsertRegs.push_back(InsertReg);
4147 }
4148 }
4149
4150 // Now that we've created any necessary subregister inserts, we can
4151 // create the copies.
4152 //
4153 // Perform the first copy separately as a subregister copy.
4154 Register CopyTo = I.getOperand(0).getReg();
4155 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4156 .addReg(InsertRegs[0], 0, ExtractSubReg);
4157 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4158
4159 // Now, perform the remaining copies as vector lane copies.
4160 unsigned LaneIdx = 1;
4161 for (Register InsReg : InsertRegs) {
4162 Register CopyTo = I.getOperand(LaneIdx).getReg();
4163 MachineInstr &CopyInst =
4164 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4165 .addUse(InsReg)
4166 .addImm(LaneIdx);
4167 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4168 ++LaneIdx;
4169 }
4170
4171 // Separately constrain the first copy's destination. Because of the
4172 // limitation in constrainOperandRegClass, we can't guarantee that this will
4173 // actually be constrained. So, do it ourselves using the second operand.
4174 const TargetRegisterClass *RC =
4175 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4176 if (!RC) {
4177 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4178 return false;
4179 }
4180
4181 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4182 I.eraseFromParent();
4183 return true;
4184}
4185
4186bool AArch64InstructionSelector::selectConcatVectors(
4188 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4189 "Unexpected opcode");
4190 Register Dst = I.getOperand(0).getReg();
4191 Register Op1 = I.getOperand(1).getReg();
4192 Register Op2 = I.getOperand(2).getReg();
4193 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4194 if (!ConcatMI)
4195 return false;
4196 I.eraseFromParent();
4197 return true;
4198}
4199
4200unsigned
4201AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4202 MachineFunction &MF) const {
4203 Type *CPTy = CPVal->getType();
4204 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4205
4207 return MCP->getConstantPoolIndex(CPVal, Alignment);
4208}
4209
4210MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4211 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4212 const TargetRegisterClass *RC;
4213 unsigned Opc;
4214 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4215 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4216 switch (Size) {
4217 case 16:
4218 RC = &AArch64::FPR128RegClass;
4219 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4220 break;
4221 case 8:
4222 RC = &AArch64::FPR64RegClass;
4223 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4224 break;
4225 case 4:
4226 RC = &AArch64::FPR32RegClass;
4227 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4228 break;
4229 case 2:
4230 RC = &AArch64::FPR16RegClass;
4231 Opc = AArch64::LDRHui;
4232 break;
4233 default:
4234 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4235 << *CPVal->getType());
4236 return nullptr;
4237 }
4238
4239 MachineInstr *LoadMI = nullptr;
4240 auto &MF = MIRBuilder.getMF();
4241 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4242 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4243 // Use load(literal) for tiny code model.
4244 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4245 } else {
4246 auto Adrp =
4247 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4248 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4249
4250 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4251 .addConstantPoolIndex(
4253
4255 }
4256
4258 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4260 Size, Align(Size)));
4262 return LoadMI;
4263}
4264
4265/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4266/// size and RB.
4267static std::pair<unsigned, unsigned>
4268getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4269 unsigned Opc, SubregIdx;
4270 if (RB.getID() == AArch64::GPRRegBankID) {
4271 if (EltSize == 8) {
4272 Opc = AArch64::INSvi8gpr;
4273 SubregIdx = AArch64::bsub;
4274 } else if (EltSize == 16) {
4275 Opc = AArch64::INSvi16gpr;
4276 SubregIdx = AArch64::ssub;
4277 } else if (EltSize == 32) {
4278 Opc = AArch64::INSvi32gpr;
4279 SubregIdx = AArch64::ssub;
4280 } else if (EltSize == 64) {
4281 Opc = AArch64::INSvi64gpr;
4282 SubregIdx = AArch64::dsub;
4283 } else {
4284 llvm_unreachable("invalid elt size!");
4285 }
4286 } else {
4287 if (EltSize == 8) {
4288 Opc = AArch64::INSvi8lane;
4289 SubregIdx = AArch64::bsub;
4290 } else if (EltSize == 16) {
4291 Opc = AArch64::INSvi16lane;
4292 SubregIdx = AArch64::hsub;
4293 } else if (EltSize == 32) {
4294 Opc = AArch64::INSvi32lane;
4295 SubregIdx = AArch64::ssub;
4296 } else if (EltSize == 64) {
4297 Opc = AArch64::INSvi64lane;
4298 SubregIdx = AArch64::dsub;
4299 } else {
4300 llvm_unreachable("invalid elt size!");
4301 }
4302 }
4303 return std::make_pair(Opc, SubregIdx);
4304}
4305
4306MachineInstr *AArch64InstructionSelector::emitInstr(
4307 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4308 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4309 const ComplexRendererFns &RenderFns) const {
4310 assert(Opcode && "Expected an opcode?");
4311 assert(!isPreISelGenericOpcode(Opcode) &&
4312 "Function should only be used to produce selected instructions!");
4313 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4314 if (RenderFns)
4315 for (auto &Fn : *RenderFns)
4316 Fn(MI);
4318 return &*MI;
4319}
4320
4321MachineInstr *AArch64InstructionSelector::emitAddSub(
4322 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4323 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4324 MachineIRBuilder &MIRBuilder) const {
4325 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4326 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4327 auto Ty = MRI.getType(LHS.getReg());
4328 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4329 unsigned Size = Ty.getSizeInBits();
4330 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4331 bool Is32Bit = Size == 32;
4332
4333 // INSTRri form with positive arithmetic immediate.
4334 if (auto Fns = selectArithImmed(RHS))
4335 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4336 MIRBuilder, Fns);
4337
4338 // INSTRri form with negative arithmetic immediate.
4339 if (auto Fns = selectNegArithImmed(RHS))
4340 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4341 MIRBuilder, Fns);
4342
4343 // INSTRrx form.
4344 if (auto Fns = selectArithExtendedRegister(RHS))
4345 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4346 MIRBuilder, Fns);
4347
4348 // INSTRrs form.
4349 if (auto Fns = selectShiftedRegister(RHS))
4350 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4351 MIRBuilder, Fns);
4352 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4353 MIRBuilder);
4354}
4355
4357AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4358 MachineOperand &RHS,
4359 MachineIRBuilder &MIRBuilder) const {
4360 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4361 {{AArch64::ADDXri, AArch64::ADDWri},
4362 {AArch64::ADDXrs, AArch64::ADDWrs},
4363 {AArch64::ADDXrr, AArch64::ADDWrr},
4364 {AArch64::SUBXri, AArch64::SUBWri},
4365 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4366 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4367}
4368
4370AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4371 MachineOperand &RHS,
4372 MachineIRBuilder &MIRBuilder) const {
4373 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4374 {{AArch64::ADDSXri, AArch64::ADDSWri},
4375 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4376 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4377 {AArch64::SUBSXri, AArch64::SUBSWri},
4378 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4379 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4380}
4381
4383AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4384 MachineOperand &RHS,
4385 MachineIRBuilder &MIRBuilder) const {
4386 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4387 {{AArch64::SUBSXri, AArch64::SUBSWri},
4388 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4389 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4390 {AArch64::ADDSXri, AArch64::ADDSWri},
4391 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4392 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4393}
4394
4396AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4397 MachineOperand &RHS,
4398 MachineIRBuilder &MIRBuilder) const {
4399 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4400 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4401 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4402 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4403 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4404}
4405
4407AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4408 MachineOperand &RHS,
4409 MachineIRBuilder &MIRBuilder) const {
4410 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4411 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4412 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4413 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4414 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4415}
4416
4418AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4419 MachineIRBuilder &MIRBuilder) const {
4420 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4421 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4422 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4423 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4424}
4425
4427AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4428 MachineIRBuilder &MIRBuilder) const {
4429 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4430 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4431 LLT Ty = MRI.getType(LHS.getReg());
4432 unsigned RegSize = Ty.getSizeInBits();
4433 bool Is32Bit = (RegSize == 32);
4434 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4435 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4436 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4437 // ANDS needs a logical immediate for its immediate form. Check if we can
4438 // fold one in.
4439 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4440 int64_t Imm = ValAndVReg->Value.getSExtValue();
4441
4443 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4446 return &*TstMI;
4447 }
4448 }
4449
4450 if (auto Fns = selectLogicalShiftedRegister(RHS))
4451 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4452 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4453}
4454
4455MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4457 MachineIRBuilder &MIRBuilder) const {
4458 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4459 assert(Predicate.isPredicate() && "Expected predicate?");
4460 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4461 LLT CmpTy = MRI.getType(LHS.getReg());
4462 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4463 unsigned Size = CmpTy.getSizeInBits();
4464 (void)Size;
4465 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4466 // Fold the compare into a cmn or tst if possible.
4467 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4468 return FoldCmp;
4469 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4470 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4471}
4472
4473MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4474 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4475 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4476#ifndef NDEBUG
4477 LLT Ty = MRI.getType(Dst);
4478 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4479 "Expected a 32-bit scalar register?");
4480#endif
4481 const Register ZReg = AArch64::WZR;
4482 AArch64CC::CondCode CC1, CC2;
4483 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4484 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4485 if (CC2 == AArch64CC::AL)
4486 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4487 MIRBuilder);
4488 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4489 Register Def1Reg = MRI.createVirtualRegister(RC);
4490 Register Def2Reg = MRI.createVirtualRegister(RC);
4491 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4492 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4493 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4494 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4496 return &*OrMI;
4497}
4498
4499MachineInstr *AArch64InstructionSelector::emitFPCompare(
4500 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4501 std::optional<CmpInst::Predicate> Pred) const {
4502 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4503 LLT Ty = MRI.getType(LHS);
4504 if (Ty.isVector())
4505 return nullptr;
4506 unsigned OpSize = Ty.getSizeInBits();
4507 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4508
4509 // If this is a compare against +0.0, then we don't have
4510 // to explicitly materialize a constant.
4511 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4512 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4513
4514 auto IsEqualityPred = [](CmpInst::Predicate P) {
4515 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4517 };
4518 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4519 // Try commutating the operands.
4520 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4521 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4522 ShouldUseImm = true;
4523 std::swap(LHS, RHS);
4524 }
4525 }
4526 unsigned CmpOpcTbl[2][3] = {
4527 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4528 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4529 unsigned CmpOpc =
4530 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4531
4532 // Partially build the compare. Decide if we need to add a use for the
4533 // third operand based off whether or not we're comparing against 0.0.
4534 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4536 if (!ShouldUseImm)
4537 CmpMI.addUse(RHS);
4539 return &*CmpMI;
4540}
4541
4542MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4543 std::optional<Register> Dst, Register Op1, Register Op2,
4544 MachineIRBuilder &MIRBuilder) const {
4545 // We implement a vector concat by:
4546 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4547 // 2. Insert the upper vector into the destination's upper element
4548 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4549 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4550
4551 const LLT Op1Ty = MRI.getType(Op1);
4552 const LLT Op2Ty = MRI.getType(Op2);
4553
4554 if (Op1Ty != Op2Ty) {
4555 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4556 return nullptr;
4557 }
4558 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4559
4560 if (Op1Ty.getSizeInBits() >= 128) {
4561 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4562 return nullptr;
4563 }
4564
4565 // At the moment we just support 64 bit vector concats.
4566 if (Op1Ty.getSizeInBits() != 64) {
4567 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4568 return nullptr;
4569 }
4570
4571 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4572 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4573 const TargetRegisterClass *DstRC =
4574 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4575
4576 MachineInstr *WidenedOp1 =
4577 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4578 MachineInstr *WidenedOp2 =
4579 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4580 if (!WidenedOp1 || !WidenedOp2) {
4581 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4582 return nullptr;
4583 }
4584
4585 // Now do the insert of the upper element.
4586 unsigned InsertOpc, InsSubRegIdx;
4587 std::tie(InsertOpc, InsSubRegIdx) =
4588 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4589
4590 if (!Dst)
4591 Dst = MRI.createVirtualRegister(DstRC);
4592 auto InsElt =
4593 MIRBuilder
4594 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4595 .addImm(1) /* Lane index */
4596 .addUse(WidenedOp2->getOperand(0).getReg())
4597 .addImm(0);
4599 return &*InsElt;
4600}
4601
4603AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4604 Register Src2, AArch64CC::CondCode Pred,
4605 MachineIRBuilder &MIRBuilder) const {
4606 auto &MRI = *MIRBuilder.getMRI();
4607 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4608 // If we used a register class, then this won't necessarily have an LLT.
4609 // Compute the size based off whether or not we have a class or bank.
4610 unsigned Size;
4611 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4612 Size = TRI.getRegSizeInBits(*RC);
4613 else
4614 Size = MRI.getType(Dst).getSizeInBits();
4615 // Some opcodes use s1.
4616 assert(Size <= 64 && "Expected 64 bits or less only!");
4617 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4618 unsigned Opc = OpcTable[Size == 64];
4619 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4621 return &*CSINC;
4622}
4623
4624MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4625 Register CarryReg) {
4627 unsigned Opcode = I.getOpcode();
4628
4629 // If the instruction is a SUB, we need to negate the carry,
4630 // because borrowing is indicated by carry-flag == 0.
4631 bool NeedsNegatedCarry =
4632 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4633
4634 // If the previous instruction will already produce the correct carry, do not
4635 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4636 // generated during legalization of wide add/sub. This optimization depends on
4637 // these sequences not being interrupted by other instructions.
4638 // We have to select the previous instruction before the carry-using
4639 // instruction is deleted by the calling function, otherwise the previous
4640 // instruction might become dead and would get deleted.
4641 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4642 if (SrcMI == I.getPrevNode()) {
4643 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4644 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4645 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4646 CarrySrcMI->isUnsigned() &&
4647 CarrySrcMI->getCarryOutReg() == CarryReg &&
4648 selectAndRestoreState(*SrcMI))
4649 return nullptr;
4650 }
4651 }
4652
4653 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4654
4655 if (NeedsNegatedCarry) {
4656 // (0 - Carry) sets !C in NZCV when Carry == 1
4657 Register ZReg = AArch64::WZR;
4658 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4659 }
4660
4661 // (Carry - 1) sets !C in NZCV when Carry == 0
4662 auto Fns = select12BitValueWithLeftShift(1);
4663 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4664}
4665
4666bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4668 auto &CarryMI = cast<GAddSubCarryOut>(I);
4669
4670 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4671 // Set NZCV carry according to carry-in VReg
4672 emitCarryIn(I, CarryInMI->getCarryInReg());
4673 }
4674
4675 // Emit the operation and get the correct condition code.
4676 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4677 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4678
4679 Register CarryOutReg = CarryMI.getCarryOutReg();
4680
4681 // Don't convert carry-out to VReg if it is never used
4682 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4683 // Now, put the overflow result in the register given by the first operand
4684 // to the overflow op. CSINC increments the result when the predicate is
4685 // false, so to get the increment when it's true, we need to use the
4686 // inverse. In this case, we want to increment when carry is set.
4687 Register ZReg = AArch64::WZR;
4688 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4689 getInvertedCondCode(OpAndCC.second), MIB);
4690 }
4691
4692 I.eraseFromParent();
4693 return true;
4694}
4695
4696std::pair<MachineInstr *, AArch64CC::CondCode>
4697AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4698 MachineOperand &LHS,
4699 MachineOperand &RHS,
4700 MachineIRBuilder &MIRBuilder) const {
4701 switch (Opcode) {
4702 default:
4703 llvm_unreachable("Unexpected opcode!");
4704 case TargetOpcode::G_SADDO:
4705 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4706 case TargetOpcode::G_UADDO:
4707 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4708 case TargetOpcode::G_SSUBO:
4709 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4710 case TargetOpcode::G_USUBO:
4711 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4712 case TargetOpcode::G_SADDE:
4713 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4714 case TargetOpcode::G_UADDE:
4715 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4716 case TargetOpcode::G_SSUBE:
4717 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4718 case TargetOpcode::G_USUBE:
4719 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4720 }
4721}
4722
4723/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4724/// expressed as a conjunction.
4725/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4726/// changing the conditions on the CMP tests.
4727/// (this means we can call emitConjunctionRec() with
4728/// Negate==true on this sub-tree)
4729/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4730/// cannot do the negation naturally. We are required to
4731/// emit the subtree first in this case.
4732/// \param WillNegate Is true if are called when the result of this
4733/// subexpression must be negated. This happens when the
4734/// outer expression is an OR. We can use this fact to know
4735/// that we have a double negation (or (or ...) ...) that
4736/// can be implemented for free.
4737static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4738 bool WillNegate, MachineRegisterInfo &MRI,
4739 unsigned Depth = 0) {
4740 if (!MRI.hasOneNonDBGUse(Val))
4741 return false;
4742 MachineInstr *ValDef = MRI.getVRegDef(Val);
4743 unsigned Opcode = ValDef->getOpcode();
4744 if (isa<GAnyCmp>(ValDef)) {
4745 CanNegate = true;
4746 MustBeFirst = false;
4747 return true;
4748 }
4749 // Protect against exponential runtime and stack overflow.
4750 if (Depth > 6)
4751 return false;
4752 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4753 bool IsOR = Opcode == TargetOpcode::G_OR;
4754 Register O0 = ValDef->getOperand(1).getReg();
4755 Register O1 = ValDef->getOperand(2).getReg();
4756 bool CanNegateL;
4757 bool MustBeFirstL;
4758 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4759 return false;
4760 bool CanNegateR;
4761 bool MustBeFirstR;
4762 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4763 return false;
4764
4765 if (MustBeFirstL && MustBeFirstR)
4766 return false;
4767
4768 if (IsOR) {
4769 // For an OR expression we need to be able to naturally negate at least
4770 // one side or we cannot do the transformation at all.
4771 if (!CanNegateL && !CanNegateR)
4772 return false;
4773 // If we the result of the OR will be negated and we can naturally negate
4774 // the leaves, then this sub-tree as a whole negates naturally.
4775 CanNegate = WillNegate && CanNegateL && CanNegateR;
4776 // If we cannot naturally negate the whole sub-tree, then this must be
4777 // emitted first.
4778 MustBeFirst = !CanNegate;
4779 } else {
4780 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4781 // We cannot naturally negate an AND operation.
4782 CanNegate = false;
4783 MustBeFirst = MustBeFirstL || MustBeFirstR;
4784 }
4785 return true;
4786 }
4787 return false;
4788}
4789
4790MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4793 MachineIRBuilder &MIB) const {
4794 auto &MRI = *MIB.getMRI();
4795 LLT OpTy = MRI.getType(LHS);
4796 unsigned CCmpOpc;
4797 std::optional<ValueAndVReg> C;
4798 if (CmpInst::isIntPredicate(CC)) {
4799 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4801 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4802 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4803 else if (C->Value.ule(31))
4804 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4805 else
4806 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4807 } else {
4808 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4809 OpTy.getSizeInBits() == 64);
4810 switch (OpTy.getSizeInBits()) {
4811 case 16:
4812 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4813 CCmpOpc = AArch64::FCCMPHrr;
4814 break;
4815 case 32:
4816 CCmpOpc = AArch64::FCCMPSrr;
4817 break;
4818 case 64:
4819 CCmpOpc = AArch64::FCCMPDrr;
4820 break;
4821 default:
4822 return nullptr;
4823 }
4824 }
4826 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4827 auto CCmp =
4828 MIB.buildInstr(CCmpOpc, {}, {LHS});
4829 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4830 CCmp.addImm(C->Value.getZExtValue());
4831 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4832 CCmp.addImm(C->Value.abs().getZExtValue());
4833 else
4834 CCmp.addReg(RHS);
4835 CCmp.addImm(NZCV).addImm(Predicate);
4837 return &*CCmp;
4838}
4839
4840MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4841 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4843 // We're at a tree leaf, produce a conditional comparison operation.
4844 auto &MRI = *MIB.getMRI();
4845 MachineInstr *ValDef = MRI.getVRegDef(Val);
4846 unsigned Opcode = ValDef->getOpcode();
4847 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4848 Register LHS = Cmp->getLHSReg();
4849 Register RHS = Cmp->getRHSReg();
4850 CmpInst::Predicate CC = Cmp->getCond();
4851 if (Negate)
4853 if (isa<GICmp>(Cmp)) {
4854 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4855 } else {
4856 // Handle special FP cases.
4857 AArch64CC::CondCode ExtraCC;
4858 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4859 // Some floating point conditions can't be tested with a single condition
4860 // code. Construct an additional comparison in this case.
4861 if (ExtraCC != AArch64CC::AL) {
4862 MachineInstr *ExtraCmp;
4863 if (!CCOp)
4864 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4865 else
4866 ExtraCmp =
4867 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4868 CCOp = ExtraCmp->getOperand(0).getReg();
4869 Predicate = ExtraCC;
4870 }
4871 }
4872
4873 // Produce a normal comparison if we are first in the chain
4874 if (!CCOp) {
4875 auto Dst = MRI.cloneVirtualRegister(LHS);
4876 if (isa<GICmp>(Cmp))
4877 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4878 return emitFPCompare(Cmp->getOperand(2).getReg(),
4879 Cmp->getOperand(3).getReg(), MIB);
4880 }
4881 // Otherwise produce a ccmp.
4882 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4883 }
4884 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4885
4886 bool IsOR = Opcode == TargetOpcode::G_OR;
4887
4888 Register LHS = ValDef->getOperand(1).getReg();
4889 bool CanNegateL;
4890 bool MustBeFirstL;
4891 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4892 assert(ValidL && "Valid conjunction/disjunction tree");
4893 (void)ValidL;
4894
4895 Register RHS = ValDef->getOperand(2).getReg();
4896 bool CanNegateR;
4897 bool MustBeFirstR;
4898 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4899 assert(ValidR && "Valid conjunction/disjunction tree");
4900 (void)ValidR;
4901
4902 // Swap sub-tree that must come first to the right side.
4903 if (MustBeFirstL) {
4904 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4905 std::swap(LHS, RHS);
4906 std::swap(CanNegateL, CanNegateR);
4907 std::swap(MustBeFirstL, MustBeFirstR);
4908 }
4909
4910 bool NegateR;
4911 bool NegateAfterR;
4912 bool NegateL;
4913 bool NegateAfterAll;
4914 if (Opcode == TargetOpcode::G_OR) {
4915 // Swap the sub-tree that we can negate naturally to the left.
4916 if (!CanNegateL) {
4917 assert(CanNegateR && "at least one side must be negatable");
4918 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4919 assert(!Negate);
4920 std::swap(LHS, RHS);
4921 NegateR = false;
4922 NegateAfterR = true;
4923 } else {
4924 // Negate the left sub-tree if possible, otherwise negate the result.
4925 NegateR = CanNegateR;
4926 NegateAfterR = !CanNegateR;
4927 }
4928 NegateL = true;
4929 NegateAfterAll = !Negate;
4930 } else {
4931 assert(Opcode == TargetOpcode::G_AND &&
4932 "Valid conjunction/disjunction tree");
4933 assert(!Negate && "Valid conjunction/disjunction tree");
4934
4935 NegateL = false;
4936 NegateR = false;
4937 NegateAfterR = false;
4938 NegateAfterAll = false;
4939 }
4940
4941 // Emit sub-trees.
4942 AArch64CC::CondCode RHSCC;
4943 MachineInstr *CmpR =
4944 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4945 if (NegateAfterR)
4946 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4948 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4949 if (NegateAfterAll)
4950 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4951 return CmpL;
4952}
4953
4954MachineInstr *AArch64InstructionSelector::emitConjunction(
4955 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4956 bool DummyCanNegate;
4957 bool DummyMustBeFirst;
4958 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4959 *MIB.getMRI()))
4960 return nullptr;
4961 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4962}
4963
4964bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4965 MachineInstr &CondMI) {
4966 AArch64CC::CondCode AArch64CC;
4967 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4968 if (!ConjMI)
4969 return false;
4970
4971 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4972 SelI.eraseFromParent();
4973 return true;
4974}
4975
4976bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4977 MachineRegisterInfo &MRI = *MIB.getMRI();
4978 // We want to recognize this pattern:
4979 //
4980 // $z = G_FCMP pred, $x, $y
4981 // ...
4982 // $w = G_SELECT $z, $a, $b
4983 //
4984 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4985 // some copies/truncs in between.)
4986 //
4987 // If we see this, then we can emit something like this:
4988 //
4989 // fcmp $x, $y
4990 // fcsel $w, $a, $b, pred
4991 //
4992 // Rather than emitting both of the rather long sequences in the standard
4993 // G_FCMP/G_SELECT select methods.
4994
4995 // First, check if the condition is defined by a compare.
4996 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4997
4998 // We can only fold if all of the defs have one use.
4999 Register CondDefReg = CondDef->getOperand(0).getReg();
5000 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5001 // Unless it's another select.
5002 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5003 if (CondDef == &UI)
5004 continue;
5005 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5006 return false;
5007 }
5008 }
5009
5010 // Is the condition defined by a compare?
5011 unsigned CondOpc = CondDef->getOpcode();
5012 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5013 if (tryOptSelectConjunction(I, *CondDef))
5014 return true;
5015 return false;
5016 }
5017
5019 if (CondOpc == TargetOpcode::G_ICMP) {
5020 auto &PredOp = CondDef->getOperand(1);
5021 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5022 MIB);
5023 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5024 CondCode =
5025 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5026 } else {
5027 // Get the condition code for the select.
5028 auto Pred =
5029 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5030 AArch64CC::CondCode CondCode2;
5031 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5032
5033 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5034 // instructions to emit the comparison.
5035 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5036 // unnecessary.
5037 if (CondCode2 != AArch64CC::AL)
5038 return false;
5039
5040 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5041 CondDef->getOperand(3).getReg(), MIB)) {
5042 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5043 return false;
5044 }
5045 }
5046
5047 // Emit the select.
5048 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5049 I.getOperand(3).getReg(), CondCode, MIB);
5050 I.eraseFromParent();
5051 return true;
5052}
5053
5054MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5056 MachineIRBuilder &MIRBuilder) const {
5057 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5058 "Unexpected MachineOperand");
5059 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5060 // We want to find this sort of thing:
5061 // x = G_SUB 0, y
5062 // G_ICMP z, x
5063 //
5064 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5065 // e.g:
5066 //
5067 // cmn z, y
5068
5069 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5070 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5071 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5072 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5073
5074 // Given this:
5075 //
5076 // x = G_SUB 0, y
5077 // G_ICMP z, x
5078 //
5079 // Produce this:
5080 //
5081 // cmn z, y
5082 if (isCMN(RHSDef, P, MRI))
5083 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5084
5085 // Same idea here, but with the LHS of the compare instead:
5086 //
5087 // Given this:
5088 //
5089 // x = G_SUB 0, y
5090 // G_ICMP x, z
5091 //
5092 // Produce this:
5093 //
5094 // cmn y, z
5095 //
5096 // But be careful! We need to swap the predicate!
5097 if (isCMN(LHSDef, P, MRI)) {
5098 if (!CmpInst::isEquality(P)) {
5101 }
5102 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5103 }
5104
5105 // Given this:
5106 //
5107 // z = G_AND x, y
5108 // G_ICMP z, 0
5109 //
5110 // Produce this if the compare is signed:
5111 //
5112 // tst x, y
5113 if (!CmpInst::isUnsigned(P) && LHSDef &&
5114 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5115 // Make sure that the RHS is 0.
5116 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5117 if (!ValAndVReg || ValAndVReg->Value != 0)
5118 return nullptr;
5119
5120 return emitTST(LHSDef->getOperand(1),
5121 LHSDef->getOperand(2), MIRBuilder);
5122 }
5123
5124 return nullptr;
5125}
5126
5127bool AArch64InstructionSelector::selectShuffleVector(
5129 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5130 Register Src1Reg = I.getOperand(1).getReg();
5131 const LLT Src1Ty = MRI.getType(Src1Reg);
5132 Register Src2Reg = I.getOperand(2).getReg();
5133 const LLT Src2Ty = MRI.getType(Src2Reg);
5134 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5135
5136 MachineBasicBlock &MBB = *I.getParent();
5137 MachineFunction &MF = *MBB.getParent();
5138 LLVMContext &Ctx = MF.getFunction().getContext();
5139
5140 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5141 // it's originated from a <1 x T> type. Those should have been lowered into
5142 // G_BUILD_VECTOR earlier.
5143 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5144 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5145 return false;
5146 }
5147
5148 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5149
5151 for (int Val : Mask) {
5152 // For now, any undef indexes we'll just assume to be 0. This should be
5153 // optimized in future, e.g. to select DUP etc.
5154 Val = Val < 0 ? 0 : Val;
5155 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5156 unsigned Offset = Byte + Val * BytesPerElt;
5157 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5158 }
5159 }
5160
5161 // Use a constant pool to load the index vector for TBL.
5162 Constant *CPVal = ConstantVector::get(CstIdxs);
5163 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5164 if (!IndexLoad) {
5165 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5166 return false;
5167 }
5168
5169 if (DstTy.getSizeInBits() != 128) {
5170 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5171 // This case can be done with TBL1.
5173 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5174 if (!Concat) {
5175 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5176 return false;
5177 }
5178
5179 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5180 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5181 IndexLoad->getOperand(0).getReg(), MIB);
5182
5183 auto TBL1 = MIB.buildInstr(
5184 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5185 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5187
5188 auto Copy =
5189 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5190 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5191 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5192 I.eraseFromParent();
5193 return true;
5194 }
5195
5196 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5197 // Q registers for regalloc.
5198 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5199 auto RegSeq = createQTuple(Regs, MIB);
5200 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5201 {RegSeq, IndexLoad->getOperand(0)});
5203 I.eraseFromParent();
5204 return true;
5205}
5206
5207MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5208 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5209 unsigned LaneIdx, const RegisterBank &RB,
5210 MachineIRBuilder &MIRBuilder) const {
5211 MachineInstr *InsElt = nullptr;
5212 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5213 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5214
5215 // Create a register to define with the insert if one wasn't passed in.
5216 if (!DstReg)
5217 DstReg = MRI.createVirtualRegister(DstRC);
5218
5219 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5220 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5221
5222 if (RB.getID() == AArch64::FPRRegBankID) {
5223 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5224 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5225 .addImm(LaneIdx)
5226 .addUse(InsSub->getOperand(0).getReg())
5227 .addImm(0);
5228 } else {
5229 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5230 .addImm(LaneIdx)
5231 .addUse(EltReg);
5232 }
5233
5235 return InsElt;
5236}
5237
5238bool AArch64InstructionSelector::selectUSMovFromExtend(
5240 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5241 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5242 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5243 return false;
5244 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5245 const Register DefReg = MI.getOperand(0).getReg();
5246 const LLT DstTy = MRI.getType(DefReg);
5247 unsigned DstSize = DstTy.getSizeInBits();
5248
5249 if (DstSize != 32 && DstSize != 64)
5250 return false;
5251
5252 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5253 MI.getOperand(1).getReg(), MRI);
5254 int64_t Lane;
5255 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5256 return false;
5257 Register Src0 = Extract->getOperand(1).getReg();
5258
5259 const LLT VecTy = MRI.getType(Src0);
5260 if (VecTy.isScalableVector())
5261 return false;
5262
5263 if (VecTy.getSizeInBits() != 128) {
5264 const MachineInstr *ScalarToVector = emitScalarToVector(
5265 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5266 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5267 Src0 = ScalarToVector->getOperand(0).getReg();
5268 }
5269
5270 unsigned Opcode;
5271 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5272 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5273 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5274 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5275 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5276 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5277 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5278 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5279 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5280 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5281 else
5282 llvm_unreachable("Unexpected type combo for S/UMov!");
5283
5284 // We may need to generate one of these, depending on the type and sign of the
5285 // input:
5286 // DstReg = SMOV Src0, Lane;
5287 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5288 MachineInstr *ExtI = nullptr;
5289 if (DstSize == 64 && !IsSigned) {
5290 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5291 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5292 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5293 .addImm(0)
5294 .addUse(NewReg)
5295 .addImm(AArch64::sub_32);
5296 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5297 } else
5298 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5299
5301 MI.eraseFromParent();
5302 return true;
5303}
5304
5305MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5306 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5307 unsigned int Op;
5308 if (DstSize == 128) {
5309 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5310 return nullptr;
5311 Op = AArch64::MOVIv16b_ns;
5312 } else {
5313 Op = AArch64::MOVIv8b_ns;
5314 }
5315
5316 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5317
5320 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5322 return &*Mov;
5323 }
5324 return nullptr;
5325}
5326
5327MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5328 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5329 bool Inv) {
5330
5331 unsigned int Op;
5332 if (DstSize == 128) {
5333 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5334 return nullptr;
5335 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5336 } else {
5337 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5338 }
5339
5340 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5341 uint64_t Shift;
5342
5345 Shift = 0;
5346 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5348 Shift = 8;
5349 } else
5350 return nullptr;
5351
5352 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5354 return &*Mov;
5355}
5356
5357MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5358 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5359 bool Inv) {
5360
5361 unsigned int Op;
5362 if (DstSize == 128) {
5363 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5364 return nullptr;
5365 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5366 } else {
5367 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5368 }
5369
5370 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5371 uint64_t Shift;
5372
5375 Shift = 0;
5376 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5378 Shift = 8;
5379 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5381 Shift = 16;
5382 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5384 Shift = 24;
5385 } else
5386 return nullptr;
5387
5388 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5390 return &*Mov;
5391}
5392
5393MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5394 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5395
5396 unsigned int Op;
5397 if (DstSize == 128) {
5398 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5399 return nullptr;
5400 Op = AArch64::MOVIv2d_ns;
5401 } else {
5402 Op = AArch64::MOVID;
5403 }
5404
5405 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5408 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5410 return &*Mov;
5411 }
5412 return nullptr;
5413}
5414
5415MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5416 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5417 bool Inv) {
5418
5419 unsigned int Op;
5420 if (DstSize == 128) {
5421 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5422 return nullptr;
5423 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5424 } else {
5425 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5426 }
5427
5428 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5429 uint64_t Shift;
5430
5433 Shift = 264;
5434 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5436 Shift = 272;
5437 } else
5438 return nullptr;
5439
5440 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5442 return &*Mov;
5443}
5444
5445MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5446 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5447
5448 unsigned int Op;
5449 bool IsWide = false;
5450 if (DstSize == 128) {
5451 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5452 return nullptr;
5453 Op = AArch64::FMOVv4f32_ns;
5454 IsWide = true;
5455 } else {
5456 Op = AArch64::FMOVv2f32_ns;
5457 }
5458
5459 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5460
5463 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5465 Op = AArch64::FMOVv2f64_ns;
5466 } else
5467 return nullptr;
5468
5469 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5471 return &*Mov;
5472}
5473
5474bool AArch64InstructionSelector::selectIndexedExtLoad(
5476 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5477 Register Dst = ExtLd.getDstReg();
5478 Register WriteBack = ExtLd.getWritebackReg();
5479 Register Base = ExtLd.getBaseReg();
5480 Register Offset = ExtLd.getOffsetReg();
5481 LLT Ty = MRI.getType(Dst);
5482 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5483 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5484 bool IsPre = ExtLd.isPre();
5485 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5486 unsigned InsertIntoSubReg = 0;
5487 bool IsDst64 = Ty.getSizeInBits() == 64;
5488
5489 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5490 // long as they are scalar.
5491 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5492 if ((IsSExt && IsFPR) || Ty.isVector())
5493 return false;
5494
5495 unsigned Opc = 0;
5496 LLT NewLdDstTy;
5497 LLT s32 = LLT::scalar(32);
5498 LLT s64 = LLT::scalar(64);
5499
5500 if (MemSizeBits == 8) {
5501 if (IsSExt) {
5502 if (IsDst64)
5503 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5504 else
5505 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5506 NewLdDstTy = IsDst64 ? s64 : s32;
5507 } else if (IsFPR) {
5508 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5509 InsertIntoSubReg = AArch64::bsub;
5510 NewLdDstTy = LLT::scalar(MemSizeBits);
5511 } else {
5512 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5513 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5514 NewLdDstTy = s32;
5515 }
5516 } else if (MemSizeBits == 16) {
5517 if (IsSExt) {
5518 if (IsDst64)
5519 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5520 else
5521 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5522 NewLdDstTy = IsDst64 ? s64 : s32;
5523 } else if (IsFPR) {
5524 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5525 InsertIntoSubReg = AArch64::hsub;
5526 NewLdDstTy = LLT::scalar(MemSizeBits);
5527 } else {
5528 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5529 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5530 NewLdDstTy = s32;
5531 }
5532 } else if (MemSizeBits == 32) {
5533 if (IsSExt) {
5534 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5535 NewLdDstTy = s64;
5536 } else if (IsFPR) {
5537 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5538 InsertIntoSubReg = AArch64::ssub;
5539 NewLdDstTy = LLT::scalar(MemSizeBits);
5540 } else {
5541 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5542 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5543 NewLdDstTy = s32;
5544 }
5545 } else {
5546 llvm_unreachable("Unexpected size for indexed load");
5547 }
5548
5549 auto Cst = getIConstantVRegVal(Offset, MRI);
5550 if (!Cst)
5551 return false; // Shouldn't happen, but just in case.
5552
5553 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5554 .addImm(Cst->getSExtValue());
5555 LdMI.cloneMemRefs(ExtLd);
5557 // Make sure to select the load with the MemTy as the dest type, and then
5558 // insert into a larger reg if needed.
5559 if (InsertIntoSubReg) {
5560 // Generate a SUBREG_TO_REG.
5561 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5562 .addImm(0)
5563 .addUse(LdMI.getReg(1))
5564 .addImm(InsertIntoSubReg);
5565 RBI.constrainGenericRegister(
5566 SubToReg.getReg(0),
5567 *getRegClassForTypeOnBank(MRI.getType(Dst),
5568 *RBI.getRegBank(Dst, MRI, TRI)),
5569 MRI);
5570 } else {
5571 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5572 selectCopy(*Copy, TII, MRI, TRI, RBI);
5573 }
5574 MI.eraseFromParent();
5575
5576 return true;
5577}
5578
5579bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5581 auto &Ld = cast<GIndexedLoad>(MI);
5582 Register Dst = Ld.getDstReg();
5583 Register WriteBack = Ld.getWritebackReg();
5584 Register Base = Ld.getBaseReg();
5585 Register Offset = Ld.getOffsetReg();
5586 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5587 "Unexpected type for indexed load");
5588 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5589
5590 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5591 return selectIndexedExtLoad(MI, MRI);
5592
5593 unsigned Opc = 0;
5594 if (Ld.isPre()) {
5595 static constexpr unsigned GPROpcodes[] = {
5596 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5597 AArch64::LDRXpre};
5598 static constexpr unsigned FPROpcodes[] = {
5599 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5600 AArch64::LDRQpre};
5601 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5602 Opc = FPROpcodes[Log2_32(MemSize)];
5603 else
5604 Opc = GPROpcodes[Log2_32(MemSize)];
5605 } else {
5606 static constexpr unsigned GPROpcodes[] = {
5607 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5608 AArch64::LDRXpost};
5609 static constexpr unsigned FPROpcodes[] = {
5610 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5611 AArch64::LDRDpost, AArch64::LDRQpost};
5612 if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5613 Opc = FPROpcodes[Log2_32(MemSize)];
5614 else
5615 Opc = GPROpcodes[Log2_32(MemSize)];
5616 }
5617 auto Cst = getIConstantVRegVal(Offset, MRI);
5618 if (!Cst)
5619 return false; // Shouldn't happen, but just in case.
5620 auto LdMI =
5621 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5622 LdMI.cloneMemRefs(Ld);
5624 MI.eraseFromParent();
5625 return true;
5626}
5627
5628bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5630 Register Dst = I.getWritebackReg();
5631 Register Val = I.getValueReg();
5632 Register Base = I.getBaseReg();
5633 Register Offset = I.getOffsetReg();
5634 LLT ValTy = MRI.getType(Val);
5635 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5636
5637 unsigned Opc = 0;
5638 if (I.isPre()) {
5639 static constexpr unsigned GPROpcodes[] = {
5640 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5641 AArch64::STRXpre};
5642 static constexpr unsigned FPROpcodes[] = {
5643 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5644 AArch64::STRQpre};
5645
5646 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5647 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5648 else
5649 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5650 } else {
5651 static constexpr unsigned GPROpcodes[] = {
5652 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5653 AArch64::STRXpost};
5654 static constexpr unsigned FPROpcodes[] = {
5655 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5656 AArch64::STRDpost, AArch64::STRQpost};
5657
5658 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5659 Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5660 else
5661 Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];
5662 }
5663
5664 auto Cst = getIConstantVRegVal(Offset, MRI);
5665 if (!Cst)
5666 return false; // Shouldn't happen, but just in case.
5667 auto Str =
5668 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5669 Str.cloneMemRefs(I);
5671 I.eraseFromParent();
5672 return true;
5673}
5674
5676AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5677 MachineIRBuilder &MIRBuilder,
5679 LLT DstTy = MRI.getType(Dst);
5680 unsigned DstSize = DstTy.getSizeInBits();
5681 if (CV->isNullValue()) {
5682 if (DstSize == 128) {
5683 auto Mov =
5684 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5686 return &*Mov;
5687 }
5688
5689 if (DstSize == 64) {
5690 auto Mov =
5691 MIRBuilder
5692 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5693 .addImm(0);
5694 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5695 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5696 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5697 return &*Copy;
5698 }
5699 }
5700
5701 if (Constant *SplatValue = CV->getSplatValue()) {
5702 APInt SplatValueAsInt =
5703 isa<ConstantFP>(SplatValue)
5704 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5705 : SplatValue->getUniqueInteger();
5706 APInt DefBits = APInt::getSplat(
5707 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5708 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5709 MachineInstr *NewOp;
5710 bool Inv = false;
5711 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5712 (NewOp =
5713 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5714 (NewOp =
5715 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5716 (NewOp =
5717 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5718 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5719 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5720 return NewOp;
5721
5722 DefBits = ~DefBits;
5723 Inv = true;
5724 if ((NewOp =
5725 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5726 (NewOp =
5727 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5728 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5729 return NewOp;
5730 return nullptr;
5731 };
5732
5733 if (auto *NewOp = TryMOVIWithBits(DefBits))
5734 return NewOp;
5735
5736 // See if a fneg of the constant can be materialized with a MOVI, etc
5737 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5738 unsigned NegOpc) -> MachineInstr * {
5739 // FNegate each sub-element of the constant
5740 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5741 APInt NegBits(DstSize, 0);
5742 unsigned NumElts = DstSize / NumBits;
5743 for (unsigned i = 0; i < NumElts; i++)
5744 NegBits |= Neg << (NumBits * i);
5745 NegBits = DefBits ^ NegBits;
5746
5747 // Try to create the new constants with MOVI, and if so generate a fneg
5748 // for it.
5749 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5750 Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5751 NewOp->getOperand(0).setReg(NewDst);
5752 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5753 }
5754 return nullptr;
5755 };
5756 MachineInstr *R;
5757 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5758 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5759 (STI.hasFullFP16() &&
5760 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5761 return R;
5762 }
5763
5764 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5765 if (!CPLoad) {
5766 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5767 return nullptr;
5768 }
5769
5770 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5771 RBI.constrainGenericRegister(
5772 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5773 return &*Copy;
5774}
5775
5776bool AArch64InstructionSelector::tryOptConstantBuildVec(
5778 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5779 unsigned DstSize = DstTy.getSizeInBits();
5780 assert(DstSize <= 128 && "Unexpected build_vec type!");
5781 if (DstSize < 32)
5782 return false;
5783 // Check if we're building a constant vector, in which case we want to
5784 // generate a constant pool load instead of a vector insert sequence.
5786 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5787 // Try to find G_CONSTANT or G_FCONSTANT
5788 auto *OpMI =
5789 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5790 if (OpMI)
5791 Csts.emplace_back(
5792 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5793 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5794 I.getOperand(Idx).getReg(), MRI)))
5795 Csts.emplace_back(
5796 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5797 else
5798 return false;
5799 }
5800 Constant *CV = ConstantVector::get(Csts);
5801 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5802 return false;
5803 I.eraseFromParent();
5804 return true;
5805}
5806
5807bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5809 // Given:
5810 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5811 //
5812 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5813 Register Dst = I.getOperand(0).getReg();
5814 Register EltReg = I.getOperand(1).getReg();
5815 LLT EltTy = MRI.getType(EltReg);
5816 // If the index isn't on the same bank as its elements, then this can't be a
5817 // SUBREG_TO_REG.
5818 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5819 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5820 if (EltRB != DstRB)
5821 return false;
5822 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5823 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5824 }))
5825 return false;
5826 unsigned SubReg;
5827 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5828 if (!EltRC)
5829 return false;
5830 const TargetRegisterClass *DstRC =
5831 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5832 if (!DstRC)
5833 return false;
5834 if (!getSubRegForClass(EltRC, TRI, SubReg))
5835 return false;
5836 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5837 .addImm(0)
5838 .addUse(EltReg)
5839 .addImm(SubReg);
5840 I.eraseFromParent();
5841 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5842 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5843}
5844
5845bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5847 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5848 // Until we port more of the optimized selections, for now just use a vector
5849 // insert sequence.
5850 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5851 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5852 unsigned EltSize = EltTy.getSizeInBits();
5853
5854 if (tryOptConstantBuildVec(I, DstTy, MRI))
5855 return true;
5856 if (tryOptBuildVecToSubregToReg(I, MRI))
5857 return true;
5858
5859 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5860 return false; // Don't support all element types yet.
5861 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5862
5863 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5864 MachineInstr *ScalarToVec =
5865 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5866 I.getOperand(1).getReg(), MIB);
5867 if (!ScalarToVec)
5868 return false;
5869
5870 Register DstVec = ScalarToVec->getOperand(0).getReg();
5871 unsigned DstSize = DstTy.getSizeInBits();
5872
5873 // Keep track of the last MI we inserted. Later on, we might be able to save
5874 // a copy using it.
5875 MachineInstr *PrevMI = ScalarToVec;
5876 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5877 // Note that if we don't do a subregister copy, we can end up making an
5878 // extra register.
5879 Register OpReg = I.getOperand(i).getReg();
5880 // Do not emit inserts for undefs
5881 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5882 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5883 DstVec = PrevMI->getOperand(0).getReg();
5884 }
5885 }
5886
5887 // If DstTy's size in bits is less than 128, then emit a subregister copy
5888 // from DstVec to the last register we've defined.
5889 if (DstSize < 128) {
5890 // Force this to be FPR using the destination vector.
5891 const TargetRegisterClass *RC =
5892 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5893 if (!RC)
5894 return false;
5895 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5896 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5897 return false;
5898 }
5899
5900 unsigned SubReg = 0;
5901 if (!getSubRegForClass(RC, TRI, SubReg))
5902 return false;
5903 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5904 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5905 << "\n");
5906 return false;
5907 }
5908
5909 Register Reg = MRI.createVirtualRegister(RC);
5910 Register DstReg = I.getOperand(0).getReg();
5911
5912 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5913 MachineOperand &RegOp = I.getOperand(1);
5914 RegOp.setReg(Reg);
5915 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5916 } else {
5917 // We either have a vector with all elements (except the first one) undef or
5918 // at least one non-undef non-first element. In the first case, we need to
5919 // constrain the output register ourselves as we may have generated an
5920 // INSERT_SUBREG operation which is a generic operation for which the
5921 // output regclass cannot be automatically chosen.
5922 //
5923 // In the second case, there is no need to do this as it may generate an
5924 // instruction like INSvi32gpr where the regclass can be automatically
5925 // chosen.
5926 //
5927 // Also, we save a copy by re-using the destination register on the final
5928 // insert.
5929 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5931
5932 Register DstReg = PrevMI->getOperand(0).getReg();
5933 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5934 const TargetRegisterClass *RC =
5935 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5936 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5937 }
5938 }
5939
5940 I.eraseFromParent();
5941 return true;
5942}
5943
5944bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5945 unsigned NumVecs,
5946 MachineInstr &I) {
5947 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5948 assert(Opc && "Expected an opcode?");
5949 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5950 auto &MRI = *MIB.getMRI();
5951 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5952 unsigned Size = Ty.getSizeInBits();
5953 assert((Size == 64 || Size == 128) &&
5954 "Destination must be 64 bits or 128 bits?");
5955 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5956 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5957 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5958 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5959 Load.cloneMemRefs(I);
5961 Register SelectedLoadDst = Load->getOperand(0).getReg();
5962 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5963 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5964 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5965 // Emit the subreg copies and immediately select them.
5966 // FIXME: We should refactor our copy code into an emitCopy helper and
5967 // clean up uses of this pattern elsewhere in the selector.
5968 selectCopy(*Vec, TII, MRI, TRI, RBI);
5969 }
5970 return true;
5971}
5972
5973bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5974 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5975 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5976 assert(Opc && "Expected an opcode?");
5977 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5978 auto &MRI = *MIB.getMRI();
5979 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5980 bool Narrow = Ty.getSizeInBits() == 64;
5981
5982 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5983 SmallVector<Register, 4> Regs(NumVecs);
5984 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5985 [](auto MO) { return MO.getReg(); });
5986
5987 if (Narrow) {
5988 transform(Regs, Regs.begin(), [this](Register Reg) {
5989 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5990 ->getOperand(0)
5991 .getReg();
5992 });
5993 Ty = Ty.multiplyElements(2);
5994 }
5995
5996 Register Tuple = createQTuple(Regs, MIB);
5997 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5998 if (!LaneNo)
5999 return false;
6000
6001 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6002 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6003 .addReg(Tuple)
6004 .addImm(LaneNo->getZExtValue())
6005 .addReg(Ptr);
6006 Load.cloneMemRefs(I);
6008 Register SelectedLoadDst = Load->getOperand(0).getReg();
6009 unsigned SubReg = AArch64::qsub0;
6010 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6011 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6012 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6013 : DstOp(I.getOperand(Idx).getReg())},
6014 {})
6015 .addReg(SelectedLoadDst, 0, SubReg + Idx);
6016 Register WideReg = Vec.getReg(0);
6017 // Emit the subreg copies and immediately select them.
6018 selectCopy(*Vec, TII, MRI, TRI, RBI);
6019 if (Narrow &&
6020 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6021 return false;
6022 }
6023 return true;
6024}
6025
6026void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6027 unsigned NumVecs,
6028 unsigned Opc) {
6029 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6030 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6031 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6032
6033 SmallVector<Register, 2> Regs(NumVecs);
6034 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6035 Regs.begin(), [](auto MO) { return MO.getReg(); });
6036
6037 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6038 : createDTuple(Regs, MIB);
6039 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6040 Store.cloneMemRefs(I);
6042}
6043
6044bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6045 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6046 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6047 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6048 bool Narrow = Ty.getSizeInBits() == 64;
6049
6050 SmallVector<Register, 2> Regs(NumVecs);
6051 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6052 Regs.begin(), [](auto MO) { return MO.getReg(); });
6053
6054 if (Narrow)
6055 transform(Regs, Regs.begin(), [this](Register Reg) {
6056 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6057 ->getOperand(0)
6058 .getReg();
6059 });
6060
6061 Register Tuple = createQTuple(Regs, MIB);
6062
6063 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6064 if (!LaneNo)
6065 return false;
6066 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6067 auto Store = MIB.buildInstr(Opc, {}, {})
6068 .addReg(Tuple)
6069 .addImm(LaneNo->getZExtValue())
6070 .addReg(Ptr);
6071 Store.cloneMemRefs(I);
6073 return true;
6074}
6075
6076bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6078 // Find the intrinsic ID.
6079 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6080
6081 const LLT S8 = LLT::scalar(8);
6082 const LLT S16 = LLT::scalar(16);
6083 const LLT S32 = LLT::scalar(32);
6084 const LLT S64 = LLT::scalar(64);
6085 const LLT P0 = LLT::pointer(0, 64);
6086 // Select the instruction.
6087 switch (IntrinID) {
6088 default:
6089 return false;
6090 case Intrinsic::aarch64_ldxp:
6091 case Intrinsic::aarch64_ldaxp: {
6092 auto NewI = MIB.buildInstr(
6093 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6094 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6095 {I.getOperand(3)});
6096 NewI.cloneMemRefs(I);
6098 break;
6099 }
6100 case Intrinsic::aarch64_neon_ld1x2: {
6101 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6102 unsigned Opc = 0;
6103 if (Ty == LLT::fixed_vector(8, S8))
6104 Opc = AArch64::LD1Twov8b;
6105 else if (Ty == LLT::fixed_vector(16, S8))
6106 Opc = AArch64::LD1Twov16b;
6107 else if (Ty == LLT::fixed_vector(4, S16))
6108 Opc = AArch64::LD1Twov4h;
6109 else if (Ty == LLT::fixed_vector(8, S16))
6110 Opc = AArch64::LD1Twov8h;
6111 else if (Ty == LLT::fixed_vector(2, S32))
6112 Opc = AArch64::LD1Twov2s;
6113 else if (Ty == LLT::fixed_vector(4, S32))
6114 Opc = AArch64::LD1Twov4s;
6115 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6116 Opc = AArch64::LD1Twov2d;
6117 else if (Ty == S64 || Ty == P0)
6118 Opc = AArch64::LD1Twov1d;
6119 else
6120 llvm_unreachable("Unexpected type for ld1x2!");
6121 selectVectorLoadIntrinsic(Opc, 2, I);
6122 break;
6123 }
6124 case Intrinsic::aarch64_neon_ld1x3: {
6125 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6126 unsigned Opc = 0;
6127 if (Ty == LLT::fixed_vector(8, S8))
6128 Opc = AArch64::LD1Threev8b;
6129 else if (Ty == LLT::fixed_vector(16, S8))
6130 Opc = AArch64::LD1Threev16b;
6131 else if (Ty == LLT::fixed_vector(4, S16))
6132 Opc = AArch64::LD1Threev4h;
6133 else if (Ty == LLT::fixed_vector(8, S16))
6134 Opc = AArch64::LD1Threev8h;
6135 else if (Ty == LLT::fixed_vector(2, S32))
6136 Opc = AArch64::LD1Threev2s;
6137 else if (Ty == LLT::fixed_vector(4, S32))
6138 Opc = AArch64::LD1Threev4s;
6139 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6140 Opc = AArch64::LD1Threev2d;
6141 else if (Ty == S64 || Ty == P0)
6142 Opc = AArch64::LD1Threev1d;
6143 else
6144 llvm_unreachable("Unexpected type for ld1x3!");
6145 selectVectorLoadIntrinsic(Opc, 3, I);
6146 break;
6147 }
6148 case Intrinsic::aarch64_neon_ld1x4: {
6149 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6150 unsigned Opc = 0;
6151 if (Ty == LLT::fixed_vector(8, S8))
6152 Opc = AArch64::LD1Fourv8b;
6153 else if (Ty == LLT::fixed_vector(16, S8))
6154 Opc = AArch64::LD1Fourv16b;
6155 else if (Ty == LLT::fixed_vector(4, S16))
6156 Opc = AArch64::LD1Fourv4h;
6157 else if (Ty == LLT::fixed_vector(8, S16))
6158 Opc = AArch64::LD1Fourv8h;
6159 else if (Ty == LLT::fixed_vector(2, S32))
6160 Opc = AArch64::LD1Fourv2s;
6161 else if (Ty == LLT::fixed_vector(4, S32))
6162 Opc = AArch64::LD1Fourv4s;
6163 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6164 Opc = AArch64::LD1Fourv2d;
6165 else if (Ty == S64 || Ty == P0)
6166 Opc = AArch64::LD1Fourv1d;
6167 else
6168 llvm_unreachable("Unexpected type for ld1x4!");
6169 selectVectorLoadIntrinsic(Opc, 4, I);
6170 break;
6171 }
6172 case Intrinsic::aarch64_neon_ld2: {
6173 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6174 unsigned Opc = 0;
6175 if (Ty == LLT::fixed_vector(8, S8))
6176 Opc = AArch64::LD2Twov8b;
6177 else if (Ty == LLT::fixed_vector(16, S8))
6178 Opc = AArch64::LD2Twov16b;
6179 else if (Ty == LLT::fixed_vector(4, S16))
6180 Opc = AArch64::LD2Twov4h;
6181 else if (Ty == LLT::fixed_vector(8, S16))
6182 Opc = AArch64::LD2Twov8h;
6183 else if (Ty == LLT::fixed_vector(2, S32))
6184 Opc = AArch64::LD2Twov2s;
6185 else if (Ty == LLT::fixed_vector(4, S32))
6186 Opc = AArch64::LD2Twov4s;
6187 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6188 Opc = AArch64::LD2Twov2d;
6189 else if (Ty == S64 || Ty == P0)
6190 Opc = AArch64::LD1Twov1d;
6191 else
6192 llvm_unreachable("Unexpected type for ld2!");
6193 selectVectorLoadIntrinsic(Opc, 2, I);
6194 break;
6195 }
6196 case Intrinsic::aarch64_neon_ld2lane: {
6197 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6198 unsigned Opc;
6199 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6200 Opc = AArch64::LD2i8;
6201 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6202 Opc = AArch64::LD2i16;
6203 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6204 Opc = AArch64::LD2i32;
6205 else if (Ty == LLT::fixed_vector(2, S64) ||
6206 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6207 Opc = AArch64::LD2i64;
6208 else
6209 llvm_unreachable("Unexpected type for st2lane!");
6210 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6211 return false;
6212 break;
6213 }
6214 case Intrinsic::aarch64_neon_ld2r: {
6215 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6216 unsigned Opc = 0;
6217 if (Ty == LLT::fixed_vector(8, S8))
6218 Opc = AArch64::LD2Rv8b;
6219 else if (Ty == LLT::fixed_vector(16, S8))
6220 Opc = AArch64::LD2Rv16b;
6221 else if (Ty == LLT::fixed_vector(4, S16))
6222 Opc = AArch64::LD2Rv4h;
6223 else if (Ty == LLT::fixed_vector(8, S16))
6224 Opc = AArch64::LD2Rv8h;
6225 else if (Ty == LLT::fixed_vector(2, S32))
6226 Opc = AArch64::LD2Rv2s;
6227 else if (Ty == LLT::fixed_vector(4, S32))
6228 Opc = AArch64::LD2Rv4s;
6229 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6230 Opc = AArch64::LD2Rv2d;
6231 else if (Ty == S64 || Ty == P0)
6232 Opc = AArch64::LD2Rv1d;
6233 else
6234 llvm_unreachable("Unexpected type for ld2r!");
6235 selectVectorLoadIntrinsic(Opc, 2, I);
6236 break;
6237 }
6238 case Intrinsic::aarch64_neon_ld3: {
6239 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6240 unsigned Opc = 0;
6241 if (Ty == LLT::fixed_vector(8, S8))
6242 Opc = AArch64::LD3Threev8b;
6243 else if (Ty == LLT::fixed_vector(16, S8))
6244 Opc = AArch64::LD3Threev16b;
6245 else if (Ty == LLT::fixed_vector(4, S16))
6246 Opc = AArch64::LD3Threev4h;
6247 else if (Ty == LLT::fixed_vector(8, S16))
6248 Opc = AArch64::LD3Threev8h;
6249 else if (Ty == LLT::fixed_vector(2, S32))
6250 Opc = AArch64::LD3Threev2s;
6251 else if (Ty == LLT::fixed_vector(4, S32))
6252 Opc = AArch64::LD3Threev4s;
6253 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6254 Opc = AArch64::LD3Threev2d;
6255 else if (Ty == S64 || Ty == P0)
6256 Opc = AArch64::LD1Threev1d;
6257 else
6258 llvm_unreachable("Unexpected type for ld3!");
6259 selectVectorLoadIntrinsic(Opc, 3, I);
6260 break;
6261 }
6262 case Intrinsic::aarch64_neon_ld3lane: {
6263 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6264 unsigned Opc;
6265 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6266 Opc = AArch64::LD3i8;
6267 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6268 Opc = AArch64::LD3i16;
6269 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6270 Opc = AArch64::LD3i32;
6271 else if (Ty == LLT::fixed_vector(2, S64) ||
6272 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6273 Opc = AArch64::LD3i64;
6274 else
6275 llvm_unreachable("Unexpected type for st3lane!");
6276 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6277 return false;
6278 break;
6279 }
6280 case Intrinsic::aarch64_neon_ld3r: {
6281 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6282 unsigned Opc = 0;
6283 if (Ty == LLT::fixed_vector(8, S8))
6284 Opc = AArch64::LD3Rv8b;
6285 else if (Ty == LLT::fixed_vector(16, S8))
6286 Opc = AArch64::LD3Rv16b;
6287 else if (Ty == LLT::fixed_vector(4, S16))
6288 Opc = AArch64::LD3Rv4h;
6289 else if (Ty == LLT::fixed_vector(8, S16))
6290 Opc = AArch64::LD3Rv8h;
6291 else if (Ty == LLT::fixed_vector(2, S32))
6292 Opc = AArch64::LD3Rv2s;
6293 else if (Ty == LLT::fixed_vector(4, S32))
6294 Opc = AArch64::LD3Rv4s;
6295 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6296 Opc = AArch64::LD3Rv2d;
6297 else if (Ty == S64 || Ty == P0)
6298 Opc = AArch64::LD3Rv1d;
6299 else
6300 llvm_unreachable("Unexpected type for ld3r!");
6301 selectVectorLoadIntrinsic(Opc, 3, I);
6302 break;
6303 }
6304 case Intrinsic::aarch64_neon_ld4: {
6305 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6306 unsigned Opc = 0;
6307 if (Ty == LLT::fixed_vector(8, S8))
6308 Opc = AArch64::LD4Fourv8b;
6309 else if (Ty == LLT::fixed_vector(16, S8))
6310 Opc = AArch64::LD4Fourv16b;
6311 else if (Ty == LLT::fixed_vector(4, S16))
6312 Opc = AArch64::LD4Fourv4h;
6313 else if (Ty == LLT::fixed_vector(8, S16))
6314 Opc = AArch64::LD4Fourv8h;
6315 else if (Ty == LLT::fixed_vector(2, S32))
6316 Opc = AArch64::LD4Fourv2s;
6317 else if (Ty == LLT::fixed_vector(4, S32))
6318 Opc = AArch64::LD4Fourv4s;
6319 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6320 Opc = AArch64::LD4Fourv2d;
6321 else if (Ty == S64 || Ty == P0)
6322 Opc = AArch64::LD1Fourv1d;
6323 else
6324 llvm_unreachable("Unexpected type for ld4!");
6325 selectVectorLoadIntrinsic(Opc, 4, I);
6326 break;
6327 }
6328 case Intrinsic::aarch64_neon_ld4lane: {
6329 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6330 unsigned Opc;
6331 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6332 Opc = AArch64::LD4i8;
6333 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6334 Opc = AArch64::LD4i16;
6335 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6336 Opc = AArch64::LD4i32;
6337 else if (Ty == LLT::fixed_vector(2, S64) ||
6338 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6339 Opc = AArch64::LD4i64;
6340 else
6341 llvm_unreachable("Unexpected type for st4lane!");
6342 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6343 return false;
6344 break;
6345 }
6346 case Intrinsic::aarch64_neon_ld4r: {
6347 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6348 unsigned Opc = 0;
6349 if (Ty == LLT::fixed_vector(8, S8))
6350 Opc = AArch64::LD4Rv8b;
6351 else if (Ty == LLT::fixed_vector(16, S8))
6352 Opc = AArch64::LD4Rv16b;
6353 else if (Ty == LLT::fixed_vector(4, S16))
6354 Opc = AArch64::LD4Rv4h;
6355 else if (Ty == LLT::fixed_vector(8, S16))
6356 Opc = AArch64::LD4Rv8h;
6357 else if (Ty == LLT::fixed_vector(2, S32))
6358 Opc = AArch64::LD4Rv2s;
6359 else if (Ty == LLT::fixed_vector(4, S32))
6360 Opc = AArch64::LD4Rv4s;
6361 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6362 Opc = AArch64::LD4Rv2d;
6363 else if (Ty == S64 || Ty == P0)
6364 Opc = AArch64::LD4Rv1d;
6365 else
6366 llvm_unreachable("Unexpected type for ld4r!");
6367 selectVectorLoadIntrinsic(Opc, 4, I);
6368 break;
6369 }
6370 case Intrinsic::aarch64_neon_st1x2: {
6371 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6372 unsigned Opc;
6373 if (Ty == LLT::fixed_vector(8, S8))
6374 Opc = AArch64::ST1Twov8b;
6375 else if (Ty == LLT::fixed_vector(16, S8))
6376 Opc = AArch64::ST1Twov16b;
6377 else if (Ty == LLT::fixed_vector(4, S16))
6378 Opc = AArch64::ST1Twov4h;
6379 else if (Ty == LLT::fixed_vector(8, S16))
6380 Opc = AArch64::ST1Twov8h;
6381 else if (Ty == LLT::fixed_vector(2, S32))
6382 Opc = AArch64::ST1Twov2s;
6383 else if (Ty == LLT::fixed_vector(4, S32))
6384 Opc = AArch64::ST1Twov4s;
6385 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6386 Opc = AArch64::ST1Twov2d;
6387 else if (Ty == S64 || Ty == P0)
6388 Opc = AArch64::ST1Twov1d;
6389 else
6390 llvm_unreachable("Unexpected type for st1x2!");
6391 selectVectorStoreIntrinsic(I, 2, Opc);
6392 break;
6393 }
6394 case Intrinsic::aarch64_neon_st1x3: {
6395 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6396 unsigned Opc;
6397 if (Ty == LLT::fixed_vector(8, S8))
6398 Opc = AArch64::ST1Threev8b;
6399 else if (Ty == LLT::fixed_vector(16, S8))
6400 Opc = AArch64::ST1Threev16b;
6401 else if (Ty == LLT::fixed_vector(4, S16))
6402 Opc = AArch64::ST1Threev4h;
6403 else if (Ty == LLT::fixed_vector(8, S16))
6404 Opc = AArch64::ST1Threev8h;
6405 else if (Ty == LLT::fixed_vector(2, S32))
6406 Opc = AArch64::ST1Threev2s;
6407 else if (Ty == LLT::fixed_vector(4, S32))
6408 Opc = AArch64::ST1Threev4s;
6409 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6410 Opc = AArch64::ST1Threev2d;
6411 else if (Ty == S64 || Ty == P0)
6412 Opc = AArch64::ST1Threev1d;
6413 else
6414 llvm_unreachable("Unexpected type for st1x3!");
6415 selectVectorStoreIntrinsic(I, 3, Opc);
6416 break;
6417 }
6418 case Intrinsic::aarch64_neon_st1x4: {
6419 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6420 unsigned Opc;
6421 if (Ty == LLT::fixed_vector(8, S8))
6422 Opc = AArch64::ST1Fourv8b;
6423 else if (Ty == LLT::fixed_vector(16, S8))
6424 Opc = AArch64::ST1Fourv16b;
6425 else if (Ty == LLT::fixed_vector(4, S16))
6426 Opc = AArch64::ST1Fourv4h;
6427 else if (Ty == LLT::fixed_vector(8, S16))
6428 Opc = AArch64::ST1Fourv8h;
6429 else if (Ty == LLT::fixed_vector(2, S32))
6430 Opc = AArch64::ST1Fourv2s;
6431 else if (Ty == LLT::fixed_vector(4, S32))
6432 Opc = AArch64::ST1Fourv4s;
6433 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6434 Opc = AArch64::ST1Fourv2d;
6435 else if (Ty == S64 || Ty == P0)
6436 Opc = AArch64::ST1Fourv1d;
6437 else
6438 llvm_unreachable("Unexpected type for st1x4!");
6439 selectVectorStoreIntrinsic(I, 4, Opc);
6440 break;
6441 }
6442 case Intrinsic::aarch64_neon_st2: {
6443 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6444 unsigned Opc;
6445 if (Ty == LLT::fixed_vector(8, S8))
6446 Opc = AArch64::ST2Twov8b;
6447 else if (Ty == LLT::fixed_vector(16, S8))
6448 Opc = AArch64::ST2Twov16b;
6449 else if (Ty == LLT::fixed_vector(4, S16))
6450 Opc = AArch64::ST2Twov4h;
6451 else if (Ty == LLT::fixed_vector(8, S16))
6452 Opc = AArch64::ST2Twov8h;
6453 else if (Ty == LLT::fixed_vector(2, S32))
6454 Opc = AArch64::ST2Twov2s;
6455 else if (Ty == LLT::fixed_vector(4, S32))
6456 Opc = AArch64::ST2Twov4s;
6457 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6458 Opc = AArch64::ST2Twov2d;
6459 else if (Ty == S64 || Ty == P0)
6460 Opc = AArch64::ST1Twov1d;
6461 else
6462 llvm_unreachable("Unexpected type for st2!");
6463 selectVectorStoreIntrinsic(I, 2, Opc);
6464 break;
6465 }
6466 case Intrinsic::aarch64_neon_st3: {
6467 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6468 unsigned Opc;
6469 if (Ty == LLT::fixed_vector(8, S8))
6470 Opc = AArch64::ST3Threev8b;
6471 else if (Ty == LLT::fixed_vector(16, S8))
6472 Opc = AArch64::ST3Threev16b;
6473 else if (Ty == LLT::fixed_vector(4, S16))
6474 Opc = AArch64::ST3Threev4h;
6475 else if (Ty == LLT::fixed_vector(8, S16))
6476 Opc = AArch64::ST3Threev8h;
6477 else if (Ty == LLT::fixed_vector(2, S32))
6478 Opc = AArch64::ST3Threev2s;
6479 else if (Ty == LLT::fixed_vector(4, S32))
6480 Opc = AArch64::ST3Threev4s;
6481 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6482 Opc = AArch64::ST3Threev2d;
6483 else if (Ty == S64 || Ty == P0)
6484 Opc = AArch64::ST1Threev1d;
6485 else
6486 llvm_unreachable("Unexpected type for st3!");
6487 selectVectorStoreIntrinsic(I, 3, Opc);
6488 break;
6489 }
6490 case Intrinsic::aarch64_neon_st4: {
6491 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6492 unsigned Opc;
6493 if (Ty == LLT::fixed_vector(8, S8))
6494 Opc = AArch64::ST4Fourv8b;
6495 else if (Ty == LLT::fixed_vector(16, S8))
6496 Opc = AArch64::ST4Fourv16b;
6497 else if (Ty == LLT::fixed_vector(4, S16))
6498 Opc = AArch64::ST4Fourv4h;
6499 else if (Ty == LLT::fixed_vector(8, S16))
6500 Opc = AArch64::ST4Fourv8h;
6501 else if (Ty == LLT::fixed_vector(2, S32))
6502 Opc = AArch64::ST4Fourv2s;
6503 else if (Ty == LLT::fixed_vector(4, S32))
6504 Opc = AArch64::ST4Fourv4s;
6505 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6506 Opc = AArch64::ST4Fourv2d;
6507 else if (Ty == S64 || Ty == P0)
6508 Opc = AArch64::ST1Fourv1d;
6509 else
6510 llvm_unreachable("Unexpected type for st4!");
6511 selectVectorStoreIntrinsic(I, 4, Opc);
6512 break;
6513 }
6514 case Intrinsic::aarch64_neon_st2lane: {
6515 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6516 unsigned Opc;
6517 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6518 Opc = AArch64::ST2i8;
6519 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6520 Opc = AArch64::ST2i16;
6521 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6522 Opc = AArch64::ST2i32;
6523 else if (Ty == LLT::fixed_vector(2, S64) ||
6524 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6525 Opc = AArch64::ST2i64;
6526 else
6527 llvm_unreachable("Unexpected type for st2lane!");
6528 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6529 return false;
6530 break;
6531 }
6532 case Intrinsic::aarch64_neon_st3lane: {
6533 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6534 unsigned Opc;
6535 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6536 Opc = AArch64::ST3i8;
6537 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6538 Opc = AArch64::ST3i16;
6539 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6540 Opc = AArch64::ST3i32;
6541 else if (Ty == LLT::fixed_vector(2, S64) ||
6542 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6543 Opc = AArch64::ST3i64;
6544 else
6545 llvm_unreachable("Unexpected type for st3lane!");
6546 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6547 return false;
6548 break;
6549 }
6550 case Intrinsic::aarch64_neon_st4lane: {
6551 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6552 unsigned Opc;
6553 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6554 Opc = AArch64::ST4i8;
6555 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6556 Opc = AArch64::ST4i16;
6557 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6558 Opc = AArch64::ST4i32;
6559 else if (Ty == LLT::fixed_vector(2, S64) ||
6560 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6561 Opc = AArch64::ST4i64;
6562 else
6563 llvm_unreachable("Unexpected type for st4lane!");
6564 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6565 return false;
6566 break;
6567 }
6568 case Intrinsic::aarch64_mops_memset_tag: {
6569 // Transform
6570 // %dst:gpr(p0) = \
6571 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6572 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6573 // where %dst is updated, into
6574 // %Rd:GPR64common, %Rn:GPR64) = \
6575 // MOPSMemorySetTaggingPseudo \
6576 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6577 // where Rd and Rn are tied.
6578 // It is expected that %val has been extended to s64 in legalization.
6579 // Note that the order of the size/value operands are swapped.
6580
6581 Register DstDef = I.getOperand(0).getReg();
6582 // I.getOperand(1) is the intrinsic function
6583 Register DstUse = I.getOperand(2).getReg();
6584 Register ValUse = I.getOperand(3).getReg();
6585 Register SizeUse = I.getOperand(4).getReg();
6586
6587 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6588 // Therefore an additional virtual register is required for the updated size
6589 // operand. This value is not accessible via the semantics of the intrinsic.
6590 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
6591
6592 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6593 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6594 Memset.cloneMemRefs(I);
6596 break;
6597 }
6598 }
6599
6600 I.eraseFromParent();
6601 return true;
6602}
6603
6604bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6606 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6607
6608 switch (IntrinID) {
6609 default:
6610 break;
6611 case Intrinsic::aarch64_crypto_sha1h: {
6612 Register DstReg = I.getOperand(0).getReg();
6613 Register SrcReg = I.getOperand(2).getReg();
6614
6615 // FIXME: Should this be an assert?
6616 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
6617 MRI.getType(SrcReg).getSizeInBits() != 32)
6618 return false;
6619
6620 // The operation has to happen on FPRs. Set up some new FPR registers for
6621 // the source and destination if they are on GPRs.
6622 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6623 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6624 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
6625
6626 // Make sure the copy ends up getting constrained properly.
6627 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
6628 AArch64::GPR32RegClass, MRI);
6629 }
6630
6631 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6632 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6633
6634 // Actually insert the instruction.
6635 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6636 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
6637
6638 // Did we create a new register for the destination?
6639 if (DstReg != I.getOperand(0).getReg()) {
6640 // Yep. Copy the result of the instruction back into the original
6641 // destination.
6642 MIB.buildCopy({I.getOperand(0)}, {DstReg});
6643 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
6644 AArch64::GPR32RegClass, MRI);
6645 }
6646
6647 I.eraseFromParent();
6648 return true;
6649 }
6650 case Intrinsic::ptrauth_resign: {
6651 Register DstReg = I.getOperand(0).getReg();
6652 Register ValReg = I.getOperand(2).getReg();
6653 uint64_t AUTKey = I.getOperand(3).getImm();
6654 Register AUTDisc = I.getOperand(4).getReg();
6655 uint64_t PACKey = I.getOperand(5).getImm();
6656 Register PACDisc = I.getOperand(6).getReg();
6657
6658 Register AUTAddrDisc = AUTDisc;
6659 uint16_t AUTConstDiscC = 0;
6660 std::tie(AUTConstDiscC, AUTAddrDisc) =
6662
6663 Register PACAddrDisc = PACDisc;
6664 uint16_t PACConstDiscC = 0;
6665 std::tie(PACConstDiscC, PACAddrDisc) =
6667
6668 MIB.buildCopy({AArch64::X16}, {ValReg});
6669 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6670 MIB.buildInstr(AArch64::AUTPAC)
6671 .addImm(AUTKey)
6672 .addImm(AUTConstDiscC)
6673 .addUse(AUTAddrDisc)
6674 .addImm(PACKey)
6675 .addImm(PACConstDiscC)
6676 .addUse(PACAddrDisc)
6677 .constrainAllUses(TII, TRI, RBI);
6678 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6679
6680 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6681 I.eraseFromParent();
6682 return true;
6683 }
6684 case Intrinsic::ptrauth_auth: {
6685 Register DstReg = I.getOperand(0).getReg();
6686 Register ValReg = I.getOperand(2).getReg();
6687 uint64_t AUTKey = I.getOperand(3).getImm();
6688 Register AUTDisc = I.getOperand(4).getReg();
6689
6690 Register AUTAddrDisc = AUTDisc;
6691 uint16_t AUTConstDiscC = 0;
6692 std::tie(AUTConstDiscC, AUTAddrDisc) =
6694
6695 if (STI.isX16X17Safer()) {
6696 MIB.buildCopy({AArch64::X16}, {ValReg});
6697 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6698 MIB.buildInstr(AArch64::AUTx16x17)
6699 .addImm(AUTKey)
6700 .addImm(AUTConstDiscC)
6701 .addUse(AUTAddrDisc)
6702 .constrainAllUses(TII, TRI, RBI);
6703 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6704 } else {
6705 Register ScratchReg =
6706 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6707 MIB.buildInstr(AArch64::AUTxMxN)
6708 .addDef(DstReg)
6709 .addDef(ScratchReg)
6710 .addUse(ValReg)
6711 .addImm(AUTKey)
6712 .addImm(AUTConstDiscC)
6713 .addUse(AUTAddrDisc)
6714 .constrainAllUses(TII, TRI, RBI);
6715 }
6716
6717 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6718 I.eraseFromParent();
6719 return true;
6720 }
6721 case Intrinsic::frameaddress:
6722 case Intrinsic::returnaddress: {
6723 MachineFunction &MF = *I.getParent()->getParent();
6724 MachineFrameInfo &MFI = MF.getFrameInfo();
6725
6726 unsigned Depth = I.getOperand(2).getImm();
6727 Register DstReg = I.getOperand(0).getReg();
6728 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6729
6730 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6731 if (!MFReturnAddr) {
6732 // Insert the copy from LR/X30 into the entry block, before it can be
6733 // clobbered by anything.
6734 MFI.setReturnAddressIsTaken(true);
6735 MFReturnAddr = getFunctionLiveInPhysReg(
6736 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6737 }
6738
6739 if (STI.hasPAuth()) {
6740 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6741 } else {
6742 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6743 MIB.buildInstr(AArch64::XPACLRI);
6744 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6745 }
6746
6747 I.eraseFromParent();
6748 return true;
6749 }
6750
6751 MFI.setFrameAddressIsTaken(true);
6752 Register FrameAddr(AArch64::FP);
6753 while (Depth--) {
6754 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6755 auto Ldr =
6756 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6758 FrameAddr = NextFrame;
6759 }
6760
6761 if (IntrinID == Intrinsic::frameaddress)
6762 MIB.buildCopy({DstReg}, {FrameAddr});
6763 else {
6764 MFI.setReturnAddressIsTaken(true);
6765
6766 if (STI.hasPAuth()) {
6767 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6768 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6769 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6770 } else {
6771 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6772 .addImm(1);
6773 MIB.buildInstr(AArch64::XPACLRI);
6774 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6775 }
6776 }
6777
6778 I.eraseFromParent();
6779 return true;
6780 }
6781 case Intrinsic::aarch64_neon_tbl2:
6782 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6783 return true;
6784 case Intrinsic::aarch64_neon_tbl3:
6785 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6786 false);
6787 return true;
6788 case Intrinsic::aarch64_neon_tbl4:
6789 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6790 return true;
6791 case Intrinsic::aarch64_neon_tbx2:
6792 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6793 return true;
6794 case Intrinsic::aarch64_neon_tbx3:
6795 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6796 return true;
6797 case Intrinsic::aarch64_neon_tbx4:
6798 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6799 return true;
6800 case Intrinsic::swift_async_context_addr:
6801 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6802 {Register(AArch64::FP)})
6803 .addImm(8)
6804 .addImm(0);
6806
6808 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6809 I.eraseFromParent();
6810 return true;
6811 }
6812 return false;
6813}
6814
6815// G_PTRAUTH_GLOBAL_VALUE lowering
6816//
6817// We have 3 lowering alternatives to choose from:
6818// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6819// If the GV doesn't need a GOT load (i.e., is locally defined)
6820// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6821//
6822// - LOADgotPAC: similar to LOADgot, with added PAC.
6823// If the GV needs a GOT load, materialize the pointer using the usual
6824// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6825// section is assumed to be read-only (for example, via relro mechanism). See
6826// LowerMOVaddrPAC.
6827//
6828// - LOADauthptrstatic: similar to LOADgot, but use a
6829// special stub slot instead of a GOT slot.
6830// Load a signed pointer for symbol 'sym' from a stub slot named
6831// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6832// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6833// .data with an
6834// @AUTH relocation. See LowerLOADauthptrstatic.
6835//
6836// All 3 are pseudos that are expand late to longer sequences: this lets us
6837// provide integrity guarantees on the to-be-signed intermediate values.
6838//
6839// LOADauthptrstatic is undesirable because it requires a large section filled
6840// with often similarly-signed pointers, making it a good harvesting target.
6841// Thus, it's only used for ptrauth references to extern_weak to avoid null
6842// checks.
6843
6844bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6846 Register DefReg = I.getOperand(0).getReg();
6847 Register Addr = I.getOperand(1).getReg();
6848 uint64_t Key = I.getOperand(2).getImm();
6849 Register AddrDisc = I.getOperand(3).getReg();
6850 uint64_t Disc = I.getOperand(4).getImm();
6851 int64_t Offset = 0;
6852
6853 if (Key > AArch64PACKey::LAST)
6854 report_fatal_error("key in ptrauth global out of range [0, " +
6855 Twine((int)AArch64PACKey::LAST) + "]");
6856
6857 // Blend only works if the integer discriminator is 16-bit wide.
6858 if (!isUInt<16>(Disc))
6860 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6861
6862 // Choosing between 3 lowering alternatives is target-specific.
6863 if (!STI.isTargetELF() && !STI.isTargetMachO())
6864 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6865
6866 if (!MRI.hasOneDef(Addr))
6867 return false;
6868
6869 // First match any offset we take from the real global.
6870 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6871 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6872 Register OffsetReg = DefMI->getOperand(2).getReg();
6873 if (!MRI.hasOneDef(OffsetReg))
6874 return false;
6875 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6876 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6877 return false;
6878
6879 Addr = DefMI->getOperand(1).getReg();
6880 if (!MRI.hasOneDef(Addr))
6881 return false;
6882
6883 DefMI = &*MRI.def_instr_begin(Addr);
6884 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6885 }
6886
6887 // We should be left with a genuine unauthenticated GlobalValue.
6888 const GlobalValue *GV;
6889 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6890 GV = DefMI->getOperand(1).getGlobal();
6892 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6893 GV = DefMI->getOperand(2).getGlobal();
6895 } else {
6896 return false;
6897 }
6898
6899 MachineIRBuilder MIB(I);
6900
6901 // Classify the reference to determine whether it needs a GOT load.
6902 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6903 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6904 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6905 "unsupported non-GOT op flags on ptrauth global reference");
6906 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6907 "unsupported non-GOT reference to weak ptrauth global");
6908
6909 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6910 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6911
6912 // Non-extern_weak:
6913 // - No GOT load needed -> MOVaddrPAC
6914 // - GOT load for non-extern_weak -> LOADgotPAC
6915 // Note that we disallow extern_weak refs to avoid null checks later.
6916 if (!GV->hasExternalWeakLinkage()) {
6917 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6918 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6919 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6921 .addImm(Key)
6922 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6923 .addImm(Disc)
6924 .constrainAllUses(TII, TRI, RBI);
6925 MIB.buildCopy(DefReg, Register(AArch64::X16));
6926 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6927 I.eraseFromParent();
6928 return true;
6929 }
6930
6931 // extern_weak -> LOADauthptrstatic
6932
6933 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6934 // offset alone as a pointer if the symbol wasn't available, which would
6935 // probably break null checks in users. Ptrauth complicates things further:
6936 // error out.
6937 if (Offset != 0)
6939 "unsupported non-zero offset in weak ptrauth global reference");
6940
6941 if (HasAddrDisc)
6942 report_fatal_error("unsupported weak addr-div ptrauth global");
6943
6944 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6945 .addGlobalAddress(GV, Offset)
6946 .addImm(Key)
6947 .addImm(Disc);
6948 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6949
6950 I.eraseFromParent();
6951 return true;
6952}
6953
6954void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6956 unsigned NumVec, unsigned Opc1,
6957 unsigned Opc2, bool isExt) {
6958 Register DstReg = I.getOperand(0).getReg();
6959 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6960
6961 // Create the REG_SEQUENCE
6963 for (unsigned i = 0; i < NumVec; i++)
6964 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6965 Register RegSeq = createQTuple(Regs, MIB);
6966
6967 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6969 if (isExt) {
6970 Register Reg = I.getOperand(2).getReg();
6971 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6972 } else
6973 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6975 I.eraseFromParent();
6976}
6977
6979AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6980 auto MaybeImmed = getImmedFromMO(Root);
6981 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6982 return std::nullopt;
6983 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6984 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6985}
6986
6988AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6989 auto MaybeImmed = getImmedFromMO(Root);
6990 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6991 return std::nullopt;
6992 uint64_t Enc = 31 - *MaybeImmed;
6993 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6994}
6995
6997AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6998 auto MaybeImmed = getImmedFromMO(Root);
6999 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7000 return std::nullopt;
7001 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7002 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7003}
7004
7006AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7007 auto MaybeImmed = getImmedFromMO(Root);
7008 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7009 return std::nullopt;
7010 uint64_t Enc = 63 - *MaybeImmed;
7011 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7012}
7013
7014/// Helper to select an immediate value that can be represented as a 12-bit
7015/// value shifted left by either 0 or 12. If it is possible to do so, return
7016/// the immediate and shift value. If not, return std::nullopt.
7017///
7018/// Used by selectArithImmed and selectNegArithImmed.
7020AArch64InstructionSelector::select12BitValueWithLeftShift(
7021 uint64_t Immed) const {
7022 unsigned ShiftAmt;
7023 if (Immed >> 12 == 0) {
7024 ShiftAmt = 0;
7025 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7026 ShiftAmt = 12;
7027 Immed = Immed >> 12;
7028 } else
7029 return std::nullopt;
7030
7031 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7032 return {{
7033 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7034 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7035 }};
7036}
7037
7038/// SelectArithImmed - Select an immediate value that can be represented as
7039/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7040/// Val set to the 12-bit value and Shift set to the shifter operand.
7042AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7043 // This function is called from the addsub_shifted_imm ComplexPattern,
7044 // which lists [imm] as the list of opcode it's interested in, however
7045 // we still need to check whether the operand is actually an immediate
7046 // here because the ComplexPattern opcode list is only used in
7047 // root-level opcode matching.
7048 auto MaybeImmed = getImmedFromMO(Root);
7049 if (MaybeImmed == std::nullopt)
7050 return std::nullopt;
7051 return select12BitValueWithLeftShift(*MaybeImmed);
7052}
7053
7054/// SelectNegArithImmed - As above, but negates the value before trying to
7055/// select it.
7057AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7058 // We need a register here, because we need to know if we have a 64 or 32
7059 // bit immediate.
7060 if (!Root.isReg())
7061 return std::nullopt;
7062 auto MaybeImmed = getImmedFromMO(Root);
7063 if (MaybeImmed == std::nullopt)
7064 return std::nullopt;
7065 uint64_t Immed = *MaybeImmed;
7066
7067 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7068 // have the opposite effect on the C flag, so this pattern mustn't match under
7069 // those circumstances.
7070 if (Immed == 0)
7071 return std::nullopt;
7072
7073 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7074 // the root.
7076 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7077 Immed = ~((uint32_t)Immed) + 1;
7078 else
7079 Immed = ~Immed + 1ULL;
7080
7081 if (Immed & 0xFFFFFFFFFF000000ULL)
7082 return std::nullopt;
7083
7084 Immed &= 0xFFFFFFULL;
7085 return select12BitValueWithLeftShift(Immed);
7086}
7087
7088/// Checks if we are sure that folding MI into load/store addressing mode is
7089/// beneficial or not.
7090///
7091/// Returns:
7092/// - true if folding MI would be beneficial.
7093/// - false if folding MI would be bad.
7094/// - std::nullopt if it is not sure whether folding MI is beneficial.
7095///
7096/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7097///
7098/// %13:gpr(s64) = G_CONSTANT i64 1
7099/// %8:gpr(s64) = G_SHL %6, %13(s64)
7100/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7101/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7102std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7103 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7104 if (MI.getOpcode() == AArch64::G_SHL) {
7105 // Address operands with shifts are free, except for running on subtargets
7106 // with AddrLSLSlow14.
7107 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7108 MI.getOperand(2).getReg(), MRI)) {
7109 const APInt ShiftVal = ValAndVeg->Value;
7110
7111 // Don't fold if we know this will be slow.
7112 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7113 }
7114 }
7115 return std::nullopt;
7116}
7117
7118/// Return true if it is worth folding MI into an extended register. That is,
7119/// if it's safe to pull it into the addressing mode of a load or store as a
7120/// shift.
7121/// \p IsAddrOperand whether the def of MI is used as an address operand
7122/// (e.g. feeding into an LDR/STR).
7123bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7125 bool IsAddrOperand) const {
7126
7127 // Always fold if there is one use, or if we're optimizing for size.
7128 Register DefReg = MI.getOperand(0).getReg();
7129 if (MRI.hasOneNonDBGUse(DefReg) ||
7130 MI.getParent()->getParent()->getFunction().hasOptSize())
7131 return true;
7132
7133 if (IsAddrOperand) {
7134 // If we are already sure that folding MI is good or bad, return the result.
7135 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7136 return *Worth;
7137
7138 // Fold G_PTR_ADD if its offset operand can be folded
7139 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7140 MachineInstr *OffsetInst =
7141 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7142
7143 // Note, we already know G_PTR_ADD is used by at least two instructions.
7144 // If we are also sure about whether folding is beneficial or not,
7145 // return the result.
7146 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7147 return *Worth;
7148 }
7149 }
7150
7151 // FIXME: Consider checking HasALULSLFast as appropriate.
7152
7153 // We have a fastpath, so folding a shift in and potentially computing it
7154 // many times may be beneficial. Check if this is only used in memory ops.
7155 // If it is, then we should fold.
7156 return all_of(MRI.use_nodbg_instructions(DefReg),
7157 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7158}
7159
7161 switch (Type) {
7162 case AArch64_AM::SXTB:
7163 case AArch64_AM::SXTH:
7164 case AArch64_AM::SXTW:
7165 return true;
7166 default:
7167 return false;
7168 }
7169}
7170
7172AArch64InstructionSelector::selectExtendedSHL(
7174 unsigned SizeInBytes, bool WantsExt) const {
7175 assert(Base.isReg() && "Expected base to be a register operand");
7176 assert(Offset.isReg() && "Expected offset to be a register operand");
7177
7179 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7180
7181 unsigned OffsetOpc = OffsetInst->getOpcode();
7182 bool LookedThroughZExt = false;
7183 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7184 // Try to look through a ZEXT.
7185 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7186 return std::nullopt;
7187
7188 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7189 OffsetOpc = OffsetInst->getOpcode();
7190 LookedThroughZExt = true;
7191
7192 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7193 return std::nullopt;
7194 }
7195 // Make sure that the memory op is a valid size.
7196 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7197 if (LegalShiftVal == 0)
7198 return std::nullopt;
7199 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7200 return std::nullopt;
7201
7202 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7203 // register we will offset is the LHS, and the register containing the
7204 // constant is the RHS.
7205 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7206 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7207 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7208 if (!ValAndVReg) {
7209 // We didn't get a constant on the RHS. If the opcode is a shift, then
7210 // we're done.
7211 if (OffsetOpc == TargetOpcode::G_SHL)
7212 return std::nullopt;
7213
7214 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7215 std::swap(OffsetReg, ConstantReg);
7216 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7217 if (!ValAndVReg)
7218 return std::nullopt;
7219 }
7220
7221 // The value must fit into 3 bits, and must be positive. Make sure that is
7222 // true.
7223 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7224
7225 // Since we're going to pull this into a shift, the constant value must be
7226 // a power of 2. If we got a multiply, then we need to check this.
7227 if (OffsetOpc == TargetOpcode::G_MUL) {
7228 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7229 return std::nullopt;
7230
7231 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7232 ImmVal = Log2_32(ImmVal);
7233 }
7234
7235 if ((ImmVal & 0x7) != ImmVal)
7236 return std::nullopt;
7237
7238 // We are only allowed to shift by LegalShiftVal. This shift value is built
7239 // into the instruction, so we can't just use whatever we want.
7240 if (ImmVal != LegalShiftVal)
7241 return std::nullopt;
7242
7243 unsigned SignExtend = 0;
7244 if (WantsExt) {
7245 // Check if the offset is defined by an extend, unless we looked through a
7246 // G_ZEXT earlier.
7247 if (!LookedThroughZExt) {
7248 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7249 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7251 return std::nullopt;
7252
7253 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
7254 // We only support SXTW for signed extension here.
7255 if (SignExtend && Ext != AArch64_AM::SXTW)
7256 return std::nullopt;
7257 OffsetReg = ExtInst->getOperand(1).getReg();
7258 }
7259
7260 // Need a 32-bit wide register here.
7261 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7262 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7263 }
7264
7265 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7266 // offset. Signify that we are shifting by setting the shift flag to 1.
7267 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7268 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7269 [=](MachineInstrBuilder &MIB) {
7270 // Need to add both immediates here to make sure that they are both
7271 // added to the instruction.
7272 MIB.addImm(SignExtend);
7273 MIB.addImm(1);
7274 }}};
7275}
7276
7277/// This is used for computing addresses like this:
7278///
7279/// ldr x1, [x2, x3, lsl #3]
7280///
7281/// Where x2 is the base register, and x3 is an offset register. The shift-left
7282/// is a constant value specific to this load instruction. That is, we'll never
7283/// see anything other than a 3 here (which corresponds to the size of the
7284/// element being loaded.)
7286AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7287 MachineOperand &Root, unsigned SizeInBytes) const {
7288 if (!Root.isReg())
7289 return std::nullopt;
7291
7292 // We want to find something like this:
7293 //
7294 // val = G_CONSTANT LegalShiftVal
7295 // shift = G_SHL off_reg val
7296 // ptr = G_PTR_ADD base_reg shift
7297 // x = G_LOAD ptr
7298 //
7299 // And fold it into this addressing mode:
7300 //
7301 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7302
7303 // Check if we can find the G_PTR_ADD.
7304 MachineInstr *PtrAdd =
7305 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7306 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7307 return std::nullopt;
7308
7309 // Now, try to match an opcode which will match our specific offset.
7310 // We want a G_SHL or a G_MUL.
7311 MachineInstr *OffsetInst =
7313 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7314 OffsetInst->getOperand(0), SizeInBytes,
7315 /*WantsExt=*/false);
7316}
7317
7318/// This is used for computing addresses like this:
7319///
7320/// ldr x1, [x2, x3]
7321///
7322/// Where x2 is the base register, and x3 is an offset register.
7323///
7324/// When possible (or profitable) to fold a G_PTR_ADD into the address
7325/// calculation, this will do so. Otherwise, it will return std::nullopt.
7327AArch64InstructionSelector::selectAddrModeRegisterOffset(
7328 MachineOperand &Root) const {
7330
7331 // We need a GEP.
7332 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7333 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7334 return std::nullopt;
7335
7336 // If this is used more than once, let's not bother folding.
7337 // TODO: Check if they are memory ops. If they are, then we can still fold
7338 // without having to recompute anything.
7339 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7340 return std::nullopt;
7341
7342 // Base is the GEP's LHS, offset is its RHS.
7343 return {{[=](MachineInstrBuilder &MIB) {
7344 MIB.addUse(Gep->getOperand(1).getReg());
7345 },
7346 [=](MachineInstrBuilder &MIB) {
7347 MIB.addUse(Gep->getOperand(2).getReg());
7348 },
7349 [=](MachineInstrBuilder &MIB) {
7350 // Need to add both immediates here to make sure that they are both
7351 // added to the instruction.
7352 MIB.addImm(0);
7353 MIB.addImm(0);
7354 }}};
7355}
7356
7357/// This is intended to be equivalent to selectAddrModeXRO in
7358/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7360AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7361 unsigned SizeInBytes) const {
7363 if (!Root.isReg())
7364 return std::nullopt;
7365 MachineInstr *PtrAdd =
7366 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7367 if (!PtrAdd)
7368 return std::nullopt;
7369
7370 // Check for an immediates which cannot be encoded in the [base + imm]
7371 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7372 // end up with code like:
7373 //
7374 // mov x0, wide
7375 // add x1 base, x0
7376 // ldr x2, [x1, x0]
7377 //
7378 // In this situation, we can use the [base, xreg] addressing mode to save an
7379 // add/sub:
7380 //
7381 // mov x0, wide
7382 // ldr x2, [base, x0]
7383 auto ValAndVReg =
7385 if (ValAndVReg) {
7386 unsigned Scale = Log2_32(SizeInBytes);
7387 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7388
7389 // Skip immediates that can be selected in the load/store addressing
7390 // mode.
7391 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7392 ImmOff < (0x1000 << Scale))
7393 return std::nullopt;
7394
7395 // Helper lambda to decide whether or not it is preferable to emit an add.
7396 auto isPreferredADD = [](int64_t ImmOff) {
7397 // Constants in [0x0, 0xfff] can be encoded in an add.
7398 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7399 return true;
7400
7401 // Can it be encoded in an add lsl #12?
7402 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7403 return false;
7404
7405 // It can be encoded in an add lsl #12, but we may not want to. If it is
7406 // possible to select this as a single movz, then prefer that. A single
7407 // movz is faster than an add with a shift.
7408 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7409 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7410 };
7411
7412 // If the immediate can be encoded in a single add/sub, then bail out.
7413 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7414 return std::nullopt;
7415 }
7416
7417 // Try to fold shifts into the addressing mode.
7418 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7419 if (AddrModeFns)
7420 return AddrModeFns;
7421
7422 // If that doesn't work, see if it's possible to fold in registers from
7423 // a GEP.
7424 return selectAddrModeRegisterOffset(Root);
7425}
7426
7427/// This is used for computing addresses like this:
7428///
7429/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7430///
7431/// Where we have a 64-bit base register, a 32-bit offset register, and an
7432/// extend (which may or may not be signed).
7434AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7435 unsigned SizeInBytes) const {
7437
7438 MachineInstr *PtrAdd =
7439 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7440 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7441 return std::nullopt;
7442
7443 MachineOperand &LHS = PtrAdd->getOperand(1);
7444 MachineOperand &RHS = PtrAdd->getOperand(2);
7445 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7446
7447 // The first case is the same as selectAddrModeXRO, except we need an extend.
7448 // In this case, we try to find a shift and extend, and fold them into the
7449 // addressing mode.
7450 //
7451 // E.g.
7452 //
7453 // off_reg = G_Z/S/ANYEXT ext_reg
7454 // val = G_CONSTANT LegalShiftVal
7455 // shift = G_SHL off_reg val
7456 // ptr = G_PTR_ADD base_reg shift
7457 // x = G_LOAD ptr
7458 //
7459 // In this case we can get a load like this:
7460 //
7461 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7462 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7463 SizeInBytes, /*WantsExt=*/true);
7464 if (ExtendedShl)
7465 return ExtendedShl;
7466
7467 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7468 //
7469 // e.g.
7470 // ldr something, [base_reg, ext_reg, sxtw]
7471 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7472 return std::nullopt;
7473
7474 // Check if this is an extend. We'll get an extend type if it is.
7476 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7478 return std::nullopt;
7479
7480 // Need a 32-bit wide register.
7481 MachineIRBuilder MIB(*PtrAdd);
7482 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7483 AArch64::GPR32RegClass, MIB);
7484 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7485
7486 // Base is LHS, offset is ExtReg.
7487 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7488 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7489 [=](MachineInstrBuilder &MIB) {
7490 MIB.addImm(SignExtend);
7491 MIB.addImm(0);
7492 }}};
7493}
7494
7495/// Select a "register plus unscaled signed 9-bit immediate" address. This
7496/// should only match when there is an offset that is not valid for a scaled
7497/// immediate addressing mode. The "Size" argument is the size in bytes of the
7498/// memory reference, which is needed here to know what is valid for a scaled
7499/// immediate.
7501AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7502 unsigned Size) const {
7504 Root.getParent()->getParent()->getParent()->getRegInfo();
7505
7506 if (!Root.isReg())
7507 return std::nullopt;
7508
7509 if (!isBaseWithConstantOffset(Root, MRI))
7510 return std::nullopt;
7511
7512 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7513
7514 MachineOperand &OffImm = RootDef->getOperand(2);
7515 if (!OffImm.isReg())
7516 return std::nullopt;
7517 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7518 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7519 return std::nullopt;
7520 int64_t RHSC;
7521 MachineOperand &RHSOp1 = RHS->getOperand(1);
7522 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7523 return std::nullopt;
7524 RHSC = RHSOp1.getCImm()->getSExtValue();
7525
7526 if (RHSC >= -256 && RHSC < 256) {
7527 MachineOperand &Base = RootDef->getOperand(1);
7528 return {{
7529 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7530 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7531 }};
7532 }
7533 return std::nullopt;
7534}
7535
7537AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7538 unsigned Size,
7539 MachineRegisterInfo &MRI) const {
7540 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7541 return std::nullopt;
7542 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7543 if (Adrp.getOpcode() != AArch64::ADRP)
7544 return std::nullopt;
7545
7546 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7547 auto Offset = Adrp.getOperand(1).getOffset();
7548 if (Offset % Size != 0)
7549 return std::nullopt;
7550
7551 auto GV = Adrp.getOperand(1).getGlobal();
7552 if (GV->isThreadLocal())
7553 return std::nullopt;
7554
7555 auto &MF = *RootDef.getParent()->getParent();
7556 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7557 return std::nullopt;
7558
7559 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7560 MachineIRBuilder MIRBuilder(RootDef);
7561 Register AdrpReg = Adrp.getOperand(0).getReg();
7562 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7563 [=](MachineInstrBuilder &MIB) {
7564 MIB.addGlobalAddress(GV, Offset,
7565 OpFlags | AArch64II::MO_PAGEOFF |
7567 }}};
7568}
7569
7570/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7571/// "Size" argument is the size in bytes of the memory reference, which
7572/// determines the scale.
7574AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7575 unsigned Size) const {
7576 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7578
7579 if (!Root.isReg())
7580 return std::nullopt;
7581
7582 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7583 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7584 return {{
7585 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7586 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7587 }};
7588 }
7589
7591 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7592 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7593 // globals into the offset.
7594 MachineInstr *RootParent = Root.getParent();
7595 if (CM == CodeModel::Small &&
7596 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7597 STI.isTargetDarwin())) {
7598 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7599 if (OpFns)
7600 return OpFns;
7601 }
7602
7603 if (isBaseWithConstantOffset(Root, MRI)) {
7604 MachineOperand &LHS = RootDef->getOperand(1);
7605 MachineOperand &RHS = RootDef->getOperand(2);
7606 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7607 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7608
7609 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7610 unsigned Scale = Log2_32(Size);
7611 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7612 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7613 return {{
7614 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7615 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7616 }};
7617
7618 return {{
7619 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7620 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7621 }};
7622 }
7623 }
7624
7625 // Before falling back to our general case, check if the unscaled
7626 // instructions can handle this. If so, that's preferable.
7627 if (selectAddrModeUnscaled(Root, Size))
7628 return std::nullopt;
7629
7630 return {{
7631 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7632 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7633 }};
7634}
7635
7636/// Given a shift instruction, return the correct shift type for that
7637/// instruction.
7639 switch (MI.getOpcode()) {
7640 default:
7642 case TargetOpcode::G_SHL:
7643 return AArch64_AM::LSL;
7644 case TargetOpcode::G_LSHR:
7645 return AArch64_AM::LSR;
7646 case TargetOpcode::G_ASHR:
7647 return AArch64_AM::ASR;
7648 case TargetOpcode::G_ROTR:
7649 return AArch64_AM::ROR;
7650 }
7651}
7652
7653/// Select a "shifted register" operand. If the value is not shifted, set the
7654/// shift operand to a default value of "lsl 0".
7656AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7657 bool AllowROR) const {
7658 if (!Root.isReg())
7659 return std::nullopt;
7661 Root.getParent()->getParent()->getParent()->getRegInfo();
7662
7663 // Check if the operand is defined by an instruction which corresponds to
7664 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7665 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7667 if (ShType == AArch64_AM::InvalidShiftExtend)
7668 return std::nullopt;
7669 if (ShType == AArch64_AM::ROR && !AllowROR)
7670 return std::nullopt;
7671 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7672 return std::nullopt;
7673
7674 // Need an immediate on the RHS.
7675 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7676 auto Immed = getImmedFromMO(ShiftRHS);
7677 if (!Immed)
7678 return std::nullopt;
7679
7680 // We have something that we can fold. Fold in the shift's LHS and RHS into
7681 // the instruction.
7682 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7683 Register ShiftReg = ShiftLHS.getReg();
7684
7685 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7686 unsigned Val = *Immed & (NumBits - 1);
7687 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7688
7689 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7690 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7691}
7692
7693AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7694 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7695 unsigned Opc = MI.getOpcode();
7696
7697 // Handle explicit extend instructions first.
7698 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7699 unsigned Size;
7700 if (Opc == TargetOpcode::G_SEXT)
7701 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7702 else
7703 Size = MI.getOperand(2).getImm();
7704 assert(Size != 64 && "Extend from 64 bits?");
7705 switch (Size) {
7706 case 8:
7707 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7708 case 16:
7709 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7710 case 32:
7711 return AArch64_AM::SXTW;
7712 default:
7714 }
7715 }
7716
7717 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7718 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7719 assert(Size != 64 && "Extend from 64 bits?");
7720 switch (Size) {
7721 case 8:
7722 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7723 case 16:
7724 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7725 case 32:
7726 return AArch64_AM::UXTW;
7727 default:
7729 }
7730 }
7731
7732 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7733 // on the RHS.
7734 if (Opc != TargetOpcode::G_AND)
7736
7737 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7738 if (!MaybeAndMask)
7740 uint64_t AndMask = *MaybeAndMask;
7741 switch (AndMask) {
7742 default:
7744 case 0xFF:
7745 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7746 case 0xFFFF:
7747 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7748 case 0xFFFFFFFF:
7749 return AArch64_AM::UXTW;
7750 }
7751}
7752
7753Register AArch64InstructionSelector::moveScalarRegClass(
7754 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7755 MachineRegisterInfo &MRI = *MIB.getMRI();
7756 auto Ty = MRI.getType(Reg);
7757 assert(!Ty.isVector() && "Expected scalars only!");
7758 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7759 return Reg;
7760
7761 // Create a copy and immediately select it.
7762 // FIXME: We should have an emitCopy function?
7763 auto Copy = MIB.buildCopy({&RC}, {Reg});
7764 selectCopy(*Copy, TII, MRI, TRI, RBI);
7765 return Copy.getReg(0);
7766}
7767
7768/// Select an "extended register" operand. This operand folds in an extend
7769/// followed by an optional left shift.
7771AArch64InstructionSelector::selectArithExtendedRegister(
7772 MachineOperand &Root) const {
7773 if (!Root.isReg())
7774 return std::nullopt;
7776 Root.getParent()->getParent()->getParent()->getRegInfo();
7777
7778 uint64_t ShiftVal = 0;
7779 Register ExtReg;
7781 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7782 if (!RootDef)
7783 return std::nullopt;
7784
7785 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7786 return std::nullopt;
7787
7788 // Check if we can fold a shift and an extend.
7789 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7790 // Look for a constant on the RHS of the shift.
7791 MachineOperand &RHS = RootDef->getOperand(2);
7792 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7793 if (!MaybeShiftVal)
7794 return std::nullopt;
7795 ShiftVal = *MaybeShiftVal;
7796 if (ShiftVal > 4)
7797 return std::nullopt;
7798 // Look for a valid extend instruction on the LHS of the shift.
7799 MachineOperand &LHS = RootDef->getOperand(1);
7800 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7801 if (!ExtDef)
7802 return std::nullopt;
7803 Ext = getExtendTypeForInst(*ExtDef, MRI);
7805 return std::nullopt;
7806 ExtReg = ExtDef->getOperand(1).getReg();
7807 } else {
7808 // Didn't get a shift. Try just folding an extend.
7809 Ext = getExtendTypeForInst(*RootDef, MRI);
7811 return std::nullopt;
7812 ExtReg = RootDef->getOperand(1).getReg();
7813
7814 // If we have a 32 bit instruction which zeroes out the high half of a
7815 // register, we get an implicit zero extend for free. Check if we have one.
7816 // FIXME: We actually emit the extend right now even though we don't have
7817 // to.
7818 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7819 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7820 if (isDef32(*ExtInst))
7821 return std::nullopt;
7822 }
7823 }
7824
7825 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7826 // copy.
7827 MachineIRBuilder MIB(*RootDef);
7828 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7829
7830 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7831 [=](MachineInstrBuilder &MIB) {
7832 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7833 }}};
7834}
7835
7837AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7838 if (!Root.isReg())
7839 return std::nullopt;
7841 Root.getParent()->getParent()->getParent()->getRegInfo();
7842
7843 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7844 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7845 STI.isLittleEndian())
7846 Extract =
7847 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7848 if (!Extract)
7849 return std::nullopt;
7850
7851 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7852 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7853 Register ExtReg = Extract->MI->getOperand(2).getReg();
7854 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7855 }
7856 }
7857 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7858 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7860 Extract->MI->getOperand(2).getReg(), MRI);
7861 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7862 LaneIdx->Value.getSExtValue() == 1) {
7863 Register ExtReg = Extract->MI->getOperand(1).getReg();
7864 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7865 }
7866 }
7867
7868 return std::nullopt;
7869}
7870
7871void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7872 const MachineInstr &MI,
7873 int OpIdx) const {
7874 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7875 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7876 "Expected G_CONSTANT");
7877 std::optional<int64_t> CstVal =
7878 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7879 assert(CstVal && "Expected constant value");
7880 MIB.addImm(*CstVal);
7881}
7882
7883void AArch64InstructionSelector::renderLogicalImm32(
7884 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7885 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7886 "Expected G_CONSTANT");
7887 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7889 MIB.addImm(Enc);
7890}
7891
7892void AArch64InstructionSelector::renderLogicalImm64(
7893 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7894 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7895 "Expected G_CONSTANT");
7896 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7898 MIB.addImm(Enc);
7899}
7900
7901void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7902 const MachineInstr &MI,
7903 int OpIdx) const {
7904 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7905 "Expected G_UBSANTRAP");
7906 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7907}
7908
7909void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7910 const MachineInstr &MI,
7911 int OpIdx) const {
7912 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7913 "Expected G_FCONSTANT");
7914 MIB.addImm(
7915 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7916}
7917
7918void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7919 const MachineInstr &MI,
7920 int OpIdx) const {
7921 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7922 "Expected G_FCONSTANT");
7923 MIB.addImm(
7924 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7925}
7926
7927void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7928 const MachineInstr &MI,
7929 int OpIdx) const {
7930 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7931 "Expected G_FCONSTANT");
7932 MIB.addImm(
7933 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7934}
7935
7936void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7937 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7938 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7939 "Expected G_FCONSTANT");
7941 .getFPImm()
7942 ->getValueAPF()
7943 .bitcastToAPInt()
7944 .getZExtValue()));
7945}
7946
7947bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7948 const MachineInstr &MI, unsigned NumBytes) const {
7949 if (!MI.mayLoadOrStore())
7950 return false;
7951 assert(MI.hasOneMemOperand() &&
7952 "Expected load/store to have only one mem op!");
7953 return (*MI.memoperands_begin())->getSize() == NumBytes;
7954}
7955
7956bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7957 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7958 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7959 return false;
7960
7961 // Only return true if we know the operation will zero-out the high half of
7962 // the 64-bit register. Truncates can be subregister copies, which don't
7963 // zero out the high bits. Copies and other copy-like instructions can be
7964 // fed by truncates, or could be lowered as subregister copies.
7965 switch (MI.getOpcode()) {
7966 default:
7967 return true;
7968 case TargetOpcode::COPY:
7969 case TargetOpcode::G_BITCAST:
7970 case TargetOpcode::G_TRUNC:
7971 case TargetOpcode::G_PHI:
7972 return false;
7973 }
7974}
7975
7976
7977// Perform fixups on the given PHI instruction's operands to force them all
7978// to be the same as the destination regbank.
7980 const AArch64RegisterBankInfo &RBI) {
7981 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7982 Register DstReg = MI.getOperand(0).getReg();
7983 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
7984 assert(DstRB && "Expected PHI dst to have regbank assigned");
7985 MachineIRBuilder MIB(MI);
7986
7987 // Go through each operand and ensure it has the same regbank.
7988 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
7989 if (!MO.isReg())
7990 continue;
7991 Register OpReg = MO.getReg();
7992 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
7993 if (RB != DstRB) {
7994 // Insert a cross-bank copy.
7995 auto *OpDef = MRI.getVRegDef(OpReg);
7996 const LLT &Ty = MRI.getType(OpReg);
7997 MachineBasicBlock &OpDefBB = *OpDef->getParent();
7998
7999 // Any instruction we insert must appear after all PHIs in the block
8000 // for the block to be valid MIR.
8001 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
8002 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8003 InsertPt = OpDefBB.getFirstNonPHI();
8004 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
8005 auto Copy = MIB.buildCopy(Ty, OpReg);
8006 MRI.setRegBank(Copy.getReg(0), *DstRB);
8007 MO.setReg(Copy.getReg(0));
8008 }
8009 }
8010}
8011
8012void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8013 // We're looking for PHIs, build a list so we don't invalidate iterators.
8016 for (auto &BB : MF) {
8017 for (auto &MI : BB) {
8018 if (MI.getOpcode() == TargetOpcode::G_PHI)
8019 Phis.emplace_back(&MI);
8020 }
8021 }
8022
8023 for (auto *MI : Phis) {
8024 // We need to do some work here if the operand types are < 16 bit and they
8025 // are split across fpr/gpr banks. Since all types <32b on gpr
8026 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8027 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8028 // be selecting heterogenous regbanks for operands if possible, but we
8029 // still need to be able to deal with it here.
8030 //
8031 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8032 // one other operand is on the fpr bank, then we add cross-bank copies
8033 // to homogenize the operand banks. For simplicity the bank that we choose
8034 // to settle on is whatever bank the def operand has. For example:
8035 //
8036 // %endbb:
8037 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8038 // =>
8039 // %bb2:
8040 // ...
8041 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8042 // ...
8043 // %endbb:
8044 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8045 bool HasGPROp = false, HasFPROp = false;
8046 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8047 if (!MO.isReg())
8048 continue;
8049 const LLT &Ty = MRI.getType(MO.getReg());
8050 if (!Ty.isValid() || !Ty.isScalar())
8051 break;
8052 if (Ty.getSizeInBits() >= 32)
8053 break;
8054 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8055 // If for some reason we don't have a regbank yet. Don't try anything.
8056 if (!RB)
8057 break;
8058
8059 if (RB->getID() == AArch64::GPRRegBankID)
8060 HasGPROp = true;
8061 else
8062 HasFPROp = true;
8063 }
8064 // We have heterogenous regbanks, need to fixup.
8065 if (HasGPROp && HasFPROp)
8066 fixupPHIOpBanks(*MI, MRI, RBI);
8067 }
8068}
8069
8070namespace llvm {
8073 const AArch64Subtarget &Subtarget,
8074 const AArch64RegisterBankInfo &RBI) {
8075 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8076}
8077}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
uint64_t Size
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static constexpr int Concat[]
Value * RHS
Value * LHS
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:651
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:917
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:684
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:693
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:692
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:690
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:685
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:687
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:694
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:691
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:688
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:829
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791
bool isIntPredicate() const
Definition: InstrTypes.h:785
bool isUnsigned() const
Definition: InstrTypes.h:938
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3066
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
const APFloat & getValueAPF() const
Definition: Constants.h:320
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:327
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:324
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:169
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1423
This is an important base class in LLVM.
Definition: Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1713
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:265
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:531
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:265
constexpr bool isScalar() const
Definition: LowLevelType.h:147
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:212
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:252
constexpr bool isPointerVector() const
Definition: LowLevelType.h:153
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
constexpr bool isValid() const
Definition: LowLevelType.h:146
constexpr bool isVector() const
Definition: LowLevelType.h:149
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:58
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:191
constexpr bool isPointer() const
Definition: LowLevelType.h:150
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:278
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:271
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:101
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:201
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
TypeSize getValue() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
Set of metadata that should be preserved when using BuildMI().
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
Definition: RegisterBank.h:29
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:46
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Register getReg() const
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
Key
PAL metadata keys.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FrameIndex
Definition: ISDOpcodes.h:90
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
operand_type_match m_Reg()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:916
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:651
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:294
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:492
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:439
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:467
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:499
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.