45#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
213 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
215 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
217 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
219 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
233 unsigned Opc1,
unsigned Opc2,
bool isExt);
239 unsigned emitConstantPoolEntry(
const Constant *CPVal,
258 std::optional<CmpInst::Predicate> = std::nullopt)
const;
261 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
264 const ComplexRendererFns &RenderFns = std::nullopt)
const;
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
342 std::pair<MachineInstr *, AArch64CC::CondCode>
377 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
379 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
380 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
382 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
384 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
387 unsigned Size)
const;
389 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
390 return selectAddrModeUnscaled(Root, 1);
392 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
393 return selectAddrModeUnscaled(Root, 2);
395 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
396 return selectAddrModeUnscaled(Root, 4);
398 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
399 return selectAddrModeUnscaled(Root, 8);
401 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
402 return selectAddrModeUnscaled(Root, 16);
407 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
411 unsigned Size)
const;
413 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
414 return selectAddrModeIndexed(Root, Width / 8);
423 bool IsAddrOperand)
const;
426 unsigned SizeInBytes)
const;
434 bool WantsExt)
const;
435 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
437 unsigned SizeInBytes)
const;
439 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
440 return selectAddrModeXRO(Root, Width / 8);
444 unsigned SizeInBytes)
const;
446 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
447 return selectAddrModeWRO(Root, Width / 8);
451 bool AllowROR =
false)
const;
453 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
454 return selectShiftedRegister(Root);
457 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
458 return selectShiftedRegister(Root,
true);
468 bool IsLoadStore =
false)
const;
479 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
484 int OpIdx = -1)
const;
486 int OpIdx = -1)
const;
488 int OpIdx = -1)
const;
492 int OpIdx = -1)
const;
494 int OpIdx = -1)
const;
496 int OpIdx = -1)
const;
499 int OpIdx = -1)
const;
505 bool tryOptSelect(
GSelect &Sel);
512 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
525 bool ProduceNonFlagSettingCondBr =
false;
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
551AArch64InstructionSelector::AArch64InstructionSelector(
554 : TM(TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
557#include
"AArch64GenGlobalISel.inc"
560#include
"AArch64GenGlobalISel.inc"
572 bool GetAllRegSet =
false) {
573 if (RB.
getID() == AArch64::GPRRegBankID) {
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
581 return &AArch64::XSeqPairsClassRegClass;
585 if (RB.
getID() == AArch64::FPRRegBankID) {
588 return &AArch64::FPR8RegClass;
590 return &AArch64::FPR16RegClass;
592 return &AArch64::FPR32RegClass;
594 return &AArch64::FPR64RegClass;
596 return &AArch64::FPR128RegClass;
608 bool GetAllRegSet =
false) {
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
615 unsigned RegBankID = RB.
getID();
617 if (RegBankID == AArch64::GPRRegBankID) {
619 if (SizeInBits <= 32)
620 return GetAllRegSet ? &AArch64::GPR32allRegClass
621 : &AArch64::GPR32RegClass;
622 if (SizeInBits == 64)
623 return GetAllRegSet ? &AArch64::GPR64allRegClass
624 : &AArch64::GPR64RegClass;
625 if (SizeInBits == 128)
626 return &AArch64::XSeqPairsClassRegClass;
629 if (RegBankID == AArch64::FPRRegBankID) {
632 "Unexpected scalable register size");
633 return &AArch64::ZPRRegClass;
636 switch (SizeInBits) {
640 return &AArch64::FPR8RegClass;
642 return &AArch64::FPR16RegClass;
644 return &AArch64::FPR32RegClass;
646 return &AArch64::FPR64RegClass;
648 return &AArch64::FPR128RegClass;
658 switch (
TRI.getRegSizeInBits(*RC)) {
666 if (RC != &AArch64::FPR32RegClass)
676 dbgs() <<
"Couldn't find appropriate subregister for register class.");
685 switch (RB.
getID()) {
686 case AArch64::GPRRegBankID:
688 case AArch64::FPRRegBankID:
711 const unsigned RegClassIDs[],
713 unsigned NumRegs = Regs.
size();
716 assert(NumRegs >= 2 && NumRegs <= 4 &&
717 "Only support between two and 4 registers in a tuple!");
719 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
721 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
722 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
723 RegSequence.addUse(Regs[
I]);
724 RegSequence.addImm(SubRegs[
I]);
726 return RegSequence.getReg(0);
731 static const unsigned RegClassIDs[] = {
732 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
733 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
734 AArch64::dsub2, AArch64::dsub3};
735 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
740 static const unsigned RegClassIDs[] = {
741 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
742 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
743 AArch64::qsub2, AArch64::qsub3};
744 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
749 auto &
MBB = *
MI.getParent();
751 auto &
MRI = MF.getRegInfo();
757 else if (Root.
isReg()) {
762 Immed = ValAndVReg->Value.getSExtValue();
778 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
785 for (
auto &MO :
I.operands()) {
788 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
796 if (!MO.getReg().isVirtual()) {
797 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
807 if (PrevOpBank && OpBank != PrevOpBank) {
808 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
823 case AArch64::GPRRegBankID:
825 switch (GenericOpc) {
826 case TargetOpcode::G_SHL:
827 return AArch64::LSLVWr;
828 case TargetOpcode::G_LSHR:
829 return AArch64::LSRVWr;
830 case TargetOpcode::G_ASHR:
831 return AArch64::ASRVWr;
835 }
else if (OpSize == 64) {
836 switch (GenericOpc) {
837 case TargetOpcode::G_PTR_ADD:
838 return AArch64::ADDXrr;
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVXr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVXr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVXr;
850 case AArch64::FPRRegBankID:
853 switch (GenericOpc) {
854 case TargetOpcode::G_FADD:
855 return AArch64::FADDSrr;
856 case TargetOpcode::G_FSUB:
857 return AArch64::FSUBSrr;
858 case TargetOpcode::G_FMUL:
859 return AArch64::FMULSrr;
860 case TargetOpcode::G_FDIV:
861 return AArch64::FDIVSrr;
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDDrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBDrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULDrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVDrr;
875 case TargetOpcode::G_OR:
876 return AArch64::ORRv8i8;
893 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
895 case AArch64::GPRRegBankID:
898 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
900 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
902 return isStore ? AArch64::STRWui : AArch64::LDRWui;
904 return isStore ? AArch64::STRXui : AArch64::LDRXui;
907 case AArch64::FPRRegBankID:
910 return isStore ? AArch64::STRBui : AArch64::LDRBui;
912 return isStore ? AArch64::STRHui : AArch64::LDRHui;
914 return isStore ? AArch64::STRSui : AArch64::LDRSui;
916 return isStore ? AArch64::STRDui : AArch64::LDRDui;
918 return isStore ? AArch64::STRQui : AArch64::LDRQui;
932 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
933 assert(To &&
"Destination register class cannot be null");
940 RegOp.
setReg(SubRegCopy.getReg(0));
944 if (!
I.getOperand(0).getReg().isPhysical())
954static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
958 Register DstReg =
I.getOperand(0).getReg();
959 Register SrcReg =
I.getOperand(1).getReg();
974 if (SrcRegBank != DstRegBank &&
993 if (Reg.isPhysical())
995 LLT Ty =
MRI.getType(Reg);
998 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
1000 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1001 RC = getRegClassForTypeOnBank(Ty, RB);
1004 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
1017 Register DstReg =
I.getOperand(0).getReg();
1018 Register SrcReg =
I.getOperand(1).getReg();
1037 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1041 const TypeSize SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1042 const TypeSize DstSize =
TRI.getRegSizeInBits(*DstRC);
1053 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1055 }
else if (SrcSize > DstSize) {
1062 }
else if (DstSize > SrcSize) {
1069 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1071 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1076 RegOp.
setReg(PromoteReg);
1095 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1096 I.setDesc(
TII.get(AArch64::COPY));
1097 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1101 I.setDesc(
TII.get(AArch64::COPY));
1111 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1112 "Expected both select operands to have the same regbank?");
1113 LLT Ty =
MRI.getType(True);
1118 "Expected 32 bit or 64 bit select only?");
1119 const bool Is32Bit =
Size == 32;
1120 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1121 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1122 auto FCSel = MIB.
buildInstr(
Opc, {Dst}, {True, False}).addImm(CC);
1128 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1130 auto TryFoldBinOpIntoSelect = [&
Opc, Is32Bit, &CC, &
MRI,
1145 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1162 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1181 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1197 auto TryOptSelectCst = [&
Opc, &True, &False, &CC, Is32Bit, &
MRI,
1203 if (!TrueCst && !FalseCst)
1206 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1207 if (TrueCst && FalseCst) {
1208 int64_t
T = TrueCst->Value.getSExtValue();
1209 int64_t
F = FalseCst->Value.getSExtValue();
1211 if (
T == 0 &&
F == 1) {
1213 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1219 if (
T == 0 &&
F == -1) {
1221 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1229 int64_t
T = TrueCst->Value.getSExtValue();
1232 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1241 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1250 int64_t
F = FalseCst->Value.getSExtValue();
1253 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1260 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1268 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1269 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1291 if (ValAndVReg && ValAndVReg->Value == 0)
1298 if (ValAndVReg && ValAndVReg->Value == 0)
1402 assert(Reg.isValid() &&
"Expected valid register!");
1403 bool HasZext =
false;
1405 unsigned Opc =
MI->getOpcode();
1407 if (!
MI->getOperand(0).isReg() ||
1408 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1415 if (
Opc == TargetOpcode::G_ANYEXT ||
Opc == TargetOpcode::G_ZEXT ||
1416 Opc == TargetOpcode::G_TRUNC) {
1417 if (
Opc == TargetOpcode::G_ZEXT)
1420 Register NextReg =
MI->getOperand(1).getReg();
1422 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1431 std::optional<uint64_t>
C;
1436 case TargetOpcode::G_AND:
1437 case TargetOpcode::G_XOR: {
1438 TestReg =
MI->getOperand(1).getReg();
1439 Register ConstantReg =
MI->getOperand(2).getReg();
1450 C = VRegAndVal->Value.getZExtValue();
1452 C = VRegAndVal->Value.getSExtValue();
1456 case TargetOpcode::G_ASHR:
1457 case TargetOpcode::G_LSHR:
1458 case TargetOpcode::G_SHL: {
1459 TestReg =
MI->getOperand(1).getReg();
1463 C = VRegAndVal->Value.getSExtValue();
1475 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1479 case TargetOpcode::G_AND:
1481 if ((*
C >> Bit) & 1)
1484 case TargetOpcode::G_SHL:
1487 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1492 case TargetOpcode::G_ASHR:
1497 if (Bit >= TestRegSize)
1498 Bit = TestRegSize - 1;
1500 case TargetOpcode::G_LSHR:
1502 if ((Bit + *
C) < TestRegSize) {
1507 case TargetOpcode::G_XOR:
1516 if ((*
C >> Bit) & 1)
1535 assert(ProduceNonFlagSettingCondBr &&
1536 "Cannot emit TB(N)Z with speculation tracking!");
1541 LLT Ty =
MRI.getType(TestReg);
1544 assert(Bit < 64 &&
"Bit is too large!");
1548 bool UseWReg =
Bit < 32;
1549 unsigned NecessarySize = UseWReg ? 32 : 64;
1550 if (
Size != NecessarySize)
1551 TestReg = moveScalarRegClass(
1552 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1555 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1556 {AArch64::TBZW, AArch64::TBNZW}};
1557 unsigned Opc = OpcTable[UseWReg][IsNegative];
1564bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1567 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1594 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1601 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1609 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1611 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1612 AArch64::GPRRegBankID &&
1613 "Expected GPRs only?");
1614 auto Ty =
MRI.getType(CompareReg);
1617 assert(Width <= 64 &&
"Expected width to be at most 64?");
1618 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1619 {AArch64::CBNZW, AArch64::CBNZX}};
1620 unsigned Opc = OpcTable[IsNegative][Width == 64];
1621 auto BranchMI = MIB.
buildInstr(
Opc, {}, {CompareReg}).addMBB(DestMBB);
1626bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1629 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1641 I.eraseFromParent();
1645bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1648 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1654 if (!ProduceNonFlagSettingCondBr)
1673 if (VRegAndVal && !AndInst) {
1674 int64_t
C = VRegAndVal->Value.getSExtValue();
1680 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1681 I.eraseFromParent();
1689 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1690 I.eraseFromParent();
1698 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1699 I.eraseFromParent();
1713 if (VRegAndVal && VRegAndVal->Value == 0) {
1721 tryOptAndIntoCompareBranch(
1723 I.eraseFromParent();
1728 auto LHSTy =
MRI.getType(LHS);
1729 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1731 I.eraseFromParent();
1740bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1743 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1744 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1755 I.eraseFromParent();
1759bool AArch64InstructionSelector::selectCompareBranch(
1761 Register CondReg =
I.getOperand(0).getReg();
1766 if (CCMIOpc == TargetOpcode::G_FCMP)
1767 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1768 if (CCMIOpc == TargetOpcode::G_ICMP)
1769 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1774 if (ProduceNonFlagSettingCondBr) {
1775 emitTestBit(CondReg, 0,
true,
1776 I.getOperand(1).getMBB(), MIB);
1777 I.eraseFromParent();
1787 .
addMBB(
I.getOperand(1).getMBB());
1788 I.eraseFromParent();
1796 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1807 return std::nullopt;
1809 int64_t Imm = *ShiftImm;
1811 return std::nullopt;
1815 return std::nullopt;
1818 return std::nullopt;
1822 return std::nullopt;
1826 return std::nullopt;
1830 return std::nullopt;
1836bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1838 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1839 Register DstReg =
I.getOperand(0).getReg();
1840 const LLT Ty =
MRI.getType(DstReg);
1841 Register Src1Reg =
I.getOperand(1).getReg();
1842 Register Src2Reg =
I.getOperand(2).getReg();
1853 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1855 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1857 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1859 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1861 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1863 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1865 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1877 I.eraseFromParent();
1881bool AArch64InstructionSelector::selectVectorAshrLshr(
1883 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1884 I.getOpcode() == TargetOpcode::G_LSHR);
1885 Register DstReg =
I.getOperand(0).getReg();
1886 const LLT Ty =
MRI.getType(DstReg);
1887 Register Src1Reg =
I.getOperand(1).getReg();
1888 Register Src2Reg =
I.getOperand(2).getReg();
1893 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1903 unsigned NegOpc = 0;
1905 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1907 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1908 NegOpc = AArch64::NEGv2i64;
1910 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1911 NegOpc = AArch64::NEGv4i32;
1913 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1914 NegOpc = AArch64::NEGv2i32;
1916 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1917 NegOpc = AArch64::NEGv4i16;
1919 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1920 NegOpc = AArch64::NEGv8i16;
1922 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1923 NegOpc = AArch64::NEGv16i8;
1925 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1926 NegOpc = AArch64::NEGv8i8;
1932 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1936 I.eraseFromParent();
1940bool AArch64InstructionSelector::selectVaStartAAPCS(
1951 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1952 const auto *PtrRegClass =
1953 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1956 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1958 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1969 const auto VAList =
I.getOperand(0).getReg();
1972 unsigned OffsetBytes = 0;
1976 const auto PushAddress = [&](
const int FrameIndex,
const int64_t
Imm) {
1977 const Register Top =
MRI.createVirtualRegister(PtrRegClass);
1978 auto MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(), MCIDAddAddr)
1985 const auto *MMO = *
I.memoperands_begin();
1986 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(), MCIDStoreAddr)
1989 .
addImm(OffsetBytes / PtrSize)
1991 MMO->getPointerInfo().getWithOffset(OffsetBytes),
1995 OffsetBytes += PtrSize;
2011 const auto PushIntConstant = [&](
const int32_t
Value) {
2012 constexpr int IntSize = 4;
2013 const Register Temp =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2015 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::MOVi32imm))
2020 const auto *MMO = *
I.memoperands_begin();
2021 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRWui))
2024 .
addImm(OffsetBytes / IntSize)
2026 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2029 OffsetBytes += IntSize;
2033 PushIntConstant(-
static_cast<int32_t
>(GPRSize));
2036 PushIntConstant(-
static_cast<int32_t
>(FPRSize));
2038 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) &&
"Unexpected offset");
2040 I.eraseFromParent();
2044bool AArch64InstructionSelector::selectVaStartDarwin(
2047 Register ListReg =
I.getOperand(0).getReg();
2049 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2060 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2068 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2075 I.eraseFromParent();
2079void AArch64InstructionSelector::materializeLargeCMVal(
2085 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2096 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2098 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2100 GV, MovZ->getOperand(1).getOffset(), Flags));
2104 MovZ->getOperand(1).getOffset(), Flags));
2110 Register DstReg = BuildMovK(MovZ.getReg(0),
2116bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2121 switch (
I.getOpcode()) {
2122 case TargetOpcode::G_STORE: {
2123 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2131 SrcOp.setReg(NewSrc);
2132 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2137 case TargetOpcode::G_PTR_ADD: {
2140 const auto &TL = STI.getTargetLowering();
2143 return convertPtrAddToAdd(
I,
MRI);
2145 case TargetOpcode::G_LOAD: {
2150 Register DstReg =
I.getOperand(0).getReg();
2151 const LLT DstTy =
MRI.getType(DstReg);
2157 case AArch64::G_DUP: {
2159 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2163 MRI.setType(
I.getOperand(0).getReg(),
2165 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2166 I.getOperand(1).setReg(NewSrc.getReg(0));
2169 case AArch64::G_INSERT_VECTOR_ELT: {
2171 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2172 LLT SrcVecTy =
MRI.getType(
I.getOperand(1).getReg());
2176 MRI.setType(
I.getOperand(1).getReg(),
2178 MRI.setType(
I.getOperand(0).getReg(),
2180 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2181 I.getOperand(2).setReg(NewSrc.getReg(0));
2184 case TargetOpcode::G_UITOFP:
2185 case TargetOpcode::G_SITOFP: {
2190 Register SrcReg =
I.getOperand(1).getReg();
2191 LLT SrcTy =
MRI.getType(SrcReg);
2192 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2196 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2197 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2198 I.setDesc(
TII.get(AArch64::G_SITOF));
2200 I.setDesc(
TII.get(AArch64::G_UITOF));
2218bool AArch64InstructionSelector::convertPtrAddToAdd(
2220 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2221 Register DstReg =
I.getOperand(0).getReg();
2222 Register AddOp1Reg =
I.getOperand(1).getReg();
2223 const LLT PtrTy =
MRI.getType(DstReg);
2227 const LLT CastPtrTy =
2232 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2234 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2238 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2239 MRI.setType(DstReg, CastPtrTy);
2240 I.getOperand(1).setReg(PtrToInt.getReg(0));
2241 if (!select(*PtrToInt)) {
2242 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2251 I.getOperand(2).setReg(NegatedReg);
2252 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2256bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2261 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2262 const auto &MO =
I.getOperand(2);
2267 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2271 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2272 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2274 if (!Imm1Fn || !Imm2Fn)
2278 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2281 for (
auto &RenderFn : *Imm1Fn)
2283 for (
auto &RenderFn : *Imm2Fn)
2286 I.eraseFromParent();
2290bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2292 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2310 LLT DefDstTy =
MRI.getType(DefDstReg);
2311 Register StoreSrcReg =
I.getOperand(0).getReg();
2312 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2323 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2324 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2328 I.getOperand(0).setReg(DefDstReg);
2332bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2333 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2334 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2340 switch (
I.getOpcode()) {
2341 case AArch64::G_DUP: {
2344 Register Src =
I.getOperand(1).getReg();
2349 Register Dst =
I.getOperand(0).getReg();
2351 MRI.getType(Dst).getNumElements(),
2354 ValAndVReg->Value.trunc(
MRI.getType(Dst).getScalarSizeInBits())));
2355 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2357 I.eraseFromParent();
2360 case TargetOpcode::G_SEXT:
2363 if (selectUSMovFromExtend(
I,
MRI))
2366 case TargetOpcode::G_BR:
2368 case TargetOpcode::G_SHL:
2369 return earlySelectSHL(
I,
MRI);
2370 case TargetOpcode::G_CONSTANT: {
2371 bool IsZero =
false;
2372 if (
I.getOperand(1).isCImm())
2373 IsZero =
I.getOperand(1).getCImm()->isZero();
2374 else if (
I.getOperand(1).isImm())
2375 IsZero =
I.getOperand(1).getImm() == 0;
2380 Register DefReg =
I.getOperand(0).getReg();
2381 LLT Ty =
MRI.getType(DefReg);
2383 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2384 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2386 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2387 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2391 I.setDesc(
TII.get(TargetOpcode::COPY));
2395 case TargetOpcode::G_ADD: {
2404 Register AddDst =
I.getOperand(0).getReg();
2405 Register AddLHS =
I.getOperand(1).getReg();
2406 Register AddRHS =
I.getOperand(2).getReg();
2408 LLT Ty =
MRI.getType(AddLHS);
2417 if (!
MRI.hasOneNonDBGUse(Reg))
2431 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2441 Cmp = MatchCmp(AddRHS);
2445 auto &PredOp =
Cmp->getOperand(1);
2447 emitIntegerCompare(
Cmp->getOperand(2),
2448 Cmp->getOperand(3), PredOp, MIB);
2452 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2453 I.eraseFromParent();
2456 case TargetOpcode::G_OR: {
2460 Register Dst =
I.getOperand(0).getReg();
2461 LLT Ty =
MRI.getType(Dst);
2480 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2483 int64_t Immr =
Size - ShiftImm;
2484 int64_t Imms =
Size - ShiftImm - 1;
2485 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2486 emitInstr(
Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2487 I.eraseFromParent();
2490 case TargetOpcode::G_FENCE: {
2491 if (
I.getOperand(1).getImm() == 0)
2495 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2496 I.eraseFromParent();
2505 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2506 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2513 if (Subtarget->requiresStrictAlign()) {
2515 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2521 unsigned Opcode =
I.getOpcode();
2523 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2526 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2529 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2530 const Register DefReg =
I.getOperand(0).getReg();
2531 const LLT DefTy =
MRI.getType(DefReg);
2534 MRI.getRegClassOrRegBank(DefReg);
2537 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
2543 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2544 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2551 I.setDesc(
TII.get(TargetOpcode::PHI));
2553 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2559 if (
I.isDebugInstr())
2566 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2568 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2575 if (preISelLower(
I)) {
2576 Opcode =
I.getOpcode();
2587 if (selectImpl(
I, *CoverageInfo))
2591 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2594 case TargetOpcode::G_SBFX:
2595 case TargetOpcode::G_UBFX: {
2596 static const unsigned OpcTable[2][2] = {
2597 {AArch64::UBFMWri, AArch64::UBFMXri},
2598 {AArch64::SBFMWri, AArch64::SBFMXri}};
2599 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2601 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2604 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2607 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2608 auto LSB = Cst1->Value.getZExtValue();
2609 auto Width = Cst2->Value.getZExtValue();
2613 .
addImm(LSB + Width - 1);
2614 I.eraseFromParent();
2617 case TargetOpcode::G_BRCOND:
2618 return selectCompareBranch(
I, MF,
MRI);
2620 case TargetOpcode::G_BRINDIRECT: {
2622 if (std::optional<uint16_t> BADisc =
2623 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2627 MI.addReg(AArch64::XZR);
2628 I.eraseFromParent();
2631 I.setDesc(
TII.get(AArch64::BR));
2635 case TargetOpcode::G_BRJT:
2636 return selectBrJT(
I,
MRI);
2638 case AArch64::G_ADD_LOW: {
2644 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2645 I.setDesc(
TII.get(AArch64::ADDXri));
2650 "Expected small code model");
2652 auto Op2 =
I.getOperand(2);
2653 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2654 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2655 Op1.getTargetFlags())
2657 Op2.getTargetFlags());
2658 I.eraseFromParent();
2662 case TargetOpcode::G_FCONSTANT:
2663 case TargetOpcode::G_CONSTANT: {
2664 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2673 const Register DefReg =
I.getOperand(0).getReg();
2674 const LLT DefTy =
MRI.getType(DefReg);
2680 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2682 <<
" constant, expected: " << s16 <<
" or " << s32
2683 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2687 if (RB.
getID() != AArch64::FPRRegBankID) {
2689 <<
" constant on bank: " << RB
2690 <<
", expected: FPR\n");
2698 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2702 if (Ty != p0 && Ty != s8 && Ty != s16) {
2704 <<
" constant, expected: " << s32 <<
", " << s64
2705 <<
", or " << p0 <<
'\n');
2709 if (RB.
getID() != AArch64::GPRRegBankID) {
2711 <<
" constant on bank: " << RB
2712 <<
", expected: GPR\n");
2725 bool OptForSize = shouldOptForSize(&MF);
2729 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2736 auto *FPImm =
I.getOperand(1).getFPImm();
2739 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2743 I.eraseFromParent();
2744 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2748 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2750 const Register DefGPRReg =
MRI.createVirtualRegister(
2751 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2757 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2758 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2766 }
else if (
I.getOperand(1).isCImm()) {
2767 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2768 I.getOperand(1).ChangeToImmediate(Val);
2769 }
else if (
I.getOperand(1).isImm()) {
2770 uint64_t Val =
I.getOperand(1).getImm();
2771 I.getOperand(1).ChangeToImmediate(Val);
2774 const unsigned MovOpc =
2775 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2776 I.setDesc(
TII.get(MovOpc));
2780 case TargetOpcode::G_EXTRACT: {
2781 Register DstReg =
I.getOperand(0).getReg();
2782 Register SrcReg =
I.getOperand(1).getReg();
2783 LLT SrcTy =
MRI.getType(SrcReg);
2784 LLT DstTy =
MRI.getType(DstReg);
2796 unsigned Offset =
I.getOperand(2).getImm();
2805 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2807 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2809 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2811 AArch64::GPR64RegClass, NewI->getOperand(0));
2812 I.eraseFromParent();
2818 unsigned LaneIdx =
Offset / 64;
2820 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2823 I.eraseFromParent();
2827 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2833 "unexpected G_EXTRACT types");
2840 .addReg(DstReg, 0, AArch64::sub_32);
2841 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2842 AArch64::GPR32RegClass,
MRI);
2843 I.getOperand(0).setReg(DstReg);
2848 case TargetOpcode::G_INSERT: {
2849 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2850 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2857 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2858 unsigned LSB =
I.getOperand(3).getImm();
2859 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2860 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2865 "unexpected G_INSERT types");
2871 TII.get(AArch64::SUBREG_TO_REG))
2874 .
addUse(
I.getOperand(2).getReg())
2875 .
addImm(AArch64::sub_32);
2876 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2877 AArch64::GPR32RegClass,
MRI);
2878 I.getOperand(2).setReg(SrcReg);
2882 case TargetOpcode::G_FRAME_INDEX: {
2889 I.setDesc(
TII.get(AArch64::ADDXri));
2898 case TargetOpcode::G_GLOBAL_VALUE: {
2901 if (
I.getOperand(1).isSymbol()) {
2902 OpFlags =
I.getOperand(1).getTargetFlags();
2906 GV =
I.getOperand(1).getGlobal();
2909 if (
TM.useEmulatedTLS())
2911 return selectTLSGlobalValue(
I,
MRI);
2913 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2918 ? AArch64::LOADgotAUTH
2919 : AArch64::LOADgot));
2920 I.getOperand(1).setTargetFlags(OpFlags);
2922 !
TM.isPositionIndependent()) {
2924 materializeLargeCMVal(
I, GV, OpFlags);
2925 I.eraseFromParent();
2928 I.setDesc(
TII.get(AArch64::ADR));
2929 I.getOperand(1).setTargetFlags(OpFlags);
2931 I.setDesc(
TII.get(AArch64::MOVaddr));
2934 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2940 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2941 return selectPtrAuthGlobalValue(
I,
MRI);
2943 case TargetOpcode::G_ZEXTLOAD:
2944 case TargetOpcode::G_LOAD:
2945 case TargetOpcode::G_STORE: {
2947 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2960 if (Order != AtomicOrdering::NotAtomic &&
2961 Order != AtomicOrdering::Unordered &&
2962 Order != AtomicOrdering::Monotonic) {
2963 assert(!isa<GZExtLoad>(LdSt));
2964 assert(MemSizeInBytes <= 8 &&
2965 "128-bit atomics should already be custom-legalized");
2967 if (isa<GLoad>(LdSt)) {
2968 static constexpr unsigned LDAPROpcodes[] = {
2969 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2970 static constexpr unsigned LDAROpcodes[] = {
2971 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2973 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2976 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2978 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2979 AArch64::STLRW, AArch64::STLRX};
2981 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2983 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2984 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2985 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2986 I.getOperand(0).setReg(NewVal);
2988 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2999 "Load/Store pointer operand isn't a GPR");
3000 assert(
MRI.getType(PtrReg).isPointer() &&
3001 "Load/Store pointer operand isn't a pointer");
3006 LLT ValTy =
MRI.getType(ValReg);
3010 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
3013 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3019 .addReg(ValReg, 0,
SubReg)
3021 RBI.constrainGenericRegister(Copy, *RC,
MRI);
3023 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
3026 if (RB.
getID() == AArch64::FPRRegBankID) {
3029 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3036 MRI.setRegBank(NewDst, RB);
3039 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3043 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
3044 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
3052 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
3053 bool IsStore = isa<GStore>(
I);
3054 const unsigned NewOpc =
3056 if (NewOpc ==
I.getOpcode())
3060 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
3063 I.setDesc(
TII.get(NewOpc));
3069 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
3070 Register CurValReg =
I.getOperand(0).getReg();
3071 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3072 NewInst.cloneMemRefs(
I);
3073 for (
auto &Fn : *AddrModeFns)
3075 I.eraseFromParent();
3084 if (Opcode == TargetOpcode::G_STORE) {
3087 if (CVal && CVal->Value == 0) {
3089 case AArch64::STRWui:
3090 case AArch64::STRHHui:
3091 case AArch64::STRBBui:
3092 LoadStore->getOperand(0).setReg(AArch64::WZR);
3094 case AArch64::STRXui:
3095 LoadStore->getOperand(0).setReg(AArch64::XZR);
3101 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3102 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3105 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3109 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3114 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3117 .
addImm(AArch64::sub_32);
3119 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3125 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3126 case TargetOpcode::G_INDEXED_SEXTLOAD:
3127 return selectIndexedExtLoad(
I,
MRI);
3128 case TargetOpcode::G_INDEXED_LOAD:
3129 return selectIndexedLoad(
I,
MRI);
3130 case TargetOpcode::G_INDEXED_STORE:
3131 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3133 case TargetOpcode::G_LSHR:
3134 case TargetOpcode::G_ASHR:
3135 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3136 return selectVectorAshrLshr(
I,
MRI);
3138 case TargetOpcode::G_SHL:
3139 if (Opcode == TargetOpcode::G_SHL &&
3140 MRI.getType(
I.getOperand(0).getReg()).isVector())
3141 return selectVectorSHL(
I,
MRI);
3148 Register SrcReg =
I.getOperand(1).getReg();
3149 Register ShiftReg =
I.getOperand(2).getReg();
3150 const LLT ShiftTy =
MRI.getType(ShiftReg);
3151 const LLT SrcTy =
MRI.getType(SrcReg);
3156 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3157 .addReg(ShiftReg, 0, AArch64::sub_32);
3158 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3159 I.getOperand(2).setReg(Trunc.getReg(0));
3163 case TargetOpcode::G_OR: {
3170 const Register DefReg =
I.getOperand(0).getReg();
3174 if (NewOpc ==
I.getOpcode())
3177 I.setDesc(
TII.get(NewOpc));
3185 case TargetOpcode::G_PTR_ADD: {
3186 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3187 I.eraseFromParent();
3191 case TargetOpcode::G_SADDE:
3192 case TargetOpcode::G_UADDE:
3193 case TargetOpcode::G_SSUBE:
3194 case TargetOpcode::G_USUBE:
3195 case TargetOpcode::G_SADDO:
3196 case TargetOpcode::G_UADDO:
3197 case TargetOpcode::G_SSUBO:
3198 case TargetOpcode::G_USUBO:
3199 return selectOverflowOp(
I,
MRI);
3201 case TargetOpcode::G_PTRMASK: {
3202 Register MaskReg =
I.getOperand(2).getReg();
3209 I.setDesc(
TII.get(AArch64::ANDXri));
3210 I.getOperand(2).ChangeToImmediate(
3215 case TargetOpcode::G_PTRTOINT:
3216 case TargetOpcode::G_TRUNC: {
3217 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3218 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3220 const Register DstReg =
I.getOperand(0).getReg();
3221 const Register SrcReg =
I.getOperand(1).getReg();
3228 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3232 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3241 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3242 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3243 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3247 if (DstRC == SrcRC) {
3249 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3253 }
else if (DstRC == &AArch64::GPR32RegClass &&
3254 SrcRC == &AArch64::GPR64RegClass) {
3255 I.getOperand(1).setSubReg(AArch64::sub_32);
3258 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3262 I.setDesc(
TII.get(TargetOpcode::COPY));
3264 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3267 I.setDesc(
TII.get(AArch64::XTNv4i16));
3277 I.eraseFromParent();
3282 if (Opcode == TargetOpcode::G_PTRTOINT) {
3283 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3284 I.setDesc(
TII.get(TargetOpcode::COPY));
3292 case TargetOpcode::G_ANYEXT: {
3293 if (selectUSMovFromExtend(
I,
MRI))
3296 const Register DstReg =
I.getOperand(0).getReg();
3297 const Register SrcReg =
I.getOperand(1).getReg();
3300 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3302 <<
", expected: GPR\n");
3307 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3309 <<
", expected: GPR\n");
3313 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3316 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3320 if (DstSize != 64 && DstSize > 32) {
3322 <<
", expected: 32 or 64\n");
3328 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3333 .
addImm(AArch64::sub_32);
3334 I.getOperand(1).setReg(ExtSrc);
3339 case TargetOpcode::G_ZEXT:
3340 case TargetOpcode::G_SEXT_INREG:
3341 case TargetOpcode::G_SEXT: {
3342 if (selectUSMovFromExtend(
I,
MRI))
3345 unsigned Opcode =
I.getOpcode();
3346 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3347 const Register DefReg =
I.getOperand(0).getReg();
3348 Register SrcReg =
I.getOperand(1).getReg();
3349 const LLT DstTy =
MRI.getType(DefReg);
3350 const LLT SrcTy =
MRI.getType(SrcReg);
3356 if (Opcode == TargetOpcode::G_SEXT_INREG)
3357 SrcSize =
I.getOperand(2).getImm();
3363 AArch64::GPRRegBankID &&
3364 "Unexpected ext regbank");
3377 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3378 if (LoadMI && IsGPR) {
3380 unsigned BytesLoaded =
MemOp->getSize().getValue();
3387 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3389 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3390 const Register ZReg = AArch64::WZR;
3391 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3394 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3397 .
addImm(AArch64::sub_32);
3399 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3401 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3405 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3411 I.eraseFromParent();
3416 if (DstSize == 64) {
3417 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3419 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3425 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3426 {&AArch64::GPR64RegClass}, {})
3433 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3437 }
else if (DstSize <= 32) {
3438 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3447 I.eraseFromParent();
3451 case TargetOpcode::G_FREEZE:
3454 case TargetOpcode::G_INTTOPTR:
3459 case TargetOpcode::G_BITCAST:
3467 case TargetOpcode::G_SELECT: {
3468 auto &Sel = cast<GSelect>(
I);
3469 const Register CondReg = Sel.getCondReg();
3471 const Register FReg = Sel.getFalseReg();
3473 if (tryOptSelect(Sel))
3478 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3479 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3482 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3484 Sel.eraseFromParent();
3487 case TargetOpcode::G_ICMP: {
3497 auto &PredOp =
I.getOperand(1);
3498 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3), PredOp, MIB);
3502 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3503 AArch64::WZR, InvCC, MIB);
3504 I.eraseFromParent();
3508 case TargetOpcode::G_FCMP: {
3511 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3513 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3515 I.eraseFromParent();
3518 case TargetOpcode::G_VASTART:
3519 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3520 : selectVaStartAAPCS(
I, MF,
MRI);
3521 case TargetOpcode::G_INTRINSIC:
3522 return selectIntrinsic(
I,
MRI);
3523 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3524 return selectIntrinsicWithSideEffects(
I,
MRI);
3525 case TargetOpcode::G_IMPLICIT_DEF: {
3526 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3527 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3528 const Register DstReg =
I.getOperand(0).getReg();
3531 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3534 case TargetOpcode::G_BLOCK_ADDR: {
3535 Function *BAFn =
I.getOperand(1).getBlockAddress()->getFunction();
3536 if (std::optional<uint16_t> BADisc =
3537 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3538 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3539 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3547 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
3548 AArch64::GPR64RegClass,
MRI);
3549 I.eraseFromParent();
3553 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3554 I.eraseFromParent();
3557 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3558 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3559 I.getOperand(0).getReg())
3563 I.getOperand(1).getBlockAddress(), 0,
3565 I.eraseFromParent();
3569 case AArch64::G_DUP: {
3575 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3576 AArch64::GPRRegBankID)
3578 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3580 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3582 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3584 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3586 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3591 case TargetOpcode::G_BUILD_VECTOR:
3592 return selectBuildVector(
I,
MRI);
3593 case TargetOpcode::G_MERGE_VALUES:
3595 case TargetOpcode::G_UNMERGE_VALUES:
3597 case TargetOpcode::G_SHUFFLE_VECTOR:
3598 return selectShuffleVector(
I,
MRI);
3599 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3600 return selectExtractElt(
I,
MRI);
3601 case TargetOpcode::G_CONCAT_VECTORS:
3602 return selectConcatVectors(
I,
MRI);
3603 case TargetOpcode::G_JUMP_TABLE:
3604 return selectJumpTable(
I,
MRI);
3605 case TargetOpcode::G_MEMCPY:
3606 case TargetOpcode::G_MEMCPY_INLINE:
3607 case TargetOpcode::G_MEMMOVE:
3608 case TargetOpcode::G_MEMSET:
3609 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3610 return selectMOPS(
I,
MRI);
3616bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3623bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3627 case TargetOpcode::G_MEMCPY:
3628 case TargetOpcode::G_MEMCPY_INLINE:
3629 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3631 case TargetOpcode::G_MEMMOVE:
3632 Mopcode = AArch64::MOPSMemoryMovePseudo;
3634 case TargetOpcode::G_MEMSET:
3636 Mopcode = AArch64::MOPSMemorySetPseudo;
3645 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3646 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3649 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3650 const auto &SrcValRegClass =
3651 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3654 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3655 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3656 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3666 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3667 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3669 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3670 {DstPtrCopy, SizeCopy, SrcValCopy});
3672 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3673 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3674 {DstPtrCopy, SrcValCopy, SizeCopy});
3683 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3684 Register JTAddr =
I.getOperand(0).getReg();
3685 unsigned JTI =
I.getOperand(1).getIndex();
3694 if (STI.isTargetMachO()) {
3699 assert(STI.isTargetELF() &&
3700 "jump table hardening only supported on MachO/ELF");
3708 I.eraseFromParent();
3712 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3713 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3715 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3716 {TargetReg, ScratchReg}, {JTAddr,
Index})
3717 .addJumpTableIndex(JTI);
3719 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3720 {
static_cast<int64_t
>(JTI)});
3722 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3723 I.eraseFromParent();
3727bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3729 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3730 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3732 Register DstReg =
I.getOperand(0).getReg();
3733 unsigned JTI =
I.getOperand(1).getIndex();
3736 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3739 I.eraseFromParent();
3743bool AArch64InstructionSelector::selectTLSGlobalValue(
3745 if (!STI.isTargetMachO())
3750 const auto &GlobalOp =
I.getOperand(1);
3751 assert(GlobalOp.getOffset() == 0 &&
3752 "Shouldn't have an offset on TLS globals!");
3756 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3759 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3760 {LoadGOT.getReg(0)})
3771 assert(Opcode == AArch64::BLR);
3772 Opcode = AArch64::BLRAAZ;
3781 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3783 I.eraseFromParent();
3787MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3790 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3792 auto BuildFn = [&](
unsigned SubregIndex) {
3796 .addImm(SubregIndex);
3804 return BuildFn(AArch64::bsub);
3806 return BuildFn(AArch64::hsub);
3808 return BuildFn(AArch64::ssub);
3810 return BuildFn(AArch64::dsub);
3817AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3820 LLT DstTy =
MRI.getType(DstReg);
3822 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3823 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3830 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3836 .addReg(SrcReg, 0,
SubReg);
3837 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3841bool AArch64InstructionSelector::selectMergeValues(
3843 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3844 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3845 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3849 if (
I.getNumOperands() != 3)
3856 Register DstReg =
I.getOperand(0).getReg();
3857 Register Src1Reg =
I.getOperand(1).getReg();
3858 Register Src2Reg =
I.getOperand(2).getReg();
3859 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3860 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3865 Src2Reg, 1, RB, MIB);
3870 I.eraseFromParent();
3874 if (RB.
getID() != AArch64::GPRRegBankID)
3880 auto *DstRC = &AArch64::GPR64RegClass;
3881 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3883 TII.get(TargetOpcode::SUBREG_TO_REG))
3886 .
addUse(
I.getOperand(1).getReg())
3887 .
addImm(AArch64::sub_32);
3888 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3891 TII.get(TargetOpcode::SUBREG_TO_REG))
3894 .
addUse(
I.getOperand(2).getReg())
3895 .
addImm(AArch64::sub_32);
3897 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3898 .
addDef(
I.getOperand(0).getReg())
3906 I.eraseFromParent();
3911 const unsigned EltSize) {
3916 CopyOpc = AArch64::DUPi8;
3917 ExtractSubReg = AArch64::bsub;
3920 CopyOpc = AArch64::DUPi16;
3921 ExtractSubReg = AArch64::hsub;
3924 CopyOpc = AArch64::DUPi32;
3925 ExtractSubReg = AArch64::ssub;
3928 CopyOpc = AArch64::DUPi64;
3929 ExtractSubReg = AArch64::dsub;
3933 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3939MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3940 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3943 unsigned CopyOpc = 0;
3944 unsigned ExtractSubReg = 0;
3947 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3952 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3954 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3959 const LLT &VecTy =
MRI.getType(VecReg);
3961 getRegClassForTypeOnBank(VecTy, VecRB,
true);
3963 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3970 DstReg =
MRI.createVirtualRegister(DstRC);
3973 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3974 .addReg(VecReg, 0, ExtractSubReg);
3975 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3984 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3985 if (!ScalarToVector)
3991 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3995 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3999bool AArch64InstructionSelector::selectExtractElt(
4001 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4002 "unexpected opcode!");
4003 Register DstReg =
I.getOperand(0).getReg();
4004 const LLT NarrowTy =
MRI.getType(DstReg);
4005 const Register SrcReg =
I.getOperand(1).getReg();
4006 const LLT WideTy =
MRI.getType(SrcReg);
4009 "source register size too small!");
4010 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4014 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4016 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4025 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4029 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4034 I.eraseFromParent();
4038bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4040 unsigned NumElts =
I.getNumOperands() - 1;
4041 Register SrcReg =
I.getOperand(NumElts).getReg();
4042 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4043 const LLT SrcTy =
MRI.getType(SrcReg);
4045 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4047 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4054 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4058 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg,
OpIdx, MIB);
4062 I.eraseFromParent();
4066bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4068 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4069 "unexpected opcode");
4072 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4073 AArch64::FPRRegBankID ||
4074 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4075 AArch64::FPRRegBankID) {
4076 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4077 "currently unsupported.\n");
4083 unsigned NumElts =
I.getNumOperands() - 1;
4084 Register SrcReg =
I.getOperand(NumElts).getReg();
4085 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4086 const LLT WideTy =
MRI.getType(SrcReg);
4089 "can only unmerge from vector or s128 types!");
4091 "source register size too small!");
4094 return selectSplitVectorUnmerge(
I,
MRI);
4098 unsigned CopyOpc = 0;
4099 unsigned ExtractSubReg = 0;
4110 unsigned NumInsertRegs = NumElts - 1;
4122 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4126 assert(Found &&
"expected to find last operand's subeg idx");
4127 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4128 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4130 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4134 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4137 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4154 Register CopyTo =
I.getOperand(0).getReg();
4155 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4156 .addReg(InsertRegs[0], 0, ExtractSubReg);
4160 unsigned LaneIdx = 1;
4161 for (
Register InsReg : InsertRegs) {
4162 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4175 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4181 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4182 I.eraseFromParent();
4186bool AArch64InstructionSelector::selectConcatVectors(
4188 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4189 "Unexpected opcode");
4190 Register Dst =
I.getOperand(0).getReg();
4191 Register Op1 =
I.getOperand(1).getReg();
4192 Register Op2 =
I.getOperand(2).getReg();
4193 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4196 I.eraseFromParent();
4201AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4210MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4218 RC = &AArch64::FPR128RegClass;
4219 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4222 RC = &AArch64::FPR64RegClass;
4223 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4226 RC = &AArch64::FPR32RegClass;
4227 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4230 RC = &AArch64::FPR16RegClass;
4231 Opc = AArch64::LDRHui;
4234 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4240 auto &MF = MIRBuilder.
getMF();
4241 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4242 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4244 LoadMI = &*MIRBuilder.
buildInstr(
Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4247 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4251 .addConstantPoolIndex(
4267static std::pair<unsigned, unsigned>
4269 unsigned Opc, SubregIdx;
4270 if (RB.
getID() == AArch64::GPRRegBankID) {
4272 Opc = AArch64::INSvi8gpr;
4273 SubregIdx = AArch64::bsub;
4274 }
else if (EltSize == 16) {
4275 Opc = AArch64::INSvi16gpr;
4276 SubregIdx = AArch64::ssub;
4277 }
else if (EltSize == 32) {
4278 Opc = AArch64::INSvi32gpr;
4279 SubregIdx = AArch64::ssub;
4280 }
else if (EltSize == 64) {
4281 Opc = AArch64::INSvi64gpr;
4282 SubregIdx = AArch64::dsub;
4288 Opc = AArch64::INSvi8lane;
4289 SubregIdx = AArch64::bsub;
4290 }
else if (EltSize == 16) {
4291 Opc = AArch64::INSvi16lane;
4292 SubregIdx = AArch64::hsub;
4293 }
else if (EltSize == 32) {
4294 Opc = AArch64::INSvi32lane;
4295 SubregIdx = AArch64::ssub;
4296 }
else if (EltSize == 64) {
4297 Opc = AArch64::INSvi64lane;
4298 SubregIdx = AArch64::dsub;
4303 return std::make_pair(
Opc, SubregIdx);
4307 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4309 const ComplexRendererFns &RenderFns)
const {
4310 assert(Opcode &&
"Expected an opcode?");
4312 "Function should only be used to produce selected instructions!");
4313 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4315 for (
auto &Fn : *RenderFns)
4322 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4326 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4327 auto Ty =
MRI.getType(
LHS.getReg());
4330 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4331 bool Is32Bit =
Size == 32;
4334 if (
auto Fns = selectArithImmed(RHS))
4335 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4339 if (
auto Fns = selectNegArithImmed(RHS))
4340 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4344 if (
auto Fns = selectArithExtendedRegister(RHS))
4345 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4349 if (
auto Fns = selectShiftedRegister(RHS))
4350 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4352 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4360 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4361 {{AArch64::ADDXri, AArch64::ADDWri},
4362 {AArch64::ADDXrs, AArch64::ADDWrs},
4363 {AArch64::ADDXrr, AArch64::ADDWrr},
4364 {AArch64::SUBXri, AArch64::SUBWri},
4365 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4366 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4373 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4374 {{AArch64::ADDSXri, AArch64::ADDSWri},
4375 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4376 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4377 {AArch64::SUBSXri, AArch64::SUBSWri},
4378 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4379 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4386 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4387 {{AArch64::SUBSXri, AArch64::SUBSWri},
4388 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4389 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4390 {AArch64::ADDSXri, AArch64::ADDSWri},
4391 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4392 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4399 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4401 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4402 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4403 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4410 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4412 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4413 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4414 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4421 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4422 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4423 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4429 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4433 bool Is32Bit = (
RegSize == 32);
4434 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4435 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4436 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4440 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4443 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4450 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4451 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4452 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4455MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4458 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4465 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4467 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS,
Predicate, MIRBuilder))
4469 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4470 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4473MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4477 LLT Ty =
MRI.getType(Dst);
4479 "Expected a 32-bit scalar register?");
4481 const Register ZReg = AArch64::WZR;
4486 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4492 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4493 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4494 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4499MachineInstr *AArch64InstructionSelector::emitFPCompare(
4501 std::optional<CmpInst::Predicate> Pred)
const {
4503 LLT Ty =
MRI.getType(LHS);
4507 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4518 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4522 ShouldUseImm =
true;
4526 unsigned CmpOpcTbl[2][3] = {
4527 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4528 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4530 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4542MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4551 const LLT Op1Ty =
MRI.getType(Op1);
4552 const LLT Op2Ty =
MRI.getType(Op2);
4554 if (Op1Ty != Op2Ty) {
4555 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4558 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4561 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4577 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4579 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4580 if (!WidenedOp1 || !WidenedOp2) {
4581 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4586 unsigned InsertOpc, InsSubRegIdx;
4587 std::tie(InsertOpc, InsSubRegIdx) =
4591 Dst =
MRI.createVirtualRegister(DstRC);
4611 if (
const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4612 Size =
TRI.getRegSizeInBits(*RC);
4614 Size =
MRI.getType(Dst).getSizeInBits();
4616 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4617 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4618 unsigned Opc = OpcTable[
Size == 64];
4627 unsigned Opcode =
I.getOpcode();
4631 bool NeedsNegatedCarry =
4632 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4642 if (SrcMI ==
I.getPrevNode()) {
4643 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4644 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4645 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4646 CarrySrcMI->isUnsigned() &&
4647 CarrySrcMI->getCarryOutReg() == CarryReg &&
4648 selectAndRestoreState(*SrcMI))
4653 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4655 if (NeedsNegatedCarry) {
4658 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4662 auto Fns = select12BitValueWithLeftShift(1);
4663 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4666bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4668 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4670 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4672 emitCarryIn(
I, CarryInMI->getCarryInReg());
4676 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4677 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4679 Register CarryOutReg = CarryMI.getCarryOutReg();
4682 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4688 emitCSINC(CarryOutReg, ZReg, ZReg,
4689 getInvertedCondCode(OpAndCC.second), MIB);
4692 I.eraseFromParent();
4696std::pair<MachineInstr *, AArch64CC::CondCode>
4697AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4704 case TargetOpcode::G_SADDO:
4705 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4706 case TargetOpcode::G_UADDO:
4707 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4708 case TargetOpcode::G_SSUBO:
4709 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4710 case TargetOpcode::G_USUBO:
4711 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4712 case TargetOpcode::G_SADDE:
4713 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4714 case TargetOpcode::G_UADDE:
4715 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4716 case TargetOpcode::G_SSUBE:
4717 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4718 case TargetOpcode::G_USUBE:
4719 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4739 unsigned Depth = 0) {
4740 if (!
MRI.hasOneNonDBGUse(Val))
4744 if (isa<GAnyCmp>(ValDef)) {
4746 MustBeFirst =
false;
4752 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4753 bool IsOR = Opcode == TargetOpcode::G_OR;
4765 if (MustBeFirstL && MustBeFirstR)
4771 if (!CanNegateL && !CanNegateR)
4775 CanNegate = WillNegate && CanNegateL && CanNegateR;
4778 MustBeFirst = !CanNegate;
4780 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4783 MustBeFirst = MustBeFirstL || MustBeFirstR;
4790MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4795 LLT OpTy =
MRI.getType(LHS);
4797 std::optional<ValueAndVReg>
C;
4801 if (!
C ||
C->Value.sgt(31) ||
C->Value.slt(-31))
4802 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4803 else if (
C->Value.ule(31))
4804 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4806 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4812 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4813 CCmpOpc = AArch64::FCCMPHrr;
4816 CCmpOpc = AArch64::FCCMPSrr;
4819 CCmpOpc = AArch64::FCCMPDrr;
4829 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4830 CCmp.
addImm(
C->Value.getZExtValue());
4831 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4832 CCmp.
addImm(
C->Value.abs().getZExtValue());
4840MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4847 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4853 if (isa<GICmp>(Cmp)) {
4864 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4875 auto Dst =
MRI.cloneVirtualRegister(LHS);
4876 if (isa<GICmp>(Cmp))
4877 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4878 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4879 Cmp->getOperand(3).getReg(), MIB);
4884 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4886 bool IsOR = Opcode == TargetOpcode::G_OR;
4892 assert(ValidL &&
"Valid conjunction/disjunction tree");
4899 assert(ValidR &&
"Valid conjunction/disjunction tree");
4904 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4913 bool NegateAfterAll;
4914 if (Opcode == TargetOpcode::G_OR) {
4917 assert(CanNegateR &&
"at least one side must be negatable");
4918 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4922 NegateAfterR =
true;
4925 NegateR = CanNegateR;
4926 NegateAfterR = !CanNegateR;
4929 NegateAfterAll = !Negate;
4931 assert(Opcode == TargetOpcode::G_AND &&
4932 "Valid conjunction/disjunction tree");
4933 assert(!Negate &&
"Valid conjunction/disjunction tree");
4937 NegateAfterR =
false;
4938 NegateAfterAll =
false;
4954MachineInstr *AArch64InstructionSelector::emitConjunction(
4956 bool DummyCanNegate;
4957 bool DummyMustBeFirst;
4964bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
4976bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
5000 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
5002 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
5005 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5011 unsigned CondOpc = CondDef->
getOpcode();
5012 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5013 if (tryOptSelectConjunction(
I, *CondDef))
5019 if (CondOpc == TargetOpcode::G_ICMP) {
5048 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
5049 I.getOperand(3).getReg(), CondCode, MIB);
5050 I.eraseFromParent();
5054MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5058 "Unexpected MachineOperand");
5083 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
5102 return emitCMN(LHSDef->
getOperand(2), RHS, MIRBuilder);
5114 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
5117 if (!ValAndVReg || ValAndVReg->Value != 0)
5127bool AArch64InstructionSelector::selectShuffleVector(
5129 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5130 Register Src1Reg =
I.getOperand(1).getReg();
5131 const LLT Src1Ty =
MRI.getType(Src1Reg);
5132 Register Src2Reg =
I.getOperand(2).getReg();
5133 const LLT Src2Ty =
MRI.getType(Src2Reg);
5144 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5151 for (
int Val : Mask) {
5154 Val = Val < 0 ? 0 : Val;
5155 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5173 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5180 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5184 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5190 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5191 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5192 I.eraseFromParent();
5200 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5203 I.eraseFromParent();
5207MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5217 DstReg =
MRI.createVirtualRegister(DstRC);
5219 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5222 if (RB.
getID() == AArch64::FPRRegBankID) {
5223 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5226 .
addUse(InsSub->getOperand(0).getReg())
5238bool AArch64InstructionSelector::selectUSMovFromExtend(
5240 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5241 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5242 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5244 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5245 const Register DefReg =
MI.getOperand(0).getReg();
5246 const LLT DstTy =
MRI.getType(DefReg);
5249 if (DstSize != 32 && DstSize != 64)
5253 MI.getOperand(1).getReg(),
MRI);
5259 const LLT VecTy =
MRI.getType(Src0);
5264 const MachineInstr *ScalarToVector = emitScalarToVector(
5265 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5266 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5272 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5274 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5276 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5278 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5280 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5289 if (DstSize == 64 && !IsSigned) {
5290 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5291 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5292 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5295 .
addImm(AArch64::sub_32);
5296 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5298 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5301 MI.eraseFromParent();
5305MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5308 if (DstSize == 128) {
5309 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5311 Op = AArch64::MOVIv16b_ns;
5313 Op = AArch64::MOVIv8b_ns;
5320 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5327MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5332 if (DstSize == 128) {
5333 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5335 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5337 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5357MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5362 if (DstSize == 128) {
5363 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5365 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5367 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5393MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5397 if (DstSize == 128) {
5398 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5400 Op = AArch64::MOVIv2d_ns;
5402 Op = AArch64::MOVID;
5408 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5415MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5420 if (DstSize == 128) {
5421 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5423 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5425 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5445MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5449 bool IsWide =
false;
5450 if (DstSize == 128) {
5451 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5453 Op = AArch64::FMOVv4f32_ns;
5456 Op = AArch64::FMOVv2f32_ns;
5465 Op = AArch64::FMOVv2f64_ns;
5469 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5474bool AArch64InstructionSelector::selectIndexedExtLoad(
5476 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5478 Register WriteBack = ExtLd.getWritebackReg();
5481 LLT Ty =
MRI.getType(Dst);
5483 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5484 bool IsPre = ExtLd.isPre();
5485 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5486 unsigned InsertIntoSubReg = 0;
5491 bool IsFPR = RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID;
5492 if ((IsSExt && IsFPR) || Ty.
isVector())
5500 if (MemSizeBits == 8) {
5503 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5505 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5506 NewLdDstTy = IsDst64 ? s64 : s32;
5508 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5509 InsertIntoSubReg = AArch64::bsub;
5512 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5513 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5516 }
else if (MemSizeBits == 16) {
5519 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5521 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5522 NewLdDstTy = IsDst64 ? s64 : s32;
5524 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5525 InsertIntoSubReg = AArch64::hsub;
5528 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5529 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5532 }
else if (MemSizeBits == 32) {
5534 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5537 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5538 InsertIntoSubReg = AArch64::ssub;
5541 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5542 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5554 .addImm(Cst->getSExtValue());
5559 if (InsertIntoSubReg) {
5561 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5564 .
addImm(InsertIntoSubReg);
5565 RBI.constrainGenericRegister(
5567 *getRegClassForTypeOnBank(
MRI.getType(Dst),
5568 *RBI.getRegBank(Dst,
MRI,
TRI)),
5574 MI.eraseFromParent();
5579bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5581 auto &Ld = cast<GIndexedLoad>(
MI);
5583 Register WriteBack = Ld.getWritebackReg();
5586 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5587 "Unexpected type for indexed load");
5588 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5590 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5591 return selectIndexedExtLoad(
MI,
MRI);
5595 static constexpr unsigned GPROpcodes[] = {
5596 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5598 static constexpr unsigned FPROpcodes[] = {
5599 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5601 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5606 static constexpr unsigned GPROpcodes[] = {
5607 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5609 static constexpr unsigned FPROpcodes[] = {
5610 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5611 AArch64::LDRDpost, AArch64::LDRQpost};
5612 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5624 MI.eraseFromParent();
5628bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5634 LLT ValTy =
MRI.getType(Val);
5639 static constexpr unsigned GPROpcodes[] = {
5640 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5642 static constexpr unsigned FPROpcodes[] = {
5643 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5646 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5651 static constexpr unsigned GPROpcodes[] = {
5652 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5654 static constexpr unsigned FPROpcodes[] = {
5655 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5656 AArch64::STRDpost, AArch64::STRQpost};
5658 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5669 Str.cloneMemRefs(
I);
5671 I.eraseFromParent();
5679 LLT DstTy =
MRI.getType(Dst);
5682 if (DstSize == 128) {
5684 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5689 if (DstSize == 64) {
5692 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5695 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5696 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5702 APInt SplatValueAsInt =
5703 isa<ConstantFP>(SplatValue)
5704 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5705 : SplatValue->getUniqueInteger();
5733 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5737 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5741 APInt NegBits(DstSize, 0);
5742 unsigned NumElts = DstSize / NumBits;
5743 for (
unsigned i = 0; i < NumElts; i++)
5744 NegBits |= Neg << (NumBits * i);
5745 NegBits = DefBits ^ NegBits;
5749 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5750 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5752 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5757 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5758 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5759 (STI.hasFullFP16() &&
5760 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5766 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5770 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5771 RBI.constrainGenericRegister(
5772 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5776bool AArch64InstructionSelector::tryOptConstantBuildVec(
5778 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5780 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5786 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5792 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5793 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5794 I.getOperand(
Idx).getReg(),
MRI)))
5796 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5801 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5803 I.eraseFromParent();
5807bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5813 Register Dst =
I.getOperand(0).getReg();
5814 Register EltReg =
I.getOperand(1).getReg();
5815 LLT EltTy =
MRI.getType(EltReg);
5823 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5831 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5836 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5840 I.eraseFromParent();
5842 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5845bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5847 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5850 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5851 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5854 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5856 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5859 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5866 I.getOperand(1).getReg(), MIB);
5876 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5879 Register OpReg =
I.getOperand(i).getReg();
5881 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5882 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5889 if (DstSize < 128) {
5892 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5895 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5903 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5904 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5910 Register DstReg =
I.getOperand(0).getReg();
5912 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5915 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5933 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5935 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5936 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5940 I.eraseFromParent();
5944bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
5947 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5949 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5951 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5954 "Destination must be 64 bits or 128 bits?");
5955 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5956 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
5957 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
5959 Load.cloneMemRefs(
I);
5961 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5962 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5963 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
5964 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5973bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5975 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5977 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5979 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5982 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
5984 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
5985 [](
auto MO) { return MO.getReg(); });
5989 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6004 .
addImm(LaneNo->getZExtValue())
6006 Load.cloneMemRefs(
I);
6008 Register SelectedLoadDst =
Load->getOperand(0).getReg();
6009 unsigned SubReg = AArch64::qsub0;
6010 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
6011 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
6012 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
6015 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
6020 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
6026void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
6030 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6034 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6035 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6044bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6047 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6051 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6052 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6056 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6069 .
addImm(LaneNo->getZExtValue())
6076bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6079 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6090 case Intrinsic::aarch64_ldxp:
6091 case Intrinsic::aarch64_ldaxp: {
6093 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6094 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
6100 case Intrinsic::aarch64_neon_ld1x2: {
6101 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6104 Opc = AArch64::LD1Twov8b;
6106 Opc = AArch64::LD1Twov16b;
6108 Opc = AArch64::LD1Twov4h;
6110 Opc = AArch64::LD1Twov8h;
6112 Opc = AArch64::LD1Twov2s;
6114 Opc = AArch64::LD1Twov4s;
6116 Opc = AArch64::LD1Twov2d;
6117 else if (Ty ==
S64 || Ty == P0)
6118 Opc = AArch64::LD1Twov1d;
6121 selectVectorLoadIntrinsic(
Opc, 2,
I);
6124 case Intrinsic::aarch64_neon_ld1x3: {
6125 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6128 Opc = AArch64::LD1Threev8b;
6130 Opc = AArch64::LD1Threev16b;
6132 Opc = AArch64::LD1Threev4h;
6134 Opc = AArch64::LD1Threev8h;
6136 Opc = AArch64::LD1Threev2s;
6138 Opc = AArch64::LD1Threev4s;
6140 Opc = AArch64::LD1Threev2d;
6141 else if (Ty ==
S64 || Ty == P0)
6142 Opc = AArch64::LD1Threev1d;
6145 selectVectorLoadIntrinsic(
Opc, 3,
I);
6148 case Intrinsic::aarch64_neon_ld1x4: {
6149 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6152 Opc = AArch64::LD1Fourv8b;
6154 Opc = AArch64::LD1Fourv16b;
6156 Opc = AArch64::LD1Fourv4h;
6158 Opc = AArch64::LD1Fourv8h;
6160 Opc = AArch64::LD1Fourv2s;
6162 Opc = AArch64::LD1Fourv4s;
6164 Opc = AArch64::LD1Fourv2d;
6165 else if (Ty ==
S64 || Ty == P0)
6166 Opc = AArch64::LD1Fourv1d;
6169 selectVectorLoadIntrinsic(
Opc, 4,
I);
6172 case Intrinsic::aarch64_neon_ld2: {
6173 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6176 Opc = AArch64::LD2Twov8b;
6178 Opc = AArch64::LD2Twov16b;
6180 Opc = AArch64::LD2Twov4h;
6182 Opc = AArch64::LD2Twov8h;
6184 Opc = AArch64::LD2Twov2s;
6186 Opc = AArch64::LD2Twov4s;
6188 Opc = AArch64::LD2Twov2d;
6189 else if (Ty ==
S64 || Ty == P0)
6190 Opc = AArch64::LD1Twov1d;
6193 selectVectorLoadIntrinsic(
Opc, 2,
I);
6196 case Intrinsic::aarch64_neon_ld2lane: {
6197 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6200 Opc = AArch64::LD2i8;
6202 Opc = AArch64::LD2i16;
6204 Opc = AArch64::LD2i32;
6207 Opc = AArch64::LD2i64;
6210 if (!selectVectorLoadLaneIntrinsic(
Opc, 2,
I))
6214 case Intrinsic::aarch64_neon_ld2r: {
6215 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6218 Opc = AArch64::LD2Rv8b;
6220 Opc = AArch64::LD2Rv16b;
6222 Opc = AArch64::LD2Rv4h;
6224 Opc = AArch64::LD2Rv8h;
6226 Opc = AArch64::LD2Rv2s;
6228 Opc = AArch64::LD2Rv4s;
6230 Opc = AArch64::LD2Rv2d;
6231 else if (Ty ==
S64 || Ty == P0)
6232 Opc = AArch64::LD2Rv1d;
6235 selectVectorLoadIntrinsic(
Opc, 2,
I);
6238 case Intrinsic::aarch64_neon_ld3: {
6239 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6242 Opc = AArch64::LD3Threev8b;
6244 Opc = AArch64::LD3Threev16b;
6246 Opc = AArch64::LD3Threev4h;
6248 Opc = AArch64::LD3Threev8h;
6250 Opc = AArch64::LD3Threev2s;
6252 Opc = AArch64::LD3Threev4s;
6254 Opc = AArch64::LD3Threev2d;
6255 else if (Ty ==
S64 || Ty == P0)
6256 Opc = AArch64::LD1Threev1d;
6259 selectVectorLoadIntrinsic(
Opc, 3,
I);
6262 case Intrinsic::aarch64_neon_ld3lane: {
6263 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6266 Opc = AArch64::LD3i8;
6268 Opc = AArch64::LD3i16;
6270 Opc = AArch64::LD3i32;
6273 Opc = AArch64::LD3i64;
6276 if (!selectVectorLoadLaneIntrinsic(
Opc, 3,
I))
6280 case Intrinsic::aarch64_neon_ld3r: {
6281 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6284 Opc = AArch64::LD3Rv8b;
6286 Opc = AArch64::LD3Rv16b;
6288 Opc = AArch64::LD3Rv4h;
6290 Opc = AArch64::LD3Rv8h;
6292 Opc = AArch64::LD3Rv2s;
6294 Opc = AArch64::LD3Rv4s;
6296 Opc = AArch64::LD3Rv2d;
6297 else if (Ty ==
S64 || Ty == P0)
6298 Opc = AArch64::LD3Rv1d;
6301 selectVectorLoadIntrinsic(
Opc, 3,
I);
6304 case Intrinsic::aarch64_neon_ld4: {
6305 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6308 Opc = AArch64::LD4Fourv8b;
6310 Opc = AArch64::LD4Fourv16b;
6312 Opc = AArch64::LD4Fourv4h;
6314 Opc = AArch64::LD4Fourv8h;
6316 Opc = AArch64::LD4Fourv2s;
6318 Opc = AArch64::LD4Fourv4s;
6320 Opc = AArch64::LD4Fourv2d;
6321 else if (Ty ==
S64 || Ty == P0)
6322 Opc = AArch64::LD1Fourv1d;
6325 selectVectorLoadIntrinsic(
Opc, 4,
I);
6328 case Intrinsic::aarch64_neon_ld4lane: {
6329 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6332 Opc = AArch64::LD4i8;
6334 Opc = AArch64::LD4i16;
6336 Opc = AArch64::LD4i32;
6339 Opc = AArch64::LD4i64;
6342 if (!selectVectorLoadLaneIntrinsic(
Opc, 4,
I))
6346 case Intrinsic::aarch64_neon_ld4r: {
6347 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6350 Opc = AArch64::LD4Rv8b;
6352 Opc = AArch64::LD4Rv16b;
6354 Opc = AArch64::LD4Rv4h;
6356 Opc = AArch64::LD4Rv8h;
6358 Opc = AArch64::LD4Rv2s;
6360 Opc = AArch64::LD4Rv4s;
6362 Opc = AArch64::LD4Rv2d;
6363 else if (Ty ==
S64 || Ty == P0)
6364 Opc = AArch64::LD4Rv1d;
6367 selectVectorLoadIntrinsic(
Opc, 4,
I);
6370 case Intrinsic::aarch64_neon_st1x2: {
6371 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6374 Opc = AArch64::ST1Twov8b;
6376 Opc = AArch64::ST1Twov16b;
6378 Opc = AArch64::ST1Twov4h;
6380 Opc = AArch64::ST1Twov8h;
6382 Opc = AArch64::ST1Twov2s;
6384 Opc = AArch64::ST1Twov4s;
6386 Opc = AArch64::ST1Twov2d;
6387 else if (Ty ==
S64 || Ty == P0)
6388 Opc = AArch64::ST1Twov1d;
6391 selectVectorStoreIntrinsic(
I, 2,
Opc);
6394 case Intrinsic::aarch64_neon_st1x3: {
6395 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6398 Opc = AArch64::ST1Threev8b;
6400 Opc = AArch64::ST1Threev16b;
6402 Opc = AArch64::ST1Threev4h;
6404 Opc = AArch64::ST1Threev8h;
6406 Opc = AArch64::ST1Threev2s;
6408 Opc = AArch64::ST1Threev4s;
6410 Opc = AArch64::ST1Threev2d;
6411 else if (Ty ==
S64 || Ty == P0)
6412 Opc = AArch64::ST1Threev1d;
6415 selectVectorStoreIntrinsic(
I, 3,
Opc);
6418 case Intrinsic::aarch64_neon_st1x4: {
6419 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6422 Opc = AArch64::ST1Fourv8b;
6424 Opc = AArch64::ST1Fourv16b;
6426 Opc = AArch64::ST1Fourv4h;
6428 Opc = AArch64::ST1Fourv8h;
6430 Opc = AArch64::ST1Fourv2s;
6432 Opc = AArch64::ST1Fourv4s;
6434 Opc = AArch64::ST1Fourv2d;
6435 else if (Ty ==
S64 || Ty == P0)
6436 Opc = AArch64::ST1Fourv1d;
6439 selectVectorStoreIntrinsic(
I, 4,
Opc);
6442 case Intrinsic::aarch64_neon_st2: {
6443 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6446 Opc = AArch64::ST2Twov8b;
6448 Opc = AArch64::ST2Twov16b;
6450 Opc = AArch64::ST2Twov4h;
6452 Opc = AArch64::ST2Twov8h;
6454 Opc = AArch64::ST2Twov2s;
6456 Opc = AArch64::ST2Twov4s;
6458 Opc = AArch64::ST2Twov2d;
6459 else if (Ty ==
S64 || Ty == P0)
6460 Opc = AArch64::ST1Twov1d;
6463 selectVectorStoreIntrinsic(
I, 2,
Opc);
6466 case Intrinsic::aarch64_neon_st3: {
6467 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6470 Opc = AArch64::ST3Threev8b;
6472 Opc = AArch64::ST3Threev16b;
6474 Opc = AArch64::ST3Threev4h;
6476 Opc = AArch64::ST3Threev8h;
6478 Opc = AArch64::ST3Threev2s;
6480 Opc = AArch64::ST3Threev4s;
6482 Opc = AArch64::ST3Threev2d;
6483 else if (Ty ==
S64 || Ty == P0)
6484 Opc = AArch64::ST1Threev1d;
6487 selectVectorStoreIntrinsic(
I, 3,
Opc);
6490 case Intrinsic::aarch64_neon_st4: {
6491 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6494 Opc = AArch64::ST4Fourv8b;
6496 Opc = AArch64::ST4Fourv16b;
6498 Opc = AArch64::ST4Fourv4h;
6500 Opc = AArch64::ST4Fourv8h;
6502 Opc = AArch64::ST4Fourv2s;
6504 Opc = AArch64::ST4Fourv4s;
6506 Opc = AArch64::ST4Fourv2d;
6507 else if (Ty ==
S64 || Ty == P0)
6508 Opc = AArch64::ST1Fourv1d;
6511 selectVectorStoreIntrinsic(
I, 4,
Opc);
6514 case Intrinsic::aarch64_neon_st2lane: {
6515 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6518 Opc = AArch64::ST2i8;
6520 Opc = AArch64::ST2i16;
6522 Opc = AArch64::ST2i32;
6525 Opc = AArch64::ST2i64;
6528 if (!selectVectorStoreLaneIntrinsic(
I, 2,
Opc))
6532 case Intrinsic::aarch64_neon_st3lane: {
6533 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6536 Opc = AArch64::ST3i8;
6538 Opc = AArch64::ST3i16;
6540 Opc = AArch64::ST3i32;
6543 Opc = AArch64::ST3i64;
6546 if (!selectVectorStoreLaneIntrinsic(
I, 3,
Opc))
6550 case Intrinsic::aarch64_neon_st4lane: {
6551 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6554 Opc = AArch64::ST4i8;
6556 Opc = AArch64::ST4i16;
6558 Opc = AArch64::ST4i32;
6561 Opc = AArch64::ST4i64;
6564 if (!selectVectorStoreLaneIntrinsic(
I, 4,
Opc))
6568 case Intrinsic::aarch64_mops_memset_tag: {
6581 Register DstDef =
I.getOperand(0).getReg();
6583 Register DstUse =
I.getOperand(2).getReg();
6584 Register ValUse =
I.getOperand(3).getReg();
6585 Register SizeUse =
I.getOperand(4).getReg();
6592 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6593 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6600 I.eraseFromParent();
6604bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6606 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6611 case Intrinsic::aarch64_crypto_sha1h: {
6612 Register DstReg =
I.getOperand(0).getReg();
6613 Register SrcReg =
I.getOperand(2).getReg();
6616 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6617 MRI.getType(SrcReg).getSizeInBits() != 32)
6622 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6623 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6627 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6628 AArch64::GPR32RegClass,
MRI);
6631 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6632 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6635 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6639 if (DstReg !=
I.getOperand(0).getReg()) {
6643 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6644 AArch64::GPR32RegClass,
MRI);
6647 I.eraseFromParent();
6650 case Intrinsic::ptrauth_resign: {
6651 Register DstReg =
I.getOperand(0).getReg();
6652 Register ValReg =
I.getOperand(2).getReg();
6653 uint64_t AUTKey =
I.getOperand(3).getImm();
6654 Register AUTDisc =
I.getOperand(4).getReg();
6655 uint64_t PACKey =
I.getOperand(5).getImm();
6656 Register PACDisc =
I.getOperand(6).getReg();
6660 std::tie(AUTConstDiscC, AUTAddrDisc) =
6665 std::tie(PACConstDiscC, PACAddrDisc) =
6668 MIB.
buildCopy({AArch64::X16}, {ValReg});
6669 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6680 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6681 I.eraseFromParent();
6684 case Intrinsic::ptrauth_auth: {
6685 Register DstReg =
I.getOperand(0).getReg();
6686 Register ValReg =
I.getOperand(2).getReg();
6687 uint64_t AUTKey =
I.getOperand(3).getImm();
6688 Register AUTDisc =
I.getOperand(4).getReg();
6692 std::tie(AUTConstDiscC, AUTAddrDisc) =
6695 if (STI.isX16X17Safer()) {
6696 MIB.
buildCopy({AArch64::X16}, {ValReg});
6697 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6706 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6717 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6718 I.eraseFromParent();
6721 case Intrinsic::frameaddress:
6722 case Intrinsic::returnaddress: {
6726 unsigned Depth =
I.getOperand(2).getImm();
6727 Register DstReg =
I.getOperand(0).getReg();
6728 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6730 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6731 if (!MFReturnAddr) {
6736 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6739 if (STI.hasPAuth()) {
6740 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6747 I.eraseFromParent();
6754 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6756 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6758 FrameAddr = NextFrame;
6761 if (IntrinID == Intrinsic::frameaddress)
6766 if (STI.hasPAuth()) {
6767 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6768 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6769 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6778 I.eraseFromParent();
6781 case Intrinsic::aarch64_neon_tbl2:
6782 SelectTable(
I,
MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two,
false);
6784 case Intrinsic::aarch64_neon_tbl3:
6785 SelectTable(
I,
MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6788 case Intrinsic::aarch64_neon_tbl4:
6789 SelectTable(
I,
MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four,
false);
6791 case Intrinsic::aarch64_neon_tbx2:
6792 SelectTable(
I,
MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two,
true);
6794 case Intrinsic::aarch64_neon_tbx3:
6795 SelectTable(
I,
MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three,
true);
6797 case Intrinsic::aarch64_neon_tbx4:
6798 SelectTable(
I,
MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four,
true);
6800 case Intrinsic::swift_async_context_addr:
6809 I.eraseFromParent();
6844bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6846 Register DefReg =
I.getOperand(0).getReg();
6849 Register AddrDisc =
I.getOperand(3).getReg();
6850 uint64_t Disc =
I.getOperand(4).getImm();
6858 if (!isUInt<16>(Disc))
6860 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6863 if (!STI.isTargetELF() && !STI.isTargetMachO())
6873 if (!
MRI.hasOneDef(OffsetReg))
6876 if (OffsetMI.
getOpcode() != TargetOpcode::G_CONSTANT)
6902 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6905 "unsupported non-GOT op flags on ptrauth global reference");
6907 "unsupported non-GOT reference to weak ptrauth global");
6910 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6917 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6918 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6919 MIB.
buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6922 .
addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6926 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6927 I.eraseFromParent();
6939 "unsupported non-zero offset in weak ptrauth global reference");
6944 MIB.
buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6945 .addGlobalAddress(GV,
Offset)
6948 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6950 I.eraseFromParent();
6954void AArch64InstructionSelector::SelectTable(
MachineInstr &
I,
6956 unsigned NumVec,
unsigned Opc1,
6957 unsigned Opc2,
bool isExt) {
6958 Register DstReg =
I.getOperand(0).getReg();
6963 for (
unsigned i = 0; i < NumVec; i++)
6964 Regs.
push_back(
I.getOperand(i + 2 + isExt).getReg());
6967 Register IdxReg =
I.getOperand(2 + NumVec + isExt).getReg();
6975 I.eraseFromParent();
6979AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6981 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6982 return std::nullopt;
6983 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6988AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6990 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6991 return std::nullopt;
6997AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6999 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7000 return std::nullopt;
7001 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7006AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
7008 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7009 return std::nullopt;
7020AArch64InstructionSelector::select12BitValueWithLeftShift(
7023 if (Immed >> 12 == 0) {
7025 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7027 Immed = Immed >> 12;
7029 return std::nullopt;
7042AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
7049 if (MaybeImmed == std::nullopt)
7050 return std::nullopt;
7051 return select12BitValueWithLeftShift(*MaybeImmed);
7057AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
7061 return std::nullopt;
7063 if (MaybeImmed == std::nullopt)
7064 return std::nullopt;
7071 return std::nullopt;
7076 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
7079 Immed = ~Immed + 1ULL;
7081 if (Immed & 0xFFFFFFFFFF000000ULL)
7082 return std::nullopt;
7084 Immed &= 0xFFFFFFULL;
7085 return select12BitValueWithLeftShift(Immed);
7102std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7104 if (
MI.getOpcode() == AArch64::G_SHL) {
7108 MI.getOperand(2).getReg(),
MRI)) {
7109 const APInt ShiftVal = ValAndVeg->Value;
7112 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7115 return std::nullopt;
7123bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7125 bool IsAddrOperand)
const {
7129 if (
MRI.hasOneNonDBGUse(DefReg) ||
7130 MI.getParent()->getParent()->getFunction().hasOptSize())
7133 if (IsAddrOperand) {
7135 if (
const auto Worth = isWorthFoldingIntoAddrMode(
MI,
MRI))
7139 if (
MI.getOpcode() == AArch64::G_PTR_ADD) {
7146 if (
const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst,
MRI))
7156 return all_of(
MRI.use_nodbg_instructions(DefReg),
7172AArch64InstructionSelector::selectExtendedSHL(
7174 unsigned SizeInBytes,
bool WantsExt)
const {
7175 assert(
Base.isReg() &&
"Expected base to be a register operand");
7176 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
7181 unsigned OffsetOpc = OffsetInst->
getOpcode();
7182 bool LookedThroughZExt =
false;
7183 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7185 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7186 return std::nullopt;
7190 LookedThroughZExt =
true;
7192 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7193 return std::nullopt;
7196 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
7197 if (LegalShiftVal == 0)
7198 return std::nullopt;
7199 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
7200 return std::nullopt;
7211 if (OffsetOpc == TargetOpcode::G_SHL)
7212 return std::nullopt;
7218 return std::nullopt;
7223 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7227 if (OffsetOpc == TargetOpcode::G_MUL) {
7228 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7229 return std::nullopt;
7235 if ((ImmVal & 0x7) != ImmVal)
7236 return std::nullopt;
7240 if (ImmVal != LegalShiftVal)
7241 return std::nullopt;
7243 unsigned SignExtend = 0;
7247 if (!LookedThroughZExt) {
7249 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
7251 return std::nullopt;
7256 return std::nullopt;
7262 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7272 MIB.addImm(SignExtend);
7286AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7289 return std::nullopt;
7306 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7307 return std::nullopt;
7313 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
7327AArch64InstructionSelector::selectAddrModeRegisterOffset(
7333 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
7334 return std::nullopt;
7340 return std::nullopt;
7360AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
7361 unsigned SizeInBytes)
const {
7364 return std::nullopt;
7368 return std::nullopt;
7386 unsigned Scale =
Log2_32(SizeInBytes);
7387 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7391 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7392 ImmOff < (0x1000 << Scale))
7393 return std::nullopt;
7398 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7402 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7408 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7409 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7414 return std::nullopt;
7418 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7424 return selectAddrModeRegisterOffset(Root);
7434AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
7435 unsigned SizeInBytes)
const {
7440 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7441 return std::nullopt;
7462 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
7471 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
7472 return std::nullopt;
7476 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7478 return std::nullopt;
7483 AArch64::GPR32RegClass, MIB);
7490 MIB.addImm(SignExtend);
7501AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7502 unsigned Size)
const {
7507 return std::nullopt;
7509 if (!isBaseWithConstantOffset(Root,
MRI))
7510 return std::nullopt;
7515 if (!OffImm.
isReg())
7516 return std::nullopt;
7518 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7519 return std::nullopt;
7523 return std::nullopt;
7526 if (RHSC >= -256 && RHSC < 256) {
7533 return std::nullopt;
7537AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7540 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7541 return std::nullopt;
7544 return std::nullopt;
7549 return std::nullopt;
7553 return std::nullopt;
7557 return std::nullopt;
7559 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7564 MIB.addGlobalAddress(GV,
Offset,
7574AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7575 unsigned Size)
const {
7580 return std::nullopt;
7583 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7596 !(RootParent->
getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7597 STI.isTargetDarwin())) {
7598 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7603 if (isBaseWithConstantOffset(Root,
MRI)) {
7611 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7612 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7627 if (selectAddrModeUnscaled(Root,
Size))
7628 return std::nullopt;
7639 switch (
MI.getOpcode()) {
7642 case TargetOpcode::G_SHL:
7644 case TargetOpcode::G_LSHR:
7646 case TargetOpcode::G_ASHR:
7648 case TargetOpcode::G_ROTR:
7656AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7657 bool AllowROR)
const {
7659 return std::nullopt;
7668 return std::nullopt;
7670 return std::nullopt;
7671 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI,
false))
7672 return std::nullopt;
7678 return std::nullopt;
7685 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7686 unsigned Val = *Immed & (NumBits - 1);
7695 unsigned Opc =
MI.getOpcode();
7698 if (
Opc == TargetOpcode::G_SEXT ||
Opc == TargetOpcode::G_SEXT_INREG) {
7700 if (
Opc == TargetOpcode::G_SEXT)
7701 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7703 Size =
MI.getOperand(2).getImm();
7704 assert(
Size != 64 &&
"Extend from 64 bits?");
7717 if (
Opc == TargetOpcode::G_ZEXT ||
Opc == TargetOpcode::G_ANYEXT) {
7718 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7719 assert(
Size != 64 &&
"Extend from 64 bits?");
7734 if (
Opc != TargetOpcode::G_AND)
7753Register AArch64InstructionSelector::moveScalarRegClass(
7756 auto Ty =
MRI.getType(Reg);
7765 return Copy.getReg(0);
7771AArch64InstructionSelector::selectArithExtendedRegister(
7774 return std::nullopt;
7783 return std::nullopt;
7785 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI,
false))
7786 return std::nullopt;
7789 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7794 return std::nullopt;
7795 ShiftVal = *MaybeShiftVal;
7797 return std::nullopt;
7802 return std::nullopt;
7803 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7805 return std::nullopt;
7809 Ext = getExtendTypeForInst(*RootDef,
MRI);
7811 return std::nullopt;
7820 if (isDef32(*ExtInst))
7821 return std::nullopt;
7828 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7832 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7837AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7839 return std::nullopt;
7844 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7845 STI.isLittleEndian())
7849 return std::nullopt;
7851 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7857 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7862 LaneIdx->Value.getSExtValue() == 1) {
7868 return std::nullopt;
7875 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7876 "Expected G_CONSTANT");
7877 std::optional<int64_t> CstVal =
7879 assert(CstVal &&
"Expected constant value");
7883void AArch64InstructionSelector::renderLogicalImm32(
7885 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7886 "Expected G_CONSTANT");
7887 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7892void AArch64InstructionSelector::renderLogicalImm64(
7894 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7895 "Expected G_CONSTANT");
7896 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7904 assert(
MI.getOpcode() == TargetOpcode::G_UBSANTRAP &&
OpIdx == 0 &&
7905 "Expected G_UBSANTRAP");
7906 MIB.
addImm(
MI.getOperand(0).getImm() | (
'U' << 8));
7912 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1 &&
7913 "Expected G_FCONSTANT");
7921 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1 &&
7922 "Expected G_FCONSTANT");
7930 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1 &&
7931 "Expected G_FCONSTANT");
7936void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7938 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1 &&
7939 "Expected G_FCONSTANT");
7947bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7949 if (!
MI.mayLoadOrStore())
7952 "Expected load/store to have only one mem op!");
7953 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7956bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7958 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7965 switch (
MI.getOpcode()) {
7968 case TargetOpcode::COPY:
7969 case TargetOpcode::G_BITCAST:
7970 case TargetOpcode::G_TRUNC:
7971 case TargetOpcode::G_PHI:
7981 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7984 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7995 auto *OpDef =
MRI.getVRegDef(OpReg);
7996 const LLT &Ty =
MRI.getType(OpReg);
8002 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
8006 MRI.setRegBank(Copy.getReg(0), *DstRB);
8007 MO.setReg(Copy.getReg(0));
8016 for (
auto &BB : MF) {
8017 for (
auto &
MI : BB) {
8018 if (
MI.getOpcode() == TargetOpcode::G_PHI)
8023 for (
auto *
MI : Phis) {
8045 bool HasGPROp =
false, HasFPROp =
false;
8049 const LLT &Ty =
MRI.getType(MO.getReg());
8059 if (RB->
getID() == AArch64::GPRRegBankID)
8065 if (HasGPROp && HasFPROp)
8075 return new AArch64InstructionSelector(TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static StringRef getName(Value *V)
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
unsigned getVarArgsFPRSize() const
bool hasELFSignedGOT() const
int getVarArgsFPRIndex() const
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
TypeSize getValue() const
Describe properties that are true of each instruction in the target description file.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.