80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
94 using OpcodePair = std::pair<unsigned, unsigned>;
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(
T,
unsigned,
T &,
T &)>;
99 std::function<void(
MachineInstr &, OpcodePair,
unsigned,
unsigned,
116 template <
typename T>
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
123 template <
typename T>
124 bool visitADDSUB(
unsigned PosOpc,
unsigned NegOpc,
MachineInstr &
MI);
125 template <
typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs,
MachineInstr &
MI);
129 enum class SplitStrategy {
133 template <
typename T>
135 SplitStrategy Strategy,
unsigned OtherOpc = 0);
147 return "AArch64 MI Peephole Optimization pass";
157char AArch64MIPeepholeOpt::ID = 0;
162 "AArch64 MI Peephole Optimization",
false,
false)
166 T UImm =
static_cast<T>(Imm);
167 assert(UImm && (UImm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
198 assert(Imm && (Imm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
205 unsigned LowestGapBitUnset =
209 assert(LowestGapBitUnset <
sizeof(
T) * CHAR_BIT &&
"Undefined behaviour!");
210 T NewImm1 = (
static_cast<T>(1) << LowestGapBitUnset) -
226 SplitStrategy Strategy,
238 return splitTwoPartImm<T>(
240 [
Opc, Strategy, OtherOpc](
T Imm,
unsigned RegSize,
T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
250 if (Insn.
size() == 1)
253 bool SplitSucc =
false;
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm,
RegSize, Imm0, Imm1);
258 case SplitStrategy::Disjoint:
263 return std::make_pair(
Opc, !OtherOpc ?
Opc : OtherOpc);
285 if (
MI.getOperand(3).getImm() != 0)
288 if (
MI.getOperand(1).getReg() != AArch64::WZR)
305 if (SrcMI->
getOpcode() == TargetOpcode::COPY &&
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
319 CpySrc =
MRI->createVirtualRegister(&AArch64::FPR32RegClass);
321 TII->get(TargetOpcode::COPY), CpySrc)
331 else if (SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END)
336 MRI->replaceRegWith(DefReg, SrcReg);
337 MRI->clearKillFlags(SrcReg);
339 MI.eraseFromParent();
346 if (
MI.getOperand(1).getReg() !=
MI.getOperand(2).getReg())
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
360 MI.eraseFromParent();
372 if (!
MI.isRegTiedToDefOperand(1))
391 if ((SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
398 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
400 .
add(
MI.getOperand(2))
401 .
add(
MI.getOperand(3));
404 MI.eraseFromParent();
413 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
414 (Imm & ~
static_cast<T>(0xffffff)) != 0)
420 if (Insn.
size() == 1)
424 Imm0 = (Imm >> 12) & 0xfff;
430bool AArch64MIPeepholeOpt::visitADDSUB(
448 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
449 MI.getOperand(1).getReg() == AArch64::WZR)
452 return splitTwoPartImm<T>(
454 [PosOpc, NegOpc](
T Imm,
unsigned RegSize,
T &Imm0,
455 T &Imm1) -> std::optional<OpcodePair> {
457 return std::make_pair(PosOpc, PosOpc);
459 return std::make_pair(NegOpc, NegOpc);
479bool AArch64MIPeepholeOpt::visitADDSSUBS(
484 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
485 MI.getOperand(1).getReg() == AArch64::WZR)
488 return splitTwoPartImm<T>(
492 T &Imm1) -> std::optional<OpcodePair> {
504 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
532 if (L && !
L->isLoopInvariant(
MI))
536 MovMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
541 SubregToRegMI =
nullptr;
542 if (MovMI->
getOpcode() == TargetOpcode::SUBREG_TO_REG) {
543 SubregToRegMI = MovMI;
549 if (MovMI->
getOpcode() != AArch64::MOVi32imm &&
550 MovMI->
getOpcode() != AArch64::MOVi64imm)
565bool AArch64MIPeepholeOpt::splitTwoPartImm(
567 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
570 "Invalid RegSize for legal immediate peephole optimization");
574 if (!checkMovImmInstr(
MI, MovMI, SubregToRegMI))
586 if (
auto R = SplitAndOpc(Imm,
RegSize, Imm0, Imm1))
599 TII->getRegClass(
TII->get(Opcode.first), 0,
TRI, *MF);
601 TII->getRegClass(
TII->get(Opcode.first), 1,
TRI, *MF);
603 (Opcode.first == Opcode.second)
605 :
TII->getRegClass(
TII->get(Opcode.second), 0,
TRI, *MF);
607 (Opcode.first == Opcode.second)
608 ? FirstInstrOperandRC
609 :
TII->getRegClass(
TII->get(Opcode.second), 1,
TRI, *MF);
614 Register NewTmpReg =
MRI->createVirtualRegister(FirstInstrDstRC);
618 ?
MRI->createVirtualRegister(SecondInstrDstRC)
622 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
623 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
624 if (DstReg != NewDstReg)
625 MRI->constrainRegClass(NewDstReg,
MRI->getRegClass(DstReg));
628 BuildInstr(
MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
632 if (DstReg != NewDstReg) {
633 MRI->replaceRegWith(DstReg, NewDstReg);
634 MI.getOperand(0).setReg(DstReg);
638 MI.eraseFromParent();
663 if (!SrcMI || SrcMI->
getOpcode() != TargetOpcode::COPY)
670 &AArch64::FPR128RegClass) {
680 .
add(
MI.getOperand(1))
681 .
add(
MI.getOperand(2))
687 MI.eraseFromParent();
695 if (!
MI->getOperand(0).isReg() || !
MI->getOperand(0).isDef())
698 if (RC != &AArch64::FPR64RegClass)
700 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
712 if (Low64MI->
getOpcode() != AArch64::INSERT_SUBREG)
732 if (!High64MI || High64MI->
getOpcode() != AArch64::INSERT_SUBREG)
735 if (High64MI && High64MI->
getOpcode() == TargetOpcode::COPY)
737 if (!High64MI || (High64MI->
getOpcode() != AArch64::MOVID &&
738 High64MI->
getOpcode() != AArch64::MOVIv2d_ns))
746 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
747 MRI->replaceRegWith(OldDef, NewDef);
748 MI.eraseFromParent();
763 MRI->clearKillFlags(OldDef);
764 MRI->clearKillFlags(NewDef);
765 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
766 MRI->replaceRegWith(OldDef, NewDef);
767 MI.eraseFromParent();
775 int64_t Immr =
MI.getOperand(2).getImm();
776 int64_t Imms =
MI.getOperand(3).getImm();
778 bool IsLSR = Imms == 31 && Immr <= Imms;
779 bool IsLSL = Immr == Imms + 33;
780 if (!IsLSR && !IsLSL)
788 TII->getRegClass(
TII->get(
MI.getOpcode()), 0,
TRI, *
MI.getMF());
790 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
791 assert(DstRC32 &&
"Destination register class of UBFMXri doesn't have a "
792 "sub_32 subregister class");
795 TII->getRegClass(
TII->get(
MI.getOpcode()), 1,
TRI, *
MI.getMF());
797 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
798 assert(SrcRC32 &&
"Source register class of UBFMXri doesn't have a sub_32 "
799 "subregister class");
801 Register DstReg64 =
MI.getOperand(0).getReg();
802 Register DstReg32 =
MRI->createVirtualRegister(DstRC32);
803 Register SrcReg64 =
MI.getOperand(1).getReg();
804 Register SrcReg32 =
MRI->createVirtualRegister(SrcRC32);
808 .
addReg(SrcReg64, 0, AArch64::sub_32);
815 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
819 MI.eraseFromParent();
827 Register InputReg =
MI.getOperand(1).getReg();
828 if (
MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
829 !
MRI->hasOneNonDBGUse(InputReg))
846 if (SrcMI->
getOpcode() != AArch64::SBFMXri ||
849 return AArch64::NoRegister;
854 if (SrcMI->
getOpcode() != AArch64::SUBREG_TO_REG ||
857 return AArch64::NoRegister;
859 if (!Orr || Orr->
getOpcode() != AArch64::ORRWrr ||
862 return AArch64::NoRegister;
864 if (!Cpy || Cpy->
getOpcode() != AArch64::COPY ||
866 return AArch64::NoRegister;
871 Register SrcReg = getSXTWSrcReg(SrcMI);
873 SrcReg = getUXTWSrcReg(SrcMI);
877 MRI->constrainRegClass(SrcReg,
MRI->getRegClass(InputReg));
879 MI.getOperand(1).setReg(SrcReg);
881 for (
auto *DeadMI : DeadInstrs) {
883 DeadMI->eraseFromParent();
895 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
898 assert(
MRI->isSSA() &&
"Expected to be run on SSA form!");
900 bool Changed =
false;
904 switch (
MI.getOpcode()) {
907 case AArch64::INSERT_SUBREG:
908 Changed |= visitINSERT(
MI);
910 case AArch64::ANDWrr:
911 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri,
MI,
912 SplitStrategy::Intersect);
914 case AArch64::ANDXrr:
915 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri,
MI,
916 SplitStrategy::Intersect);
918 case AArch64::ANDSWrr:
919 Changed |= trySplitLogicalImm<uint32_t>(
920 AArch64::ANDWri,
MI, SplitStrategy::Intersect, AArch64::ANDSWri);
922 case AArch64::ANDSXrr:
923 Changed |= trySplitLogicalImm<uint64_t>(
924 AArch64::ANDXri,
MI, SplitStrategy::Intersect, AArch64::ANDSXri);
926 case AArch64::EORWrr:
927 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri,
MI,
928 SplitStrategy::Disjoint);
930 case AArch64::EORXrr:
931 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri,
MI,
932 SplitStrategy::Disjoint);
934 case AArch64::ORRWrr:
935 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri,
MI,
936 SplitStrategy::Disjoint);
938 case AArch64::ORRXrr:
939 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri,
MI,
940 SplitStrategy::Disjoint);
942 case AArch64::ORRWrs:
943 Changed |= visitORR(
MI);
945 case AArch64::ADDWrr:
946 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri,
MI);
948 case AArch64::SUBWrr:
949 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri,
MI);
951 case AArch64::ADDXrr:
952 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri,
MI);
954 case AArch64::SUBXrr:
955 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri,
MI);
957 case AArch64::ADDSWrr:
959 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
960 {AArch64::SUBWri, AArch64::SUBSWri},
MI);
962 case AArch64::SUBSWrr:
964 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
965 {AArch64::ADDWri, AArch64::ADDSWri},
MI);
967 case AArch64::ADDSXrr:
969 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
970 {AArch64::SUBXri, AArch64::SUBSXri},
MI);
972 case AArch64::SUBSXrr:
974 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
975 {AArch64::ADDXri, AArch64::ADDSXri},
MI);
977 case AArch64::CSELWr:
978 case AArch64::CSELXr:
979 Changed |= visitCSEL(
MI);
981 case AArch64::INSvi64gpr:
982 Changed |= visitINSviGPR(
MI, AArch64::INSvi64lane);
984 case AArch64::INSvi32gpr:
985 Changed |= visitINSviGPR(
MI, AArch64::INSvi32lane);
987 case AArch64::INSvi16gpr:
988 Changed |= visitINSviGPR(
MI, AArch64::INSvi16lane);
990 case AArch64::INSvi8gpr:
991 Changed |= visitINSviGPR(
MI, AArch64::INSvi8lane);
993 case AArch64::INSvi64lane:
994 Changed |= visitINSvi64lane(
MI);
996 case AArch64::FMOVDr:
997 Changed |= visitFMOVDr(
MI);
999 case AArch64::UBFMXri:
1000 Changed |= visitUBFMXri(
MI);
1003 Changed |= visitCopy(
MI);
1013 return new AArch64MIPeepholeOpt();
unsigned const MachineRegisterInfo * MRI
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.