46#define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
52 cl::desc(
"Always modify dest registers regardless of color"),
59 cl::desc(
"Ignore balance information, always return "
60 "(1: Even, 2: Odd)."),
68 switch (
MI->getOpcode()) {
69 case AArch64::FMULSrr:
70 case AArch64::FNMULSrr:
71 case AArch64::FMULDrr:
72 case AArch64::FNMULDrr:
81 switch (
MI->getOpcode()) {
82 case AArch64::FMSUBSrrr:
83 case AArch64::FMADDSrrr:
84 case AArch64::FNMSUBSrrr:
85 case AArch64::FNMADDSrrr:
86 case AArch64::FMSUBDrrr:
87 case AArch64::FMADDDrrr:
88 case AArch64::FNMSUBDrrr:
89 case AArch64::FNMADDDrrr:
101enum class Color { Even, Odd };
103static const char *ColorNames[2] = {
"Even",
"Odd" };
109 MachineRegisterInfo *MRI;
110 const TargetRegisterInfo *TRI;
111 RegisterClassInfo RCI;
115 explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {}
117 bool runOnMachineFunction(MachineFunction &
F)
override;
119 MachineFunctionProperties getRequiredProperties()
const override {
120 return MachineFunctionProperties().setNoVRegs();
123 StringRef getPassName()
const override {
124 return "A57 FP Anti-dependency breaker";
127 void getAnalysisUsage(AnalysisUsage &AU)
const override {
134 bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &
MBB,
136 bool colorChain(Chain *
G, Color
C, MachineBasicBlock &
MBB);
137 int scavengeRegister(Chain *
G, Color
C, MachineBasicBlock &
MBB);
138 void scanInstruction(MachineInstr *
MI,
unsigned Idx,
139 std::map<unsigned, Chain*> &Active,
140 std::vector<std::unique_ptr<Chain>> &AllChains);
141 void maybeKillChain(MachineOperand &MO,
unsigned Idx,
142 std::map<unsigned, Chain*> &RegChains);
144 Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
148char AArch64A57FPLoadBalancing::ID = 0;
151 "AArch64 A57 FP Load-Balancing",
false,
false)
213 "Chain: broken invariant. A Chain can only be killed after its last "
234 "Chain: broken invariant. A Chain can only be killed after its last "
264 unsigned OtherEnd =
Other.KillInst ?
306 if (skipFunction(
F.getFunction()))
309 if (!
F.getSubtarget<AArch64Subtarget>().balanceFPOps())
315 MRI = &
F.getRegInfo();
316 TRI =
F.getRegInfo().getTargetRegisterInfo();
319 for (
auto &
MBB :
F) {
326bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &
MBB) {
329 <<
" - scanning instructions...\n");
336 std::map<unsigned, Chain*> ActiveChains;
337 std::vector<std::unique_ptr<Chain>> AllChains;
340 scanInstruction(&
MI, Idx++, ActiveChains, AllChains);
343 <<
" chains created.\n");
352 EquivalenceClasses<Chain*>
EC;
353 for (
auto &
I : AllChains)
356 for (
auto &
I : AllChains)
357 for (
auto &J : AllChains)
358 if (
I != J &&
I->rangeOverlapsWith(*J))
359 EC.unionSets(
I.get(), J.get());
360 LLVM_DEBUG(
dbgs() <<
"Created " <<
EC.getNumClasses() <<
" disjoint sets.\n");
366 std::vector<std::vector<Chain*> >
V;
367 for (
const auto &
E : EC) {
370 std::vector<Chain *> Cs(
EC.member_begin(*
E),
EC.member_end());
371 if (Cs.empty())
continue;
372 V.push_back(std::move(Cs));
378 [](
const std::vector<Chain *> &
A,
const std::vector<Chain *> &
B) {
379 return A.front()->startsBefore(
B.front());
396 Changed |= colorChainSet(std::move(
I),
MBB, Parity);
401Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
402 std::vector<Chain*> &L) {
414 const unsigned SizeFuzz = 1;
415 unsigned MinSize =
L.front()->size() - SizeFuzz;
416 for (
auto I =
L.begin(),
E =
L.end();
I !=
E; ++
I) {
417 if ((*I)->size() <= MinSize) {
424 if ((*I)->getPreferredColor() == PreferredColor) {
432 Chain *Ch =
L.front();
437bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
438 MachineBasicBlock &
MBB,
441 LLVM_DEBUG(
dbgs() <<
"colorChainSet(): #sets=" << GV.size() <<
"\n");
452 llvm::sort(GV, [](
const Chain *G1,
const Chain *G2) {
453 if (G1->size() != G2->size())
454 return G1->size() > G2->size();
455 if (G1->requiresFixup() != G2->requiresFixup())
456 return G1->requiresFixup() > G2->requiresFixup();
458 assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
459 "Starts before not total order!");
460 return G1->startsBefore(G2);
463 Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
464 while (Chain *
G = getAndEraseNext(PreferredColor, GV)) {
466 Color
C = PreferredColor;
469 C =
G->getPreferredColor();
472 <<
", Color=" << ColorNames[(
int)
C] <<
"\n");
477 if (
G->requiresFixup() &&
C !=
G->getPreferredColor()) {
478 C =
G->getPreferredColor();
480 <<
" - not worthwhile changing; "
482 << ColorNames[(
int)
C] <<
"\n");
487 Parity += (
C == Color::Even) ?
G->size() : -
G->size();
488 PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
494int AArch64A57FPLoadBalancing::scavengeRegister(Chain *
G, Color
C,
495 MachineBasicBlock &
MBB) {
498 LiveRegUnits Units(*
TRI);
499 Units.addLiveOuts(
MBB);
502 while (
I != ChainEnd) {
504 Units.stepBackward(*
I);
509 assert(ChainBegin != ChainEnd &&
"Chain should contain instructions");
512 Units.accumulate(*
I);
513 }
while (
I != ChainBegin);
516 unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass;
517 auto Ord = RCI.
getOrder(
TRI->getRegClass(RegClassID));
518 for (
auto Reg : Ord) {
519 if (!Units.available(
Reg))
521 if (
C == getColor(
Reg))
528bool AArch64A57FPLoadBalancing::colorChain(Chain *
G, Color
C,
529 MachineBasicBlock &
MBB) {
532 << ColorNames[(
int)
C] <<
")\n");
536 int Reg = scavengeRegister(
G,
C,
MBB);
543 std::map<unsigned, unsigned> Substs;
544 for (MachineInstr &
I : *
G) {
545 if (!
G->contains(
I) && (&
I !=
G->getKill() ||
G->isKillImmutable()))
550 std::vector<unsigned> ToErase;
551 for (
auto &U :
I.operands()) {
552 if (
U.isReg() &&
U.isUse() && Substs.find(
U.getReg()) != Substs.end()) {
554 U.setReg(Substs[OrigReg]);
558 ToErase.push_back(OrigReg);
559 }
else if (
U.isRegMask()) {
560 for (
auto J : Substs) {
561 if (
U.clobbersPhysReg(J.first))
562 ToErase.push_back(J.first);
567 for (
auto J : ToErase)
571 if (&
I !=
G->getKill()) {
572 MachineOperand &MO =
I.getOperand(0);
575 if (
G->requiresFixup() && &
I ==
G->getLast())
586 assert(Substs.size() == 0 &&
"No substitutions should be left active!");
598void AArch64A57FPLoadBalancing::scanInstruction(
599 MachineInstr *
MI,
unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
600 std::vector<std::unique_ptr<Chain>> &AllChains) {
605 for (
auto &
I :
MI->uses())
606 maybeKillChain(
I, Idx, ActiveChains);
607 for (
auto &
I :
MI->defs())
608 maybeKillChain(
I, Idx, ActiveChains);
612 Register DestReg =
MI->getOperand(0).getReg();
617 auto G = std::make_unique<Chain>(
MI, Idx, getColor(DestReg));
618 ActiveChains[DestReg] =
G.get();
619 AllChains.push_back(std::move(
G));
625 Register DestReg =
MI->getOperand(0).getReg();
626 Register AccumReg =
MI->getOperand(3).getReg();
628 maybeKillChain(
MI->getOperand(1), Idx, ActiveChains);
629 maybeKillChain(
MI->getOperand(2), Idx, ActiveChains);
630 if (DestReg != AccumReg)
631 maybeKillChain(
MI->getOperand(0), Idx, ActiveChains);
633 if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
642 if (
MI->getOperand(3).isKill()) {
644 LLVM_DEBUG(
dbgs() <<
"Instruction was successfully added to chain.\n");
645 ActiveChains[AccumReg]->add(
MI, Idx, getColor(DestReg));
647 if (DestReg != AccumReg) {
648 ActiveChains[DestReg] = ActiveChains[AccumReg];
649 ActiveChains.erase(AccumReg);
655 dbgs() <<
"Cannot add to chain because accumulator operand wasn't "
656 <<
"marked <kill>!\n");
657 maybeKillChain(
MI->getOperand(3), Idx, ActiveChains);
662 auto G = std::make_unique<Chain>(
MI, Idx, getColor(DestReg));
663 ActiveChains[DestReg] =
G.get();
664 AllChains.push_back(std::move(
G));
670 for (
auto &
I :
MI->uses())
671 maybeKillChain(
I, Idx, ActiveChains);
672 for (
auto &
I :
MI->defs())
673 maybeKillChain(
I, Idx, ActiveChains);
678void AArch64A57FPLoadBalancing::
679maybeKillChain(MachineOperand &MO,
unsigned Idx,
680 std::map<unsigned, Chain*> &ActiveChains) {
688 if (MO.
isKill() && ActiveChains.find(MO.
getReg()) != ActiveChains.end()) {
693 ActiveChains.erase(MO.
getReg());
697 for (
auto I = ActiveChains.begin(),
E = ActiveChains.end();
702 I->second->setKill(
MI, Idx,
true);
703 ActiveChains.erase(
I++);
711Color AArch64A57FPLoadBalancing::getColor(
unsigned Reg) {
712 if ((
TRI->getEncodingValue(
Reg) % 2) == 0)
720 return new AArch64A57FPLoadBalancing();
static bool isMul(MachineInstr *MI)
static cl::opt< unsigned > OverrideBalance("aarch64-a57-fp-load-balancing-override", cl::desc("Ignore balance information, always return " "(1: Even, 2: Odd)."), cl::init(0), cl::Hidden)
static cl::opt< bool > TransformAll("aarch64-a57-fp-load-balancing-force-all", cl::desc("Always modify dest registers regardless of color"), cl::init(false), cl::Hidden)
static bool isMla(MachineInstr *MI)
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file declares the machine register scavenger class.
MachineInstr * StartInst
The important (marker) instructions.
MachineBasicBlock::iterator begin() const
bool isKillImmutable() const
Can the Kill instruction (assuming one exists) be modified?
void add(MachineInstr *MI, unsigned Idx, Color C)
Add a new instruction into the chain.
bool contains(MachineInstr &MI)
Return true if MI is a member of the chain.
Color LastColor
The "color" of LastInst.
bool requiresFixup() const
Return true if the group will require a fixup MOV at the end.
MachineInstr * getLast() const
Return the last instruction in the chain.
bool KillIsImmutable
True if KillInst cannot be modified.
bool rangeOverlapsWith(const Chain &Other) const
Return true if this chain (StartInst..KillInst) overlaps with Other.
MachineInstr * getStart() const
Return the first instruction in the chain.
unsigned size() const
Return the number of instructions in the chain.
MachineBasicBlock::iterator end() const
Return an instruction that can be used as an iterator for the end of the chain.
void setKill(MachineInstr *MI, unsigned Idx, bool Immutable)
Inform the chain that its last active register (the dest register of LastInst) is killed by MI with n...
MachineInstr * getKill() const
Return the "kill" instruction (as set with setKill()) or NULL.
unsigned StartInstIdx
The index, from the start of the basic block, that each marker appears.
Color getPreferredColor()
Return the preferred color of this chain.
std::string str() const
Return a simple string representation of the chain.
std::set< MachineInstr * > Insts
All instructions in the chain.
Chain(MachineInstr *MI, unsigned Idx, Color C)
bool startsBefore(const Chain *Other) const
Return true if this chain starts before Other.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64A57FPLoadBalancing()
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.