LLVM 22.0.0git
AArch64A57FPLoadBalancing.cpp
Go to the documentation of this file.
1//===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// For best-case performance on Cortex-A57, we should try to use a balanced
9// mix of odd and even D-registers when performing a critical sequence of
10// independent, non-quadword FP/ASIMD floating-point multiply or
11// multiply-accumulate operations.
12//
13// This pass attempts to detect situations where the register allocation may
14// adversely affect this load balancing and to change the registers used so as
15// to better utilize the CPU.
16//
17// Ideally we'd just take each multiply or multiply-accumulate in turn and
18// allocate it alternating even or odd registers. However, multiply-accumulates
19// are most efficiently performed in the same functional unit as their
20// accumulation operand. Therefore this pass tries to find maximal sequences
21// ("Chains") of multiply-accumulates linked via their accumulation operand,
22// and assign them all the same "color" (oddness/evenness).
23//
24// This optimization affects S-register and D-register floating point
25// multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
26// FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
27// not affected.
28//===----------------------------------------------------------------------===//
29
30#include "AArch64.h"
31#include "AArch64InstrInfo.h"
32#include "AArch64Subtarget.h"
42#include "llvm/Support/Debug.h"
44using namespace llvm;
45
46#define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
47
48// Enforce the algorithm to use the scavenged register even when the original
49// destination register is the correct color. Used for testing.
50static cl::opt<bool>
51TransformAll("aarch64-a57-fp-load-balancing-force-all",
52 cl::desc("Always modify dest registers regardless of color"),
53 cl::init(false), cl::Hidden);
54
55// Never use the balance information obtained from chains - return a specific
56// color always. Used for testing.
58OverrideBalance("aarch64-a57-fp-load-balancing-override",
59 cl::desc("Ignore balance information, always return "
60 "(1: Even, 2: Odd)."),
62
63//===----------------------------------------------------------------------===//
64// Helper functions
65
66// Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
67static bool isMul(MachineInstr *MI) {
68 switch (MI->getOpcode()) {
69 case AArch64::FMULSrr:
70 case AArch64::FNMULSrr:
71 case AArch64::FMULDrr:
72 case AArch64::FNMULDrr:
73 return true;
74 default:
75 return false;
76 }
77}
78
79// Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
80static bool isMla(MachineInstr *MI) {
81 switch (MI->getOpcode()) {
82 case AArch64::FMSUBSrrr:
83 case AArch64::FMADDSrrr:
84 case AArch64::FNMSUBSrrr:
85 case AArch64::FNMADDSrrr:
86 case AArch64::FMSUBDrrr:
87 case AArch64::FMADDDrrr:
88 case AArch64::FNMSUBDrrr:
89 case AArch64::FNMADDDrrr:
90 return true;
91 default:
92 return false;
93 }
94}
95
96//===----------------------------------------------------------------------===//
97
98namespace {
99/// A "color", which is either even or odd. Yes, these aren't really colors
100/// but the algorithm is conceptually doing two-color graph coloring.
101enum class Color { Even, Odd };
102#ifndef NDEBUG
103static const char *ColorNames[2] = { "Even", "Odd" };
104#endif
105
106class Chain;
107
108class AArch64A57FPLoadBalancing : public MachineFunctionPass {
109 MachineRegisterInfo *MRI;
110 const TargetRegisterInfo *TRI;
111 RegisterClassInfo RCI;
112
113public:
114 static char ID;
115 explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {}
116
117 bool runOnMachineFunction(MachineFunction &F) override;
118
119 MachineFunctionProperties getRequiredProperties() const override {
120 return MachineFunctionProperties().setNoVRegs();
121 }
122
123 StringRef getPassName() const override {
124 return "A57 FP Anti-dependency breaker";
125 }
126
127 void getAnalysisUsage(AnalysisUsage &AU) const override {
128 AU.setPreservesCFG();
130 }
131
132private:
133 bool runOnBasicBlock(MachineBasicBlock &MBB);
134 bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB,
135 int &Balance);
136 bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
137 int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
138 void scanInstruction(MachineInstr *MI, unsigned Idx,
139 std::map<unsigned, Chain*> &Active,
140 std::vector<std::unique_ptr<Chain>> &AllChains);
141 void maybeKillChain(MachineOperand &MO, unsigned Idx,
142 std::map<unsigned, Chain*> &RegChains);
143 Color getColor(unsigned Register);
144 Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
145};
146}
147
148char AArch64A57FPLoadBalancing::ID = 0;
149
150INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
151 "AArch64 A57 FP Load-Balancing", false, false)
152INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
153 "AArch64 A57 FP Load-Balancing", false, false)
154
155namespace {
156/// A Chain is a sequence of instructions that are linked together by
157/// an accumulation operand. For example:
158///
159/// fmul def d0, ?
160/// fmla def d1, ?, ?, killed d0
161/// fmla def d2, ?, ?, killed d1
162///
163/// There may be other instructions interleaved in the sequence that
164/// do not belong to the chain. These other instructions must not use
165/// the "chain" register at any point.
166///
167/// We currently only support chains where the "chain" operand is killed
168/// at each link in the chain for simplicity.
169/// A chain has three important instructions - Start, Last and Kill.
170/// * The start instruction is the first instruction in the chain.
171/// * Last is the final instruction in the chain.
172/// * Kill may or may not be defined. If defined, Kill is the instruction
173/// where the outgoing value of the Last instruction is killed.
174/// This information is important as if we know the outgoing value is
175/// killed with no intervening uses, we can safely change its register.
176///
177/// Without a kill instruction, we must assume the outgoing value escapes
178/// beyond our model and either must not change its register or must
179/// create a fixup FMOV to keep the old register value consistent.
180///
181class Chain {
182public:
183 /// The important (marker) instructions.
185 /// The index, from the start of the basic block, that each marker
186 /// appears. These are stored so we can do quick interval tests.
188 /// All instructions in the chain.
189 std::set<MachineInstr*> Insts;
190 /// True if KillInst cannot be modified. If this is true,
191 /// we cannot change LastInst's outgoing register.
192 /// This will be true for tied values and regmasks.
194 /// The "color" of LastInst. This will be the preferred chain color,
195 /// as changing intermediate nodes is easy but changing the last
196 /// instruction can be more tricky.
198
199 Chain(MachineInstr *MI, unsigned Idx, Color C)
200 : StartInst(MI), LastInst(MI), KillInst(nullptr),
201 StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
202 LastColor(C) {
203 Insts.insert(MI);
204 }
205
206 /// Add a new instruction into the chain. The instruction's dest operand
207 /// has the given color.
208 void add(MachineInstr *MI, unsigned Idx, Color C) {
209 LastInst = MI;
210 LastInstIdx = Idx;
211 LastColor = C;
213 "Chain: broken invariant. A Chain can only be killed after its last "
214 "def");
215
216 Insts.insert(MI);
217 }
218
219 /// Return true if MI is a member of the chain.
220 bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; }
221
222 /// Return the number of instructions in the chain.
223 unsigned size() const {
224 return Insts.size();
225 }
226
227 /// Inform the chain that its last active register (the dest register of
228 /// LastInst) is killed by MI with no intervening uses or defs.
229 void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
230 KillInst = MI;
231 KillInstIdx = Idx;
232 KillIsImmutable = Immutable;
234 "Chain: broken invariant. A Chain can only be killed after its last "
235 "def");
236 }
237
238 /// Return the first instruction in the chain.
239 MachineInstr *getStart() const { return StartInst; }
240 /// Return the last instruction in the chain.
241 MachineInstr *getLast() const { return LastInst; }
242 /// Return the "kill" instruction (as set with setKill()) or NULL.
243 MachineInstr *getKill() const { return KillInst; }
244 /// Return an instruction that can be used as an iterator for the end
245 /// of the chain. This is the maximum of KillInst (if set) and LastInst.
250
251 /// Can the Kill instruction (assuming one exists) be modified?
252 bool isKillImmutable() const { return KillIsImmutable; }
253
254 /// Return the preferred color of this chain.
256 if (OverrideBalance != 0)
257 return OverrideBalance == 1 ? Color::Even : Color::Odd;
258 return LastColor;
259 }
260
261 /// Return true if this chain (StartInst..KillInst) overlaps with Other.
262 bool rangeOverlapsWith(const Chain &Other) const {
263 unsigned End = KillInst ? KillInstIdx : LastInstIdx;
264 unsigned OtherEnd = Other.KillInst ?
265 Other.KillInstIdx : Other.LastInstIdx;
266
267 return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
268 }
269
270 /// Return true if this chain starts before Other.
271 bool startsBefore(const Chain *Other) const {
272 return StartInstIdx < Other->StartInstIdx;
273 }
274
275 /// Return true if the group will require a fixup MOV at the end.
276 bool requiresFixup() const {
277 return (getKill() && isKillImmutable()) || !getKill();
278 }
279
280 /// Return a simple string representation of the chain.
281 std::string str() const {
282 std::string S;
283 raw_string_ostream OS(S);
284
285 OS << "{";
286 StartInst->print(OS, /* SkipOpers= */true);
287 OS << " -> ";
288 LastInst->print(OS, /* SkipOpers= */true);
289 if (KillInst) {
290 OS << " (kill @ ";
291 KillInst->print(OS, /* SkipOpers= */true);
292 OS << ")";
293 }
294 OS << "}";
295
296 return OS.str();
297 }
298
299};
300
301} // end anonymous namespace
302
303//===----------------------------------------------------------------------===//
304
305bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
306 if (skipFunction(F.getFunction()))
307 return false;
308
309 if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
310 return false;
311
312 bool Changed = false;
313 LLVM_DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
314
315 MRI = &F.getRegInfo();
316 TRI = F.getRegInfo().getTargetRegisterInfo();
318
319 for (auto &MBB : F) {
321 }
322
323 return Changed;
324}
325
326bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
327 bool Changed = false;
328 LLVM_DEBUG(dbgs() << "Running on MBB: " << MBB
329 << " - scanning instructions...\n");
330
331 // First, scan the basic block producing a set of chains.
332
333 // The currently "active" chains - chains that can be added to and haven't
334 // been killed yet. This is keyed by register - all chains can only have one
335 // "link" register between each inst in the chain.
336 std::map<unsigned, Chain*> ActiveChains;
337 std::vector<std::unique_ptr<Chain>> AllChains;
338 unsigned Idx = 0;
339 for (auto &MI : MBB)
340 scanInstruction(&MI, Idx++, ActiveChains, AllChains);
341
342 LLVM_DEBUG(dbgs() << "Scan complete, " << AllChains.size()
343 << " chains created.\n");
344
345 // Group the chains into disjoint sets based on their liveness range. This is
346 // a poor-man's version of graph coloring. Ideally we'd create an interference
347 // graph and perform full-on graph coloring on that, but;
348 // (a) That's rather heavyweight for only two colors.
349 // (b) We expect multiple disjoint interference regions - in practice the live
350 // range of chains is quite small and they are clustered between loads
351 // and stores.
352 EquivalenceClasses<Chain*> EC;
353 for (auto &I : AllChains)
354 EC.insert(I.get());
355
356 for (auto &I : AllChains)
357 for (auto &J : AllChains)
358 if (I != J && I->rangeOverlapsWith(*J))
359 EC.unionSets(I.get(), J.get());
360 LLVM_DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
361
362 // Now we assume that every member of an equivalence class interferes
363 // with every other member of that class, and with no members of other classes.
364
365 // Convert the EquivalenceClasses to a simpler set of sets.
366 std::vector<std::vector<Chain*> > V;
367 for (const auto &E : EC) {
368 if (!E->isLeader())
369 continue;
370 std::vector<Chain *> Cs(EC.member_begin(*E), EC.member_end());
371 if (Cs.empty()) continue;
372 V.push_back(std::move(Cs));
373 }
374
375 // Now we have a set of sets, order them by start address so
376 // we can iterate over them sequentially.
377 llvm::sort(V,
378 [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
379 return A.front()->startsBefore(B.front());
380 });
381
382 // As we only have two colors, we can track the global (BB-level) balance of
383 // odds versus evens. We aim to keep this near zero to keep both execution
384 // units fed.
385 // Positive means we're even-heavy, negative we're odd-heavy.
386 //
387 // FIXME: If chains have interdependencies, for example:
388 // mul r0, r1, r2
389 // mul r3, r0, r1
390 // We do not model this and may color each one differently, assuming we'll
391 // get ILP when we obviously can't. This hasn't been seen to be a problem
392 // in practice so far, so we simplify the algorithm by ignoring it.
393 int Parity = 0;
394
395 for (auto &I : V)
396 Changed |= colorChainSet(std::move(I), MBB, Parity);
397
398 return Changed;
399}
400
401Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
402 std::vector<Chain*> &L) {
403 if (L.empty())
404 return nullptr;
405
406 // We try and get the best candidate from L to color next, given that our
407 // preferred color is "PreferredColor". L is ordered from larger to smaller
408 // chains. It is beneficial to color the large chains before the small chains,
409 // but if we can't find a chain of the maximum length with the preferred color,
410 // we fuzz the size and look for slightly smaller chains before giving up and
411 // returning a chain that must be recolored.
412
413 // FIXME: Does this need to be configurable?
414 const unsigned SizeFuzz = 1;
415 unsigned MinSize = L.front()->size() - SizeFuzz;
416 for (auto I = L.begin(), E = L.end(); I != E; ++I) {
417 if ((*I)->size() <= MinSize) {
418 // We've gone past the size limit. Return the previous item.
419 Chain *Ch = *--I;
420 L.erase(I);
421 return Ch;
422 }
423
424 if ((*I)->getPreferredColor() == PreferredColor) {
425 Chain *Ch = *I;
426 L.erase(I);
427 return Ch;
428 }
429 }
430
431 // Bailout case - just return the first item.
432 Chain *Ch = L.front();
433 L.erase(L.begin());
434 return Ch;
435}
436
437bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
438 MachineBasicBlock &MBB,
439 int &Parity) {
440 bool Changed = false;
441 LLVM_DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
442
443 // Sort by descending size order so that we allocate the most important
444 // sets first.
445 // Tie-break equivalent sizes by sorting chains requiring fixups before
446 // those without fixups. The logic here is that we should look at the
447 // chains that we cannot change before we look at those we can,
448 // so the parity counter is updated and we know what color we should
449 // change them to!
450 // Final tie-break with instruction order so pass output is stable (i.e. not
451 // dependent on malloc'd pointer values).
452 llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
453 if (G1->size() != G2->size())
454 return G1->size() > G2->size();
455 if (G1->requiresFixup() != G2->requiresFixup())
456 return G1->requiresFixup() > G2->requiresFixup();
457 // Make sure startsBefore() produces a stable final order.
458 assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
459 "Starts before not total order!");
460 return G1->startsBefore(G2);
461 });
462
463 Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
464 while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
465 // Start off by assuming we'll color to our own preferred color.
466 Color C = PreferredColor;
467 if (Parity == 0)
468 // But if we really don't care, use the chain's preferred color.
469 C = G->getPreferredColor();
470
471 LLVM_DEBUG(dbgs() << " - Parity=" << Parity
472 << ", Color=" << ColorNames[(int)C] << "\n");
473
474 // If we'll need a fixup FMOV, don't bother. Testing has shown that this
475 // happens infrequently and when it does it has at least a 50% chance of
476 // slowing code down instead of speeding it up.
477 if (G->requiresFixup() && C != G->getPreferredColor()) {
478 C = G->getPreferredColor();
479 LLVM_DEBUG(dbgs() << " - " << G->str()
480 << " - not worthwhile changing; "
481 "color remains "
482 << ColorNames[(int)C] << "\n");
483 }
484
485 Changed |= colorChain(G, C, MBB);
486
487 Parity += (C == Color::Even) ? G->size() : -G->size();
488 PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
489 }
490
491 return Changed;
492}
493
494int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
495 MachineBasicBlock &MBB) {
496 // Can we find an appropriate register that is available throughout the life
497 // of the chain? Simulate liveness backwards until the end of the chain.
498 LiveRegUnits Units(*TRI);
499 Units.addLiveOuts(MBB);
501 MachineBasicBlock::iterator ChainEnd = G->end();
502 while (I != ChainEnd) {
503 --I;
504 Units.stepBackward(*I);
505 }
506
507 // Check which register units are alive throughout the chain.
508 MachineBasicBlock::iterator ChainBegin = G->begin();
509 assert(ChainBegin != ChainEnd && "Chain should contain instructions");
510 do {
511 --I;
512 Units.accumulate(*I);
513 } while (I != ChainBegin);
514
515 // Make sure we allocate in-order, to get the cheapest registers first.
516 unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass;
517 auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
518 for (auto Reg : Ord) {
519 if (!Units.available(Reg))
520 continue;
521 if (C == getColor(Reg))
522 return Reg;
523 }
524
525 return -1;
526}
527
528bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
529 MachineBasicBlock &MBB) {
530 bool Changed = false;
531 LLVM_DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
532 << ColorNames[(int)C] << ")\n");
533
534 // Try and obtain a free register of the right class. Without a register
535 // to play with we cannot continue.
536 int Reg = scavengeRegister(G, C, MBB);
537 if (Reg == -1) {
538 LLVM_DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
539 return false;
540 }
541 LLVM_DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n");
542
543 std::map<unsigned, unsigned> Substs;
544 for (MachineInstr &I : *G) {
545 if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable()))
546 continue;
547
548 // I is a member of G, or I is a mutable instruction that kills G.
549
550 std::vector<unsigned> ToErase;
551 for (auto &U : I.operands()) {
552 if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
553 Register OrigReg = U.getReg();
554 U.setReg(Substs[OrigReg]);
555 if (U.isKill())
556 // Don't erase straight away, because there may be other operands
557 // that also reference this substitution!
558 ToErase.push_back(OrigReg);
559 } else if (U.isRegMask()) {
560 for (auto J : Substs) {
561 if (U.clobbersPhysReg(J.first))
562 ToErase.push_back(J.first);
563 }
564 }
565 }
566 // Now it's safe to remove the substs identified earlier.
567 for (auto J : ToErase)
568 Substs.erase(J);
569
570 // Only change the def if this isn't the last instruction.
571 if (&I != G->getKill()) {
572 MachineOperand &MO = I.getOperand(0);
573
574 bool Change = TransformAll || getColor(MO.getReg()) != C;
575 if (G->requiresFixup() && &I == G->getLast())
576 Change = false;
577
578 if (Change) {
579 Substs[MO.getReg()] = Reg;
580 MO.setReg(Reg);
581
582 Changed = true;
583 }
584 }
585 }
586 assert(Substs.size() == 0 && "No substitutions should be left active!");
587
588 if (G->getKill()) {
589 LLVM_DEBUG(dbgs() << " - Kill instruction seen.\n");
590 } else {
591 // We didn't have a kill instruction, but we didn't seem to need to change
592 // the destination register anyway.
593 LLVM_DEBUG(dbgs() << " - Destination register not changed.\n");
594 }
595 return Changed;
596}
597
598void AArch64A57FPLoadBalancing::scanInstruction(
599 MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
600 std::vector<std::unique_ptr<Chain>> &AllChains) {
601 // Inspect "MI", updating ActiveChains and AllChains.
602
603 if (isMul(MI)) {
604
605 for (auto &I : MI->uses())
606 maybeKillChain(I, Idx, ActiveChains);
607 for (auto &I : MI->defs())
608 maybeKillChain(I, Idx, ActiveChains);
609
610 // Create a new chain. Multiplies don't require forwarding so can go on any
611 // unit.
612 Register DestReg = MI->getOperand(0).getReg();
613
614 LLVM_DEBUG(dbgs() << "New chain started for register "
615 << printReg(DestReg, TRI) << " at " << *MI);
616
617 auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
618 ActiveChains[DestReg] = G.get();
619 AllChains.push_back(std::move(G));
620
621 } else if (isMla(MI)) {
622
623 // It is beneficial to keep MLAs on the same functional unit as their
624 // accumulator operand.
625 Register DestReg = MI->getOperand(0).getReg();
626 Register AccumReg = MI->getOperand(3).getReg();
627
628 maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
629 maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
630 if (DestReg != AccumReg)
631 maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
632
633 if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
634 LLVM_DEBUG(dbgs() << "Chain found for accumulator register "
635 << printReg(AccumReg, TRI) << " in MI " << *MI);
636
637 // For simplicity we only chain together sequences of MULs/MLAs where the
638 // accumulator register is killed on each instruction. This means we don't
639 // need to track other uses of the registers we want to rewrite.
640 //
641 // FIXME: We could extend to handle the non-kill cases for more coverage.
642 if (MI->getOperand(3).isKill()) {
643 // Add to chain.
644 LLVM_DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
645 ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
646 // Handle cases where the destination is not the same as the accumulator.
647 if (DestReg != AccumReg) {
648 ActiveChains[DestReg] = ActiveChains[AccumReg];
649 ActiveChains.erase(AccumReg);
650 }
651 return;
652 }
653
655 dbgs() << "Cannot add to chain because accumulator operand wasn't "
656 << "marked <kill>!\n");
657 maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
658 }
659
660 LLVM_DEBUG(dbgs() << "Creating new chain for dest register "
661 << printReg(DestReg, TRI) << "\n");
662 auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
663 ActiveChains[DestReg] = G.get();
664 AllChains.push_back(std::move(G));
665
666 } else {
667
668 // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
669 // lists.
670 for (auto &I : MI->uses())
671 maybeKillChain(I, Idx, ActiveChains);
672 for (auto &I : MI->defs())
673 maybeKillChain(I, Idx, ActiveChains);
674
675 }
676}
677
678void AArch64A57FPLoadBalancing::
679maybeKillChain(MachineOperand &MO, unsigned Idx,
680 std::map<unsigned, Chain*> &ActiveChains) {
681 // Given an operand and the set of active chains (keyed by register),
682 // determine if a chain should be ended and remove from ActiveChains.
683 MachineInstr *MI = MO.getParent();
684
685 if (MO.isReg()) {
686
687 // If this is a KILL of a current chain, record it.
688 if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
689 LLVM_DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI)
690 << "\n");
691 ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
692 }
693 ActiveChains.erase(MO.getReg());
694
695 } else if (MO.isRegMask()) {
696
697 for (auto I = ActiveChains.begin(), E = ActiveChains.end();
698 I != E;) {
699 if (MO.clobbersPhysReg(I->first)) {
700 LLVM_DEBUG(dbgs() << "Kill (regmask) seen for chain "
701 << printReg(I->first, TRI) << "\n");
702 I->second->setKill(MI, Idx, /*Immutable=*/true);
703 ActiveChains.erase(I++);
704 } else
705 ++I;
706 }
707
708 }
709}
710
711Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) {
712 if ((TRI->getEncodingValue(Reg) % 2) == 0)
713 return Color::Even;
714 else
715 return Color::Odd;
716}
717
718// Factory function used by AArch64TargetMachine to add the pass to the passmanager.
720 return new AArch64A57FPLoadBalancing();
721}
static bool isMul(MachineInstr *MI)
static cl::opt< unsigned > OverrideBalance("aarch64-a57-fp-load-balancing-override", cl::desc("Ignore balance information, always return " "(1: Even, 2: Odd)."), cl::init(0), cl::Hidden)
static cl::opt< bool > TransformAll("aarch64-a57-fp-load-balancing-force-all", cl::desc("Always modify dest registers regardless of color"), cl::init(false), cl::Hidden)
static bool isMla(MachineInstr *MI)
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file declares the machine register scavenger class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
MachineInstr * StartInst
The important (marker) instructions.
MachineBasicBlock::iterator begin() const
bool isKillImmutable() const
Can the Kill instruction (assuming one exists) be modified?
void add(MachineInstr *MI, unsigned Idx, Color C)
Add a new instruction into the chain.
bool contains(MachineInstr &MI)
Return true if MI is a member of the chain.
Color LastColor
The "color" of LastInst.
bool requiresFixup() const
Return true if the group will require a fixup MOV at the end.
MachineInstr * getLast() const
Return the last instruction in the chain.
bool KillIsImmutable
True if KillInst cannot be modified.
bool rangeOverlapsWith(const Chain &Other) const
Return true if this chain (StartInst..KillInst) overlaps with Other.
MachineInstr * getStart() const
Return the first instruction in the chain.
unsigned size() const
Return the number of instructions in the chain.
MachineBasicBlock::iterator end() const
Return an instruction that can be used as an iterator for the end of the chain.
void setKill(MachineInstr *MI, unsigned Idx, bool Immutable)
Inform the chain that its last active register (the dest register of LastInst) is killed by MI with n...
MachineInstr * getKill() const
Return the "kill" instruction (as set with setKill()) or NULL.
unsigned StartInstIdx
The index, from the start of the basic block, that each marker appears.
Color getPreferredColor()
Return the preferred color of this chain.
std::string str() const
Return a simple string representation of the chain.
std::set< MachineInstr * > Insts
All instructions in the chain.
Chain(MachineInstr *MI, unsigned Idx, Color C)
bool startsBefore(const Chain *Other) const
Return true if this chain starts before Other.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64A57FPLoadBalancing()
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1652
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.