LLVM 22.0.0git
RegAllocPBQP.cpp
Go to the documentation of this file.
1//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
10// register allocator for LLVM. This allocator works by constructing a PBQP
11// problem representing the register allocation problem under consideration,
12// solving this using a PBQP solver, and mapping the solution back to a
13// register assignment. If any variables are selected for spilling then spill
14// code is inserted and the process repeated.
15//
16// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
17// for register allocation. For more information on PBQP for register
18// allocation, see the following papers:
19//
20// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
21// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
22// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
23//
24// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
25// architectures. In Proceedings of the Joint Conference on Languages,
26// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
27// NY, USA, 139-148.
28//
29//===----------------------------------------------------------------------===//
30
32#include "RegisterCoalescer.h"
33#include "llvm/ADT/ArrayRef.h"
34#include "llvm/ADT/BitVector.h"
35#include "llvm/ADT/DenseMap.h"
36#include "llvm/ADT/DenseSet.h"
37#include "llvm/ADT/STLExtras.h"
40#include "llvm/ADT/StringRef.h"
64#include "llvm/Config/llvm-config.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/Module.h"
67#include "llvm/Pass.h"
70#include "llvm/Support/Debug.h"
74#include <algorithm>
75#include <cassert>
76#include <cstddef>
77#include <limits>
78#include <map>
79#include <memory>
80#include <queue>
81#include <set>
82#include <sstream>
83#include <string>
84#include <system_error>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE "regalloc"
92
94RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
96
97static cl::opt<bool>
98PBQPCoalescing("pbqp-coalescing",
99 cl::desc("Attempt coalescing during PBQP register allocation."),
100 cl::init(false), cl::Hidden);
101
102#ifndef NDEBUG
103static cl::opt<bool>
104PBQPDumpGraphs("pbqp-dump-graphs",
105 cl::desc("Dump graphs for each function/round in the compilation unit."),
106 cl::init(false), cl::Hidden);
107#endif
108
109namespace {
110
111///
112/// PBQP based allocators solve the register allocation problem by mapping
113/// register allocation problems to Partitioned Boolean Quadratic
114/// Programming problems.
115class RegAllocPBQP : public MachineFunctionPass {
116public:
117 static char ID;
118
119 /// Construct a PBQP register allocator.
120 RegAllocPBQP(char *cPassID = nullptr)
121 : MachineFunctionPass(ID), customPassID(cPassID) {
126 }
127
128 /// Return the pass name.
129 StringRef getPassName() const override { return "PBQP Register Allocator"; }
130
131 /// PBQP analysis usage.
132 void getAnalysisUsage(AnalysisUsage &au) const override;
133
134 /// Perform register allocation
135 bool runOnMachineFunction(MachineFunction &MF) override;
136
138 return MachineFunctionProperties().setNoPHIs();
139 }
140
142 return MachineFunctionProperties().setIsSSA();
143 }
144
145private:
146 using RegSet = std::set<Register>;
147
148 char *customPassID;
149
150 RegSet VRegsToAlloc, EmptyIntervalVRegs;
151
152 /// Inst which is a def of an original reg and whose defs are already all
153 /// dead after remat is saved in DeadRemats. The deletion of such inst is
154 /// postponed till all the allocations are done, so its remat expr is
155 /// always available for the remat of all the siblings of the original reg.
157
158 /// Finds the initial set of vreg intervals to allocate.
159 void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
160
161 /// Constructs an initial graph.
162 void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
163
164 /// Spill the given VReg.
165 void spillVReg(Register VReg, SmallVectorImpl<Register> &NewIntervals,
167 Spiller &VRegSpiller);
168
169 /// Given a solved PBQP problem maps this solution back to a register
170 /// assignment.
171 bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
172 const PBQP::Solution &Solution,
173 VirtRegMap &VRM,
174 Spiller &VRegSpiller);
175
176 /// Postprocessing before final spilling. Sets basic block "live in"
177 /// variables.
178 void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
179 VirtRegMap &VRM) const;
180
181 void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
182};
183
184char RegAllocPBQP::ID = 0;
185
186/// Set spill costs for each node in the PBQP reg-alloc graph.
187class SpillCosts : public PBQPRAConstraint {
188public:
189 void apply(PBQPRAGraph &G) override {
190 LiveIntervals &LIS = G.getMetadata().LIS;
191
192 // A minimum spill costs, so that register constraints can be set
193 // without normalization in the [0.0:MinSpillCost( interval.
194 const PBQP::PBQPNum MinSpillCost = 10.0;
195
196 for (auto NId : G.nodeIds()) {
197 PBQP::PBQPNum SpillCost =
198 LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight();
199 if (SpillCost == 0.0)
200 SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
201 else
202 SpillCost += MinSpillCost;
203 PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
204 NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
205 G.setNodeCosts(NId, std::move(NodeCosts));
206 }
207 }
208};
209
210/// Add interference edges between overlapping vregs.
211class Interference : public PBQPRAConstraint {
212private:
213 using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
214 using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
215 using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
216 using DisjointAllowedRegsCache = DenseSet<IKey>;
217 using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
218 using IEdgeCache = DenseSet<IEdgeKey>;
219
220 bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
222 const DisjointAllowedRegsCache &D) const {
223 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
224 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
225
226 if (NRegs == MRegs)
227 return false;
228
229 if (NRegs < MRegs)
230 return D.contains(IKey(NRegs, MRegs));
231
232 return D.contains(IKey(MRegs, NRegs));
233 }
234
235 void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
237 DisjointAllowedRegsCache &D) {
238 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
239 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
240
241 assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
242
243 if (NRegs < MRegs)
244 D.insert(IKey(NRegs, MRegs));
245 else
246 D.insert(IKey(MRegs, NRegs));
247 }
248
249 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
250 // for the fast interference graph construction algorithm. The last is there
251 // to save us from looking up node ids via the VRegToNode map in the graph
252 // metadata.
253 using IntervalInfo =
254 std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
255
256 static SlotIndex getStartPoint(const IntervalInfo &I) {
257 return std::get<0>(I)->segments[std::get<1>(I)].start;
258 }
259
260 static SlotIndex getEndPoint(const IntervalInfo &I) {
261 return std::get<0>(I)->segments[std::get<1>(I)].end;
262 }
263
264 static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
265 return std::get<2>(I);
266 }
267
268 static bool lowestStartPoint(const IntervalInfo &I1,
269 const IntervalInfo &I2) {
270 // Condition reversed because priority queue has the *highest* element at
271 // the front, rather than the lowest.
272 return getStartPoint(I1) > getStartPoint(I2);
273 }
274
275 static bool lowestEndPoint(const IntervalInfo &I1,
276 const IntervalInfo &I2) {
277 SlotIndex E1 = getEndPoint(I1);
278 SlotIndex E2 = getEndPoint(I2);
279
280 if (E1 < E2)
281 return true;
282
283 if (E1 > E2)
284 return false;
285
286 // If two intervals end at the same point, we need a way to break the tie or
287 // the set will assume they're actually equal and refuse to insert a
288 // "duplicate". Just compare the vregs - fast and guaranteed unique.
289 return std::get<0>(I1)->reg() < std::get<0>(I2)->reg();
290 }
291
292 static bool isAtLastSegment(const IntervalInfo &I) {
293 return std::get<1>(I) == std::get<0>(I)->size() - 1;
294 }
295
296 static IntervalInfo nextSegment(const IntervalInfo &I) {
297 return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
298 }
299
300public:
301 void apply(PBQPRAGraph &G) override {
302 // The following is loosely based on the linear scan algorithm introduced in
303 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
304 // isn't linear, because the size of the active set isn't bound by the
305 // number of registers, but rather the size of the largest clique in the
306 // graph. Still, we expect this to be better than N^2.
307 LiveIntervals &LIS = G.getMetadata().LIS;
308
309 // Interferenc matrices are incredibly regular - they're only a function of
310 // the allowed sets, so we cache them to avoid the overhead of constructing
311 // and uniquing them.
312 IMatrixCache C;
313
314 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
315 // cache locally edges we have already seen.
316 IEdgeCache EC;
317
318 // Cache known disjoint allowed registers pairs
319 DisjointAllowedRegsCache D;
320
321 using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
322 using IntervalQueue =
323 std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
324 decltype(&lowestStartPoint)>;
325 IntervalSet Active(lowestEndPoint);
326 IntervalQueue Inactive(lowestStartPoint);
327
328 // Start by building the inactive set.
329 for (auto NId : G.nodeIds()) {
330 Register VReg = G.getNodeMetadata(NId).getVReg();
331 LiveInterval &LI = LIS.getInterval(VReg);
332 assert(!LI.empty() && "PBQP graph contains node for empty interval");
333 Inactive.push(std::make_tuple(&LI, 0, NId));
334 }
335
336 while (!Inactive.empty()) {
337 // Tentatively grab the "next" interval - this choice may be overriden
338 // below.
339 IntervalInfo Cur = Inactive.top();
340
341 // Retire any active intervals that end before Cur starts.
342 IntervalSet::iterator RetireItr = Active.begin();
343 while (RetireItr != Active.end() &&
344 (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
345 // If this interval has subsequent segments, add the next one to the
346 // inactive list.
347 if (!isAtLastSegment(*RetireItr))
348 Inactive.push(nextSegment(*RetireItr));
349
350 ++RetireItr;
351 }
352 Active.erase(Active.begin(), RetireItr);
353
354 // One of the newly retired segments may actually start before the
355 // Cur segment, so re-grab the front of the inactive list.
356 Cur = Inactive.top();
357 Inactive.pop();
358
359 // At this point we know that Cur overlaps all active intervals. Add the
360 // interference edges.
361 PBQP::GraphBase::NodeId NId = getNodeId(Cur);
362 for (const auto &A : Active) {
363 PBQP::GraphBase::NodeId MId = getNodeId(A);
364
365 // Do not add an edge when the nodes' allowed registers do not
366 // intersect: there is obviously no interference.
367 if (haveDisjointAllowedRegs(G, NId, MId, D))
368 continue;
369
370 // Check that we haven't already added this edge
371 IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
372 if (EC.count(EK))
373 continue;
374
375 // This is a new edge - add it to the graph.
376 if (!createInterferenceEdge(G, NId, MId, C))
377 setDisjointAllowedRegs(G, NId, MId, D);
378 else
379 EC.insert(EK);
380 }
381
382 // Finally, add Cur to the Active set.
383 Active.insert(Cur);
384 }
385 }
386
387private:
388 // Create an Interference edge and add it to the graph, unless it is
389 // a null matrix, meaning the nodes' allowed registers do not have any
390 // interference. This case occurs frequently between integer and floating
391 // point registers for example.
392 // return true iff both nodes interferes.
393 bool createInterferenceEdge(PBQPRAGraph &G,
395 IMatrixCache &C) {
396 const TargetRegisterInfo &TRI =
397 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
398 const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
399 const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
400
401 // Try looking the edge costs up in the IMatrixCache first.
402 IKey K(&NRegs, &MRegs);
403 IMatrixCache::iterator I = C.find(K);
404 if (I != C.end()) {
405 G.addEdgeBypassingCostAllocator(NId, MId, I->second);
406 return true;
407 }
408
409 PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
410 bool NodesInterfere = false;
411 for (unsigned I = 0; I != NRegs.size(); ++I) {
412 MCRegister PRegN = NRegs[I];
413 for (unsigned J = 0; J != MRegs.size(); ++J) {
414 MCRegister PRegM = MRegs[J];
415 if (TRI.regsOverlap(PRegN, PRegM)) {
416 M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
417 NodesInterfere = true;
418 }
419 }
420 }
421
422 if (!NodesInterfere)
423 return false;
424
425 PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
426 C[K] = G.getEdgeCostsPtr(EId);
427
428 return true;
429 }
430};
431
432class Coalescing : public PBQPRAConstraint {
433public:
434 void apply(PBQPRAGraph &G) override {
435 MachineFunction &MF = G.getMetadata().MF;
436 MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
438
439 // Scan the machine function and add a coalescing cost whenever CoalescerPair
440 // gives the Ok.
441 for (const auto &MBB : MF) {
442 for (const auto &MI : MBB) {
443 // Skip not-coalescable or already coalesced copies.
444 if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
445 continue;
446
447 Register DstReg = CP.getDstReg();
448 Register SrcReg = CP.getSrcReg();
449
451
452 if (CP.isPhys()) {
453 if (!MF.getRegInfo().isAllocatable(DstReg))
454 continue;
455
456 PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
457
458 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
459 G.getNodeMetadata(NId).getAllowedRegs();
460
461 unsigned PRegOpt = 0;
462 while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg)
463 ++PRegOpt;
464
465 if (PRegOpt < Allowed.size()) {
466 PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
467 NewCosts[PRegOpt + 1] -= CBenefit;
468 G.setNodeCosts(NId, std::move(NewCosts));
469 }
470 } else {
471 PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
472 PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
473 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
474 &G.getNodeMetadata(N1Id).getAllowedRegs();
475 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
476 &G.getNodeMetadata(N2Id).getAllowedRegs();
477
478 PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
479 if (EId == G.invalidEdgeId()) {
480 PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
481 Allowed2->size() + 1, 0);
482 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
483 G.addEdge(N1Id, N2Id, std::move(Costs));
484 } else {
485 if (G.getEdgeNode1Id(EId) == N2Id) {
486 std::swap(N1Id, N2Id);
487 std::swap(Allowed1, Allowed2);
488 }
489 PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
490 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
491 G.updateEdgeCosts(EId, std::move(Costs));
492 }
493 }
494 }
495 }
496 }
497
498private:
499 void addVirtRegCoalesce(
500 PBQPRAGraph::RawMatrix &CostMat,
501 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
502 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
503 PBQP::PBQPNum Benefit) {
504 assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
505 assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
506 for (unsigned I = 0; I != Allowed1.size(); ++I) {
507 MCRegister PReg1 = Allowed1[I];
508 for (unsigned J = 0; J != Allowed2.size(); ++J) {
509 MCRegister PReg2 = Allowed2[J];
510 if (PReg1 == PReg2)
511 CostMat[I + 1][J + 1] -= Benefit;
512 }
513 }
514 }
515};
516
517/// PBQP-specific implementation of weight normalization.
518class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
519 float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override {
520 // All intervals have a spill weight that is mostly proportional to the
521 // number of uses, with uses in loops having a bigger weight.
522 return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1);
523 }
524
525public:
526 PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
527 const MachineLoopInfo &Loops,
528 const MachineBlockFrequencyInfo &MBFI)
529 : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
530};
531} // end anonymous namespace
532
533// Out-of-line destructor/anchor for PBQPRAConstraint.
535
536void PBQPRAConstraint::anchor() {}
537
538void PBQPRAConstraintList::anchor() {}
539
540void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
541 au.setPreservesCFG();
548 //au.addRequiredID(SplitCriticalEdgesID);
549 if (customPassID)
550 au.addRequiredID(*customPassID);
562}
563
564void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
565 LiveIntervals &LIS) {
566 const MachineRegisterInfo &MRI = MF.getRegInfo();
567
568 // Iterate over all live ranges.
569 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
571 if (MRI.reg_nodbg_empty(Reg))
572 continue;
573 VRegsToAlloc.insert(Reg);
574 }
575}
576
578 const TargetRegisterInfo &TRI,
579 const MachineFunction &MF) {
580 const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
581 for (unsigned i = 0; CSR[i] != 0; ++i)
582 if (TRI.regsOverlap(Reg, CSR[i]))
583 return true;
584 return false;
585}
586
587void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
588 Spiller &VRegSpiller) {
589 MachineFunction &MF = G.getMetadata().MF;
590
591 LiveIntervals &LIS = G.getMetadata().LIS;
592 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
593 const TargetRegisterInfo &TRI =
594 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
595
596 std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
597
598 std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
599
600 while (!Worklist.empty()) {
601 Register VReg = Worklist.back();
602 Worklist.pop_back();
603
604 LiveInterval &VRegLI = LIS.getInterval(VReg);
605
606 // If this is an empty interval move it to the EmptyIntervalVRegs set then
607 // continue.
608 if (VRegLI.empty()) {
609 EmptyIntervalVRegs.insert(VRegLI.reg());
610 VRegsToAlloc.erase(VRegLI.reg());
611 continue;
612 }
613
614 const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
615
616 // Record any overlaps with regmask operands.
617 BitVector RegMaskOverlaps;
618 LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
619
620 // Compute an initial allowed set for the current vreg.
621 std::vector<MCRegister> VRegAllowed;
622 ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
623 for (MCPhysReg R : RawPRegOrder) {
624 MCRegister PReg(R);
625 if (MRI.isReserved(PReg))
626 continue;
627
628 // vregLI crosses a regmask operand that clobbers preg.
629 if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
630 continue;
631
632 // vregLI overlaps fixed regunit interference.
633 bool Interference = false;
634 for (MCRegUnit Unit : TRI.regunits(PReg)) {
635 if (VRegLI.overlaps(LIS.getRegUnit(Unit))) {
636 Interference = true;
637 break;
638 }
639 }
640 if (Interference)
641 continue;
642
643 // preg is usable for this virtual register.
644 VRegAllowed.push_back(PReg);
645 }
646
647 // Check for vregs that have no allowed registers. These should be
648 // pre-spilled and the new vregs added to the worklist.
649 if (VRegAllowed.empty()) {
651 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
652 llvm::append_range(Worklist, NewVRegs);
653 continue;
654 }
655
656 VRegAllowedMap[VReg.id()] = std::move(VRegAllowed);
657 }
658
659 for (auto &KV : VRegAllowedMap) {
660 auto VReg = KV.first;
661
662 // Move empty intervals to the EmptyIntervalVReg set.
663 if (LIS.getInterval(VReg).empty()) {
664 EmptyIntervalVRegs.insert(VReg);
665 VRegsToAlloc.erase(VReg);
666 continue;
667 }
668
669 auto &VRegAllowed = KV.second;
670
671 PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
672
673 // Tweak cost of callee saved registers, as using then force spilling and
674 // restoring them. This would only happen in the prologue / epilogue though.
675 for (unsigned i = 0; i != VRegAllowed.size(); ++i)
676 if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
677 NodeCosts[1 + i] += 1.0;
678
679 PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
680 G.getNodeMetadata(NId).setVReg(VReg);
681 G.getNodeMetadata(NId).setAllowedRegs(
682 G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
683 G.getMetadata().setNodeIdForVReg(VReg, NId);
684 }
685}
686
687void RegAllocPBQP::spillVReg(Register VReg,
688 SmallVectorImpl<Register> &NewIntervals,
690 VirtRegMap &VRM, Spiller &VRegSpiller) {
691 VRegsToAlloc.erase(VReg);
692 LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
693 nullptr, &DeadRemats);
694 VRegSpiller.spill(LRE);
695
697 (void)TRI;
698 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
699 << LRE.getParent().weight() << ", New vregs: ");
700
701 // Copy any newly inserted live intervals into the list of regs to
702 // allocate.
703 for (const Register &R : LRE) {
704 const LiveInterval &LI = LIS.getInterval(R);
705 assert(!LI.empty() && "Empty spill range.");
706 LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
707 VRegsToAlloc.insert(LI.reg());
708 }
709
710 LLVM_DEBUG(dbgs() << ")\n");
711}
712
713bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
714 const PBQP::Solution &Solution,
715 VirtRegMap &VRM,
716 Spiller &VRegSpiller) {
717 MachineFunction &MF = G.getMetadata().MF;
718 LiveIntervals &LIS = G.getMetadata().LIS;
720 (void)TRI;
721
722 // Set to true if we have any spills
723 bool AnotherRoundNeeded = false;
724
725 // Clear the existing allocation.
726 VRM.clearAllVirt();
727
728 // Iterate over the nodes mapping the PBQP solution to a register
729 // assignment.
730 for (auto NId : G.nodeIds()) {
731 Register VReg = G.getNodeMetadata(NId).getVReg();
732 unsigned AllocOpt = Solution.getSelection(NId);
733
734 if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) {
735 MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
736 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
737 << TRI.getName(PReg) << "\n");
738 assert(PReg != 0 && "Invalid preg selected.");
739 VRM.assignVirt2Phys(VReg, PReg);
740 } else {
741 // Spill VReg. If this introduces new intervals we'll need another round
742 // of allocation.
744 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
745 AnotherRoundNeeded |= !NewVRegs.empty();
746 }
747 }
748
749 return !AnotherRoundNeeded;
750}
751
752void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
753 LiveIntervals &LIS,
754 VirtRegMap &VRM) const {
756
757 // First allocate registers for the empty intervals.
758 for (const Register &R : EmptyIntervalVRegs) {
759 LiveInterval &LI = LIS.getInterval(R);
760
761 Register PReg = MRI.getSimpleHint(LI.reg());
762
763 if (PReg == 0) {
764 const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg());
765 const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
766 for (MCRegister CandidateReg : RawPRegOrder) {
767 if (!VRM.getRegInfo().isReserved(CandidateReg)) {
768 PReg = CandidateReg;
769 break;
770 }
771 }
772 assert(PReg &&
773 "No un-reserved physical registers in this register class");
774 }
775
776 VRM.assignVirt2Phys(LI.reg(), PReg);
777 }
778}
779
780void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
781 VRegSpiller.postOptimization();
782 /// Remove dead defs because of rematerialization.
783 for (auto *DeadInst : DeadRemats) {
784 LIS.RemoveMachineInstrFromMaps(*DeadInst);
785 DeadInst->eraseFromParent();
786 }
787 DeadRemats.clear();
788}
789
790bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
791 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
793 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
794
795 auto &LiveStks = getAnalysis<LiveStacksWrapperLegacy>().getLS();
796 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
797
798 VirtRegMap &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
799
800 PBQPVirtRegAuxInfo VRAI(
801 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
802 VRAI.calculateSpillWeightsAndHints();
803
804 // FIXME: we create DefaultVRAI here to match existing behavior pre-passing
805 // the VRAI through the spiller to the live range editor. However, it probably
806 // makes more sense to pass the PBQP VRAI. The existing behavior had
807 // LiveRangeEdit make its own VirtRegAuxInfo object.
808 VirtRegAuxInfo DefaultVRAI(
809 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
810 std::unique_ptr<Spiller> VRegSpiller(
811 createInlineSpiller({LIS, LiveStks, MDT, MBFI}, MF, VRM, DefaultVRAI));
812
814
815 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
816
817 // Allocator main loop:
818 //
819 // * Map current regalloc problem to a PBQP problem
820 // * Solve the PBQP problem
821 // * Map the solution back to a register allocation
822 // * Spill if necessary
823 //
824 // This process is continued till no more spills are generated.
825
826 // Find the vreg intervals in need of allocation.
827 findVRegIntervalsToAlloc(MF, LIS);
828
829#ifndef NDEBUG
830 const Function &F = MF.getFunction();
831 std::string FullyQualifiedName =
832 F.getParent()->getModuleIdentifier() + "." + F.getName().str();
833#endif
834
835 // If there are non-empty intervals allocate them using pbqp.
836 if (!VRegsToAlloc.empty()) {
837 const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
838 std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
839 std::make_unique<PBQPRAConstraintList>();
840 ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
841 ConstraintsRoot->addConstraint(std::make_unique<Interference>());
842 if (PBQPCoalescing)
843 ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
844 ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
845
846 bool PBQPAllocComplete = false;
847 unsigned Round = 0;
848
849 while (!PBQPAllocComplete) {
850 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
851 (void) Round;
852
854 initializeGraph(G, VRM, *VRegSpiller);
855 ConstraintsRoot->apply(G);
856
857#ifndef NDEBUG
858 if (PBQPDumpGraphs) {
859 std::ostringstream RS;
860 RS << Round;
861 std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
862 ".pbqpgraph";
863 std::error_code EC;
864 raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_TextWithCRLF);
865 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
866 << GraphFileName << "\"\n");
867 G.dump(OS);
868 }
869#endif
870
872 PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
873 ++Round;
874 }
875 }
876
877 // Finalise allocation, allocate empty ranges.
878 finalizeAlloc(MF, LIS, VRM);
879 postOptimization(*VRegSpiller, LIS);
880 VRegsToAlloc.clear();
881 EmptyIntervalVRegs.clear();
882
883 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
884
885 return true;
886}
887
888/// Create Printable object for node and register info.
891 return Printable([NId, &G](raw_ostream &OS) {
892 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
893 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
894 Register VReg = G.getNodeMetadata(NId).getVReg();
895 const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
896 OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
897 });
898}
899
900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
902 for (auto NId : nodeIds()) {
903 const Vector &Costs = getNodeCosts(NId);
904 assert(Costs.getLength() != 0 && "Empty vector in graph.");
905 OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
906 }
907 OS << '\n';
908
909 for (auto EId : edgeIds()) {
910 NodeId N1Id = getEdgeNode1Id(EId);
911 NodeId N2Id = getEdgeNode2Id(EId);
912 assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
913 const Matrix &M = getEdgeCosts(EId);
914 assert(M.getRows() != 0 && "No rows in matrix.");
915 assert(M.getCols() != 0 && "No cols in matrix.");
916 OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
917 OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
918 OS << M << '\n';
919 }
920}
921
923 dump(dbgs());
924}
925#endif
926
928 OS << "graph {\n";
929 for (auto NId : nodeIds()) {
930 OS << " node" << NId << " [ label=\""
931 << PrintNodeInfo(NId, *this) << "\\n"
932 << getNodeCosts(NId) << "\" ]\n";
933 }
934
935 OS << " edge [ len=" << nodeIds().size() << " ]\n";
936 for (auto EId : edgeIds()) {
937 OS << " node" << getEdgeNode1Id(EId)
938 << " -- node" << getEdgeNode2Id(EId)
939 << " [ label=\"";
940 const Matrix &EdgeCosts = getEdgeCosts(EId);
941 for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
942 OS << EdgeCosts.getRowAsVector(i) << "\\n";
943 }
944 OS << "\" ]\n";
945 }
946 OS << "}\n";
947}
948
950 return new RegAllocPBQP(customPassID);
951}
952
955}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
Hexagon Hardware Loops
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Register const TargetRegisterInfo * TRI
Branch Coalescing
static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, const PBQP::RegAlloc::PBQPRAGraph &G)
Create Printable object for node and register info.
static cl::opt< bool > PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden)
static cl::opt< bool > PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden)
static bool isACalleeSavedRegister(MCRegister Reg, const TargetRegisterInfo &TRI, const MachineFunction &MF)
static RegisterRegAlloc RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:284
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool test(unsigned Idx) const
Definition: BitVector.h:461
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition: BitVector.h:156
A helper class for register coalescers.
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:74
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:690
float weight() const
Definition: LiveInterval.h:722
Register reg() const
Definition: LiveInterval.h:721
LLVM_ABI bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
bool empty() const
Definition: LiveInterval.h:384
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:450
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI void freezeReservedRegs()
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Abstract base for classes implementing PBQP register allocation constraints (e.g.
virtual ~PBQPRAConstraint()=0
virtual void apply(PBQPRAGraph &G)=0
typename SolverT::GraphMetadata GraphMetadata
Definition: Graph.h:59
typename SolverT::Matrix Matrix
Definition: Graph.h:54
NodeId getEdgeNode2Id(EdgeId EId) const
Get the second node connected to this edge.
Definition: Graph.h:552
NodeId getEdgeNode1Id(EdgeId EId) const
Get the first node connected to this edge.
Definition: Graph.h:545
const Matrix & getEdgeCosts(EdgeId EId) const
Get an edge's cost matrix.
Definition: Graph.h:530
typename SolverT::Vector Vector
Definition: Graph.h:53
typename SolverT::RawMatrix RawMatrix
Definition: Graph.h:52
typename SolverT::RawVector RawVector
Definition: Graph.h:51
const Vector & getNodeCosts(NodeId NId) const
Get a node's cost vector.
Definition: Graph.h:488
Holds a vector of the allowed physical regs for a vreg.
Definition: RegAllocPBQP.h:94
void printDot(raw_ostream &OS) const
Print a representation of this graph in DOT format.
void dump() const
Dump this graph to dbgs().
Represents a solution to a PBQP problem.
Definition: Solution.h:26
unsigned getSelection(GraphBase::NodeId nodeId) const
Get a node's selection.
Definition: Solution.h:45
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:67
constexpr unsigned id() const
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:66
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Spiller interface.
Definition: Spiller.h:33
virtual void postOptimization()
Definition: Spiller.h:49
virtual void spill(LiveRangeEdit &LRE, AllocationOrder *Order=nullptr)=0
spill - Spill the LRE.getParent() live interval.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
ArrayRef< MCPhysReg > getRawAllocationOrder(const MachineFunction &MF, bool Rev=false) const
Returns the preferred order for allocating registers from this register class in MF.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const
Return PBQPConstraint(s) for the target.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Calculate auxiliary information for a virtual register such as its spill weight and allocation hint.
virtual float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr)
Weight normalization function.
void clearAllVirt()
clears all virtual to physical register mappings
Definition: VirtRegMap.h:125
MachineRegisterInfo & getRegInfo() const
Definition: VirtRegMap.h:80
LLVM_ABI void assignVirt2Phys(Register virtReg, MCRegister physReg)
creates a mapping for the specified virtual register to the specified physical register
Definition: VirtRegMap.cpp:86
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:461
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Solution solve(PBQPRAGraph &G)
Definition: RegAllocPBQP.h:520
unsigned getSpillOptionIdx()
Spill option index.
Definition: RegAllocPBQP.h:48
float PBQPNum
Definition: Math.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition: FileSystem.h:771
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
LLVM_ABI void initializeLiveIntervalsWrapperPassPass(PassRegistry &)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI FunctionPass * createDefaultPBQPRegisterAllocator()
PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean Quadratic Prograaming (PBQ...
FunctionPass * createPBQPRegisterAllocator(char *customPassID=nullptr)
Create a PBQP register allocator instance.
LLVM_ABI void initializeSlotIndexesWrapperPassPass(PassRegistry &)
Spiller * createInlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix=nullptr)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
LLVM_ABI void initializeLiveStacksWrapperLegacyPass(PassRegistry &)
LLVM_ABI void initializeVirtRegMapWrapperLegacyPass(PassRegistry &)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858