LLVM 22.0.0git
MIRCanonicalizerPass.cpp
Go to the documentation of this file.
1//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The purpose of this pass is to employ a canonical code transformation so
10// that code compiled with slightly different IR passes can be diffed more
11// effectively than otherwise. This is done by renaming vregs in a given
12// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13// move defs closer to their use inorder to reduce diffs caused by slightly
14// different schedules.
15//
16// Basic Usage:
17//
18// llc -o - -run-pass mir-canonicalizer example.mir
19//
20// Reorders instructions canonically.
21// Renames virtual register operands canonically.
22// Strips certain MIR artifacts (optionally).
23//
24//===----------------------------------------------------------------------===//
25
26#include "MIRVRegNamerUtils.h"
28#include "llvm/ADT/STLExtras.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Debug.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "mir-canonicalizer"
39
41 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
42 cl::value_desc("N"),
43 cl::desc("Function number to canonicalize."));
44
45namespace {
46
47class MIRCanonicalizer : public MachineFunctionPass {
48public:
49 static char ID;
50 MIRCanonicalizer() : MachineFunctionPass(ID) {}
51
52 StringRef getPassName() const override {
53 return "Rename register operands in a canonical ordering.";
54 }
55
56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.setPreservesCFG();
59 }
60
61 bool runOnMachineFunction(MachineFunction &MF) override;
62};
63
64} // end anonymous namespace
65
66char MIRCanonicalizer::ID;
67
68char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
69
70INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71 "Rename Register Operands Canonically", false, false)
72
73INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
75
77 if (MF.empty())
78 return {};
80 std::vector<MachineBasicBlock *> RPOList;
81 append_range(RPOList, RPOT);
82
83 return RPOList;
84}
85
86static bool
87rescheduleLexographically(std::vector<MachineInstr *> instructions,
89 std::function<MachineBasicBlock::iterator()> getPos) {
90
91 bool Changed = false;
92 using StringInstrPair = std::pair<std::string, MachineInstr *>;
93 std::vector<StringInstrPair> StringInstrMap;
94
95 for (auto *II : instructions) {
96 std::string S;
98 II->print(OS);
99 OS.flush();
100
101 // Trim the assignment, or start from the beginning in the case of a store.
102 const size_t i = S.find('=');
103 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
104 }
105
106 llvm::sort(StringInstrMap, llvm::less_first());
107
108 for (auto &II : StringInstrMap) {
109
110 LLVM_DEBUG({
111 dbgs() << "Splicing ";
112 II.second->dump();
113 dbgs() << " right before: ";
114 getPos()->dump();
115 });
116
117 Changed = true;
118 MBB->splice(getPos(), MBB, II.second);
119 }
120
121 return Changed;
122}
123
124static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
126
127 bool Changed = false;
128
129 // Calculates the distance of MI from the beginning of its parent BB.
130 auto getInstrIdx = [](const MachineInstr &MI) {
131 unsigned i = 0;
132 for (const auto &CurMI : *MI.getParent()) {
133 if (&CurMI == &MI)
134 return i;
135 i++;
136 }
137 return ~0U;
138 };
139
140 // Pre-Populate vector of instructions to reschedule so that we don't
141 // clobber the iterator.
142 std::vector<MachineInstr *> Instructions;
143 for (auto &MI : *MBB) {
144 Instructions.push_back(&MI);
145 }
146
147 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
148 std::map<unsigned, MachineInstr *> MultiUserLookup;
149 unsigned UseToBringDefCloserToCount = 0;
150 std::vector<MachineInstr *> PseudoIdempotentInstructions;
151 std::vector<MCRegister> PhysRegDefs;
152 for (auto *II : Instructions) {
153 for (unsigned i = 1; i < II->getNumOperands(); i++) {
154 MachineOperand &MO = II->getOperand(i);
155 if (!MO.isReg())
156 continue;
157
158 if (MO.getReg().isVirtual())
159 continue;
160
161 if (!MO.isDef())
162 continue;
163
164 PhysRegDefs.push_back(MO.getReg());
165 }
166 }
167
168 for (auto *II : Instructions) {
169 if (II->getNumOperands() == 0)
170 continue;
171 if (II->mayLoadOrStore())
172 continue;
173
174 MachineOperand &MO = II->getOperand(0);
175 if (!MO.isReg() || !MO.getReg().isVirtual())
176 continue;
177 if (!MO.isDef())
178 continue;
179
180 bool IsPseudoIdempotent = true;
181 for (unsigned i = 1; i < II->getNumOperands(); i++) {
182
183 if (II->getOperand(i).isImm()) {
184 continue;
185 }
186
187 if (II->getOperand(i).isReg()) {
188 if (!II->getOperand(i).getReg().isVirtual())
189 if (!llvm::is_contained(PhysRegDefs,
190 II->getOperand(i).getReg().asMCReg())) {
191 continue;
192 }
193 }
194
195 IsPseudoIdempotent = false;
196 break;
197 }
198
199 if (IsPseudoIdempotent) {
200 PseudoIdempotentInstructions.push_back(II);
201 continue;
202 }
203
204 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
205
206 MachineInstr *Def = II;
207 unsigned Distance = ~0U;
208 MachineInstr *UseToBringDefCloserTo = nullptr;
210 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
211 MachineInstr *UseInst = UO.getParent();
212
213 const unsigned DefLoc = getInstrIdx(*Def);
214 const unsigned UseLoc = getInstrIdx(*UseInst);
215 const unsigned Delta = (UseLoc - DefLoc);
216
217 if (UseInst->getParent() != Def->getParent())
218 continue;
219 if (DefLoc >= UseLoc)
220 continue;
221
222 if (Delta < Distance) {
223 Distance = Delta;
224 UseToBringDefCloserTo = UseInst;
225 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
226 }
227 }
228
229 const auto BBE = MBB->instr_end();
232
233 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
234
235 if (DefI != BBE && UseI != BBE)
236 break;
237
238 if (&*BBI == Def) {
239 DefI = BBI;
240 continue;
241 }
242
243 if (&*BBI == UseToBringDefCloserTo) {
244 UseI = BBI;
245 continue;
246 }
247 }
248
249 if (DefI == BBE || UseI == BBE)
250 continue;
251
252 LLVM_DEBUG({
253 dbgs() << "Splicing ";
254 DefI->dump();
255 dbgs() << " right before: ";
256 UseI->dump();
257 });
258
259 MultiUsers[UseToBringDefCloserTo].push_back(Def);
260 Changed = true;
261 MBB->splice(UseI, MBB, DefI);
262 }
263
264 // Sort the defs for users of multiple defs lexographically.
265 for (const auto &E : MultiUserLookup) {
266
267 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
268 return &MI == E.second;
269 });
270
271 if (UseI == MBB->instr_end())
272 continue;
273
275 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.");
276 Changed |= rescheduleLexographically(
277 MultiUsers[E.second], MBB,
278 [&]() -> MachineBasicBlock::iterator { return UseI; });
279 }
280
281 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
282 LLVM_DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically.");
283 Changed |= rescheduleLexographically(
284 PseudoIdempotentInstructions, MBB,
285 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
286
287 return Changed;
288}
289
291 bool Changed = false;
293
294 std::vector<MachineInstr *> Copies;
295 for (MachineInstr &MI : MBB->instrs()) {
296 if (MI.isCopy())
297 Copies.push_back(&MI);
298 }
299
300 for (MachineInstr *MI : Copies) {
301
302 if (!MI->getOperand(0).isReg())
303 continue;
304 if (!MI->getOperand(1).isReg())
305 continue;
306
307 const Register Dst = MI->getOperand(0).getReg();
308 const Register Src = MI->getOperand(1).getReg();
309
310 if (!Dst.isVirtual())
311 continue;
312 if (!Src.isVirtual())
313 continue;
314 // Not folding COPY instructions if regbankselect has not set the RCs.
315 // Why are we only considering Register Classes? Because the verifier
316 // sometimes gets upset if the register classes don't match even if the
317 // types do. A future patch might add COPY folding for matching types in
318 // pre-registerbankselect code.
319 if (!MRI.getRegClassOrNull(Dst))
320 continue;
321 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
322 continue;
323
324 std::vector<MachineOperand *> Uses;
325 for (MachineOperand &MO : MRI.use_operands(Dst))
326 Uses.push_back(&MO);
327 for (auto *MO : Uses)
328 MO->setReg(Src);
329
330 Changed = true;
331 MI->eraseFromParent();
332 }
333
334 return Changed;
335}
336
338 bool Changed = false;
339
340 for (auto &MI : *MBB) {
341 for (auto &MO : MI.operands()) {
342 if (!MO.isReg())
343 continue;
344 if (!MO.isDef() && MO.isKill()) {
345 Changed = true;
346 MO.setIsKill(false);
347 }
348
349 if (MO.isDef() && MO.isDead()) {
350 Changed = true;
351 MO.setIsDead(false);
352 }
353 }
354 }
355
356 return Changed;
357}
358
360 unsigned BasicBlockNum, VRegRenamer &Renamer) {
361 LLVM_DEBUG({
362 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
363 dbgs() << "\n\n================================================\n\n";
364 });
365
366 bool Changed = false;
367
368 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n");
369
370 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
371 MBB->dump(););
372 Changed |= propagateLocalCopies(MBB);
373 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
374
375 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
376 unsigned IdempotentInstCount = 0;
377 Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
378 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
379
380 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
381
382 // TODO: Consider dropping this. Dropping kill defs is probably not
383 // semantically sound.
384 Changed |= doDefKillClear(MBB);
385
386 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
387 dbgs() << "\n");
389 dbgs() << "\n\n================================================\n\n");
390 return Changed;
391}
392
393bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
394
395 static unsigned functionNum = 0;
396 if (CanonicalizeFunctionNumber != ~0U) {
397 if (CanonicalizeFunctionNumber != functionNum++)
398 return false;
399 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
400 << "\n";);
401 }
402
403 // we need a valid vreg to create a vreg type for skipping all those
404 // stray vreg numbers so reach alignment/canonical vreg values.
405 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
406
408 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
409 dbgs() << "\n\n================================================\n\n";
410 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
411 for (auto MBB
412 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
413 << "\n\n================================================\n\n";);
414
415 unsigned BBNum = 0;
416 bool Changed = false;
418 VRegRenamer Renamer(MRI);
419 for (auto *MBB : RPOList)
420 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
421
422 return Changed;
423}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
Expand Atomic instructions
IRTranslator LLVM IR MI
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
static bool rescheduleLexographically(std::vector< MachineInstr * > instructions, MachineBasicBlock *MBB, std::function< MachineBasicBlock::iterator()> getPos)
static cl::opt< unsigned > CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("Function number to canonicalize."))
static bool doDefKillClear(MachineBasicBlock *MBB)
static bool propagateLocalCopies(MachineBasicBlock *MBB)
mir canonicalizer
mir Rename Register Operands Canonically
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, MachineBasicBlock *MBB)
mir Rename Register Operands static false std::vector< MachineBasicBlock * > GetRPOList(MachineFunction &MF)
mir Rename Register Operands
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Remove Loads Into Fake Uses
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
instr_iterator instr_begin()
LLVM_ABI void dump() const
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
Definition: MachineInstr.h:72
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void dump() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
VRegRenamer - This class is used for renaming vregs in a machine basic block according to semantics o...
bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum)
Same as the above, but sets a BBNum depending on BB traversal that will be used as prefix for the vre...
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
LLVM_ABI char & MIRCanonicalizerID
MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs according to the semantics of the in...
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:851
Function object to check whether the first component of a container supported by std::get (like std::...
Definition: STLExtras.h:1472