LLVM 22.0.0git
MIRSampleProfile.cpp
Go to the documentation of this file.
1//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the implementation of the MIRSampleProfile loader, mainly
10// for flow sensitive SampleFDO.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
25#include "llvm/CodeGen/Passes.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/PseudoProbe.h"
30#include "llvm/Support/Debug.h"
35#include <optional>
36
37using namespace llvm;
38using namespace sampleprof;
39using namespace llvm::sampleprofutil;
41
42#define DEBUG_TYPE "fs-profile-loader"
43
45 "show-fs-branchprob", cl::Hidden, cl::init(false),
46 cl::desc("Print setting flow sensitive branch probabilities"));
48 "fs-profile-debug-prob-diff-threshold", cl::init(10),
50 "Only show debug message if the branch probability is greater than "
51 "this value (in percentage)."));
52
54 "fs-profile-debug-bw-threshold", cl::init(10000),
55 cl::desc("Only show debug message if the source branch weight is greater "
56 " than this value."));
57
58static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
59 cl::init(false),
60 cl::desc("View BFI before MIR loader"));
61static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
62 cl::init(false),
63 cl::desc("View BFI after MIR loader"));
64
65namespace llvm {
67}
69
71 "Load MIR Sample Profile",
72 /* cfg = */ false, /* is_analysis = */ false)
79 /* cfg = */ false, /* is_analysis = */ false)
80
82
84llvm::createMIRProfileLoaderPass(std::string File, std::string RemappingFile,
86 IntrusiveRefCntPtr<vfs::FileSystem> FS) {
87 return new MIRProfileLoaderPass(File, RemappingFile, P, std::move(FS));
88}
89
90namespace llvm {
91
92// Internal option used to control BFI display only after MBP pass.
93// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
94// -view-block-layout-with-bfi={none | fraction | integer | count}
96
97// Command line option to specify the name of the function for CFG dump
98// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
100
101std::optional<PseudoProbe> extractProbe(const MachineInstr &MI) {
102 if (MI.isPseudoProbe()) {
103 PseudoProbe Probe;
104 Probe.Id = MI.getOperand(1).getImm();
105 Probe.Type = MI.getOperand(2).getImm();
106 Probe.Attr = MI.getOperand(3).getImm();
107 Probe.Factor = 1;
108 DILocation *DebugLoc = MI.getDebugLoc();
109 Probe.Discriminator = DebugLoc ? DebugLoc->getDiscriminator() : 0;
110 return Probe;
111 }
112
113 // Ignore callsite probes since they do not have FS discriminators.
114 return std::nullopt;
115}
116
117namespace afdo_detail {
118template <> struct IRTraits<MachineBasicBlock> {
134 static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
137 }
139 return BB->predecessors();
140 }
142 return BB->successors();
143 }
144};
145} // namespace afdo_detail
146
148 : public SampleProfileLoaderBaseImpl<MachineFunction> {
149public:
153 DT = MDT;
154 PDT = MPDT;
155 LI = MLI;
156 BFI = MBFI;
157 ORE = MORE;
158 }
160 P = Pass;
163 assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
164 }
165
169 std::move(FS)) {}
170
173 bool doInitialization(Module &M);
174 bool isValid() const { return ProfileIsValid; }
175
176protected:
178
179 /// Hold the information of the basic block frequency.
181
182 /// PassNum is the sequence number this pass is called, start from 1.
184
185 // LowBit in the FS discriminator used by this instance. Note the number is
186 // 0-based. Base discrimnator use bit 0 to bit 11.
187 unsigned LowBit;
188 // HighwBit in the FS discriminator used by this instance. Note the number
189 // is 0-based.
190 unsigned HighBit;
191
192 bool ProfileIsValid = true;
195 return getProbeWeight(MI);
196 if (ImprovedFSDiscriminator && MI.isMetaInstruction())
197 return std::error_code();
198 return getInstWeightImpl(MI);
199 }
200};
201
202template <>
204 MachineFunction &F) {}
205
207 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
208 for (auto &BI : F) {
209 MachineBasicBlock *BB = &BI;
210 if (BB->succ_size() < 2)
211 continue;
212 const MachineBasicBlock *EC = EquivalenceClass[BB];
213 uint64_t BBWeight = BlockWeights[EC];
214 uint64_t SumEdgeWeight = 0;
215 for (MachineBasicBlock *Succ : BB->successors()) {
216 Edge E = std::make_pair(BB, Succ);
217 SumEdgeWeight += EdgeWeights[E];
218 }
219
220 if (BBWeight != SumEdgeWeight) {
221 LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
222 << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
223 << "\n");
224 BBWeight = SumEdgeWeight;
225 }
226 if (BBWeight == 0) {
227 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
228 continue;
229 }
230
231#ifndef NDEBUG
232 uint64_t BBWeightOrig = BBWeight;
233#endif
234 uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
235 uint32_t Factor = 1;
236 if (BBWeight > MaxWeight) {
237 Factor = BBWeight / MaxWeight + 1;
238 BBWeight /= Factor;
239 LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
240 }
241
243 SE = BB->succ_end();
244 SI != SE; ++SI) {
245 MachineBasicBlock *Succ = *SI;
246 Edge E = std::make_pair(BB, Succ);
247 uint64_t EdgeWeight = EdgeWeights[E];
248 EdgeWeight /= Factor;
249
250 assert(BBWeight >= EdgeWeight &&
251 "BBweight is larger than EdgeWeight -- should not happen.\n");
252
253 BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
254 BranchProbability NewProb(EdgeWeight, BBWeight);
255 if (OldProb == NewProb)
256 continue;
257 BB->setSuccProbability(SI, NewProb);
258#ifndef NDEBUG
259 if (!ShowFSBranchProb)
260 continue;
261 bool Show = false;
263 if (OldProb > NewProb)
264 Diff = OldProb - NewProb;
265 else
266 Diff = NewProb - OldProb;
268 Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
269
270 auto DIL = BB->findBranchDebugLoc();
271 auto SuccDIL = Succ->findBranchDebugLoc();
272 if (Show) {
273 dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
274 << Succ->getNumber() << "): ";
275 if (DIL)
276 dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
277 << DIL->getColumn();
278 if (SuccDIL)
279 dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
280 << ":" << SuccDIL->getColumn();
281 dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb
282 << "\n";
283 }
284#endif
285 }
286 }
287}
288
290 auto &Ctx = M.getContext();
291
293 Filename, Ctx, *FS, P, RemappingFilename);
294 if (std::error_code EC = ReaderOrErr.getError()) {
295 std::string Msg = "Could not open profile: " + EC.message();
296 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
297 return false;
298 }
299
300 Reader = std::move(ReaderOrErr.get());
301 Reader->setModule(&M);
303
304 // Load pseudo probe descriptors for probe-based function samples.
305 if (Reader->profileIsProbeBased()) {
306 ProbeManager = std::make_unique<PseudoProbeManager>(M);
307 if (!ProbeManager->moduleIsProbed(M)) {
308 return false;
309 }
310 }
311
312 return true;
313}
314
316 // Do not load non-FS profiles. A line or probe can get a zero-valued
317 // discriminator at certain pass which could result in accidentally loading
318 // the corresponding base counter in the non-FS profile, while a non-zero
319 // discriminator would end up getting zero samples. This could in turn undo
320 // the sample distribution effort done by previous BFI maintenance and the
321 // probe distribution factor work for pseudo probes.
322 if (!Reader->profileIsFS())
323 return false;
324
325 Function &Func = MF.getFunction();
326 clearFunctionData(false);
327 Samples = Reader->getSamplesFor(Func);
328 if (!Samples || Samples->empty())
329 return false;
330
332 if (!ProbeManager->profileIsValid(MF.getFunction(), *Samples))
333 return false;
334 } else {
335 if (getFunctionLoc(MF) == 0)
336 return false;
337 }
338
339 DenseSet<GlobalValue::GUID> InlinedGUIDs;
340 bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
341
342 // Set the new BPI, BFI.
343 setBranchProbs(MF);
344
345 return Changed;
346}
347
348} // namespace llvm
349
351 std::string FileName, std::string RemappingFileName, FSDiscriminatorPass P,
353 : MachineFunctionPass(ID), ProfileFileName(FileName), P(P) {
354 LowBit = getFSPassBitBegin(P);
355 HighBit = getFSPassBitEnd(P);
356
357 auto VFS = FS ? std::move(FS) : vfs::getRealFileSystem();
358 MIRSampleLoader = std::make_unique<MIRProfileLoader>(
359 FileName, RemappingFileName, std::move(VFS));
360 assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
361}
362
363bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
364 if (!MIRSampleLoader->isValid())
365 return false;
366
367 LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
368 << MF.getFunction().getName() << "\n");
369 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
370 auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
371 auto *MPDT =
372 &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
373
374 MF.RenumberBlocks();
375 MDT->updateBlockNumbers();
376 MPDT->updateBlockNumbers();
377
378 MIRSampleLoader->setInitVals(
379 MDT, MPDT, &getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI,
380 &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
381
383 (ViewBlockFreqFuncName.empty() ||
385 MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
386 }
387
388 bool Changed = MIRSampleLoader->runOnFunction(MF);
389 if (Changed)
390 MBFI->calculate(MF, *MBFI->getMBPI(),
391 *&getAnalysis<MachineLoopInfoWrapperPass>().getLI());
392
394 (ViewBlockFreqFuncName.empty() ||
396 MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
397 }
398
399 return Changed;
400}
401
402bool MIRProfileLoaderPass::doInitialization(Module &M) {
403 LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
404 << "\n");
405
406 MIRSampleLoader->setFSPass(P);
407 return MIRSampleLoader->doInitialization(M);
408}
409
410void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
411 AU.setPreservesAll();
418}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
Load MIR Sample Profile
static cl::opt< bool > ShowFSBranchProb("show-fs-branchprob", cl::Hidden, cl::init(false), cl::desc("Print setting flow sensitive branch probabilities"))
static cl::opt< bool > ViewBFIAfter("fs-viewbfi-after", cl::Hidden, cl::init(false), cl::desc("View BFI after MIR loader"))
static cl::opt< unsigned > FSProfileDebugBWThreshold("fs-profile-debug-bw-threshold", cl::init(10000), cl::desc("Only show debug message if the source branch weight is greater " " than this value."))
static cl::opt< unsigned > FSProfileDebugProbDiffThreshold("fs-profile-debug-prob-diff-threshold", cl::init(10), cl::desc("Only show debug message if the branch probability is greater than " "this value (in percentage)."))
#define DEBUG_TYPE
static cl::opt< bool > ViewBFIBefore("fs-viewbfi-before", cl::Hidden, cl::init(false), cl::desc("View BFI before MIR loader"))
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Defines the virtual file system interface vfs::FileSystem.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
Debug location.
A debug info location.
Definition: DebugLoc.h:124
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Diagnostic information for the sample profiler.
Represents either an error or a value T.
Definition: ErrorOr.h:56
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
Class to represent profile counts.
Definition: Function.h:297
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
MIRProfileLoaderPass(std::string FileName="", std::string RemappingFileName="", FSDiscriminatorPass P=FSDiscriminatorPass::Pass1, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
FS bits will only use the '1' bits in the Mask.
MIRProfileLoader(StringRef Name, StringRef RemapName, IntrusiveRefCntPtr< vfs::FileSystem > FS)
void setBranchProbs(MachineFunction &F)
ErrorOr< uint64_t > getInstWeight(const MachineInstr &MI) override
bool runOnFunction(MachineFunction &F)
MachineBlockFrequencyInfo * BFI
Hold the information of the basic block frequency.
FSDiscriminatorPass P
PassNum is the sequence number this pass is called, start from 1.
bool doInitialization(Module &M)
void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT, MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI, MachineOptimizationRemarkEmitter *MORE)
void setFSPass(FSDiscriminatorPass Pass)
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
unsigned succ_size() const
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI void view(const Twine &Name, bool isSimple=true) const
Pop up a ghostview window with the current block frequency propagation rendered using dot.
LLVM_ABI const MachineBranchProbabilityInfo * getMBPI() const
LLVM_ABI void calculate(const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI, const MachineLoopInfo &MLI)
calculate - compute block frequency info for the given function.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
Representation of each machine instruction.
Definition: MachineInstr.h:72
Diagnostic information for optimization analysis remarks.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:99
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
void computeDominanceAndLoopInfo(FunctionT &F)
IntrusiveRefCntPtr< vfs::FileSystem > FS
VirtualFileSystem to load profile files from.
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
OptRemarkEmitterT * ORE
Optimization Remark Emitter used to emit diagnostic remarks.
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
std::string Filename
Name of the profile file to load.
virtual ErrorOr< uint64_t > getProbeWeight(const InstructionT &Inst)
std::string RemappingFilename
Name of the profile remapping file to load.
FunctionSamples * Samples
Samples collected for the body of this function.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
A range adaptor for a pair of iterators.
static LLVM_ABI bool ProfileIsProbeBased
Definition: SampleProf.h:1198
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static unsigned getFSPassBitBegin(sampleprof::FSDiscriminatorPass P)
Definition: Discriminator.h:94
LLVM_ABI char & MIRProfileLoaderPassID
This pass reads flow sensitive profile.
static unsigned getFSPassBitEnd(sampleprof::FSDiscriminatorPass P)
Definition: Discriminator.h:87
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
cl::opt< bool > ImprovedFSDiscriminator("improved-fs-discriminator", cl::Hidden, cl::init(false), cl::desc("New FS discriminators encoding (incompatible with the original " "encoding)"))
LLVM_ABI FunctionPass * createMIRProfileLoaderPass(std::string File, std::string RemappingFile, sampleprof::FSDiscriminatorPass P, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Read Flow Sensitive Profile.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
#define MORE()
Definition: regcomp.c:246
uint32_t Discriminator
Definition: PseudoProbe.h:122
static PredRangeT getPredecessors(MachineBasicBlock *BB)
static SuccRangeT getSuccessors(MachineBasicBlock *BB)
static const MachineBasicBlock * getEntryBB(const MachineFunction *F)
static Function & getFunction(MachineFunction &F)