LLVM 22.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/StringSet.h"
59#include "llvm/ADT/Twine.h"
60#include "llvm/ADT/iterator.h"
64#include "llvm/Analysis/CFG.h"
69#include "llvm/IR/Attributes.h"
70#include "llvm/IR/BasicBlock.h"
71#include "llvm/IR/CFG.h"
72#include "llvm/IR/Comdat.h"
73#include "llvm/IR/Constant.h"
74#include "llvm/IR/Constants.h"
76#include "llvm/IR/Dominators.h"
78#include "llvm/IR/Function.h"
79#include "llvm/IR/GlobalAlias.h"
80#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/IRBuilder.h"
83#include "llvm/IR/InstVisitor.h"
84#include "llvm/IR/InstrTypes.h"
85#include "llvm/IR/Instruction.h"
88#include "llvm/IR/Intrinsics.h"
89#include "llvm/IR/LLVMContext.h"
90#include "llvm/IR/MDBuilder.h"
91#include "llvm/IR/Module.h"
92#include "llvm/IR/PassManager.h"
95#include "llvm/IR/Type.h"
96#include "llvm/IR/Value.h"
100#include "llvm/Support/CRC.h"
101#include "llvm/Support/Casting.h"
105#include "llvm/Support/Debug.h"
106#include "llvm/Support/Error.h"
118#include <algorithm>
119#include <cassert>
120#include <cstdint>
121#include <memory>
122#include <numeric>
123#include <optional>
124#include <stack>
125#include <string>
126#include <unordered_map>
127#include <utility>
128#include <vector>
129
130using namespace llvm;
133
134#define DEBUG_TYPE "pgo-instrumentation"
135
136STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
137STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
138STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
139STATISTIC(NumOfPGOEdge, "Number of edges.");
140STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
141STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
142STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
143STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
144STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
145STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
146STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOSelectInsts,
148 "Number of select instruction instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOMemIntrinsics,
150 "Number of mem intrinsics instrumented in CSPGO.");
151STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
152STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
153STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
154STATISTIC(NumOfCSPGOFunc,
155 "Number of functions having valid profile counts in CSPGO.");
156STATISTIC(NumOfCSPGOMismatch,
157 "Number of functions having mismatch profile in CSPGO.");
158STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
159STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
160
161// Command line option to specify the file to read profile from. This is
162// mainly used for testing.
164 "pgo-test-profile-file", cl::init(""), cl::Hidden,
165 cl::value_desc("filename"),
166 cl::desc("Specify the path of profile data file. This is "
167 "mainly for test purpose."));
169 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
170 cl::value_desc("filename"),
171 cl::desc("Specify the path of profile remapping file. This is mainly for "
172 "test purpose."));
173
174// Command line option to disable value profiling. The default is false:
175// i.e. value profiling is enabled by default. This is for debug purpose.
176static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
178 cl::desc("Disable Value Profiling"));
179
180// Command line option to set the maximum number of VP annotations to write to
181// the metadata for a single indirect call callsite.
183 "icp-max-annotations", cl::init(3), cl::Hidden,
184 cl::desc("Max number of annotations for a single indirect "
185 "call callsite"));
186
187// Command line option to set the maximum number of value annotations
188// to write to the metadata for a single memop intrinsic.
190 "memop-max-annotations", cl::init(4), cl::Hidden,
191 cl::desc("Max number of precise value annotations for a single memop"
192 "intrinsic"));
193
194// Command line option to control appending FunctionHash to the name of a COMDAT
195// function. This is to avoid the hash mismatch caused by the preinliner.
197 "do-comdat-renaming", cl::init(false), cl::Hidden,
198 cl::desc("Append function hash to the name of COMDAT function to avoid "
199 "function hash mismatch due to the preinliner"));
200
201namespace llvm {
202// Command line option to enable/disable the warning about missing profile
203// information.
204cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
206 cl::desc("Use this option to turn on/off "
207 "warnings about missing profile data for "
208 "functions."));
209
210// Command line option to enable/disable the warning about a hash mismatch in
211// the profile data.
213 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
214 cl::desc("Use this option to turn off/on "
215 "warnings about profile cfg mismatch."));
216
217// Command line option to enable/disable the warning about a hash mismatch in
218// the profile data for Comdat functions, which often turns out to be false
219// positive due to the pre-instrumentation inline.
221 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
222 cl::desc("The option is used to turn on/off "
223 "warnings about hash mismatch for comdat "
224 "or weak functions."));
225} // namespace llvm
226
227// Command line option to enable/disable select instruction instrumentation.
228static cl::opt<bool>
229 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
230 cl::desc("Use this option to turn on/off SELECT "
231 "instruction instrumentation. "));
232
233// Command line option to turn on CFG dot or text dump of raw profile counts
235 "pgo-view-raw-counts", cl::Hidden,
236 cl::desc("A boolean option to show CFG dag or text "
237 "with raw profile counts from "
238 "profile data. See also option "
239 "-pgo-view-counts. To limit graph "
240 "display to only one function, use "
241 "filtering option -view-bfi-func-name."),
242 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
243 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
244 clEnumValN(PGOVCT_Text, "text", "show in text.")));
245
246// Command line option to enable/disable memop intrinsic call.size profiling.
247static cl::opt<bool>
248 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
249 cl::desc("Use this option to turn on/off "
250 "memory intrinsic size profiling."));
251
252// Emit branch probability as optimization remarks.
253static cl::opt<bool>
254 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
255 cl::desc("When this option is on, the annotated "
256 "branch probability will be emitted as "
257 "optimization remarks: -{Rpass|"
258 "pass-remarks}=pgo-instrumentation"));
259
261 "pgo-instrument-entry", cl::init(false), cl::Hidden,
262 cl::desc("Force to instrument function entry basicblock."));
263
264static cl::opt<bool>
265 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
267 cl::desc("Force to instrument loop entries."));
268
270 "pgo-function-entry-coverage", cl::Hidden,
271 cl::desc(
272 "Use this option to enable function entry coverage instrumentation."));
273
275 "pgo-block-coverage",
276 cl::desc("Use this option to enable basic block coverage instrumentation"));
277
278static cl::opt<bool>
279 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
280 cl::desc("Create a dot file of CFGs with block "
281 "coverage inference information"));
282
284 "pgo-temporal-instrumentation",
285 cl::desc("Use this option to enable temporal instrumentation"));
286
287static cl::opt<bool>
288 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
289 cl::desc("Fix function entry count in profile use."));
290
292 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
293 cl::desc("Print out the non-match BFI count if a hot raw profile count "
294 "becomes non-hot, or a cold raw profile count becomes hot. "
295 "The print is enabled under -Rpass-analysis=pgo, or "
296 "internal option -pass-remarks-analysis=pgo."));
297
299 "pgo-verify-bfi", cl::init(false), cl::Hidden,
300 cl::desc("Print out mismatched BFI counts after setting profile metadata "
301 "The print is enabled under -Rpass-analysis=pgo, or "
302 "internal option -pass-remarks-analysis=pgo."));
303
305 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
306 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
307 "mismatched BFI if the difference percentage is greater than "
308 "this value (in percentage)."));
309
311 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
312 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
313 "profile count value is below."));
314
316 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
317 cl::value_desc("function name"),
318 cl::desc("Trace the hash of the function with this name."));
319
321 "pgo-function-size-threshold", cl::Hidden,
322 cl::desc("Do not instrument functions smaller than this threshold."));
323
325 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
326 cl::desc("Do not instrument functions with the number of critical edges "
327 " greater than this threshold."));
328
330 "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
331 cl::desc("For cold function instrumentation, skip instrumenting functions "
332 "whose entry count is above the given value."));
333
335 "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
336 cl::desc("For cold function instrumentation, treat count unknown(e.g. "
337 "unprofiled) functions as cold."));
338
340 "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
341 cl::desc("Enable cold function only instrumentation."));
342
344 "ctx-prof-skip-callsite-instr", cl::Hidden,
345 cl::desc("Do not instrument callsites to functions in this list. Intended "
346 "for testing."));
347
349
350namespace llvm {
351// Command line option to turn on CFG dot dump after profile annotation.
352// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
354
355// Command line option to specify the name of the function for CFG dump
356// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
358
359// Command line option to enable vtable value profiling. Defined in
360// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
365} // namespace llvm
366
367namespace {
368class FunctionInstrumenter final {
369 Module &M;
370 Function &F;
372 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
373 BranchProbabilityInfo *const BPI;
374 BlockFrequencyInfo *const BFI;
375 LoopInfo *const LI;
376
377 const PGOInstrumentationType InstrumentationType;
378
379 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
380 // Ctx profiling implicitly captures indirect call cases, but not other
381 // values. Supporting other values is relatively straight-forward - just
382 // another counter range within the context.
383 bool isValueProfilingDisabled() const {
384 return DisableValueProfiling ||
385 InstrumentationType == PGOInstrumentationType::CTXPROF;
386 }
387
388 bool shouldInstrumentEntryBB() const {
389 return PGOInstrumentEntry ||
390 InstrumentationType == PGOInstrumentationType::CTXPROF;
391 }
392
393 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
394
395public:
396 FunctionInstrumenter(
398 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
399 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
400 LoopInfo *LI = nullptr,
401 PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
402 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
403 LI(LI), InstrumentationType(InstrumentationType) {}
404
405 void instrument();
406};
407} // namespace
408
409// Return a string describing the branch condition that can be
410// used in static branch probability heuristics:
411static std::string getBranchCondString(Instruction *TI) {
412 BranchInst *BI = dyn_cast<BranchInst>(TI);
413 if (!BI || !BI->isConditional())
414 return std::string();
415
416 Value *Cond = BI->getCondition();
417 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
418 if (!CI)
419 return std::string();
420
421 std::string result;
422 raw_string_ostream OS(result);
423 OS << CI->getPredicate() << "_";
424 CI->getOperand(0)->getType()->print(OS, true);
425
426 Value *RHS = CI->getOperand(1);
427 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
428 if (CV) {
429 if (CV->isZero())
430 OS << "_Zero";
431 else if (CV->isOne())
432 OS << "_One";
433 else if (CV->isMinusOne())
434 OS << "_MinusOne";
435 else
436 OS << "_Const";
437 }
438 return result;
439}
440
441static const char *ValueProfKindDescr[] = {
442#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
444};
445
446// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
447// aware this is an ir_level profile so it can set the version flag.
448static GlobalVariable *
450 PGOInstrumentationType InstrumentationType) {
451 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
452 Type *IntTy64 = Type::getInt64Ty(M.getContext());
453 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
454 if (InstrumentationType == PGOInstrumentationType::CSFDO)
455 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
456 if (PGOInstrumentEntry ||
457 InstrumentationType == PGOInstrumentationType::CTXPROF)
458 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
460 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
462 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
464 ProfileVersion |=
465 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
467 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
469 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
470 auto IRLevelVersionVariable = new GlobalVariable(
471 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
472 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
473 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
474 if (isGPUProfTarget(M))
475 IRLevelVersionVariable->setVisibility(
477
478 Triple TT(M.getTargetTriple());
479 if (TT.supportsCOMDAT()) {
480 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
481 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
482 }
483 return IRLevelVersionVariable;
484}
485
486namespace {
487
488/// The select instruction visitor plays three roles specified
489/// by the mode. In \c VM_counting mode, it simply counts the number of
490/// select instructions. In \c VM_instrument mode, it inserts code to count
491/// the number times TrueValue of select is taken. In \c VM_annotate mode,
492/// it reads the profile data and annotate the select instruction with metadata.
493enum VisitMode { VM_counting, VM_instrument, VM_annotate };
494class PGOUseFunc;
495
496/// Instruction Visitor class to visit select instructions.
497struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
498 Function &F;
499 unsigned NSIs = 0; // Number of select instructions instrumented.
500 VisitMode Mode = VM_counting; // Visiting mode.
501 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
502 unsigned TotalNumCtrs = 0; // Total number of counters
503 GlobalValue *FuncNameVar = nullptr;
504 uint64_t FuncHash = 0;
505 PGOUseFunc *UseFunc = nullptr;
506 bool HasSingleByteCoverage;
507
508 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
509 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
510
511 void countSelects() {
512 NSIs = 0;
513 Mode = VM_counting;
514 visit(F);
515 }
516
517 // Visit the IR stream and instrument all select instructions. \p
518 // Ind is a pointer to the counter index variable; \p TotalNC
519 // is the total number of counters; \p FNV is the pointer to the
520 // PGO function name var; \p FHash is the function hash.
521 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
522 uint64_t FHash) {
523 Mode = VM_instrument;
524 CurCtrIdx = Ind;
525 TotalNumCtrs = TotalNC;
526 FuncHash = FHash;
527 FuncNameVar = FNV;
528 visit(F);
529 }
530
531 // Visit the IR stream and annotate all select instructions.
532 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
533 Mode = VM_annotate;
534 UseFunc = UF;
535 CurCtrIdx = Ind;
536 visit(F);
537 }
538
539 void instrumentOneSelectInst(SelectInst &SI);
540 void annotateOneSelectInst(SelectInst &SI);
541
542 // Visit \p SI instruction and perform tasks according to visit mode.
543 void visitSelectInst(SelectInst &SI);
544
545 // Return the number of select instructions. This needs be called after
546 // countSelects().
547 unsigned getNumOfSelectInsts() const { return NSIs; }
548};
549
550/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
551/// based instrumentation.
552/// Note that the CFG can be a multi-graph. So there might be multiple edges
553/// with the same SrcBB and DestBB.
554struct PGOEdge {
555 BasicBlock *SrcBB;
556 BasicBlock *DestBB;
557 uint64_t Weight;
558 bool InMST = false;
559 bool Removed = false;
560 bool IsCritical = false;
561
562 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
563 : SrcBB(Src), DestBB(Dest), Weight(W) {}
564
565 /// Return the information string of an edge.
566 std::string infoString() const {
567 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
568 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
569 .str();
570 }
571};
572
573/// This class stores the auxiliary information for each BB in the MST.
574struct PGOBBInfo {
575 PGOBBInfo *Group;
577 uint32_t Rank = 0;
578
579 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
580
581 /// Return the information string of this object.
582 std::string infoString() const {
583 return (Twine("Index=") + Twine(Index)).str();
584 }
585};
586
587// This class implements the CFG edges. Note the CFG can be a multi-graph.
588template <class Edge, class BBInfo> class FuncPGOInstrumentation {
589private:
590 Function &F;
591
592 // Is this is context-sensitive instrumentation.
593 bool IsCS;
594
595 // A map that stores the Comdat group in function F.
596 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
597
599
600 void computeCFGHash();
601 void renameComdatFunction();
602
603public:
604 const TargetLibraryInfo &TLI;
605 std::vector<std::vector<VPCandidateInfo>> ValueSites;
606 SelectInstVisitor SIVisitor;
607 std::string FuncName;
608 std::string DeprecatedFuncName;
609 GlobalVariable *FuncNameVar;
610
611 // CFG hash value for this function.
612 uint64_t FunctionHash = 0;
613
614 // The Minimum Spanning Tree of function CFG.
616
617 const std::optional<BlockCoverageInference> BCI;
618
619 static std::optional<BlockCoverageInference>
620 constructBCI(Function &Func, bool HasSingleByteCoverage,
621 bool InstrumentFuncEntry) {
622 if (HasSingleByteCoverage)
623 return BlockCoverageInference(Func, InstrumentFuncEntry);
624 return {};
625 }
626
627 // Collect all the BBs that will be instrumented, and store them in
628 // InstrumentBBs.
629 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
630
631 // Give an edge, find the BB that will be instrumented.
632 // Return nullptr if there is no BB to be instrumented.
633 BasicBlock *getInstrBB(Edge *E);
634
635 // Return the auxiliary BB information.
636 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
637
638 // Return the auxiliary BB information if available.
639 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
640
641 // Dump edges and BB information.
642 void dumpInfo(StringRef Str = "") const {
643 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
644 " Hash: " + Twine(FunctionHash) + "\t" + Str);
645 }
646
647 FuncPGOInstrumentation(
648 Function &Func, TargetLibraryInfo &TLI,
649 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
650 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
651 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
652 bool IsCS = false, bool InstrumentFuncEntry = true,
653 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
654 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
655 TLI(TLI), ValueSites(IPVK_Last + 1),
656 SIVisitor(Func, HasSingleByteCoverage),
657 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
658 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
659 if (BCI && PGOViewBlockCoverageGraph)
660 BCI->viewBlockCoverageGraph();
661 // This should be done before CFG hash computation.
662 SIVisitor.countSelects();
663 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
664 if (!IsCS) {
665 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
666 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
667 NumOfPGOBB += MST.bbInfoSize();
668 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
670 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
671 } else {
672 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
673 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
674 NumOfCSPGOBB += MST.bbInfoSize();
675 }
676
677 FuncName = getIRPGOFuncName(F);
678 DeprecatedFuncName = getPGOFuncName(F);
679 computeCFGHash();
680 if (!ComdatMembers.empty())
681 renameComdatFunction();
682 LLVM_DEBUG(dumpInfo("after CFGMST"));
683
684 for (const auto &E : MST.allEdges()) {
685 if (E->Removed)
686 continue;
687 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
688 if (!E->InMST)
689 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
690 }
691
692 if (CreateGlobalVar)
693 FuncNameVar = createPGOFuncNameVar(F, FuncName);
694 }
695};
696
697} // end anonymous namespace
698
699// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
700// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
701// of selects, indirect calls, mem ops and edges.
702template <class Edge, class BBInfo>
703void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
704 std::vector<uint8_t> Indexes;
705 JamCRC JC;
706 for (auto &BB : F) {
707 for (BasicBlock *Succ : successors(&BB)) {
708 auto BI = findBBInfo(Succ);
709 if (BI == nullptr)
710 continue;
711 uint32_t Index = BI->Index;
712 for (int J = 0; J < 4; J++)
713 Indexes.push_back((uint8_t)(Index >> (J * 8)));
714 }
715 }
716 JC.update(Indexes);
717
718 JamCRC JCH;
719 // The higher 32 bits.
720 auto updateJCH = [&JCH](uint64_t Num) {
721 uint8_t Data[8];
723 JCH.update(Data);
724 };
725 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
726 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
727 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
728 if (BCI) {
729 updateJCH(BCI->getInstrumentedBlocksHash());
730 } else {
731 updateJCH((uint64_t)MST.numEdges());
732 }
733
734 // Hash format for context sensitive profile. Reserve 4 bits for other
735 // information.
736 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
737
738 // Reserve bit 60-63 for other information purpose.
739 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
740 if (IsCS)
742 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
743 << " CRC = " << JC.getCRC()
744 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
745 << ", Edges = " << MST.numEdges() << ", ICSites = "
746 << ValueSites[IPVK_IndirectCallTarget].size()
747 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
748 << ", High32 CRC = " << JCH.getCRC()
749 << ", Hash = " << FunctionHash << "\n";);
750
751 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
752 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
753 << " in building " << F.getParent()->getSourceFileName() << "\n";
754}
755
756// Check if we can safely rename this Comdat function.
757static bool canRenameComdat(
758 Function &F,
759 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
760 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
761 return false;
762
763 // FIXME: Current only handle those Comdat groups that only containing one
764 // function.
765 // (1) For a Comdat group containing multiple functions, we need to have a
766 // unique postfix based on the hashes for each function. There is a
767 // non-trivial code refactoring to do this efficiently.
768 // (2) Variables can not be renamed, so we can not rename Comdat function in a
769 // group including global vars.
770 Comdat *C = F.getComdat();
771 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
772 assert(!isa<GlobalAlias>(CM.second));
773 Function *FM = dyn_cast<Function>(CM.second);
774 if (FM != &F)
775 return false;
776 }
777 return true;
778}
779
780// Append the CFGHash to the Comdat function name.
781template <class Edge, class BBInfo>
782void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
783 if (!canRenameComdat(F, ComdatMembers))
784 return;
785 std::string OrigName = F.getName().str();
786 std::string NewFuncName =
787 Twine(F.getName() + "." + Twine(FunctionHash)).str();
788 F.setName(Twine(NewFuncName));
790 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
791 Comdat *NewComdat;
792 Module *M = F.getParent();
793 // For AvailableExternallyLinkage functions, change the linkage to
794 // LinkOnceODR and put them into comdat. This is because after renaming, there
795 // is no backup external copy available for the function.
796 if (!F.hasComdat()) {
798 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
800 F.setComdat(NewComdat);
801 return;
802 }
803
804 // This function belongs to a single function Comdat group.
805 Comdat *OrigComdat = F.getComdat();
806 std::string NewComdatName =
807 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
808 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
809 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
810
811 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
812 // Must be a function.
813 cast<Function>(CM.second)->setComdat(NewComdat);
814 }
815}
816
817/// Collect all the BBs that will be instruments and add them to
818/// `InstrumentBBs`.
819template <class Edge, class BBInfo>
820void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
821 std::vector<BasicBlock *> &InstrumentBBs) {
822 if (BCI) {
823 for (auto &BB : F)
824 if (BCI->shouldInstrumentBlock(BB))
825 InstrumentBBs.push_back(&BB);
826 return;
827 }
828
829 // Use a worklist as we will update the vector during the iteration.
830 std::vector<Edge *> EdgeList;
831 EdgeList.reserve(MST.numEdges());
832 for (const auto &E : MST.allEdges())
833 EdgeList.push_back(E.get());
834
835 for (auto &E : EdgeList) {
836 BasicBlock *InstrBB = getInstrBB(E);
837 if (InstrBB)
838 InstrumentBBs.push_back(InstrBB);
839 }
840}
841
842// Given a CFG E to be instrumented, find which BB to place the instrumented
843// code. The function will split the critical edge if necessary.
844template <class Edge, class BBInfo>
845BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
846 if (E->InMST || E->Removed)
847 return nullptr;
848
849 BasicBlock *SrcBB = E->SrcBB;
850 BasicBlock *DestBB = E->DestBB;
851 // For a fake edge, instrument the real BB.
852 if (SrcBB == nullptr)
853 return DestBB;
854 if (DestBB == nullptr)
855 return SrcBB;
856
857 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
858 // There are basic blocks (such as catchswitch) cannot be instrumented.
859 // If the returned first insertion point is the end of BB, skip this BB.
860 if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
861 return nullptr;
862 return BB;
863 };
864
865 // Instrument the SrcBB if it has a single successor,
866 // otherwise, the DestBB if this is not a critical edge.
867 Instruction *TI = SrcBB->getTerminator();
868 if (TI->getNumSuccessors() <= 1)
869 return canInstrument(SrcBB);
870 if (!E->IsCritical)
871 return canInstrument(DestBB);
872
873 // Some IndirectBr critical edges cannot be split by the previous
874 // SplitIndirectBrCriticalEdges call. Bail out.
875 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
876 BasicBlock *InstrBB =
877 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
878 if (!InstrBB) {
880 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
881 return nullptr;
882 }
883 // For a critical edge, we have to split. Instrument the newly
884 // created BB.
885 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
886 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
887 << " --> " << getBBInfo(DestBB).Index << "\n");
888 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
889 MST.addEdge(SrcBB, InstrBB, 0);
890 // Second one: Add new edge of InstrBB->DestBB.
891 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
892 NewEdge1.InMST = true;
893 E->Removed = true;
894
895 return canInstrument(InstrBB);
896}
897
898// When generating value profiling calls on Windows routines that make use of
899// handler funclets for exception processing an operand bundle needs to attached
900// to the called function. This routine will set \p OpBundles to contain the
901// funclet information, if any is needed, that should be placed on the generated
902// value profiling call for the value profile candidate call.
903static void
907 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
908 if (!OrigCall)
909 return;
910
911 if (!isa<IntrinsicInst>(OrigCall)) {
912 // The instrumentation call should belong to the same funclet as a
913 // non-intrinsic call, so just copy the operand bundle, if any exists.
914 std::optional<OperandBundleUse> ParentFunclet =
915 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
916 if (ParentFunclet)
917 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
918 } else {
919 // Intrinsics or other instructions do not get funclet information from the
920 // front-end. Need to use the BlockColors that was computed by the routine
921 // colorEHFunclets to determine whether a funclet is needed.
922 if (!BlockColors.empty()) {
923 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
924 assert(CV.size() == 1 && "non-unique color for block!");
925 BasicBlock::iterator EHPadIt = CV.front()->getFirstNonPHIIt();
926 if (EHPadIt->isEHPad())
927 OpBundles.emplace_back("funclet", &*EHPadIt);
928 }
929 }
930}
931
932// Visit all edge and instrument the edges not in MST, and do value profiling.
933// Critical edges will be split.
934void FunctionInstrumenter::instrument() {
935 if (!PGOBlockCoverage) {
936 // Split indirectbr critical edges here before computing the MST rather than
937 // later in getInstrBB() to avoid invalidating it.
938 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
939 }
940
941 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
942 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
943 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
944 InstrumentationType == PGOInstrumentationType::CSFDO,
945 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
947
948 auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
949 auto *const CFGHash =
950 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
951 // Make sure that pointer to global is passed in with zero addrspace
952 // This is relevant during GPU profiling
953 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
954 Name, PointerType::get(M.getContext(), 0));
956 auto &EntryBB = F.getEntryBlock();
957 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
958 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
959 // i32 <index>)
960 Builder.CreateIntrinsic(
961 Intrinsic::instrprof_cover,
962 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
963 return;
964 }
965
966 std::vector<BasicBlock *> InstrumentBBs;
967 FuncInfo.getInstrumentBBs(InstrumentBBs);
968 unsigned NumCounters =
969 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
970
971 if (IsCtxProf) {
973
974 auto *CSIntrinsic =
975 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
976 // We want to count the instrumentable callsites, then instrument them. This
977 // is because the llvm.instrprof.callsite intrinsic has an argument (like
978 // the other instrprof intrinsics) capturing the total number of
979 // instrumented objects (counters, or callsites, in this case). In this
980 // case, we want that value so we can readily pass it to the compiler-rt
981 // APIs that may have to allocate memory based on the nr of callsites.
982 // The traversal logic is the same for both counting and instrumentation,
983 // just needs to be done in succession.
984 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
985 for (auto &BB : F)
986 for (auto &Instr : BB)
987 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
989 continue;
990 if (CS->getCalledFunction() &&
991 SkipCSInstr.contains(CS->getCalledFunction()->getName()))
992 continue;
993 Visitor(CS);
994 }
995 };
996 // First, count callsites.
997 uint32_t TotalNumCallsites = 0;
998 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; });
999
1000 // Now instrument.
1002 Visit([&](auto *CB) {
1003 IRBuilder<> Builder(CB);
1004 Builder.CreateCall(CSIntrinsic,
1005 {Name, CFGHash, Builder.getInt32(TotalNumCallsites),
1006 Builder.getInt32(CallsiteIndex++),
1007 CB->getCalledOperand()});
1008 });
1009 }
1010
1011 uint32_t I = 0;
1013 NumCounters += PGOBlockCoverage ? 8 : 1;
1014 auto &EntryBB = F.getEntryBlock();
1015 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
1016 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
1017 // i32 <index>)
1018 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
1019 {NormalizedNamePtr, CFGHash,
1020 Builder.getInt32(NumCounters),
1021 Builder.getInt32(I)});
1022 I += PGOBlockCoverage ? 8 : 1;
1023 }
1024
1025 for (auto *InstrBB : InstrumentBBs) {
1026 IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
1027 assert(Builder.GetInsertPoint() != InstrBB->end() &&
1028 "Cannot get the Instrumentation point");
1029 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
1030 // i32 <index>)
1031 Builder.CreateIntrinsic(PGOBlockCoverage ? Intrinsic::instrprof_cover
1032 : Intrinsic::instrprof_increment,
1033 {NormalizedNamePtr, CFGHash,
1034 Builder.getInt32(NumCounters),
1035 Builder.getInt32(I++)});
1036 }
1037
1038 // Now instrument select instructions:
1039 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name,
1040 FuncInfo.FunctionHash);
1041 assert(I == NumCounters);
1042
1043 if (isValueProfilingDisabled())
1044 return;
1045
1046 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1047
1048 // Intrinsic function calls do not have funclet operand bundles needed for
1049 // Windows exception handling attached to them. However, if value profiling is
1050 // inserted for one of these calls, then a funclet value will need to be set
1051 // on the instrumentation call based on the funclet coloring.
1053 if (F.hasPersonalityFn() &&
1054 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1055 BlockColors = colorEHFunclets(F);
1056
1057 // For each VP Kind, walk the VP candidates and instrument each one.
1058 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1059 unsigned SiteIndex = 0;
1060 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1061 continue;
1062
1063 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1064 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1065 << " site: CallSite Index = " << SiteIndex << "\n");
1066
1067 IRBuilder<> Builder(Cand.InsertPt);
1068 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1069 "Cannot get the Instrumentation point");
1070
1071 Value *ToProfile = nullptr;
1072 if (Cand.V->getType()->isIntegerTy())
1073 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1074 else if (Cand.V->getType()->isPointerTy())
1075 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1076 assert(ToProfile && "value profiling Value is of unexpected type");
1077
1078 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1079 Name, PointerType::get(M.getContext(), 0));
1080
1082 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1083 Builder.CreateCall(
1085 Intrinsic::instrprof_value_profile),
1086 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1087 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1088 OpBundles);
1089 }
1090 } // IPVK_First <= Kind <= IPVK_Last
1091}
1092
1093namespace {
1094
1095// This class represents a CFG edge in profile use compilation.
1096struct PGOUseEdge : public PGOEdge {
1097 using PGOEdge::PGOEdge;
1098
1099 std::optional<uint64_t> Count;
1100
1101 // Set edge count value
1102 void setEdgeCount(uint64_t Value) { Count = Value; }
1103
1104 // Return the information string for this object.
1105 std::string infoString() const {
1106 if (!Count)
1107 return PGOEdge::infoString();
1108 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1109 }
1110};
1111
1112using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1113
1114// This class stores the auxiliary information for each BB.
1115struct PGOUseBBInfo : public PGOBBInfo {
1116 std::optional<uint64_t> Count;
1117 int32_t UnknownCountInEdge = 0;
1118 int32_t UnknownCountOutEdge = 0;
1119 DirectEdges InEdges;
1120 DirectEdges OutEdges;
1121
1122 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1123
1124 // Set the profile count value for this BB.
1125 void setBBInfoCount(uint64_t Value) { Count = Value; }
1126
1127 // Return the information string of this object.
1128 std::string infoString() const {
1129 if (!Count)
1130 return PGOBBInfo::infoString();
1131 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1132 }
1133
1134 // Add an OutEdge and update the edge count.
1135 void addOutEdge(PGOUseEdge *E) {
1136 OutEdges.push_back(E);
1137 UnknownCountOutEdge++;
1138 }
1139
1140 // Add an InEdge and update the edge count.
1141 void addInEdge(PGOUseEdge *E) {
1142 InEdges.push_back(E);
1143 UnknownCountInEdge++;
1144 }
1145};
1146
1147} // end anonymous namespace
1148
1149// Sum up the count values for all the edges.
1151 uint64_t Total = 0;
1152 for (const auto &E : Edges) {
1153 if (E->Removed)
1154 continue;
1155 if (E->Count)
1156 Total += *E->Count;
1157 }
1158 return Total;
1159}
1160
1161namespace {
1162
1163class PGOUseFunc {
1164public:
1165 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1166 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1168 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1169 bool InstrumentFuncEntry, bool InstrumentLoopEntries,
1170 bool HasSingleByteCoverage)
1171 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1172 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1173 InstrumentFuncEntry, InstrumentLoopEntries,
1174 HasSingleByteCoverage),
1175 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1176
1177 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1178
1179 /// Get the profile record, assign it to \p ProfileRecord, handle errors if
1180 /// necessary, and assign \p ProgramMaxCount. \returns true if there are no
1181 /// errors.
1182 bool getRecord(IndexedInstrProfReader *PGOReader);
1183
1184 // Read counts for the instrumented BB from profile.
1185 bool readCounters(bool &AllZeros,
1187
1188 // Populate the counts for all BBs.
1189 void populateCounters();
1190
1191 // Set block coverage based on profile coverage values.
1192 void populateCoverage();
1193
1194 // Set the branch weights based on the count values.
1195 void setBranchWeights();
1196
1197 // Annotate the value profile call sites for all value kind.
1198 void annotateValueSites();
1199
1200 // Annotate the value profile call sites for one value kind.
1201 void annotateValueSites(uint32_t Kind);
1202
1203 // Annotate the irreducible loop header weights.
1204 void annotateIrrLoopHeaderWeights();
1205
1206 // The hotness of the function from the profile count.
1207 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1208
1209 // Return the function hotness from the profile.
1210 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1211
1212 // Return the function hash.
1213 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1214
1215 // Return the profile record for this function;
1216 NamedInstrProfRecord &getProfileRecord() { return ProfileRecord; }
1217
1218 // Return the auxiliary BB information.
1219 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1220 return FuncInfo.getBBInfo(BB);
1221 }
1222
1223 // Return the auxiliary BB information if available.
1224 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1225 return FuncInfo.findBBInfo(BB);
1226 }
1227
1228 Function &getFunc() const { return F; }
1229
1230 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1231
1232 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1233
1234private:
1235 Function &F;
1236 Module *M;
1238 ProfileSummaryInfo *PSI;
1239
1240 // This member stores the shared information with class PGOGenFunc.
1241 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1242
1243 // The maximum count value in the profile. This is only used in PGO use
1244 // compilation.
1245 uint64_t ProgramMaxCount;
1246
1247 // Position of counter that remains to be read.
1248 uint32_t CountPosition = 0;
1249
1250 // Total size of the profile count for this function.
1251 uint32_t ProfileCountSize = 0;
1252
1253 // ProfileRecord for this function.
1254 NamedInstrProfRecord ProfileRecord;
1255
1256 // Function hotness info derived from profile.
1257 FuncFreqAttr FreqAttr;
1258
1259 // Is to use the context sensitive profile.
1260 bool IsCS;
1261
1263
1264 // Find the Instrumented BB and set the value. Return false on error.
1265 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1266
1267 // Set the edge counter value for the unknown edge -- there should be only
1268 // one unknown edge.
1269 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1270
1271 // Set the hot/cold inline hints based on the count values.
1272 // FIXME: This function should be removed once the functionality in
1273 // the inliner is implemented.
1274 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1275 if (PSI->isHotCount(EntryCount))
1276 FreqAttr = FFA_Hot;
1277 else if (PSI->isColdCount(MaxCount))
1278 FreqAttr = FFA_Cold;
1279 }
1280};
1281
1282} // end anonymous namespace
1283
1284/// Set up InEdges/OutEdges for all BBs in the MST.
1286 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1287 // This is not required when there is block coverage inference.
1288 if (FuncInfo.BCI)
1289 return;
1290 for (const auto &E : FuncInfo.MST.allEdges()) {
1291 if (E->Removed)
1292 continue;
1293 const BasicBlock *SrcBB = E->SrcBB;
1294 const BasicBlock *DestBB = E->DestBB;
1295 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1296 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1297 SrcInfo.addOutEdge(E.get());
1298 DestInfo.addInEdge(E.get());
1299 }
1300}
1301
1302// Visit all the edges and assign the count value for the instrumented
1303// edges and the BB. Return false on error.
1304bool PGOUseFunc::setInstrumentedCounts(
1305 const std::vector<uint64_t> &CountFromProfile) {
1306
1307 std::vector<BasicBlock *> InstrumentBBs;
1308 FuncInfo.getInstrumentBBs(InstrumentBBs);
1309
1310 setupBBInfoEdges(FuncInfo);
1311
1312 unsigned NumCounters =
1313 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1314 // The number of counters here should match the number of counters
1315 // in profile. Return if they mismatch.
1316 if (NumCounters != CountFromProfile.size()) {
1317 return false;
1318 }
1319 auto *FuncEntry = &*F.begin();
1320
1321 // Set the profile count to the Instrumented BBs.
1322 uint32_t I = 0;
1323 for (BasicBlock *InstrBB : InstrumentBBs) {
1324 uint64_t CountValue = CountFromProfile[I++];
1325 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1326 // If we reach here, we know that we have some nonzero count
1327 // values in this function. The entry count should not be 0.
1328 // Fix it if necessary.
1329 if (InstrBB == FuncEntry && CountValue == 0)
1330 CountValue = 1;
1331 Info.setBBInfoCount(CountValue);
1332 }
1333 ProfileCountSize = CountFromProfile.size();
1334 CountPosition = I;
1335
1336 // Set the edge count and update the count of unknown edges for BBs.
1337 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1338 E->setEdgeCount(Value);
1339 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1340 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1341 };
1342
1343 // Set the profile count the Instrumented edges. There are BBs that not in
1344 // MST but not instrumented. Need to set the edge count value so that we can
1345 // populate the profile counts later.
1346 for (const auto &E : FuncInfo.MST.allEdges()) {
1347 if (E->Removed || E->InMST)
1348 continue;
1349 const BasicBlock *SrcBB = E->SrcBB;
1350 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1351
1352 // If only one out-edge, the edge profile count should be the same as BB
1353 // profile count.
1354 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1355 setEdgeCount(E.get(), *SrcInfo.Count);
1356 else {
1357 const BasicBlock *DestBB = E->DestBB;
1358 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1359 // If only one in-edge, the edge profile count should be the same as BB
1360 // profile count.
1361 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1362 setEdgeCount(E.get(), *DestInfo.Count);
1363 }
1364 if (E->Count)
1365 continue;
1366 // E's count should have been set from profile. If not, this meenas E skips
1367 // the instrumentation. We set the count to 0.
1368 setEdgeCount(E.get(), 0);
1369 }
1370 return true;
1371}
1372
1373// Set the count value for the unknown edge. There should be one and only one
1374// unknown edge in Edges vector.
1375void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1376 for (auto &E : Edges) {
1377 if (E->Count)
1378 continue;
1379 E->setEdgeCount(Value);
1380
1381 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1382 getBBInfo(E->DestBB).UnknownCountInEdge--;
1383 return;
1384 }
1385 llvm_unreachable("Cannot find the unknown count edge");
1386}
1387
1388// Emit function metadata indicating PGO profile mismatch.
1390 const char MetadataName[] = "instr_prof_hash_mismatch";
1392 // If this metadata already exists, ignore.
1393 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1394 if (Existing) {
1395 MDTuple *Tuple = cast<MDTuple>(Existing);
1396 for (const auto &N : Tuple->operands()) {
1397 if (N.equalsStr(MetadataName))
1398 return;
1399 Names.push_back(N.get());
1400 }
1401 }
1402
1403 MDBuilder MDB(ctx);
1404 Names.push_back(MDB.createString(MetadataName));
1405 MDNode *MD = MDTuple::get(ctx, Names);
1406 F.setMetadata(LLVMContext::MD_annotation, MD);
1407}
1408
1409void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1410 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1411 auto &Ctx = M->getContext();
1412 auto Err = IPE.get();
1413 bool SkipWarning = false;
1414 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1415 << FuncInfo.FuncName << ": ");
1416 if (Err == instrprof_error::unknown_function) {
1417 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1418 SkipWarning = !PGOWarnMissing;
1419 LLVM_DEBUG(dbgs() << "unknown function");
1420 } else if (Err == instrprof_error::hash_mismatch ||
1421 Err == instrprof_error::malformed) {
1422 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1423 SkipWarning =
1426 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1428 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1429 << " skip=" << SkipWarning << ")");
1430 // Emit function metadata indicating PGO profile mismatch.
1431 annotateFunctionWithHashMismatch(F, M->getContext());
1432 }
1433
1434 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1435 if (SkipWarning)
1436 return;
1437
1438 std::string Msg =
1439 IPE.message() + std::string(" ") + F.getName().str() +
1440 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1441 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1442 std::string(" count discarded");
1443
1444 Ctx.diagnose(
1445 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1446 });
1447}
1448
1449bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) {
1450 uint64_t MismatchedFuncSum = 0;
1451 auto Result = PGOReader->getInstrProfRecord(
1452 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1453 &MismatchedFuncSum);
1454 if (Error E = Result.takeError()) {
1455 handleInstrProfError(std::move(E), MismatchedFuncSum);
1456 return false;
1457 }
1458 ProfileRecord = std::move(Result.get());
1459 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1460 return true;
1461}
1462
1463// Read the profile from ProfileFileName and assign the value to the
1464// instrumented BB and the edges. Return true if the profile are successfully
1465// read, and false on errors.
1466bool PGOUseFunc::readCounters(bool &AllZeros,
1468 auto &Ctx = M->getContext();
1469 PseudoKind = ProfileRecord.getCountPseudoKind();
1470 if (PseudoKind != InstrProfRecord::NotPseudo) {
1471 return true;
1472 }
1473 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1474
1475 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1476 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1477
1478 uint64_t ValueSum = 0;
1479 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1480 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1481 ValueSum += CountFromProfile[I];
1482 }
1483 AllZeros = (ValueSum == 0);
1484
1485 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1486
1487 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1488 getBBInfo(nullptr).UnknownCountInEdge = 2;
1489
1490 if (!setInstrumentedCounts(CountFromProfile)) {
1491 LLVM_DEBUG(
1492 dbgs() << "Inconsistent number of counts, skipping this function");
1493 Ctx.diagnose(DiagnosticInfoPGOProfile(
1494 M->getName().data(),
1495 Twine("Inconsistent number of counts in ") + F.getName().str() +
1496 Twine(": the profile may be stale or there is a function name "
1497 "collision."),
1498 DS_Warning));
1499 return false;
1500 }
1501 return true;
1502}
1503
1504void PGOUseFunc::populateCoverage() {
1505 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1506
1507 ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.Counts;
1509 unsigned Index = 0;
1510 for (auto &BB : F)
1511 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1512 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1513 assert(Index == CountsFromProfile.size());
1514
1515 // For each B in InverseDependencies[A], if A is covered then B is covered.
1517 InverseDependencies;
1518 for (auto &BB : F) {
1519 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1520 // If Dep is covered then BB is covered.
1521 InverseDependencies[Dep].insert(&BB);
1522 }
1523 }
1524
1525 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1526 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1527 for (auto &[BB, IsCovered] : Coverage)
1528 if (IsCovered)
1529 CoveredBlocksToProcess.push(BB);
1530
1531 while (!CoveredBlocksToProcess.empty()) {
1532 auto *CoveredBlock = CoveredBlocksToProcess.top();
1533 assert(Coverage[CoveredBlock]);
1534 CoveredBlocksToProcess.pop();
1535 for (auto *BB : InverseDependencies[CoveredBlock]) {
1536 // If CoveredBlock is covered then BB is covered.
1537 bool &Cov = Coverage[BB];
1538 if (Cov)
1539 continue;
1540 Cov = true;
1541 CoveredBlocksToProcess.push(BB);
1542 }
1543 }
1544
1545 // Annotate block coverage.
1546 MDBuilder MDB(F.getContext());
1547 // We set the entry count to 10000 if the entry block is covered so that BFI
1548 // can propagate a fraction of this count to the other covered blocks.
1549 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1550 for (auto &BB : F) {
1551 // For a block A and its successor B, we set the edge weight as follows:
1552 // If A is covered and B is covered, set weight=1.
1553 // If A is covered and B is uncovered, set weight=0.
1554 // If A is uncovered, set weight=1.
1555 // This setup will allow BFI to give nonzero profile counts to only covered
1556 // blocks.
1558 for (auto *Succ : successors(&BB))
1559 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1560 if (Weights.size() >= 2)
1561 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1562 /*IsExpected=*/false);
1563 }
1564
1565 unsigned NumCorruptCoverage = 0;
1566 DominatorTree DT(F);
1567 LoopInfo LI(DT);
1568 BranchProbabilityInfo BPI(F, LI);
1569 BlockFrequencyInfo BFI(F, BPI, LI);
1570 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1571 if (auto C = BFI.getBlockProfileCount(&BB))
1572 return C == 0;
1573 return {};
1574 };
1575 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1576 for (auto &BB : F) {
1577 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1578 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1579 << "\n");
1580 // In some cases it is possible to find a covered block that has no covered
1581 // successors, e.g., when a block calls a function that may call exit(). In
1582 // those cases, BFI could find its successor to be covered while BCI could
1583 // find its successor to be dead.
1584 const bool &Cov = Coverage[&BB];
1585 if (Cov == IsBlockDead(BB).value_or(false)) {
1586 LLVM_DEBUG(
1587 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1588 << ": BCI=" << (Cov ? "Covered" : "Dead") << " BFI="
1589 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1590 ++NumCorruptCoverage;
1591 }
1592 if (Cov)
1593 ++NumCoveredBlocks;
1594 }
1595 if (PGOVerifyBFI && NumCorruptCoverage) {
1596 auto &Ctx = M->getContext();
1597 Ctx.diagnose(DiagnosticInfoPGOProfile(
1598 M->getName().data(),
1599 Twine("Found inconsistent block coverage for function ") + F.getName() +
1600 " in " + Twine(NumCorruptCoverage) + " blocks.",
1601 DS_Warning));
1602 }
1604 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1605}
1606
1607// Populate the counters from instrumented BBs to all BBs.
1608// In the end of this operation, all BBs should have a valid count value.
1609void PGOUseFunc::populateCounters() {
1610 bool Changes = true;
1611 unsigned NumPasses = 0;
1612 while (Changes) {
1613 NumPasses++;
1614 Changes = false;
1615
1616 // For efficient traversal, it's better to start from the end as most
1617 // of the instrumented edges are at the end.
1618 for (auto &BB : reverse(F)) {
1619 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1620 if (UseBBInfo == nullptr)
1621 continue;
1622 if (!UseBBInfo->Count) {
1623 if (UseBBInfo->UnknownCountOutEdge == 0) {
1624 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1625 Changes = true;
1626 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1627 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1628 Changes = true;
1629 }
1630 }
1631 if (UseBBInfo->Count) {
1632 if (UseBBInfo->UnknownCountOutEdge == 1) {
1633 uint64_t Total = 0;
1634 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1635 // If the one of the successor block can early terminate (no-return),
1636 // we can end up with situation where out edge sum count is larger as
1637 // the source BB's count is collected by a post-dominated block.
1638 if (*UseBBInfo->Count > OutSum)
1639 Total = *UseBBInfo->Count - OutSum;
1640 setEdgeCount(UseBBInfo->OutEdges, Total);
1641 Changes = true;
1642 }
1643 if (UseBBInfo->UnknownCountInEdge == 1) {
1644 uint64_t Total = 0;
1645 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1646 if (*UseBBInfo->Count > InSum)
1647 Total = *UseBBInfo->Count - InSum;
1648 setEdgeCount(UseBBInfo->InEdges, Total);
1649 Changes = true;
1650 }
1651 }
1652 }
1653 }
1654
1655 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1656 (void)NumPasses;
1657#ifndef NDEBUG
1658 // Assert every BB has a valid counter.
1659 for (auto &BB : F) {
1660 auto BI = findBBInfo(&BB);
1661 if (BI == nullptr)
1662 continue;
1663 assert(BI->Count && "BB count is not valid");
1664 }
1665#endif
1666 // Now annotate select instructions. This may fixup impossible block counts.
1667 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1668 assert(CountPosition == ProfileCountSize);
1669
1670 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1671 uint64_t FuncMaxCount = FuncEntryCount;
1672 for (auto &BB : F) {
1673 auto BI = findBBInfo(&BB);
1674 if (BI == nullptr)
1675 continue;
1676 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1677 }
1678
1679 // Fix the obviously inconsistent entry count.
1680 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1681 FuncEntryCount = 1;
1683 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1684
1685 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1686}
1687
1688// Assign the scaled count values to the BB with multiple out edges.
1689void PGOUseFunc::setBranchWeights() {
1690 // Generate MD_prof metadata for every branch instruction.
1691 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1692 << " IsCS=" << IsCS << "\n");
1693 for (auto &BB : F) {
1694 Instruction *TI = BB.getTerminator();
1695 if (TI->getNumSuccessors() < 2)
1696 continue;
1697 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1698 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1699 isa<CallBrInst>(TI)))
1700 continue;
1701
1702 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1703 if (!*BBCountInfo.Count)
1704 continue;
1705
1706 // We have a non-zero Branch BB.
1707
1708 // SuccessorCount can be greater than OutEdgesCount, because
1709 // removed edges don't appear in OutEdges.
1710 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1711 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1712 assert(OutEdgesCount <= SuccessorCount);
1713
1714 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1715 uint64_t MaxCount = 0;
1716 for (unsigned It = 0; It < OutEdgesCount; It++) {
1717 const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1718 const BasicBlock *SrcBB = E->SrcBB;
1719 const BasicBlock *DestBB = E->DestBB;
1720 if (DestBB == nullptr)
1721 continue;
1722 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1723 uint64_t EdgeCount = *E->Count;
1724 if (EdgeCount > MaxCount)
1725 MaxCount = EdgeCount;
1726 EdgeCounts[SuccNum] = EdgeCount;
1727 }
1728
1729 if (MaxCount)
1730 setProfMetadata(TI, EdgeCounts, MaxCount);
1731 else {
1732 // A zero MaxCount can come about when we have a BB with a positive
1733 // count, and whose successor blocks all have 0 count. This can happen
1734 // when there is no exit block and the code exits via a noreturn function.
1735 auto &Ctx = M->getContext();
1736 Ctx.diagnose(DiagnosticInfoPGOProfile(
1737 M->getName().data(),
1738 Twine("Profile in ") + F.getName().str() +
1739 Twine(" partially ignored") +
1740 Twine(", possibly due to the lack of a return path."),
1741 DS_Warning));
1742 }
1743 }
1744}
1745
1747 for (BasicBlock *Pred : predecessors(BB)) {
1748 if (isa<IndirectBrInst>(Pred->getTerminator()))
1749 return true;
1750 }
1751 return false;
1752}
1753
1754void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1755 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1756 // Find irr loop headers
1757 for (auto &BB : F) {
1758 // As a heuristic also annotate indrectbr targets as they have a high chance
1759 // to become an irreducible loop header after the indirectbr tail
1760 // duplication.
1761 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1762 Instruction *TI = BB.getTerminator();
1763 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1764 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1765 }
1766 }
1767}
1768
1769void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1770 Module *M = F.getParent();
1771 IRBuilder<> Builder(&SI);
1772 Type *Int64Ty = Builder.getInt64Ty();
1773 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1774 auto *NormalizedFuncNameVarPtr =
1776 FuncNameVar, PointerType::get(M->getContext(), 0));
1777 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step,
1778 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1779 Builder.getInt32(TotalNumCtrs),
1780 Builder.getInt32(*CurCtrIdx), Step});
1781 ++(*CurCtrIdx);
1782}
1783
1784void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1785 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1786 assert(*CurCtrIdx < CountFromProfile.size() &&
1787 "Out of bound access of counters");
1788 uint64_t SCounts[2];
1789 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1790 ++(*CurCtrIdx);
1791 uint64_t TotalCount = 0;
1792 auto BI = UseFunc->findBBInfo(SI.getParent());
1793 if (BI != nullptr) {
1794 TotalCount = *BI->Count;
1795
1796 // Fix the block count if it is impossible.
1797 if (TotalCount < SCounts[0])
1798 BI->Count = SCounts[0];
1799 }
1800 // False Count
1801 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1802 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1803 if (MaxCount)
1804 setProfMetadata(&SI, SCounts, MaxCount);
1805}
1806
1807void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1808 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1809 return;
1810 // FIXME: do not handle this yet.
1811 if (SI.getCondition()->getType()->isVectorTy())
1812 return;
1813
1814 switch (Mode) {
1815 case VM_counting:
1816 NSIs++;
1817 return;
1818 case VM_instrument:
1819 instrumentOneSelectInst(SI);
1820 return;
1821 case VM_annotate:
1822 annotateOneSelectInst(SI);
1823 return;
1824 }
1825
1826 llvm_unreachable("Unknown visiting mode");
1827}
1828
1830 if (ValueProfKind == IPVK_MemOPSize)
1832 if (ValueProfKind == llvm::IPVK_VTableTarget)
1834 return MaxNumAnnotations;
1835}
1836
1837// Traverse all valuesites and annotate the instructions for all value kind.
1838void PGOUseFunc::annotateValueSites() {
1840 return;
1841
1842 // Create the PGOFuncName meta data.
1843 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1844
1845 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1846 annotateValueSites(Kind);
1847}
1848
1849// Annotate the instructions for a specific value kind.
1850void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1851 assert(Kind <= IPVK_Last);
1852 unsigned ValueSiteIndex = 0;
1853
1854 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1855
1856 // Since there isn't a reliable or fast way for profile reader to tell if a
1857 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1858 // value profile collector over the function IR to find the instrumented sites
1859 // iff function profile records shows the number of instrumented vtable sites
1860 // is not zero. Function cfg already takes the number of instrumented
1861 // indirect call sites into account so it doesn't hash the number of
1862 // instrumented vtables; as a side effect it makes it easier to enable
1863 // profiling and profile use in two steps if needed.
1864 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1865 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1866 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1868 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1869 auto &ValueSites = FuncInfo.ValueSites[Kind];
1870 if (NumValueSites != ValueSites.size()) {
1871 auto &Ctx = M->getContext();
1872 Ctx.diagnose(DiagnosticInfoPGOProfile(
1873 M->getName().data(),
1874 Twine("Inconsistent number of value sites for ") +
1875 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1876 F.getName().str() +
1877 Twine("\", possibly due to the use of a stale profile."),
1878 DS_Warning));
1879 return;
1880 }
1881
1882 for (VPCandidateInfo &I : ValueSites) {
1883 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1884 << "): Index = " << ValueSiteIndex << " out of "
1885 << NumValueSites << "\n");
1887 *M, *I.AnnotatedInst, ProfileRecord,
1888 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1889 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1890 ValueSiteIndex++;
1891 }
1892}
1893
1894// Collect the set of members for each Comdat in module M and store
1895// in ComdatMembers.
1897 Module &M,
1898 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1899 if (!DoComdatRenaming)
1900 return;
1901 for (Function &F : M)
1902 if (Comdat *C = F.getComdat())
1903 ComdatMembers.insert(std::make_pair(C, &F));
1904 for (GlobalVariable &GV : M.globals())
1905 if (Comdat *C = GV.getComdat())
1906 ComdatMembers.insert(std::make_pair(C, &GV));
1907 for (GlobalAlias &GA : M.aliases())
1908 if (Comdat *C = GA.getComdat())
1909 ComdatMembers.insert(std::make_pair(C, &GA));
1910}
1911
1912// Return true if we should not find instrumentation data for this function
1913static bool skipPGOUse(const Function &F) {
1914 if (F.isDeclaration())
1915 return true;
1916 // If there are too many critical edges, PGO might cause
1917 // compiler time problem. Skip PGO if the number of
1918 // critical edges execeed the threshold.
1919 unsigned NumCriticalEdges = 0;
1920 for (auto &BB : F) {
1921 const Instruction *TI = BB.getTerminator();
1922 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1923 if (isCriticalEdge(TI, I))
1924 NumCriticalEdges++;
1925 }
1926 }
1927 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1928 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1929 << ", NumCriticalEdges=" << NumCriticalEdges
1930 << " exceed the threshold. Skip PGO.\n");
1931 return true;
1932 }
1933 return false;
1934}
1935
1936// Return true if we should not instrument this function
1937static bool skipPGOGen(const Function &F) {
1938 if (skipPGOUse(F))
1939 return true;
1940 if (F.hasFnAttribute(llvm::Attribute::Naked))
1941 return true;
1942 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1943 return true;
1944 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1945 return true;
1946 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1947 return true;
1949 if (auto EntryCount = F.getEntryCount())
1950 return EntryCount->getCount() > PGOColdInstrumentEntryThreshold;
1951 return !PGOTreatUnknownAsCold;
1952 }
1953 return false;
1954}
1955
1957 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1960 function_ref<LoopInfo *(Function &)> LookupLI,
1961 PGOInstrumentationType InstrumentationType) {
1962 // For the context-sensitve instrumentation, we should have a separated pass
1963 // (before LTO/ThinLTO linking) to create these variables.
1964 if (InstrumentationType == PGOInstrumentationType::FDO)
1965 createIRLevelProfileFlagVar(M, InstrumentationType);
1966
1967 Triple TT(M.getTargetTriple());
1968 LLVMContext &Ctx = M.getContext();
1969 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1971 M.getName().data(),
1972 Twine("VTable value profiling is presently not "
1973 "supported for non-ELF object formats"),
1974 DS_Warning));
1975 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1976 collectComdatMembers(M, ComdatMembers);
1977
1978 for (auto &F : M) {
1979 if (skipPGOGen(F))
1980 continue;
1981 TargetLibraryInfo &TLI = LookupTLI(F);
1982 BranchProbabilityInfo *BPI = LookupBPI(F);
1983 BlockFrequencyInfo *BFI = LookupBFI(F);
1984 LoopInfo *LI = LookupLI(F);
1985 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
1986 InstrumentationType);
1987 FI.instrument();
1988 }
1989 return true;
1990}
1991
1994 createProfileFileNameVar(M, CSInstrName);
1995 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1996 // will be retained.
1999 if (ProfileSampling)
2004 return PA;
2005}
2006
2009 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2010 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2012 };
2013 auto LookupBPI = [&FAM](Function &F) {
2015 };
2016 auto LookupBFI = [&FAM](Function &F) {
2018 };
2019 auto LookupLI = [&FAM](Function &F) {
2020 return &FAM.getResult<LoopAnalysis>(F);
2021 };
2022
2023 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
2024 InstrumentationType))
2025 return PreservedAnalyses::all();
2026
2027 return PreservedAnalyses::none();
2028}
2029
2030// Using the ratio b/w sums of profile count values and BFI count values to
2031// adjust the func entry count.
2032static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
2033 BranchProbabilityInfo &NBPI) {
2034 Function &F = Func.getFunc();
2035 BlockFrequencyInfo NBFI(F, NBPI, LI);
2036#ifndef NDEBUG
2037 auto BFIEntryCount = F.getEntryCount();
2038 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
2039 "Invalid BFI Entrycount");
2040#endif
2041 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
2042 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
2043 for (auto &BBI : F) {
2044 uint64_t CountValue = 0;
2045 uint64_t BFICountValue = 0;
2046 if (!Func.findBBInfo(&BBI))
2047 continue;
2048 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2049 CountValue = *Func.getBBInfo(&BBI).Count;
2050 BFICountValue = *BFICount;
2051 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
2052 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
2053 }
2054 if (SumCount.isZero())
2055 return;
2056
2057 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
2058 "Incorrect sum of BFI counts");
2059 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
2060 return;
2061 double Scale = (SumCount / SumBFICount).convertToDouble();
2062 if (Scale < 1.001 && Scale > 0.999)
2063 return;
2064
2065 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
2066 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
2067 if (NewEntryCount == 0)
2068 NewEntryCount = 1;
2069 if (NewEntryCount != FuncEntryCount) {
2070 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
2071 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2072 << ", entry_count " << FuncEntryCount << " --> "
2073 << NewEntryCount << "\n");
2074 }
2075}
2076
2077// Compare the profile count values with BFI count values, and print out
2078// the non-matching ones.
2079static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2081 uint64_t HotCountThreshold,
2083 Function &F = Func.getFunc();
2084 BlockFrequencyInfo NBFI(F, NBPI, LI);
2085 // bool PrintFunc = false;
2086 bool HotBBOnly = PGOVerifyHotBFI;
2087 StringRef Msg;
2089
2090 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2091 for (auto &BBI : F) {
2092 PGOUseBBInfo *BBInfo = Func.findBBInfo(&BBI);
2093 if (!BBInfo)
2094 continue;
2095
2096 uint64_t CountValue = BBInfo->Count.value_or(CountValue);
2097 uint64_t BFICountValue = 0;
2098
2099 BBNum++;
2100 if (CountValue)
2101 NonZeroBBNum++;
2102 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2103 if (BFICount)
2104 BFICountValue = *BFICount;
2105
2106 if (HotBBOnly) {
2107 bool rawIsHot = CountValue >= HotCountThreshold;
2108 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2109 bool rawIsCold = CountValue <= ColdCountThreshold;
2110 bool ShowCount = false;
2111 if (rawIsHot && !BFIIsHot) {
2112 Msg = "raw-Hot to BFI-nonHot";
2113 ShowCount = true;
2114 } else if (rawIsCold && BFIIsHot) {
2115 Msg = "raw-Cold to BFI-Hot";
2116 ShowCount = true;
2117 }
2118 if (!ShowCount)
2119 continue;
2120 } else {
2121 if ((CountValue < PGOVerifyBFICutoff) &&
2122 (BFICountValue < PGOVerifyBFICutoff))
2123 continue;
2124 uint64_t Diff = (BFICountValue >= CountValue)
2125 ? BFICountValue - CountValue
2126 : CountValue - BFICountValue;
2127 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2128 continue;
2129 }
2130 BBMisMatchNum++;
2131
2132 ORE.emit([&]() {
2134 F.getSubprogram(), &BBI);
2135 Remark << "BB " << ore::NV("Block", BBI.getName())
2136 << " Count=" << ore::NV("Count", CountValue)
2137 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2138 if (!Msg.empty())
2139 Remark << " (" << Msg << ")";
2140 return Remark;
2141 });
2142 }
2143 if (BBMisMatchNum)
2144 ORE.emit([&]() {
2145 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2146 F.getSubprogram(), &F.getEntryBlock())
2147 << "In Func " << ore::NV("Function", F.getName())
2148 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2149 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2150 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2151 });
2152}
2153
2155 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2156 vfs::FileSystem &FS,
2157 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2160 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2161 bool IsCS) {
2162 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2163 auto &Ctx = M.getContext();
2164 // Read the counter array from file.
2165 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2166 ProfileRemappingFileName);
2167 if (Error E = ReaderOrErr.takeError()) {
2168 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2169 Ctx.diagnose(
2170 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2171 });
2172 return false;
2173 }
2174
2175 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2176 std::move(ReaderOrErr.get());
2177 if (!PGOReader) {
2178 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2179 StringRef("Cannot get PGOReader")));
2180 return false;
2181 }
2182 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2183 return false;
2184
2185 // TODO: might need to change the warning once the clang option is finalized.
2186 if (!PGOReader->isIRLevelProfile()) {
2187 Ctx.diagnose(DiagnosticInfoPGOProfile(
2188 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2189 return false;
2190 }
2191 if (PGOReader->functionEntryOnly()) {
2192 Ctx.diagnose(DiagnosticInfoPGOProfile(
2193 ProfileFileName.data(),
2194 "Function entry profiles are not yet supported for optimization"));
2195 return false;
2196 }
2197
2199 for (GlobalVariable &G : M.globals()) {
2200 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2201 continue;
2202
2203 // Create the PGOFuncName meta data.
2204 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2205 }
2206 }
2207
2208 // Add the profile summary (read from the header of the indexed summary) here
2209 // so that we can use it below when reading counters (which checks if the
2210 // function should be marked with a cold or inlinehint attribute).
2211 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2214 PSI->refresh();
2215
2216 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2217 collectComdatMembers(M, ComdatMembers);
2218 std::vector<Function *> HotFunctions;
2219 std::vector<Function *> ColdFunctions;
2220
2221 // If the profile marked as always instrument the entry BB, do the
2222 // same. Note this can be overwritten by the internal option in CFGMST.h
2223 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2225 InstrumentFuncEntry = PGOInstrumentEntry;
2226 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2228 InstrumentLoopEntries = PGOInstrumentLoopEntries;
2229
2230 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2231 for (auto &F : M) {
2232 if (skipPGOUse(F))
2233 continue;
2234 TargetLibraryInfo &TLI = LookupTLI(F);
2235 BranchProbabilityInfo *BPI = LookupBPI(F);
2236 BlockFrequencyInfo *BFI = LookupBFI(F);
2237 LoopInfo *LI = LookupLI(F);
2238 if (!HasSingleByteCoverage) {
2239 // Split indirectbr critical edges here before computing the MST rather
2240 // than later in getInstrBB() to avoid invalidating it.
2241 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2242 BFI);
2243 }
2244 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2245 InstrumentFuncEntry, InstrumentLoopEntries,
2246 HasSingleByteCoverage);
2247 if (!Func.getRecord(PGOReader.get()))
2248 continue;
2249 if (HasSingleByteCoverage) {
2250 Func.populateCoverage();
2251 continue;
2252 }
2253 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2254 // it means the profile for the function is unrepresentative and this
2255 // function is actually hot / warm. We will reset the function hot / cold
2256 // attribute and drop all the profile counters.
2258 bool AllZeros = false;
2259 if (!Func.readCounters(AllZeros, PseudoKind))
2260 continue;
2261 if (AllZeros) {
2262 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2263 if (Func.getProgramMaxCount() != 0)
2264 ColdFunctions.push_back(&F);
2265 continue;
2266 }
2267 if (PseudoKind != InstrProfRecord::NotPseudo) {
2268 // Clear function attribute cold.
2269 if (F.hasFnAttribute(Attribute::Cold))
2270 F.removeFnAttr(Attribute::Cold);
2271 // Set function attribute as hot.
2272 if (PseudoKind == InstrProfRecord::PseudoHot)
2273 F.addFnAttr(Attribute::Hot);
2274 continue;
2275 }
2276 Func.populateCounters();
2277 Func.setBranchWeights();
2278 Func.annotateValueSites();
2279 Func.annotateIrrLoopHeaderWeights();
2280 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2281 if (FreqAttr == PGOUseFunc::FFA_Cold)
2282 ColdFunctions.push_back(&F);
2283 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2284 HotFunctions.push_back(&F);
2285 if (PGOViewCounts != PGOVCT_None &&
2286 (ViewBlockFreqFuncName.empty() ||
2287 F.getName() == ViewBlockFreqFuncName)) {
2289 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2290 std::make_unique<BranchProbabilityInfo>(F, LI);
2291 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2292 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2294 NewBFI->view();
2295 else if (PGOViewCounts == PGOVCT_Text) {
2296 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2297 NewBFI->print(dbgs());
2298 }
2299 }
2301 (ViewBlockFreqFuncName.empty() ||
2302 F.getName() == ViewBlockFreqFuncName)) {
2304 if (ViewBlockFreqFuncName.empty())
2305 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2306 else
2307 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2308 else if (PGOViewRawCounts == PGOVCT_Text) {
2309 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2310 Func.dumpInfo();
2311 }
2312 }
2313
2316 BranchProbabilityInfo NBPI(F, LI);
2317
2318 // Fix func entry count.
2319 if (PGOFixEntryCount)
2320 fixFuncEntryCount(Func, LI, NBPI);
2321
2322 // Verify BlockFrequency information.
2323 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2324 if (PGOVerifyHotBFI) {
2325 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2327 }
2328 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2329 }
2330 }
2331
2332 // Set function hotness attribute from the profile.
2333 // We have to apply these attributes at the end because their presence
2334 // can affect the BranchProbabilityInfo of any callers, resulting in an
2335 // inconsistent MST between prof-gen and prof-use.
2336 for (auto &F : HotFunctions) {
2337 F->addFnAttr(Attribute::InlineHint);
2338 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2339 << "\n");
2340 }
2341 for (auto &F : ColdFunctions) {
2342 // Only set when there is no Attribute::Hot set by the user. For Hot
2343 // attribute, user's annotation has the precedence over the profile.
2344 if (F->hasFnAttribute(Attribute::Hot)) {
2345 auto &Ctx = M.getContext();
2346 std::string Msg = std::string("Function ") + F->getName().str() +
2347 std::string(" is annotated as a hot function but"
2348 " the profile is cold");
2349 Ctx.diagnose(
2350 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2351 continue;
2352 }
2353 F->addFnAttr(Attribute::Cold);
2354 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2355 << "\n");
2356 }
2357 return true;
2358}
2359
2361 std::string Filename, std::string RemappingFilename, bool IsCS,
2363 : ProfileFileName(std::move(Filename)),
2364 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2365 FS(std::move(VFS)) {
2366 if (!PGOTestProfileFile.empty())
2367 ProfileFileName = PGOTestProfileFile;
2368 if (!PGOTestProfileRemappingFile.empty())
2369 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2370 if (!FS)
2372}
2373
2376
2377 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2378 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2380 };
2381 auto LookupBPI = [&FAM](Function &F) {
2383 };
2384 auto LookupBFI = [&FAM](Function &F) {
2386 };
2387 auto LookupLI = [&FAM](Function &F) {
2388 return &FAM.getResult<LoopAnalysis>(F);
2389 };
2390
2391 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2392 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2393 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2394 IsCS))
2395 return PreservedAnalyses::all();
2396
2397 return PreservedAnalyses::none();
2398}
2399
2400static std::string getSimpleNodeName(const BasicBlock *Node) {
2401 if (!Node->getName().empty())
2402 return Node->getName().str();
2403
2404 std::string SimpleNodeName;
2405 raw_string_ostream OS(SimpleNodeName);
2406 Node->printAsOperand(OS, false);
2407 return SimpleNodeName;
2408}
2409
2411 uint64_t MaxCount) {
2412 auto Weights = downscaleWeights(EdgeCounts, MaxCount);
2413
2414 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2415 : Weights) {
2416 dbgs() << W << " ";
2417 } dbgs() << "\n";);
2418
2419 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2420
2421 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2423 std::string BrCondStr = getBranchCondString(TI);
2424 if (BrCondStr.empty())
2425 return;
2426
2427 uint64_t WSum =
2428 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2429 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2430 uint64_t TotalCount =
2431 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2432 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2433 uint64_t Scale = calculateCountScale(WSum);
2434 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2435 scaleBranchCount(WSum, Scale));
2436 std::string BranchProbStr;
2437 raw_string_ostream OS(BranchProbStr);
2438 OS << BP;
2439 OS << " (total count : " << TotalCount << ")";
2440 Function *F = TI->getParent()->getParent();
2442 ORE.emit([&]() {
2443 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2444 << BrCondStr << " is true with probability : " << BranchProbStr;
2445 });
2446 }
2447}
2448
2449namespace llvm {
2450
2452 MDBuilder MDB(M->getContext());
2453 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2454 MDB.createIrrLoopHeaderWeight(Count));
2455}
2456
2457template <> struct GraphTraits<PGOUseFunc *> {
2458 using NodeRef = const BasicBlock *;
2461
2462 static NodeRef getEntryNode(const PGOUseFunc *G) {
2463 return &G->getFunc().front();
2464 }
2465
2467 return succ_begin(N);
2468 }
2469
2470 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2471
2472 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2473 return nodes_iterator(G->getFunc().begin());
2474 }
2475
2476 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2477 return nodes_iterator(G->getFunc().end());
2478 }
2479};
2480
2481template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2482 explicit DOTGraphTraits(bool isSimple = false)
2484
2485 static std::string getGraphName(const PGOUseFunc *G) {
2486 return std::string(G->getFunc().getName());
2487 }
2488
2489 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2490 std::string Result;
2491 raw_string_ostream OS(Result);
2492
2493 OS << getSimpleNodeName(Node) << ":\\l";
2494 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2495 OS << "Count : ";
2496 if (BI && BI->Count)
2497 OS << *BI->Count << "\\l";
2498 else
2499 OS << "Unknown\\l";
2500
2501 if (!PGOInstrSelect)
2502 return Result;
2503
2504 for (const Instruction &I : *Node) {
2505 if (!isa<SelectInst>(&I))
2506 continue;
2507 // Display scaled counts for SELECT instruction:
2508 OS << "SELECT : { T = ";
2509 uint64_t TC, FC;
2510 bool HasProf = extractBranchWeights(I, TC, FC);
2511 if (!HasProf)
2512 OS << "Unknown, F = Unknown }\\l";
2513 else
2514 OS << TC << ", F = " << FC << " }\\l";
2515 }
2516 return Result;
2517 }
2518};
2519
2520} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
#define LLVM_ABI
Definition: Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
std::string Name
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of precise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
cl::list< std::string > CtxPGOSkipCallsiteInstrument("ctx-prof-skip-callsite-instr", cl::Hidden, cl::desc("Do not instrument callsites to functions in this list. Intended " "for testing."))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static cl::opt< uint64_t > PGOColdInstrumentEntryThreshold("pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, cl::desc("For cold function instrumentation, skip instrumenting functions " "whose entry count is above the given value."))
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static cl::opt< bool > PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), cl::Hidden, cl::desc("Force to instrument loop entries."))
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType)
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static cl::opt< bool > PGOTreatUnknownAsCold("pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, cl::desc("For cold function instrumentation, treat count unknown(e.g. " "unprofiled) functions as cold."))
static bool isIndirectBrTarget(BasicBlock *BB)
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is " "mainly for test purpose."))
static std::string getBranchCondString(Instruction *TI)
cl::opt< bool > PGOInstrumentColdFunctionOnly("pgo-instrument-cold-function-only", cl::init(false), cl::Hidden, cl::desc("Enable cold function only instrumentation."))
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isSimple(Instruction *I)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
StringSet - A set-like wrapper for the StringMap.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:1079
Class for arbitrary precision integers.
Definition: APInt.h:78
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:50
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:406
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:40
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:304
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:341
size_t bbInfoSize() const
Definition: CFGMST.h:349
size_t numEdges() const
Definition: CFGMST.h:347
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:359
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:352
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:285
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
LLVM_ABI StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:48
SelectionKind getSelectionKind() const
Definition: Comdat.h:47
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2261
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:226
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:403
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool empty() const
Definition: DenseMap.h:119
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
Base class for error info classes.
Definition: Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:52
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
Class to represent profile counts.
Definition: Function.h:297
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:585
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:69
@ ProtectedVisibility
The GV is protected.
Definition: GlobalValue.h:70
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:53
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:57
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:54
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:56
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Expected< NamedInstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:585
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:190
static bool canInstrumentCallsite(const CallBase &CB)
instrprof_error get() const
Definition: InstrProf.h:465
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:257
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:53
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:570
LLVM_ABI MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:21
LLVM_ABI MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:343
Metadata node.
Definition: Metadata.h:1077
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1443
Tuple of metadata.
Definition: Metadata.h:1493
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1522
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:148
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:25
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Value * getOperand(unsigned i) const
Definition: User.h:232
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
int getNumOccurrences() const
Definition: CommandLine.h:400
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:34
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:201
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:475
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
LLVM_ABI std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:421
LLVM_ABI cl::opt< bool > DebugInfoCorrelate
LLVM_ABI void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1460
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:80
LLVM_ABI std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:410
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:990
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
LLVM_ABI void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
Definition: InstrProf.cpp:1464
PGOInstrumentationType
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:376
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:34
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:437
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
LLVM_ABI GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:524
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1334
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfUse.cpp:45
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:312
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:443
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:96
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1516
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1539
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
@ DS_Warning
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI SmallVector< uint32_t > downscaleWeights(ArrayRef< uint64_t > Weights, std::optional< uint64_t > KnownMaxCount=std::nullopt)
downscale the given weights preserving the ratio.
LLVM_ABI bool isGPUProfTarget(const Module &M)
Determines whether module targets a GPU eligable for PGO instrumentation.
Definition: InstrProf.cpp:482
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
@ CallsiteIndex
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:244
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:267
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
std::vector< uint64_t > Counts
Definition: InstrProf.h:896
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:994
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1096
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1077