LLVM 22.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileLoader transformation. This pass
10// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12// profile information in the given profile.
13//
14// This pass generates branch weight annotations on the IR:
15//
16// - prof: Represents branch weights. This annotation is added to branches
17// to indicate the weights of each edge coming out of the branch.
18// The weight of each edge is the weight of the target block for
19// that edge. The weight of a block B is computed as the maximum
20// number of samples found in B.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Statistic.h"
33#include "llvm/ADT/StringRef.h"
34#include "llvm/ADT/Twine.h"
45#include "llvm/IR/BasicBlock.h"
46#include "llvm/IR/DebugLoc.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalValue.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
54#include "llvm/IR/LLVMContext.h"
55#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/PassManager.h"
59#include "llvm/IR/PseudoProbe.h"
66#include "llvm/Support/Debug.h"
70#include "llvm/Transforms/IPO.h"
81#include <algorithm>
82#include <cassert>
83#include <cstdint>
84#include <functional>
85#include <limits>
86#include <map>
87#include <memory>
88#include <queue>
89#include <string>
90#include <system_error>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace sampleprof;
96using namespace llvm::sampleprofutil;
98#define DEBUG_TYPE "sample-profile"
99#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
100
101STATISTIC(NumCSInlined,
102 "Number of functions inlined with context sensitive profile");
103STATISTIC(NumCSNotInlined,
104 "Number of functions not inlined with context sensitive profile");
105STATISTIC(NumMismatchedProfile,
106 "Number of functions with CFG mismatched profile");
107STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
108STATISTIC(NumDuplicatedInlinesite,
109 "Number of inlined callsites with a partial distribution factor");
110
111STATISTIC(NumCSInlinedHitMinLimit,
112 "Number of functions with FDO inline stopped due to min size limit");
113STATISTIC(NumCSInlinedHitMaxLimit,
114 "Number of functions with FDO inline stopped due to max size limit");
116 NumCSInlinedHitGrowthLimit,
117 "Number of functions with FDO inline stopped due to growth size limit");
118
119// Command line option to specify the file to read samples from. This is
120// mainly used for debugging.
122 "sample-profile-file", cl::init(""), cl::value_desc("filename"),
123 cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
124
125// The named file contains a set of transformations that may have been applied
126// to the symbol names between the program from which the sample data was
127// collected and the current program's symbols.
129 "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
130 cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
131
133 "salvage-stale-profile", cl::Hidden, cl::init(false),
134 cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
135 "location for sample profile query."));
137 SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false),
138 cl::desc("Salvage unused profile by matching with new "
139 "functions on call graph."));
140
142 "report-profile-staleness", cl::Hidden, cl::init(false),
143 cl::desc("Compute and report stale profile statistical metrics."));
144
146 "persist-profile-staleness", cl::Hidden, cl::init(false),
147 cl::desc("Compute stale profile statistical metrics and write it into the "
148 "native object file(.llvm_stats section)."));
149
151 "profile-sample-accurate", cl::Hidden, cl::init(false),
152 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
153 "callsite and function as having 0 samples. Otherwise, treat "
154 "un-sampled callsites and functions conservatively as unknown. "));
155
157 "profile-sample-block-accurate", cl::Hidden, cl::init(false),
158 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
159 "branches and calls as having 0 samples. Otherwise, treat "
160 "them conservatively as unknown. "));
161
163 "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
164 cl::desc("For symbols in profile symbol list, regard their profiles to "
165 "be accurate. It may be overridden by profile-sample-accurate. "));
166
168 "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
169 cl::desc("Merge past inlinee's profile to outline version if sample "
170 "profile loader decided not to inline a call site. It will "
171 "only be enabled when top-down order of profile loading is "
172 "enabled. "));
173
175 "sample-profile-top-down-load", cl::Hidden, cl::init(true),
176 cl::desc("Do profile annotation and inlining for functions in top-down "
177 "order of call graph during sample profile loading. It only "
178 "works for new pass manager. "));
179
180static cl::opt<bool>
181 UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
182 cl::desc("Process functions in a top-down order "
183 "defined by the profiled call graph when "
184 "-sample-profile-top-down-load is on."));
185
187 "sample-profile-inline-size", cl::Hidden, cl::init(false),
188 cl::desc("Inline cold call sites in profile loader if it's beneficial "
189 "for code size."));
190
191// Since profiles are consumed by many passes, turning on this option has
192// side effects. For instance, pre-link SCC inliner would see merged profiles
193// and inline the hot functions (that are skipped in this pass).
195 "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
196 cl::desc(
197 "If true, artificially skip inline transformation in sample-loader "
198 "pass, and merge (or scale) profiles (as configured by "
199 "--sample-profile-merge-inlinee)."));
200
201namespace llvm {
203 SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
204 cl::desc("Sort profiled recursion by edge weights."));
205
207 "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
208 cl::desc("The size growth ratio limit for proirity-based sample profile "
209 "loader inlining."));
210
212 "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
213 cl::desc("The lower bound of size growth limit for "
214 "proirity-based sample profile loader inlining."));
215
217 "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
218 cl::desc("The upper bound of size growth limit for "
219 "proirity-based sample profile loader inlining."));
220
222 "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
223 cl::desc("Hot callsite threshold for proirity-based sample profile loader "
224 "inlining."));
225
227 "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
228 cl::desc("Threshold for inlining cold callsites"));
229} // namespace llvm
230
232 "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
233 cl::desc(
234 "Relative hotness percentage threshold for indirect "
235 "call promotion in proirity-based sample profile loader inlining."));
236
238 "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
239 cl::desc(
240 "Skip relative hotness check for ICP up to given number of targets."));
241
243 "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000),
244 cl::desc("A function is considered hot for staleness error check if its "
245 "total sample count is above the specified percentile"));
246
248 "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
249 cl::desc("Skip the check if the number of hot functions is smaller than "
250 "the specified number."));
251
253 "precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
254 cl::desc("Reject the profile if the mismatch percent is higher than the "
255 "given number."));
256
258 "sample-profile-prioritized-inline", cl::Hidden,
259 cl::desc("Use call site prioritized inlining for sample profile loader. "
260 "Currently only CSSPGO is supported."));
261
263 "sample-profile-use-preinliner", cl::Hidden,
264 cl::desc("Use the preinliner decisions stored in profile context."));
265
267 "sample-profile-recursive-inline", cl::Hidden,
268 cl::desc("Allow sample loader inliner to inline recursive calls."));
269
271 "sample-profile-remove-probe", cl::Hidden, cl::init(false),
272 cl::desc("Remove pseudo-probe after sample profile annotation."));
273
275 "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
276 cl::desc(
277 "Optimization remarks file containing inline remarks to be replayed "
278 "by inlining from sample profile loader."),
279 cl::Hidden);
280
282 "sample-profile-inline-replay-scope",
283 cl::init(ReplayInlinerSettings::Scope::Function),
284 cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
285 "Replay on functions that have remarks associated "
286 "with them (default)"),
287 clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
288 "Replay on the entire module")),
289 cl::desc("Whether inline replay should be applied to the entire "
290 "Module or just the Functions (default) that are present as "
291 "callers in remarks during sample profile inlining."),
292 cl::Hidden);
293
295 "sample-profile-inline-replay-fallback",
296 cl::init(ReplayInlinerSettings::Fallback::Original),
299 ReplayInlinerSettings::Fallback::Original, "Original",
300 "All decisions not in replay send to original advisor (default)"),
301 clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
302 "AlwaysInline", "All decisions not in replay are inlined"),
303 clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
304 "All decisions not in replay are not inlined")),
305 cl::desc("How sample profile inline replay treats sites that don't come "
306 "from the replay. Original: defers to original advisor, "
307 "AlwaysInline: inline all sites not in replay, NeverInline: "
308 "inline no sites not in replay"),
309 cl::Hidden);
310
312 "sample-profile-inline-replay-format",
313 cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
315 clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
316 clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
317 "<Line Number>:<Column Number>"),
318 clEnumValN(CallSiteFormat::Format::LineDiscriminator,
319 "LineDiscriminator", "<Line Number>.<Discriminator>"),
320 clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
321 "LineColumnDiscriminator",
322 "<Line Number>:<Column Number>.<Discriminator> (default)")),
323 cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
324
326 MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
327 cl::desc("Max number of promotions for a single indirect "
328 "call callsite in sample profile loader"));
329
331 "overwrite-existing-weights", cl::Hidden, cl::init(false),
332 cl::desc("Ignore existing branch weights on IR and always overwrite."));
333
335 "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
336 cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
337 "sample-profile inline pass name."));
338
339namespace llvm {
341}
342
343namespace {
344
345using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
346using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
347using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
348using EdgeWeightMap = DenseMap<Edge, uint64_t>;
349using BlockEdgeMap =
351
352class GUIDToFuncNameMapper {
353public:
354 GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
355 DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
356 : CurrentReader(Reader), CurrentModule(M),
357 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
358 if (!CurrentReader.useMD5())
359 return;
360
361 for (const auto &F : CurrentModule) {
362 StringRef OrigName = F.getName();
363 CurrentGUIDToFuncNameMap.insert(
364 {Function::getGUIDAssumingExternalLinkage(OrigName), OrigName});
365
366 // Local to global var promotion used by optimization like thinlto
367 // will rename the var and add suffix like ".llvm.xxx" to the
368 // original local name. In sample profile, the suffixes of function
369 // names are all stripped. Since it is possible that the mapper is
370 // built in post-thin-link phase and var promotion has been done,
371 // we need to add the substring of function name without the suffix
372 // into the GUIDToFuncNameMap.
374 if (CanonName != OrigName)
375 CurrentGUIDToFuncNameMap.insert(
376 {Function::getGUIDAssumingExternalLinkage(CanonName), CanonName});
377 }
378
379 // Update GUIDToFuncNameMap for each function including inlinees.
380 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
381 }
382
383 ~GUIDToFuncNameMapper() {
384 if (!CurrentReader.useMD5())
385 return;
386
387 CurrentGUIDToFuncNameMap.clear();
388
389 // Reset GUIDToFuncNameMap for of each function as they're no
390 // longer valid at this point.
391 SetGUIDToFuncNameMapForAll(nullptr);
392 }
393
394private:
395 void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
396 std::queue<FunctionSamples *> FSToUpdate;
397 for (auto &IFS : CurrentReader.getProfiles()) {
398 FSToUpdate.push(&IFS.second);
399 }
400
401 while (!FSToUpdate.empty()) {
402 FunctionSamples *FS = FSToUpdate.front();
403 FSToUpdate.pop();
404 FS->GUIDToFuncNameMap = Map;
405 for (const auto &ICS : FS->getCallsiteSamples()) {
406 const FunctionSamplesMap &FSMap = ICS.second;
407 for (const auto &IFS : FSMap) {
408 FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
409 FSToUpdate.push(&FS);
410 }
411 }
412 }
413 }
414
416 Module &CurrentModule;
417 DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
418};
419
420// Inline candidate used by iterative callsite prioritized inliner
421struct InlineCandidate {
422 CallBase *CallInstr;
423 const FunctionSamples *CalleeSamples;
424 // Prorated callsite count, which will be used to guide inlining. For example,
425 // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
426 // copies will get their own distribution factors and their prorated counts
427 // will be used to decide if they should be inlined independently.
428 uint64_t CallsiteCount;
429 // Call site distribution factor to prorate the profile samples for a
430 // duplicated callsite. Default value is 1.0.
431 float CallsiteDistribution;
432};
433
434// Inline candidate comparer using call site weight
435struct CandidateComparer {
436 bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
437 if (LHS.CallsiteCount != RHS.CallsiteCount)
438 return LHS.CallsiteCount < RHS.CallsiteCount;
439
440 const FunctionSamples *LCS = LHS.CalleeSamples;
441 const FunctionSamples *RCS = RHS.CalleeSamples;
442 // In inline replay mode, CalleeSamples may be null and the order doesn't
443 // matter.
444 if (!LCS || !RCS)
445 return LCS;
446
447 // Tie breaker using number of samples try to favor smaller functions first
448 if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
449 return LCS->getBodySamples().size() > RCS->getBodySamples().size();
450
451 // Tie breaker using GUID so we have stable/deterministic inlining order
452 return LCS->getGUID() < RCS->getGUID();
453 }
454};
455
456using CandidateQueue =
458 CandidateComparer>;
459
460/// Sample profile pass.
461///
462/// This pass reads profile data from the file specified by
463/// -sample-profile-file and annotates every affected function with the
464/// profile information found in that file.
465class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
466public:
467 SampleProfileLoader(
468 StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
470 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
471 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
472 std::function<const TargetLibraryInfo &(Function &)> GetTLI,
473 LazyCallGraph &CG, bool DisableSampleProfileInlining,
474 bool UseFlattenedProfile)
476 std::move(FS)),
477 GetAC(std::move(GetAssumptionCache)),
478 GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
479 CG(CG), LTOPhase(LTOPhase),
480 AnnotatedPassName(AnnotateSampleProfileInlinePhase
484 DisableSampleProfileInlining(DisableSampleProfileInlining),
485 UseFlattenedProfile(UseFlattenedProfile) {}
486
487 bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
488 bool runOnModule(Module &M, ModuleAnalysisManager *AM,
489 ProfileSummaryInfo *_PSI);
490
491protected:
493 bool emitAnnotations(Function &F);
495 const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
496 const FunctionSamples *
497 findFunctionSamples(const Instruction &I) const override;
498 std::vector<const FunctionSamples *>
499 findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
500 void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
501 DenseSet<GlobalValue::GUID> &InlinedGUIDs,
502 uint64_t Threshold);
503 // Attempt to promote indirect call and also inline the promoted call
504 bool tryPromoteAndInlineCandidate(
505 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
506 uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
507
508 bool inlineHotFunctions(Function &F,
509 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
510 std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
511 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
512 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
513 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
514 bool
515 tryInlineCandidate(InlineCandidate &Candidate,
516 SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
517 bool
518 inlineHotFunctionsWithPriority(Function &F,
519 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
520 // Inline cold/small functions in addition to hot ones
521 bool shouldInlineColdCallee(CallBase &CallInst);
522 void emitOptimizationRemarksForInlineCandidates(
523 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
524 bool Hot);
525 void promoteMergeNotInlinedContextSamples(
527 const Function &F);
528 std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
529 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
530 void generateMDProfMetadata(Function &F);
531 bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,
532 const SampleProfileMap &Profiles);
533 void removePseudoProbeInstsDiscriminator(Module &M);
534
535 /// Map from function name to Function *. Used to find the function from
536 /// the function name. If the function name contains suffix, additional
537 /// entry is added to map from the stripped name to the function if there
538 /// is one-to-one mapping.
540
541 /// Map from function name to profile name generated by call-graph based
542 /// profile fuzzy matching(--salvage-unused-profile).
544
545 std::function<AssumptionCache &(Function &)> GetAC;
546 std::function<TargetTransformInfo &(Function &)> GetTTI;
547 std::function<const TargetLibraryInfo &(Function &)> GetTLI;
548 LazyCallGraph &CG;
549
550 /// Profile tracker for different context.
551 std::unique_ptr<SampleContextTracker> ContextTracker;
552
553 /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
554 ///
555 /// We need to know the LTO phase because for example in ThinLTOPrelink
556 /// phase, in annotation, we should not promote indirect calls. Instead,
557 /// we will mark GUIDs that needs to be annotated to the function.
558 const ThinOrFullLTOPhase LTOPhase;
559 const std::string AnnotatedPassName;
560
561 /// Profle Symbol list tells whether a function name appears in the binary
562 /// used to generate the current profile.
563 std::shared_ptr<ProfileSymbolList> PSL;
564
565 // Information recorded when we declined to inline a call site
566 // because we have determined it is too cold is accumulated for
567 // each callee function. Initially this is just the entry count.
568 struct NotInlinedProfileInfo {
569 uint64_t entryCount;
570 };
572
573 // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
574 // all the function symbols defined or declared in current module.
575 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
576
577 // All the Names used in FunctionSamples including outline function
578 // names, inline instance names and call target names.
579 StringSet<> NamesInProfile;
580 // MD5 version of NamesInProfile. Either NamesInProfile or GUIDsInProfile is
581 // populated, depends on whether the profile uses MD5. Because the name table
582 // generally contains several magnitude more entries than the number of
583 // functions, we do not want to convert all names from one form to another.
584 llvm::DenseSet<uint64_t> GUIDsInProfile;
585
586 // For symbol in profile symbol list, whether to regard their profiles
587 // to be accurate. It is mainly decided by existance of profile symbol
588 // list and -profile-accurate-for-symsinlist flag, but it can be
589 // overriden by -profile-sample-accurate or profile-sample-accurate
590 // attribute.
591 bool ProfAccForSymsInList;
592
593 bool DisableSampleProfileInlining;
594
595 bool UseFlattenedProfile;
596
597 // External inline advisor used to replay inline decision from remarks.
598 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
599
600 // A helper to implement the sample profile matching algorithm.
601 std::unique_ptr<SampleProfileMatcher> MatchingManager;
602
603private:
604 const char *getAnnotatedRemarkPassName() const {
605 return AnnotatedPassName.c_str();
606 }
607};
608} // end anonymous namespace
609
610namespace llvm {
611template <>
613 return succ_empty(BB);
614}
615
616template <>
618 const std::vector<const BasicBlockT *> &BasicBlocks,
619 BlockEdgeMap &Successors, FlowFunction &Func) {
620 for (auto &Jump : Func.Jumps) {
621 const auto *BB = BasicBlocks[Jump.Source];
622 const auto *Succ = BasicBlocks[Jump.Target];
623 const Instruction *TI = BB->getTerminator();
624 // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
625 // In that case block Succ should be a landing pad
626 const auto &Succs = Successors[BB];
627 if (Succs.size() == 2 && Succs.back() == Succ) {
628 if (isa<InvokeInst>(TI)) {
629 Jump.IsUnlikely = true;
630 }
631 }
632 const Instruction *SuccTI = Succ->getTerminator();
633 // Check if the target block contains UnreachableInst and mark it unlikely
634 if (SuccTI->getNumSuccessors() == 0) {
635 if (isa<UnreachableInst>(SuccTI)) {
636 Jump.IsUnlikely = true;
637 }
638 }
639 }
640}
641
642template <>
644 Function &F) {
645 DT.reset(new DominatorTree);
646 DT->recalculate(F);
647
648 PDT.reset(new PostDominatorTree(F));
649
650 LI.reset(new LoopInfo);
651 LI->analyze(*DT);
652}
653} // namespace llvm
654
655ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
657 return getProbeWeight(Inst);
658
659 const DebugLoc &DLoc = Inst.getDebugLoc();
660 if (!DLoc)
661 return std::error_code();
662
663 // Ignore all intrinsics, phinodes and branch instructions.
664 // Branch and phinodes instruction usually contains debug info from sources
665 // outside of the residing basic block, thus we ignore them during annotation.
666 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
667 return std::error_code();
668
669 // For non-CS profile, if a direct call/invoke instruction is inlined in
670 // profile (findCalleeFunctionSamples returns non-empty result), but not
671 // inlined here, it means that the inlined callsite has no sample, thus the
672 // call instruction should have 0 count.
673 // For CS profile, the callsite count of previously inlined callees is
674 // populated with the entry count of the callees.
676 if (const auto *CB = dyn_cast<CallBase>(&Inst))
677 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
678 return 0;
679
680 return getInstWeightImpl(Inst);
681}
682
683/// Get the FunctionSamples for a call instruction.
684///
685/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
686/// instance in which that call instruction is calling to. It contains
687/// all samples that resides in the inlined instance. We first find the
688/// inlined instance in which the call instruction is from, then we
689/// traverse its children to find the callsite with the matching
690/// location.
691///
692/// \param Inst Call/Invoke instruction to query.
693///
694/// \returns The FunctionSamples pointer to the inlined instance.
695const FunctionSamples *
696SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
697 const DILocation *DIL = Inst.getDebugLoc();
698 if (!DIL) {
699 return nullptr;
700 }
701
702 StringRef CalleeName;
703 if (Function *Callee = Inst.getCalledFunction())
704 CalleeName = Callee->getName();
705
707 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
708
709 const FunctionSamples *FS = findFunctionSamples(Inst);
710 if (FS == nullptr)
711 return nullptr;
712
713 return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
714 CalleeName, Reader->getRemapper(),
715 &FuncNameToProfNameMap);
716}
717
718/// Returns a vector of FunctionSamples that are the indirect call targets
719/// of \p Inst. The vector is sorted by the total number of samples. Stores
720/// the total call count of the indirect call in \p Sum.
721std::vector<const FunctionSamples *>
722SampleProfileLoader::findIndirectCallFunctionSamples(
723 const Instruction &Inst, uint64_t &Sum) const {
724 const DILocation *DIL = Inst.getDebugLoc();
725 std::vector<const FunctionSamples *> R;
726
727 if (!DIL) {
728 return R;
729 }
730
731 auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
732 assert(L && R && "Expect non-null FunctionSamples");
733 if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
734 return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
735 return L->getGUID() < R->getGUID();
736 };
737
739 auto CalleeSamples =
740 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
741 if (CalleeSamples.empty())
742 return R;
743
744 // For CSSPGO, we only use target context profile's entry count
745 // as that already includes both inlined callee and non-inlined ones..
746 Sum = 0;
747 for (const auto *const FS : CalleeSamples) {
748 Sum += FS->getHeadSamplesEstimate();
749 R.push_back(FS);
750 }
751 llvm::sort(R, FSCompare);
752 return R;
753 }
754
755 const FunctionSamples *FS = findFunctionSamples(Inst);
756 if (FS == nullptr)
757 return R;
758
760 Sum = 0;
761 if (auto T = FS->findCallTargetMapAt(CallSite))
762 for (const auto &T_C : *T)
763 Sum += T_C.second;
764 if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
765 if (M->empty())
766 return R;
767 for (const auto &NameFS : *M) {
768 Sum += NameFS.second.getHeadSamplesEstimate();
769 R.push_back(&NameFS.second);
770 }
771 llvm::sort(R, FSCompare);
772 }
773 return R;
774}
775
776const FunctionSamples *
777SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
779 std::optional<PseudoProbe> Probe = extractProbe(Inst);
780 if (!Probe)
781 return nullptr;
782 }
783
784 const DILocation *DIL = Inst.getDebugLoc();
785 if (!DIL)
786 return Samples;
787
788 auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
789 if (it.second) {
791 it.first->second = ContextTracker->getContextSamplesFor(DIL);
792 else
793 it.first->second = Samples->findFunctionSamples(
794 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
795 }
796 return it.first->second;
797}
798
799/// Check whether the indirect call promotion history of \p Inst allows
800/// the promotion for \p Candidate.
801/// If the profile count for the promotion candidate \p Candidate is
802/// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
803/// for \p Inst. If we already have at least MaxNumPromotions
804/// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
805/// cannot promote for \p Inst anymore.
806static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
807 uint64_t TotalCount = 0;
808 auto ValueData = getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget,
809 MaxNumPromotions, TotalCount, true);
810 // No valid value profile so no promoted targets have been recorded
811 // before. Ok to do ICP.
812 if (ValueData.empty())
813 return true;
814
815 unsigned NumPromoted = 0;
816 for (const auto &V : ValueData) {
817 if (V.Count != NOMORE_ICP_MAGICNUM)
818 continue;
819
820 // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
821 // metadata, it means the candidate has been promoted for this
822 // indirect call.
823 if (V.Value == Function::getGUIDAssumingExternalLinkage(Candidate))
824 return false;
825 NumPromoted++;
826 // If already have MaxNumPromotions promotion, don't do it anymore.
827 if (NumPromoted == MaxNumPromotions)
828 return false;
829 }
830 return true;
831}
832
833/// Update indirect call target profile metadata for \p Inst.
834/// Usually \p Sum is the sum of counts of all the targets for \p Inst.
835/// If it is 0, it means updateIDTMetaData is used to mark a
836/// certain target to be promoted already. If it is not zero,
837/// we expect to use it to update the total count in the value profile.
838static void
840 const SmallVectorImpl<InstrProfValueData> &CallTargets,
841 uint64_t Sum) {
842 // Bail out early if MaxNumPromotions is zero.
843 // This prevents allocating an array of zero length below.
844 //
845 // Note `updateIDTMetaData` is called in two places so check
846 // `MaxNumPromotions` inside it.
847 if (MaxNumPromotions == 0)
848 return;
849 // OldSum is the existing total count in the value profile data.
850 uint64_t OldSum = 0;
851 auto ValueData = getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget,
852 MaxNumPromotions, OldSum, true);
853
854 DenseMap<uint64_t, uint64_t> ValueCountMap;
855 if (Sum == 0) {
856 assert((CallTargets.size() == 1 &&
857 CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
858 "If sum is 0, assume only one element in CallTargets "
859 "with count being NOMORE_ICP_MAGICNUM");
860 // Initialize ValueCountMap with existing value profile data.
861 for (const auto &V : ValueData)
862 ValueCountMap[V.Value] = V.Count;
863 auto Pair =
864 ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
865 // If the target already exists in value profile, decrease the total
866 // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
867 if (!Pair.second) {
868 OldSum -= Pair.first->second;
869 Pair.first->second = NOMORE_ICP_MAGICNUM;
870 }
871 Sum = OldSum;
872 } else {
873 // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
874 // counts in the value profile.
875 for (const auto &V : ValueData) {
876 if (V.Count == NOMORE_ICP_MAGICNUM)
877 ValueCountMap[V.Value] = V.Count;
878 }
879
880 for (const auto &Data : CallTargets) {
881 auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
882 if (Pair.second)
883 continue;
884 // The target represented by Data.Value has already been promoted.
885 // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
886 // Sum by Data.Count.
887 assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
888 Sum -= Data.Count;
889 }
890 }
891
893 for (const auto &ValueCount : ValueCountMap) {
894 NewCallTargets.emplace_back(
895 InstrProfValueData{ValueCount.first, ValueCount.second});
896 }
897
898 llvm::sort(NewCallTargets,
899 [](const InstrProfValueData &L, const InstrProfValueData &R) {
900 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);
901 });
902
903 uint32_t MaxMDCount =
904 std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
905 annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
906 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
907}
908
909/// Attempt to promote indirect call and also inline the promoted call.
910///
911/// \param F Caller function.
912/// \param Candidate ICP and inline candidate.
913/// \param SumOrigin Original sum of target counts for indirect call before
914/// promoting given candidate.
915/// \param Sum Prorated sum of remaining target counts for indirect call
916/// after promoting given candidate.
917/// \param InlinedCallSite Output vector for new call sites exposed after
918/// inlining.
919bool SampleProfileLoader::tryPromoteAndInlineCandidate(
920 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
921 SmallVector<CallBase *, 8> *InlinedCallSite) {
922 // Bail out early if sample-loader inliner is disabled.
923 if (DisableSampleProfileInlining)
924 return false;
925
926 // Bail out early if MaxNumPromotions is zero.
927 // This prevents allocating an array of zero length in callees below.
928 if (MaxNumPromotions == 0)
929 return false;
930 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
931 auto R = SymbolMap.find(CalleeFunctionName);
932 if (R == SymbolMap.end() || !R->second)
933 return false;
934
935 auto &CI = *Candidate.CallInstr;
936 if (!doesHistoryAllowICP(CI, R->second->getName()))
937 return false;
938
939 const char *Reason = "Callee function not available";
940 // R->getValue() != &F is to prevent promoting a recursive call.
941 // If it is a recursive call, we do not inline it as it could bloat
942 // the code exponentially. There is way to better handle this, e.g.
943 // clone the caller first, and inline the cloned caller if it is
944 // recursive. As llvm does not inline recursive calls, we will
945 // simply ignore it instead of handling it explicitly.
946 if (!R->second->isDeclaration() && R->second->getSubprogram() &&
947 R->second->hasFnAttribute("use-sample-profile") &&
948 R->second != &F && isLegalToPromote(CI, R->second, &Reason)) {
949 // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
950 // in the value profile metadata so the target won't be promoted again.
951 SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
952 Function::getGUIDAssumingExternalLinkage(R->second->getName()),
954 updateIDTMetaData(CI, SortedCallTargets, 0);
955
956 auto *DI = &pgo::promoteIndirectCall(
957 CI, R->second, Candidate.CallsiteCount, Sum, false, ORE);
958 if (DI) {
959 Sum -= Candidate.CallsiteCount;
960 // Do not prorate the indirect callsite distribution since the original
961 // distribution will be used to scale down non-promoted profile target
962 // counts later. By doing this we lose track of the real callsite count
963 // for the leftover indirect callsite as a trade off for accurate call
964 // target counts.
965 // TODO: Ideally we would have two separate factors, one for call site
966 // counts and one is used to prorate call target counts.
967 // Do not update the promoted direct callsite distribution at this
968 // point since the original distribution combined with the callee profile
969 // will be used to prorate callsites from the callee if inlined. Once not
970 // inlined, the direct callsite distribution should be prorated so that
971 // the it will reflect the real callsite counts.
972 Candidate.CallInstr = DI;
973 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
974 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
975 if (!Inlined) {
976 // Prorate the direct callsite distribution so that it reflects real
977 // callsite counts.
979 *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
980 }
981 return Inlined;
982 }
983 }
984 } else {
985 LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
987 Candidate.CallInstr->getName())<< " because "
988 << Reason << "\n");
989 }
990 return false;
991}
992
993bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
995 return false;
996
998 if (Callee == nullptr)
999 return false;
1000
1002 GetAC, GetTLI);
1003
1004 if (Cost.isNever())
1005 return false;
1006
1007 if (Cost.isAlways())
1008 return true;
1009
1010 return Cost.getCost() <= SampleColdCallSiteThreshold;
1011}
1012
1013void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1014 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1015 bool Hot) {
1016 for (auto *I : Candidates) {
1017 Function *CalledFunction = I->getCalledFunction();
1018 if (CalledFunction) {
1019 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1020 "InlineAttempt", I->getDebugLoc(),
1021 I->getParent())
1022 << "previous inlining reattempted for "
1023 << (Hot ? "hotness: '" : "size: '")
1024 << ore::NV("Callee", CalledFunction) << "' into '"
1025 << ore::NV("Caller", &F) << "'");
1026 }
1027 }
1028}
1029
1030void SampleProfileLoader::findExternalInlineCandidate(
1031 CallBase *CB, const FunctionSamples *Samples,
1032 DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
1033
1034 // If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external
1035 // function make sure it's imported
1036 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1037 // Samples may not exist for replayed function, if so
1038 // just add the direct GUID and move on
1039 if (!Samples) {
1040 InlinedGUIDs.insert(Function::getGUIDAssumingExternalLinkage(
1041 CB->getCalledFunction()->getName()));
1042 return;
1043 }
1044 // Otherwise, drop the threshold to import everything that we can
1045 Threshold = 0;
1046 }
1047
1048 // In some rare cases, call instruction could be changed after being pushed
1049 // into inline candidate queue, this is because earlier inlining may expose
1050 // constant propagation which can change indirect call to direct call. When
1051 // this happens, we may fail to find matching function samples for the
1052 // candidate later, even if a match was found when the candidate was enqueued.
1053 if (!Samples)
1054 return;
1055
1056 // For AutoFDO profile, retrieve candidate profiles by walking over
1057 // the nested inlinee profiles.
1059 // Set threshold to zero to honor pre-inliner decision.
1061 Threshold = 0;
1062 Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1063 return;
1064 }
1065
1066 ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
1067 std::queue<ContextTrieNode *> CalleeList;
1068 CalleeList.push(Caller);
1069 while (!CalleeList.empty()) {
1070 ContextTrieNode *Node = CalleeList.front();
1071 CalleeList.pop();
1072 FunctionSamples *CalleeSample = Node->getFunctionSamples();
1073 // For CSSPGO profile, retrieve candidate profile by walking over the
1074 // trie built for context profile. Note that also take call targets
1075 // even if callee doesn't have a corresponding context profile.
1076 if (!CalleeSample)
1077 continue;
1078
1079 // If pre-inliner decision is used, honor that for importing as well.
1080 bool PreInline =
1083 if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
1084 continue;
1085
1086 Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
1087 // Add to the import list only when it's defined out of module.
1088 if (!Func || Func->isDeclaration())
1089 InlinedGUIDs.insert(CalleeSample->getGUID());
1090
1091 // Import hot CallTargets, which may not be available in IR because full
1092 // profile annotation cannot be done until backend compilation in ThinLTO.
1093 for (const auto &BS : CalleeSample->getBodySamples())
1094 for (const auto &TS : BS.second.getCallTargets())
1095 if (TS.second > Threshold) {
1096 const Function *Callee = SymbolMap.lookup(TS.first);
1097 if (!Callee || Callee->isDeclaration())
1098 InlinedGUIDs.insert(TS.first.getHashCode());
1099 }
1100
1101 // Import hot child context profile associted with callees. Note that this
1102 // may have some overlap with the call target loop above, but doing this
1103 // based child context profile again effectively allow us to use the max of
1104 // entry count and call target count to determine importing.
1105 for (auto &Child : Node->getAllChildContext()) {
1106 ContextTrieNode *CalleeNode = &Child.second;
1107 CalleeList.push(CalleeNode);
1108 }
1109 }
1110}
1111
1112/// Iteratively inline hot callsites of a function.
1113///
1114/// Iteratively traverse all callsites of the function \p F, so as to
1115/// find out callsites with corresponding inline instances.
1116///
1117/// For such callsites,
1118/// - If it is hot enough, inline the callsites and adds callsites of the callee
1119/// into the caller. If the call is an indirect call, first promote
1120/// it to direct call. Each indirect call is limited with a single target.
1121///
1122/// - If a callsite is not inlined, merge the its profile to the outline
1123/// version (if --sample-profile-merge-inlinee is true), or scale the
1124/// counters of standalone function based on the profile of inlined
1125/// instances (if --sample-profile-merge-inlinee is false).
1126///
1127/// Later passes may consume the updated profiles.
1128///
1129/// \param F function to perform iterative inlining.
1130/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1131/// inlined in the profiled binary.
1132///
1133/// \returns True if there is any inline happened.
1134bool SampleProfileLoader::inlineHotFunctions(
1135 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1136 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1137 // Profile symbol list is ignored when profile-sample-accurate is on.
1138 assert((!ProfAccForSymsInList ||
1140 !F.hasFnAttribute("profile-sample-accurate"))) &&
1141 "ProfAccForSymsInList should be false when profile-sample-accurate "
1142 "is enabled");
1143
1144 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1145 bool Changed = false;
1146 bool LocalChanged = true;
1147 while (LocalChanged) {
1148 LocalChanged = false;
1150 for (auto &BB : F) {
1151 bool Hot = false;
1152 SmallVector<CallBase *, 10> AllCandidates;
1153 SmallVector<CallBase *, 10> ColdCandidates;
1154 for (auto &I : BB) {
1155 const FunctionSamples *FS = nullptr;
1156 if (auto *CB = dyn_cast<CallBase>(&I)) {
1157 if (!isa<IntrinsicInst>(I)) {
1158 if ((FS = findCalleeFunctionSamples(*CB))) {
1159 assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1160 "GUIDToFuncNameMap has to be populated");
1161 AllCandidates.push_back(CB);
1162 if (FS->getHeadSamplesEstimate() > 0 ||
1164 LocalNotInlinedCallSites.insert({CB, FS});
1165 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1166 Hot = true;
1167 else if (shouldInlineColdCallee(*CB))
1168 ColdCandidates.push_back(CB);
1169 } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1170 AllCandidates.push_back(CB);
1171 }
1172 }
1173 }
1174 }
1175 if (Hot || ExternalInlineAdvisor) {
1176 CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1177 emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1178 } else {
1179 CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1180 emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1181 }
1182 }
1183 for (CallBase *I : CIS) {
1184 Function *CalledFunction = I->getCalledFunction();
1185 InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1186 0 /* dummy count */,
1187 1.0 /* dummy distribution factor */};
1188 // Do not inline recursive calls.
1189 if (CalledFunction == &F)
1190 continue;
1191 if (I->isIndirectCall()) {
1192 uint64_t Sum;
1193 for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1194 uint64_t SumOrigin = Sum;
1195 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1196 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1197 PSI->getOrCompHotCountThreshold());
1198 continue;
1199 }
1200 if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1201 continue;
1202
1203 Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
1204 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1205 LocalNotInlinedCallSites.erase(I);
1206 LocalChanged = true;
1207 }
1208 }
1209 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1210 !CalledFunction->isDeclaration()) {
1211 if (tryInlineCandidate(Candidate)) {
1212 LocalNotInlinedCallSites.erase(I);
1213 LocalChanged = true;
1214 }
1215 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1216 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1217 InlinedGUIDs,
1218 PSI->getOrCompHotCountThreshold());
1219 }
1220 }
1221 Changed |= LocalChanged;
1222 }
1223
1224 // For CS profile, profile for not inlined context will be merged when
1225 // base profile is being retrieved.
1227 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1228 return Changed;
1229}
1230
1231bool SampleProfileLoader::tryInlineCandidate(
1232 InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1233 // Do not attempt to inline a candidate if
1234 // --disable-sample-loader-inlining is true.
1235 if (DisableSampleProfileInlining)
1236 return false;
1237
1238 CallBase &CB = *Candidate.CallInstr;
1239 Function *CalledFunction = CB.getCalledFunction();
1240 assert(CalledFunction && "Expect a callee with definition");
1241 DebugLoc DLoc = CB.getDebugLoc();
1242 BasicBlock *BB = CB.getParent();
1243
1244 InlineCost Cost = shouldInlineCandidate(Candidate);
1245 if (Cost.isNever()) {
1246 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1247 "InlineFail", DLoc, BB)
1248 << "incompatible inlining");
1249 return false;
1250 }
1251
1252 if (!Cost)
1253 return false;
1254
1255 InlineFunctionInfo IFI(GetAC);
1256 IFI.UpdateProfile = false;
1257 InlineResult IR = InlineFunction(CB, IFI,
1258 /*MergeAttributes=*/true);
1259 if (!IR.isSuccess())
1260 return false;
1261
1262 // The call to InlineFunction erases I, so we can't pass it here.
1263 emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
1264 Cost, true, getAnnotatedRemarkPassName());
1265
1266 // Now populate the list of newly exposed call sites.
1267 if (InlinedCallSites) {
1268 InlinedCallSites->clear();
1269 llvm::append_range(*InlinedCallSites, IFI.InlinedCallSites);
1270 }
1271
1273 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1274 ++NumCSInlined;
1275
1276 // Prorate inlined probes for a duplicated inlining callsite which probably
1277 // has a distribution less than 100%. Samples for an inlinee should be
1278 // distributed among the copies of the original callsite based on each
1279 // callsite's distribution factor for counts accuracy. Note that an inlined
1280 // probe may come with its own distribution factor if it has been duplicated
1281 // in the inlinee body. The two factor are multiplied to reflect the
1282 // aggregation of duplication.
1283 if (Candidate.CallsiteDistribution < 1) {
1284 for (auto &I : IFI.InlinedCallSites) {
1285 if (std::optional<PseudoProbe> Probe = extractProbe(*I))
1286 setProbeDistributionFactor(*I, Probe->Factor *
1287 Candidate.CallsiteDistribution);
1288 }
1289 NumDuplicatedInlinesite++;
1290 }
1291
1292 return true;
1293}
1294
1295bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1296 CallBase *CB) {
1297 assert(CB && "Expect non-null call instruction");
1298
1299 if (isa<IntrinsicInst>(CB))
1300 return false;
1301
1302 // Find the callee's profile. For indirect call, find hottest target profile.
1303 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1304 // If ExternalInlineAdvisor wants to inline this site, do so even
1305 // if Samples are not present.
1306 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1307 return false;
1308
1309 float Factor = 1.0;
1310 if (std::optional<PseudoProbe> Probe = extractProbe(*CB))
1311 Factor = Probe->Factor;
1312
1313 uint64_t CallsiteCount =
1314 CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
1315 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1316 return true;
1317}
1318
1319std::optional<InlineCost>
1320SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1321 std::unique_ptr<InlineAdvice> Advice = nullptr;
1322 if (ExternalInlineAdvisor) {
1323 Advice = ExternalInlineAdvisor->getAdvice(CB);
1324 if (Advice) {
1325 if (!Advice->isInliningRecommended()) {
1326 Advice->recordUnattemptedInlining();
1327 return InlineCost::getNever("not previously inlined");
1328 }
1329 Advice->recordInlining();
1330 return InlineCost::getAlways("previously inlined");
1331 }
1332 }
1333
1334 return {};
1335}
1336
1337bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1338 std::optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1339 return Cost ? !!*Cost : false;
1340}
1341
1343SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1344 if (std::optional<InlineCost> ReplayCost =
1345 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1346 return *ReplayCost;
1347 // Adjust threshold based on call site hotness, only do this for callsite
1348 // prioritized inliner because otherwise cost-benefit check is done earlier.
1349 int SampleThreshold = SampleColdCallSiteThreshold;
1351 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1352 SampleThreshold = SampleHotCallSiteThreshold;
1353 else if (!ProfileSizeInline)
1354 return InlineCost::getNever("cold callsite");
1355 }
1356
1357 Function *Callee = Candidate.CallInstr->getCalledFunction();
1358 assert(Callee && "Expect a definition for inline candidate of direct call");
1359
1360 InlineParams Params = getInlineParams();
1361 // We will ignore the threshold from inline cost, so always get full cost.
1362 Params.ComputeFullInlineCost = true;
1364 // Checks if there is anything in the reachable portion of the callee at
1365 // this callsite that makes this inlining potentially illegal. Need to
1366 // set ComputeFullInlineCost, otherwise getInlineCost may return early
1367 // when cost exceeds threshold without checking all IRs in the callee.
1368 // The acutal cost does not matter because we only checks isNever() to
1369 // see if it is legal to inline the callsite.
1370 InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1371 GetTTI(*Callee), GetAC, GetTLI);
1372
1373 // Honor always inline and never inline from call analyzer
1374 if (Cost.isNever() || Cost.isAlways())
1375 return Cost;
1376
1377 // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1378 // decisions based on hotness as well as accurate function byte sizes for
1379 // given context using function/inlinee sizes from previous build. It
1380 // stores the decision in profile, and also adjust/merge context profile
1381 // aiming at better context-sensitive post-inline profile quality, assuming
1382 // all inline decision estimates are going to be honored by compiler. Here
1383 // we replay that inline decision under `sample-profile-use-preinliner`.
1384 // Note that we don't need to handle negative decision from preinliner as
1385 // context profile for not inlined calls are merged by preinliner already.
1386 if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1387 // Once two node are merged due to promotion, we're losing some context
1388 // so the original context-sensitive preinliner decision should be ignored
1389 // for SyntheticContext.
1390 SampleContext &Context = Candidate.CalleeSamples->getContext();
1391 if (!Context.hasState(SyntheticContext) &&
1392 Context.hasAttribute(ContextShouldBeInlined))
1393 return InlineCost::getAlways("preinliner");
1394 }
1395
1396 // For old FDO inliner, we inline the call site if it is below hot threshold,
1397 // even if the function is hot based on sample profile data. This is to
1398 // prevent huge functions from being inlined.
1401 }
1402
1403 // Otherwise only use the cost from call analyzer, but overwite threshold with
1404 // Sample PGO threshold.
1405 return InlineCost::get(Cost.getCost(), SampleThreshold);
1406}
1407
1408bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1409 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1410 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1411 // Profile symbol list is ignored when profile-sample-accurate is on.
1412 assert((!ProfAccForSymsInList ||
1414 !F.hasFnAttribute("profile-sample-accurate"))) &&
1415 "ProfAccForSymsInList should be false when profile-sample-accurate "
1416 "is enabled");
1417
1418 // Populating worklist with initial call sites from root inliner, along
1419 // with call site weights.
1420 CandidateQueue CQueue;
1421 InlineCandidate NewCandidate;
1422 for (auto &BB : F) {
1423 for (auto &I : BB) {
1424 auto *CB = dyn_cast<CallBase>(&I);
1425 if (!CB)
1426 continue;
1427 if (getInlineCandidate(&NewCandidate, CB))
1428 CQueue.push(NewCandidate);
1429 }
1430 }
1431
1432 // Cap the size growth from profile guided inlining. This is needed even
1433 // though cost of each inline candidate already accounts for callee size,
1434 // because with top-down inlining, we can grow inliner size significantly
1435 // with large number of smaller inlinees each pass the cost check.
1437 "Max inline size limit should not be smaller than min inline size "
1438 "limit.");
1439 unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1440 SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
1441 SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
1442 if (ExternalInlineAdvisor)
1443 SizeLimit = std::numeric_limits<unsigned>::max();
1444
1445 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1446
1447 // Perform iterative BFS call site prioritized inlining
1448 bool Changed = false;
1449 while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1450 InlineCandidate Candidate = CQueue.top();
1451 CQueue.pop();
1452 CallBase *I = Candidate.CallInstr;
1453 Function *CalledFunction = I->getCalledFunction();
1454
1455 if (CalledFunction == &F)
1456 continue;
1457 if (I->isIndirectCall()) {
1458 uint64_t Sum = 0;
1459 auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1460 uint64_t SumOrigin = Sum;
1461 Sum *= Candidate.CallsiteDistribution;
1462 unsigned ICPCount = 0;
1463 for (const auto *FS : CalleeSamples) {
1464 // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1465 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1466 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1467 PSI->getOrCompHotCountThreshold());
1468 continue;
1469 }
1470 uint64_t EntryCountDistributed =
1471 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1472 // In addition to regular inline cost check, we also need to make sure
1473 // ICP isn't introducing excessive speculative checks even if individual
1474 // target looks beneficial to promote and inline. That means we should
1475 // only do ICP when there's a small number dominant targets.
1476 if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1477 EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1478 break;
1479 // TODO: Fix CallAnalyzer to handle all indirect calls.
1480 // For indirect call, we don't run CallAnalyzer to get InlineCost
1481 // before actual inlining. This is because we could see two different
1482 // types from the same definition, which makes CallAnalyzer choke as
1483 // it's expecting matching parameter type on both caller and callee
1484 // side. See example from PR18962 for the triggering cases (the bug was
1485 // fixed, but we generate different types).
1486 if (!PSI->isHotCount(EntryCountDistributed))
1487 break;
1488 SmallVector<CallBase *, 8> InlinedCallSites;
1489 // Attach function profile for promoted indirect callee, and update
1490 // call site count for the promoted inline candidate too.
1491 Candidate = {I, FS, EntryCountDistributed,
1492 Candidate.CallsiteDistribution};
1493 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1494 &InlinedCallSites)) {
1495 for (auto *CB : InlinedCallSites) {
1496 if (getInlineCandidate(&NewCandidate, CB))
1497 CQueue.emplace(NewCandidate);
1498 }
1499 ICPCount++;
1500 Changed = true;
1501 } else if (!ContextTracker) {
1502 LocalNotInlinedCallSites.insert({I, FS});
1503 }
1504 }
1505 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1506 !CalledFunction->isDeclaration()) {
1507 SmallVector<CallBase *, 8> InlinedCallSites;
1508 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1509 for (auto *CB : InlinedCallSites) {
1510 if (getInlineCandidate(&NewCandidate, CB))
1511 CQueue.emplace(NewCandidate);
1512 }
1513 Changed = true;
1514 } else if (!ContextTracker) {
1515 LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
1516 }
1517 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1518 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1519 InlinedGUIDs,
1520 PSI->getOrCompHotCountThreshold());
1521 }
1522 }
1523
1524 if (!CQueue.empty()) {
1525 if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1526 ++NumCSInlinedHitMaxLimit;
1527 else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1528 ++NumCSInlinedHitMinLimit;
1529 else
1530 ++NumCSInlinedHitGrowthLimit;
1531 }
1532
1533 // For CS profile, profile for not inlined context will be merged when
1534 // base profile is being retrieved.
1536 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1537 return Changed;
1538}
1539
1540void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1542 const Function &F) {
1543 // Accumulate not inlined callsite information into notInlinedSamples
1544 for (const auto &Pair : NonInlinedCallSites) {
1545 CallBase *I = Pair.first;
1546 Function *Callee = I->getCalledFunction();
1547 if (!Callee || Callee->isDeclaration())
1548 continue;
1549
1550 ORE->emit(
1551 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1552 I->getDebugLoc(), I->getParent())
1553 << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1554 << "' into '" << ore::NV("Caller", &F) << "'");
1555
1556 ++NumCSNotInlined;
1557 const FunctionSamples *FS = Pair.second;
1558 if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
1559 continue;
1560 }
1561
1562 // Do not merge a context that is already duplicated into the base profile.
1563 if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1564 continue;
1565
1566 if (ProfileMergeInlinee) {
1567 // A function call can be replicated by optimizations like callsite
1568 // splitting or jump threading and the replicates end up sharing the
1569 // sample nested callee profile instead of slicing the original
1570 // inlinee's profile. We want to do merge exactly once by filtering out
1571 // callee profiles with a non-zero head sample count.
1572 if (FS->getHeadSamples() == 0) {
1573 // Use entry samples as head samples during the merge, as inlinees
1574 // don't have head samples.
1575 const_cast<FunctionSamples *>(FS)->addHeadSamples(
1576 FS->getHeadSamplesEstimate());
1577
1578 // Note that we have to do the merge right after processing function.
1579 // This allows OutlineFS's profile to be used for annotation during
1580 // top-down processing of functions' annotation.
1581 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1582 // If outlined function does not exist in the profile, add it to a
1583 // separate map so that it does not rehash the original profile.
1584 if (!OutlineFS)
1585 OutlineFS = &OutlineFunctionSamples[
1587 OutlineFS->merge(*FS, 1);
1588 // Set outlined profile to be synthetic to not bias the inliner.
1589 OutlineFS->setContextSynthetic();
1590 }
1591 } else {
1592 auto pair =
1593 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1594 pair.first->second.entryCount += FS->getHeadSamplesEstimate();
1595 }
1596 }
1597}
1598
1599/// Returns the sorted CallTargetMap \p M by count in descending order.
1603 for (const auto &I : SampleRecord::sortCallTargets(M)) {
1604 R.emplace_back(
1605 InstrProfValueData{I.first.getHashCode(), I.second});
1606 }
1607 return R;
1608}
1609
1610// Generate MD_prof metadata for every branch instruction using the
1611// edge weights computed during propagation.
1612void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1613 // Generate MD_prof metadata for every branch instruction using the
1614 // edge weights computed during propagation.
1615 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1616 LLVMContext &Ctx = F.getContext();
1617 MDBuilder MDB(Ctx);
1618 for (auto &BI : F) {
1619 BasicBlock *BB = &BI;
1620
1621 if (BlockWeights[BB]) {
1622 for (auto &I : *BB) {
1623 if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1624 continue;
1625 if (!cast<CallBase>(I).getCalledFunction()) {
1626 const DebugLoc &DLoc = I.getDebugLoc();
1627 if (!DLoc)
1628 continue;
1629 const DILocation *DIL = DLoc;
1630 const FunctionSamples *FS = findFunctionSamples(I);
1631 if (!FS)
1632 continue;
1635 FS->findCallTargetMapAt(CallSite);
1636 if (!T || T.get().empty())
1637 continue;
1639 // Prorate the callsite counts based on the pre-ICP distribution
1640 // factor to reflect what is already done to the callsite before
1641 // ICP, such as calliste cloning.
1642 if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
1643 if (Probe->Factor < 1)
1644 T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1645 }
1646 }
1647 SmallVector<InstrProfValueData, 2> SortedCallTargets =
1649 uint64_t Sum = 0;
1650 for (const auto &C : T.get())
1651 Sum += C.second;
1652 // With CSSPGO all indirect call targets are counted torwards the
1653 // original indirect call site in the profile, including both
1654 // inlined and non-inlined targets.
1656 if (const FunctionSamplesMap *M =
1657 FS->findFunctionSamplesMapAt(CallSite)) {
1658 for (const auto &NameFS : *M)
1659 Sum += NameFS.second.getHeadSamplesEstimate();
1660 }
1661 }
1662 if (Sum)
1663 updateIDTMetaData(I, SortedCallTargets, Sum);
1664 else if (OverwriteExistingWeights)
1665 I.setMetadata(LLVMContext::MD_prof, nullptr);
1666 } else if (!isa<IntrinsicInst>(&I)) {
1667 setBranchWeights(I, {static_cast<uint32_t>(BlockWeights[BB])},
1668 /*IsExpected=*/false);
1669 }
1670 }
1672 // Set profile metadata (possibly annotated by LTO prelink) to zero or
1673 // clear it for cold code.
1674 for (auto &I : *BB) {
1675 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1676 if (cast<CallBase>(I).isIndirectCall()) {
1677 I.setMetadata(LLVMContext::MD_prof, nullptr);
1678 } else {
1679 setBranchWeights(I, {uint32_t(0)}, /*IsExpected=*/false);
1680 }
1681 }
1682 }
1683 }
1684
1685 Instruction *TI = BB->getTerminator();
1686 if (TI->getNumSuccessors() == 1)
1687 continue;
1688 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1689 !isa<IndirectBrInst>(TI))
1690 continue;
1691
1692 DebugLoc BranchLoc = TI->getDebugLoc();
1693 LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1694 << ((BranchLoc) ? Twine(BranchLoc.getLine())
1695 : Twine("<UNKNOWN LOCATION>"))
1696 << ".\n");
1698 uint32_t MaxWeight = 0;
1699 Instruction *MaxDestInst;
1700 // Since profi treats multiple edges (multiway branches) as a single edge,
1701 // we need to distribute the computed weight among the branches. We do
1702 // this by evenly splitting the edge weight among destinations.
1704 std::vector<uint64_t> EdgeIndex;
1706 EdgeIndex.resize(TI->getNumSuccessors());
1707 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1708 const BasicBlock *Succ = TI->getSuccessor(I);
1709 EdgeIndex[I] = EdgeMultiplicity[Succ];
1710 EdgeMultiplicity[Succ]++;
1711 }
1712 }
1713 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1714 BasicBlock *Succ = TI->getSuccessor(I);
1715 Edge E = std::make_pair(BB, Succ);
1716 uint64_t Weight = EdgeWeights[E];
1717 LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1718 // Use uint32_t saturated arithmetic to adjust the incoming weights,
1719 // if needed. Sample counts in profiles are 64-bit unsigned values,
1720 // but internally branch weights are expressed as 32-bit values.
1721 if (Weight > std::numeric_limits<uint32_t>::max()) {
1722 LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");
1723 Weight = std::numeric_limits<uint32_t>::max();
1724 }
1725 if (!SampleProfileUseProfi) {
1726 // Weight is added by one to avoid propagation errors introduced by
1727 // 0 weights.
1728 Weights.push_back(static_cast<uint32_t>(
1729 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1730 : Weight + 1));
1731 } else {
1732 // Profi creates proper weights that do not require "+1" adjustments but
1733 // we evenly split the weight among branches with the same destination.
1734 uint64_t W = Weight / EdgeMultiplicity[Succ];
1735 // Rounding up, if needed, so that first branches are hotter.
1736 if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1737 W++;
1738 Weights.push_back(static_cast<uint32_t>(W));
1739 }
1740 if (Weight != 0) {
1741 if (Weight > MaxWeight) {
1742 MaxWeight = Weight;
1743 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();
1744 }
1745 }
1746 }
1747
1748 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1749
1750 uint64_t TempWeight;
1751 // Only set weights if there is at least one non-zero weight.
1752 // In any other case, let the analyzer set weights.
1753 // Do not set weights if the weights are present unless under
1754 // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1755 // twice. If the first annotation already set the weights, the second pass
1756 // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1757 // weight should have their existing metadata (possibly annotated by LTO
1758 // prelink) cleared.
1759 if (MaxWeight > 0 &&
1760 (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1761 LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1762 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
1763 ORE->emit([&]() {
1764 return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1765 << "most popular destination for conditional branches at "
1766 << ore::NV("CondBranchesLoc", BranchLoc);
1767 });
1768 } else {
1770 TI->setMetadata(LLVMContext::MD_prof, nullptr);
1771 LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1772 } else {
1773 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1774 }
1775 }
1776 }
1777}
1778
1779/// Once all the branch weights are computed, we emit the MD_prof
1780/// metadata on BB using the computed values for each of its branches.
1781///
1782/// \param F The function to query.
1783///
1784/// \returns true if \p F was modified. Returns false, otherwise.
1785bool SampleProfileLoader::emitAnnotations(Function &F) {
1786 bool Changed = false;
1787
1789 LLVM_DEBUG({
1790 if (!ProbeManager->getDesc(F))
1791 dbgs() << "Probe descriptor missing for Function " << F.getName()
1792 << "\n";
1793 });
1794
1795 if (ProbeManager->profileIsValid(F, *Samples)) {
1796 ++NumMatchedProfile;
1797 } else {
1798 ++NumMismatchedProfile;
1799 LLVM_DEBUG(
1800 dbgs() << "Profile is invalid due to CFG mismatch for Function "
1801 << F.getName() << "\n");
1803 return false;
1804 }
1805 } else {
1806 if (getFunctionLoc(F) == 0)
1807 return false;
1808
1809 LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1810 << F.getName() << ": " << getFunctionLoc(F) << "\n");
1811 }
1812
1813 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1815 Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1816 else
1817 Changed |= inlineHotFunctions(F, InlinedGUIDs);
1818
1819 Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1820
1821 if (Changed)
1822 generateMDProfMetadata(F);
1823
1824 emitCoverageRemarks(F);
1825 return Changed;
1826}
1827
1828std::unique_ptr<ProfiledCallGraph>
1829SampleProfileLoader::buildProfiledCallGraph(Module &M) {
1830 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1832 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1833 else
1834 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1835
1836 // Add all functions into the profiled call graph even if they are not in
1837 // the profile. This makes sure functions missing from the profile still
1838 // gets a chance to be processed.
1839 for (Function &F : M) {
1841 continue;
1842 ProfiledCG->addProfiledFunction(
1844 }
1845
1846 return ProfiledCG;
1847}
1848
1849std::vector<Function *>
1850SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
1851 std::vector<Function *> FunctionOrderList;
1852 FunctionOrderList.reserve(M.size());
1853
1855 errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1856 "together with -sample-profile-top-down-load.\n";
1857
1858 if (!ProfileTopDownLoad) {
1859 if (ProfileMergeInlinee) {
1860 // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1861 // because the profile for a function may be used for the profile
1862 // annotation of its outline copy before the profile merging of its
1863 // non-inlined inline instances, and that is not the way how
1864 // ProfileMergeInlinee is supposed to work.
1865 ProfileMergeInlinee = false;
1866 }
1867
1868 for (Function &F : M)
1870 FunctionOrderList.push_back(&F);
1871 return FunctionOrderList;
1872 }
1873
1876 // Use profiled call edges to augment the top-down order. There are cases
1877 // that the top-down order computed based on the static call graph doesn't
1878 // reflect real execution order. For example
1879 //
1880 // 1. Incomplete static call graph due to unknown indirect call targets.
1881 // Adjusting the order by considering indirect call edges from the
1882 // profile can enable the inlining of indirect call targets by allowing
1883 // the caller processed before them.
1884 // 2. Mutual call edges in an SCC. The static processing order computed for
1885 // an SCC may not reflect the call contexts in the context-sensitive
1886 // profile, thus may cause potential inlining to be overlooked. The
1887 // function order in one SCC is being adjusted to a top-down order based
1888 // on the profile to favor more inlining. This is only a problem with CS
1889 // profile.
1890 // 3. Transitive indirect call edges due to inlining. When a callee function
1891 // (say B) is inlined into a caller function (say A) in LTO prelink,
1892 // every call edge originated from the callee B will be transferred to
1893 // the caller A. If any transferred edge (say A->C) is indirect, the
1894 // original profiled indirect edge B->C, even if considered, would not
1895 // enforce a top-down order from the caller A to the potential indirect
1896 // call target C in LTO postlink since the inlined callee B is gone from
1897 // the static call graph.
1898 // 4. #3 can happen even for direct call targets, due to functions defined
1899 // in header files. A header function (say A), when included into source
1900 // files, is defined multiple times but only one definition survives due
1901 // to ODR. Therefore, the LTO prelink inlining done on those dropped
1902 // definitions can be useless based on a local file scope. More
1903 // importantly, the inlinee (say B), once fully inlined to a
1904 // to-be-dropped A, will have no profile to consume when its outlined
1905 // version is compiled. This can lead to a profile-less prelink
1906 // compilation for the outlined version of B which may be called from
1907 // external modules. while this isn't easy to fix, we rely on the
1908 // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1909 // the A can be inlined in its local scope in prelink, it may not exist
1910 // in the merged IR in postlink, and we'll need the profiled call edges
1911 // to enforce a top-down order for the rest of the functions.
1912 //
1913 // Considering those cases, a profiled call graph completely independent of
1914 // the static call graph is constructed based on profile data, where
1915 // function objects are not even needed to handle case #3 and case 4.
1916 //
1917 // Note that static callgraph edges are completely ignored since they
1918 // can be conflicting with profiled edges for cyclic SCCs and may result in
1919 // an SCC order incompatible with profile-defined one. Using strictly
1920 // profile order ensures a maximum inlining experience. On the other hand,
1921 // static call edges are not so important when they don't correspond to a
1922 // context in the profile.
1923
1924 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1925 scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1926 while (!CGI.isAtEnd()) {
1927 auto Range = *CGI;
1928 if (SortProfiledSCC) {
1929 // Sort nodes in one SCC based on callsite hotness.
1931 Range = *SI;
1932 }
1933 for (auto *Node : Range) {
1934 Function *F = SymbolMap.lookup(Node->Name);
1935 if (F && !skipProfileForFunction(*F))
1936 FunctionOrderList.push_back(F);
1937 }
1938 ++CGI;
1939 }
1940 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1941 } else
1942 buildTopDownFuncOrder(CG, FunctionOrderList);
1943
1944 LLVM_DEBUG({
1945 dbgs() << "Function processing order:\n";
1946 for (auto F : FunctionOrderList) {
1947 dbgs() << F->getName() << "\n";
1948 }
1949 });
1950
1951 return FunctionOrderList;
1952}
1953
1954bool SampleProfileLoader::doInitialization(Module &M,
1956 auto &Ctx = M.getContext();
1957
1958 auto ReaderOrErr = SampleProfileReader::create(
1959 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1960 if (std::error_code EC = ReaderOrErr.getError()) {
1961 std::string Msg = "Could not open profile: " + EC.message();
1962 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1963 return false;
1964 }
1965 Reader = std::move(ReaderOrErr.get());
1966 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1967 // set module before reading the profile so reader may be able to only
1968 // read the function profiles which are used by the current module.
1969 Reader->setModule(&M);
1970 if (std::error_code EC = Reader->read()) {
1971 std::string Msg = "profile reading failed: " + EC.message();
1972 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1973 return false;
1974 }
1975
1976 PSL = Reader->getProfileSymbolList();
1977
1979 DisableSampleProfileInlining = DisableSampleLoaderInlining;
1980
1981 if (UseFlattenedProfile)
1982 ProfileConverter::flattenProfile(Reader->getProfiles(),
1983 Reader->profileIsCS());
1984
1985 // While profile-sample-accurate is on, ignore symbol list.
1986 ProfAccForSymsInList =
1988 if (ProfAccForSymsInList) {
1989 NamesInProfile.clear();
1990 GUIDsInProfile.clear();
1991 if (auto NameTable = Reader->getNameTable()) {
1993 for (auto Name : *NameTable)
1994 GUIDsInProfile.insert(Name.getHashCode());
1995 } else {
1996 for (auto Name : *NameTable)
1997 NamesInProfile.insert(Name.stringRef());
1998 }
1999 }
2000 CoverageTracker.setProfAccForSymsInList(true);
2001 }
2002
2003 if (FAM && !ProfileInlineReplayFile.empty()) {
2004 ExternalInlineAdvisor = getReplayInlineAdvisor(
2005 M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
2010 /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2011 }
2012
2013 // Apply tweaks if context-sensitive or probe-based profile is available.
2014 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2015 Reader->profileIsProbeBased()) {
2019 SampleProfileUseProfi = true;
2022 // Enable priority-base inliner and size inline by default for CSSPGO.
2024 ProfileSizeInline = true;
2027 // For CSSPGO, we also allow recursive inline to best use context profile.
2029 AllowRecursiveInline = true;
2030
2031 if (Reader->profileIsPreInlined()) {
2033 UsePreInlinerDecision = true;
2034 }
2035
2036 // Enable stale profile matching by default for probe-based profile.
2037 // Currently the matching relies on if the checksum mismatch is detected,
2038 // which is currently only available for pseudo-probe mode. Removing the
2039 // checksum check could cause regressions for some cases, so further tuning
2040 // might be needed if we want to enable it for all cases.
2041 if (Reader->profileIsProbeBased()) {
2043 SalvageStaleProfile = true;
2045 SalvageUnusedProfile = true;
2046 }
2047
2048 if (!Reader->profileIsCS()) {
2049 // Non-CS profile should be fine without a function size budget for the
2050 // inliner since the contexts in the profile are either all from inlining
2051 // in the prevoius build or pre-computed by the preinliner with a size
2052 // cap, thus they are bounded.
2053 if (!ProfileInlineLimitMin.getNumOccurrences())
2054 ProfileInlineLimitMin = std::numeric_limits<unsigned>::max();
2055 if (!ProfileInlineLimitMax.getNumOccurrences())
2056 ProfileInlineLimitMax = std::numeric_limits<unsigned>::max();
2057 }
2058 }
2059
2060 if (Reader->profileIsCS()) {
2061 // Tracker for profiles under different context
2062 ContextTracker = std::make_unique<SampleContextTracker>(
2063 Reader->getProfiles(), &GUIDToFuncNameMap);
2064 }
2065
2066 // Load pseudo probe descriptors for probe-based function samples.
2067 if (Reader->profileIsProbeBased()) {
2068 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2069 if (!ProbeManager->moduleIsProbed(M)) {
2070 const char *Msg =
2071 "Pseudo-probe-based profile requires SampleProfileProbePass";
2072 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2073 DS_Warning));
2074 return false;
2075 }
2076 }
2077
2080 MatchingManager = std::make_unique<SampleProfileMatcher>(
2081 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2082 FuncNameToProfNameMap);
2083 }
2084
2085 return true;
2086}
2087
2088// Note that this is a module-level check. Even if one module is errored out,
2089// the entire build will be errored out. However, the user could make big
2090// changes to functions in single module but those changes might not be
2091// performance significant to the whole binary. Therefore, to avoid those false
2092// positives, we select a reasonable big set of hot functions that are supposed
2093// to be globally performance significant, only compute and check the mismatch
2094// within those functions. The function selection is based on two criteria:
2095// 1) The function is hot enough, which is tuned by a hotness-based
2096// flag(HotFuncCutoffForStalenessError). 2) The num of function is large enough
2097// which is tuned by the MinfuncsForStalenessError flag.
2098bool SampleProfileLoader::rejectHighStalenessProfile(
2099 Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
2101 "Only support for probe-based profile");
2102 uint64_t TotalHotFunc = 0;
2103 uint64_t NumMismatchedFunc = 0;
2104 for (const auto &I : Profiles) {
2105 const auto &FS = I.second;
2106 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
2107 if (!FuncDesc)
2108 continue;
2109
2110 // Use a hotness-based threshold to control the function selection.
2112 FS.getTotalSamples()))
2113 continue;
2114
2115 TotalHotFunc++;
2116 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2117 NumMismatchedFunc++;
2118 }
2119 // Make sure that the num of selected function is not too small to distinguish
2120 // from the user's benign changes.
2121 if (TotalHotFunc < MinfuncsForStalenessError)
2122 return false;
2123
2124 // Finally check the mismatch percentage against the threshold.
2125 if (NumMismatchedFunc * 100 >=
2126 TotalHotFunc * PrecentMismatchForStalenessError) {
2127 auto &Ctx = M.getContext();
2128 const char *Msg =
2129 "The input profile significantly mismatches current source code. "
2130 "Please recollect profile to avoid performance regression.";
2131 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
2132 return true;
2133 }
2134 return false;
2135}
2136
2137void SampleProfileLoader::removePseudoProbeInstsDiscriminator(Module &M) {
2138 for (auto &F : M) {
2139 std::vector<Instruction *> InstsToDel;
2140 for (auto &BB : F) {
2141 for (auto &I : BB) {
2142 if (isa<PseudoProbeInst>(&I))
2143 InstsToDel.push_back(&I);
2144 else if (isa<CallBase>(&I))
2145 if (const DILocation *DIL = I.getDebugLoc().get()) {
2146 // Restore dwarf discriminator for call.
2147 unsigned Discriminator = DIL->getDiscriminator();
2148 if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
2149 std::optional<uint32_t> DwarfDiscriminator =
2151 Discriminator);
2152 I.setDebugLoc(
2153 DIL->cloneWithDiscriminator(DwarfDiscriminator.value_or(0)));
2154 }
2155 }
2156 }
2157 }
2158 for (auto *I : InstsToDel)
2159 I->eraseFromParent();
2160 }
2161}
2162
2163bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
2164 ProfileSummaryInfo *_PSI) {
2165 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2166
2167 PSI = _PSI;
2168 if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2169 M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2171 PSI->refresh();
2172 }
2173
2175 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2176 return false;
2177
2178 auto Remapper = Reader->getRemapper();
2179 // Populate the symbol map.
2180 for (const auto &N_F : M.getValueSymbolTable()) {
2181 StringRef OrigName = N_F.getKey();
2182 Function *F = dyn_cast<Function>(N_F.getValue());
2183 if (F == nullptr || OrigName.empty())
2184 continue;
2185 SymbolMap[FunctionId(OrigName)] = F;
2187 if (OrigName != NewName && !NewName.empty()) {
2188 auto r = SymbolMap.emplace(FunctionId(NewName), F);
2189 // Failiing to insert means there is already an entry in SymbolMap,
2190 // thus there are multiple functions that are mapped to the same
2191 // stripped name. In this case of name conflicting, set the value
2192 // to nullptr to avoid confusion.
2193 if (!r.second)
2194 r.first->second = nullptr;
2195 OrigName = NewName;
2196 }
2197 // Insert the remapped names into SymbolMap.
2198 if (Remapper) {
2199 if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2200 if (*MapName != OrigName && !MapName->empty())
2201 SymbolMap.emplace(FunctionId(*MapName), F);
2202 }
2203 }
2204 }
2205
2206 // Stale profile matching.
2209 MatchingManager->runOnModule();
2210 MatchingManager->clearMatchingData();
2211 }
2212 assert(SymbolMap.count(FunctionId()) == 0 &&
2213 "No empty StringRef should be added in SymbolMap");
2214 assert((SalvageUnusedProfile || FuncNameToProfNameMap.empty()) &&
2215 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2216 "not enabled");
2217
2218 bool retval = false;
2219 for (auto *F : buildFunctionOrder(M, CG)) {
2220 assert(!F->isDeclaration());
2221 clearFunctionData();
2222 retval |= runOnFunction(*F, AM);
2223 }
2224
2225 // Account for cold calls not inlined....
2227 for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2228 notInlinedCallInfo)
2229 updateProfileCallee(pair.first, pair.second.entryCount);
2230
2233 removePseudoProbeInstsDiscriminator(M);
2234 if (auto *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName))
2235 M.eraseNamedMetadata(FuncInfo);
2236 }
2237
2238 return retval;
2239}
2240
2241bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
2242 LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2243 DILocation2SampleMap.clear();
2244 // By default the entry count is initialized to -1, which will be treated
2245 // conservatively by getEntryCount as the same as unknown (None). This is
2246 // to avoid newly added code to be treated as cold. If we have samples
2247 // this will be overwritten in emitAnnotations.
2248 uint64_t initialEntryCount = -1;
2249
2250 ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2251 if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2252 // initialize all the function entry counts to 0. It means all the
2253 // functions without profile will be regarded as cold.
2254 initialEntryCount = 0;
2255 // profile-sample-accurate is a user assertion which has a higher precedence
2256 // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2257 ProfAccForSymsInList = false;
2258 }
2259 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2260
2261 // PSL -- profile symbol list include all the symbols in sampled binary.
2262 // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2263 // old functions without samples being cold, without having to worry
2264 // about new and hot functions being mistakenly treated as cold.
2265 if (ProfAccForSymsInList) {
2266 // Initialize the entry count to 0 for functions in the list.
2267 if (PSL->contains(F.getName()))
2268 initialEntryCount = 0;
2269
2270 // Function in the symbol list but without sample will be regarded as
2271 // cold. To minimize the potential negative performance impact it could
2272 // have, we want to be a little conservative here saying if a function
2273 // shows up in the profile, no matter as outline function, inline instance
2274 // or call targets, treat the function as not being cold. This will handle
2275 // the cases such as most callsites of a function are inlined in sampled
2276 // binary but not inlined in current build (because of source code drift,
2277 // imprecise debug information, or the callsites are all cold individually
2278 // but not cold accumulatively...), so the outline function showing up as
2279 // cold in sampled binary will actually not be cold after current build.
2282 GUIDsInProfile.count(
2283 Function::getGUIDAssumingExternalLinkage(CanonName))) ||
2284 (!FunctionSamples::UseMD5 && NamesInProfile.count(CanonName)))
2285 initialEntryCount = -1;
2286 }
2287
2288 // Initialize entry count when the function has no existing entry
2289 // count value.
2290 if (!F.getEntryCount())
2291 F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2292 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2293 if (AM) {
2294 auto &FAM =
2296 .getManager();
2298 } else {
2299 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2300 ORE = OwnedORE.get();
2301 }
2302
2304 Samples = ContextTracker->getBaseSamplesFor(F);
2305 else {
2306 Samples = Reader->getSamplesFor(F);
2307 // Try search in previously inlined functions that were split or duplicated
2308 // into base.
2309 if (!Samples) {
2311 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2312 if (It != OutlineFunctionSamples.end()) {
2313 Samples = &It->second;
2314 } else if (auto Remapper = Reader->getRemapper()) {
2315 if (auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2316 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2317 if (It != OutlineFunctionSamples.end())
2318 Samples = &It->second;
2319 }
2320 }
2321 }
2322 }
2323
2324 if (Samples && !Samples->empty())
2325 return emitAnnotations(F);
2326 return false;
2327}
2329 std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
2330 IntrusiveRefCntPtr<vfs::FileSystem> FS, bool DisableSampleProfileInlining,
2331 bool UseFlattenedProfile)
2332 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2333 LTOPhase(LTOPhase), FS(std::move(FS)),
2334 DisableSampleProfileInlining(DisableSampleProfileInlining),
2335 UseFlattenedProfile(UseFlattenedProfile) {}
2336
2341
2342 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2344 };
2345 auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2347 };
2348 auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2350 };
2351
2352 if (!FS)
2355
2356 SampleProfileLoader SampleLoader(
2357 ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2358 ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2359 : ProfileRemappingFileName,
2360 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2361 DisableSampleProfileInlining, UseFlattenedProfile);
2362 if (!SampleLoader.doInitialization(M, &FAM))
2363 return PreservedAnalyses::all();
2364
2366 if (!SampleLoader.runOnModule(M, &AM, PSI))
2367 return PreservedAnalyses::all();
2368
2369 return PreservedAnalyses::none();
2370}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
LVReader * CurrentReader
Definition: LVReader.cpp:151
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:80
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< unsigned > MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader"))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
#define CSINLINE_DEBUG
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
Function::ProfileCount ProfileCount
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
#define DEBUG_TYPE
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
This class represents a function call, abstracting a target machine's calling convention.
Debug location.
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
A debug info location.
Definition: DebugLoc.h:124
LLVM_ABI unsigned getLine() const
Definition: DebugLoc.cpp:54
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Diagnostic information for the sample profiler.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
Represents either an error or a value T.
Definition: ErrorOr.h:56
Class to represent profile counts.
Definition: Function.h:297
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1915
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:316
Represents the cost of inlining a function.
Definition: InlineCost.h:91
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:132
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:127
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition: InlineCost.h:121
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:251
InlineResult is basically true or false.
Definition: InlineCost.h:181
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:585
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1829
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:167
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:115
ValueT lookup(const KeyT &Key) const
Definition: MapVector.h:99
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Diagnostic information for optimization analysis remarks.
Diagnostic information for applied optimization remarks.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
Sample profile inference pass.
void computeDominanceAndLoopInfo(FunctionT &F)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:25
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
int getNumOccurrences() const
Definition: CommandLine.h:400
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
const ParentTy * getParent() const
Definition: ilist_node.h:34
This class represents a function that is read from a sample profile.
Definition: FunctionId.h:36
Representation of the samples collected for a function.
Definition: SampleProf.h:757
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
Definition: SampleProf.h:1053
static LLVM_ABI bool ProfileIsCS
Definition: SampleProf.h:1200
FunctionId getFunction() const
Return the function name.
Definition: SampleProf.h:1086
static LLVM_ABI bool ProfileIsProbeBased
Definition: SampleProf.h:1198
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1102
SampleContext & getContext() const
Definition: SampleProf.h:1204
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
Definition: SampleProf.h:1010
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
Definition: SampleProf.cpp:250
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
Definition: SampleProf.h:961
uint64_t getGUID() const
Return the GUID of the context's name.
Definition: SampleProf.h:1223
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
Definition: SampleProf.h:985
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1209
This class is a wrapper to associative container MapT<KeyT, ValueT> using the hash value of the origi...
Definition: HashKeyMap.h:53
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
Definition: SampleProf.h:1437
bool hasAttribute(ContextAttributeMask A)
Definition: SampleProf.h:620
This class provides operator overloads to the map container using MD5 as the key type,...
Definition: SampleProf.h:1325
Sample-based profile reader.
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
Definition: SampleProf.h:344
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition: SampleProf.h:413
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition: SampleProf.h:422
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:49
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Definition: SCCIterator.h:113
Sort the nodes of a directed SCC in the decreasing order of the edge weights.
Definition: SCCIterator.h:253
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FS
Definition: X86.h:214
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:201
DenseMap< SymbolStringPtr, ExecutorSymbolDef > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
Definition: SampleProf.h:1311
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
Definition: SampleProf.h:747
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
LLVM_ABI cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
Definition: CFG.h:256
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)
Definition: PseudoProbe.cpp:76
LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:77
cl::opt< bool > SampleProfileUseProfi
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1334
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
llvm::cl::opt< bool > UseIterativeBFIInference
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
Definition: InstrProf.cpp:1402
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
@ DS_Warning
static bool skipProfileForFunction(const Function &F)
LLVM_ABI cl::opt< bool > SortProfiledSCC
LLVM_ABI cl::opt< int > ProfileInlineLimitMax
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition: Metadata.h:58
LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit
constexpr const char * PseudoProbeDescMetadataName
Definition: PseudoProbe.h:26
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
Used in the streaming interface as the general argument type.
A wrapper of binary function with basic blocks and jumps.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:59
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:207
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:240
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition: InlineCost.h:234
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)
Definition: PseudoProbe.h:81