LLVM 22.0.0git
CtxProfAnalysis.cpp
Go to the documentation of this file.
1//===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the contextual profile analysis, which maintains contextual
10// profiling info through IPO passes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Analysis/CFG.h"
18#include "llvm/IR/Analysis.h"
19#include "llvm/IR/Dominators.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/PassManager.h"
26#include "llvm/Support/Path.h"
27#include <deque>
28#include <memory>
29
30#define DEBUG_TYPE "ctx_prof"
31
32using namespace llvm;
34 UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
35 cl::desc("Use the specified contextual profile file"));
36
38 "ctx-profile-printer-level",
41 "everything", "print everything - most verbose"),
43 "just the yaml representation of the profile")),
44 cl::desc("Verbosity level of the contextual profile printer pass."));
45
47 "ctx-profile-force-is-specialized", cl::init(false),
48 cl::desc("Treat the given module as-if it were containing the "
49 "post-thinlink module containing the root"));
50
51const char *AssignGUIDPass::GUIDMetadataName = "guid";
52
53namespace llvm {
55 friend class ProfileAnnotator;
56 class BBInfo;
57 struct EdgeInfo {
58 BBInfo *const Src;
59 BBInfo *const Dest;
60 std::optional<uint64_t> Count;
61
62 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
63 };
64
65 class BBInfo {
66 std::optional<uint64_t> Count;
67 // OutEdges is dimensioned to match the number of terminator operands.
68 // Entries in the vector match the index in the terminator operand list. In
69 // some cases - see `shouldExcludeEdge` and its implementation - an entry
70 // will be nullptr.
71 // InEdges doesn't have the above constraint.
74 size_t UnknownCountOutEdges = 0;
75 size_t UnknownCountInEdges = 0;
76
77 // Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
78 // because all the edge counters must be known.
79 // Return std::nullopt if there were no edges to sum. The user can decide
80 // how to interpret that.
81 std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
82 bool AssumeAllKnown) const {
83 std::optional<uint64_t> Sum;
84 for (const auto *E : Edges) {
85 // `Edges` may be `OutEdges`, case in which `E` could be nullptr.
86 if (E) {
87 if (!Sum.has_value())
88 Sum = 0;
89 *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
90 }
91 }
92 return Sum;
93 }
94
95 bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
96 assert(!Count.has_value());
97 Count = getEdgeSum(Edges, true);
98 return Count.has_value();
99 }
100
101 void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
102 uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
103 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
104 EdgeInfo *E = nullptr;
105 for (auto *I : Edges)
106 if (I && !I->Count.has_value()) {
107 E = I;
108#ifdef NDEBUG
109 break;
110#else
111 assert((!E || E == I) &&
112 "Expected exactly one edge to have an unknown count, "
113 "found a second one");
114 continue;
115#endif
116 }
117 assert(E && "Expected exactly one edge to have an unknown count");
118 assert(!E->Count.has_value());
119 E->Count = EdgeVal;
120 assert(E->Src->UnknownCountOutEdges > 0);
121 assert(E->Dest->UnknownCountInEdges > 0);
122 --E->Src->UnknownCountOutEdges;
123 --E->Dest->UnknownCountInEdges;
124 }
125
126 public:
127 BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
128 : Count(Count) {
129 // For in edges, we just want to pre-allocate enough space, since we know
130 // it at this stage. For out edges, we will insert edges at the indices
131 // corresponding to positions in this BB's terminator instruction, so we
132 // construct a default (nullptr values)-initialized vector. A nullptr edge
133 // corresponds to those that are excluded (see shouldExcludeEdge).
134 InEdges.reserve(NumInEdges);
135 OutEdges.resize(NumOutEdges);
136 }
137
138 bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
139 if (!UnknownCountOutEdges) {
140 return computeCountFrom(OutEdges);
141 }
142 return false;
143 }
144
145 bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
146 if (!UnknownCountInEdges) {
147 return computeCountFrom(InEdges);
148 }
149 return false;
150 }
151
152 void addInEdge(EdgeInfo &Info) {
153 InEdges.push_back(&Info);
154 ++UnknownCountInEdges;
155 }
156
157 // For the out edges, we care about the position we place them in, which is
158 // the position in terminator instruction's list (at construction). Later,
159 // we build branch_weights metadata with edge frequency values matching
160 // these positions.
161 void addOutEdge(size_t Index, EdgeInfo &Info) {
162 OutEdges[Index] = &Info;
163 ++UnknownCountOutEdges;
164 }
165
166 bool hasCount() const { return Count.has_value(); }
167
168 uint64_t getCount() const { return *Count; }
169
170 bool trySetSingleUnknownInEdgeCount() {
171 if (UnknownCountInEdges == 1) {
172 setSingleUnknownEdgeCount(InEdges);
173 return true;
174 }
175 return false;
176 }
177
178 bool trySetSingleUnknownOutEdgeCount() {
179 if (UnknownCountOutEdges == 1) {
180 setSingleUnknownEdgeCount(OutEdges);
181 return true;
182 }
183 return false;
184 }
185 size_t getNumOutEdges() const { return OutEdges.size(); }
186
187 uint64_t getEdgeCount(size_t Index) const {
188 if (auto *E = OutEdges[Index])
189 return *E->Count;
190 return 0U;
191 }
192 };
193
194 const Function &F;
195 ArrayRef<uint64_t> Counters;
196 // To be accessed through getBBInfo() after construction.
197 std::map<const BasicBlock *, BBInfo> BBInfos;
198 std::vector<EdgeInfo> EdgeInfos;
199
200 // The only criteria for exclusion is faux suspend -> exit edges in presplit
201 // coroutines. The API serves for readability, currently.
202 bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
203 return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
204 }
205
206 BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
207
208 const BBInfo &getBBInfo(const BasicBlock &BB) const {
209 return BBInfos.find(&BB)->second;
210 }
211
212 // validation function after we propagate the counters: all BBs and edges'
213 // counters must have a value.
214 bool allCountersAreAssigned() const {
215 for (const auto &BBInfo : BBInfos)
216 if (!BBInfo.second.hasCount())
217 return false;
218 for (const auto &EdgeInfo : EdgeInfos)
219 if (!EdgeInfo.Count.has_value())
220 return false;
221 return true;
222 }
223
224 /// Check that all paths from the entry basic block that use edges with
225 /// non-zero counts arrive at a basic block with no successors (i.e. "exit")
226 bool allTakenPathsExit() const {
227 std::deque<const BasicBlock *> Worklist;
228 DenseSet<const BasicBlock *> Visited;
229 Worklist.push_back(&F.getEntryBlock());
230 bool HitExit = false;
231 while (!Worklist.empty()) {
232 const auto *BB = Worklist.front();
233 Worklist.pop_front();
234 if (!Visited.insert(BB).second)
235 continue;
236 if (succ_size(BB) == 0) {
238 return false;
239 HitExit = true;
240 continue;
241 }
242 if (succ_size(BB) == 1) {
243 Worklist.push_back(BB->getUniqueSuccessor());
244 continue;
245 }
246 const auto &BBInfo = getBBInfo(*BB);
247 bool HasAWayOut = false;
248 for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
249 const auto *Succ = BB->getTerminator()->getSuccessor(I);
250 if (!shouldExcludeEdge(*BB, *Succ)) {
251 if (BBInfo.getEdgeCount(I) > 0) {
252 HasAWayOut = true;
253 Worklist.push_back(Succ);
254 }
255 }
256 }
257 if (!HasAWayOut)
258 return false;
259 }
260 return HitExit;
261 }
262
263 bool allNonColdSelectsHaveProfile() const {
264 for (const auto &BB : F) {
265 if (getBBInfo(BB).getCount() > 0) {
266 for (const auto &I : BB) {
267 if (const auto *SI = dyn_cast<SelectInst>(&I)) {
268 if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation(
269 *const_cast<SelectInst *>(SI))) {
270 auto Index = Inst->getIndex()->getZExtValue();
271 assert(Index < Counters.size());
272 if (Counters[Index] == 0)
273 return false;
274 }
275 }
276 }
277 }
278 }
279 return true;
280 }
281
282 // This is an adaptation of PGOUseFunc::populateCounters.
283 // FIXME(mtrofin): look into factoring the code to share one implementation.
284 void propagateCounterValues() {
285 bool KeepGoing = true;
286 while (KeepGoing) {
287 KeepGoing = false;
288 for (const auto &BB : F) {
289 auto &Info = getBBInfo(BB);
290 if (!Info.hasCount())
291 KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
292 Info.tryTakeCountFromKnownInEdges(BB);
293 if (Info.hasCount()) {
294 KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
295 KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
296 }
297 }
298 }
299 assert(allCountersAreAssigned() &&
300 "[ctx-prof] Expected all counters have been assigned.");
301 assert(allTakenPathsExit() &&
302 "[ctx-prof] Encountered a BB with more than one successor, where "
303 "all outgoing edges have a 0 count. This occurs in non-exiting "
304 "functions (message pumps, usually) which are not supported in the "
305 "contextual profiling case");
306 assert(allNonColdSelectsHaveProfile() &&
307 "[ctx-prof] All non-cold select instructions were expected to have "
308 "a profile.");
309 }
310
311public:
313 : F(F), Counters(Counters) {
314 assert(!F.isDeclaration());
315 assert(!Counters.empty());
316 size_t NrEdges = 0;
317 for (const auto &BB : F) {
318 std::optional<uint64_t> Count;
320 const_cast<BasicBlock &>(BB))) {
321 auto Index = Ins->getIndex()->getZExtValue();
322 assert(Index < Counters.size() &&
323 "The index must be inside the counters vector by construction - "
324 "tripping this assertion indicates a bug in how the contextual "
325 "profile is managed by IPO transforms");
326 (void)Index;
327 Count = Counters[Ins->getIndex()->getZExtValue()];
328 } else if (isa<UnreachableInst>(BB.getTerminator())) {
329 // The program presumably didn't crash.
330 Count = 0;
331 }
332 auto [It, Ins] =
333 BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
334 (void)Ins;
335 assert(Ins && "We iterate through the function's BBs, no reason to "
336 "insert one more than once");
337 NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
338 return !shouldExcludeEdge(BB, *Succ);
339 });
340 }
341 // Pre-allocate the vector, we want references to its contents to be stable.
342 EdgeInfos.reserve(NrEdges);
343 for (const auto &BB : F) {
344 auto &Info = getBBInfo(BB);
345 for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
346 const auto *Succ = BB.getTerminator()->getSuccessor(I);
347 if (!shouldExcludeEdge(BB, *Succ)) {
348 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
349 Info.addOutEdge(I, EI);
350 getBBInfo(*Succ).addInEdge(EI);
351 }
352 }
353 }
354 assert(EdgeInfos.capacity() == NrEdges &&
355 "The capacity of EdgeInfos should have stayed unchanged it was "
356 "populated, because we need pointers to its contents to be stable");
357 propagateCounterValues();
358 }
359
360 uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); }
361};
362
363} // namespace llvm
364
366 ArrayRef<uint64_t> RawCounters)
367 : PImpl(std::make_unique<ProfileAnnotatorImpl>(F, RawCounters)) {}
368
370
372 return PImpl->getBBCount(BB);
373}
374
376 uint64_t &TrueCount,
377 uint64_t &FalseCount) const {
378 const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());
379 TrueCount = FalseCount = 0;
380 if (BBInfo.getCount() == 0)
381 return false;
382
384 if (!Step)
385 return false;
386 auto Index = Step->getIndex()->getZExtValue();
387 assert(Index < PImpl->Counters.size() &&
388 "The index of the step instruction must be inside the "
389 "counters vector by "
390 "construction - tripping this assertion indicates a bug in "
391 "how the contextual profile is managed by IPO transforms");
392 auto TotalCount = BBInfo.getCount();
393 TrueCount = PImpl->Counters[Index];
394 FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
395 return true;
396}
397
400 uint64_t &MaxCount) const {
401 Profile.clear();
402
403 if (succ_size(&BB) < 2)
404 return false;
405
406 auto *Term = BB.getTerminator();
407 Profile.resize(Term->getNumSuccessors());
408
409 const auto &BBInfo = PImpl->getBBInfo(BB);
410 MaxCount = 0;
411 for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
412 ++SuccIdx) {
413 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
414 if (EdgeCount > MaxCount)
415 MaxCount = EdgeCount;
416 Profile[SuccIdx] = EdgeCount;
417 }
418 return MaxCount > 0;
419}
420
422 for (auto &F : M.functions()) {
423 if (F.isDeclaration())
424 continue;
425 if (F.getMetadata(GUIDMetadataName))
426 continue;
427 const GlobalValue::GUID GUID = F.getGUID();
428 F.setMetadata(GUIDMetadataName,
429 MDNode::get(M.getContext(),
430 {ConstantAsMetadata::get(ConstantInt::get(
431 Type::getInt64Ty(M.getContext()), GUID))}));
432 }
434}
435
437 if (F.isDeclaration()) {
439 return F.getGUID();
440 }
441 auto *MD = F.getMetadata(GUIDMetadataName);
442 assert(MD && "guid not found for defined function");
443 return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
444 ->getValue()
445 ->stripPointerCasts())
446 ->getZExtValue();
447}
449
450CtxProfAnalysis::CtxProfAnalysis(std::optional<StringRef> Profile)
451 : Profile([&]() -> std::optional<StringRef> {
452 if (Profile)
453 return *Profile;
454 if (UseCtxProfile.getNumOccurrences())
455 return UseCtxProfile;
456 return std::nullopt;
457 }()) {}
458
461 if (!Profile)
462 return {};
464 if (auto EC = MB.getError()) {
465 M.getContext().emitError("could not open contextual profile file: " +
466 EC.message());
467 return {};
468 }
469 PGOCtxProfileReader Reader(MB.get()->getBuffer());
470 auto MaybeProfiles = Reader.loadProfiles();
471 if (!MaybeProfiles) {
472 M.getContext().emitError("contextual profile file is invalid: " +
473 toString(MaybeProfiles.takeError()));
474 return {};
475 }
476
477 // FIXME: We should drive this from ThinLTO, but for the time being, use the
478 // module name as indicator.
479 // We want to *only* keep the contextual profiles in modules that capture
480 // context trees. That allows us to compute specific PSIs, for example.
481 auto DetermineRootsInModule = [&M]() -> const DenseSet<GlobalValue::GUID> {
482 DenseSet<GlobalValue::GUID> ProfileRootsInModule;
483 auto ModName = M.getName();
484 auto Filename = sys::path::filename(ModName);
485 // Drop the file extension.
486 Filename = Filename.substr(0, Filename.find_last_of('.'));
487 // See if it parses
488 APInt Guid;
489 // getAsInteger returns true if there are more chars to read other than the
490 // integer. So the "false" test is what we want.
491 if (!Filename.getAsInteger(0, Guid))
492 ProfileRootsInModule.insert(Guid.getZExtValue());
493 return ProfileRootsInModule;
494 };
495 const auto ProfileRootsInModule = DetermineRootsInModule();
497
498 // the logic from here on allows for modules that contain - by design - more
499 // than one root. We currently don't support that, because the determination
500 // happens based on the module name matching the root guid, but the logic can
501 // avoid assuming that.
502 if (!ProfileRootsInModule.empty()) {
503 Result.IsInSpecializedModule = true;
504 // Trim first the roots that aren't in this module.
505 for (auto &[RootGuid, _] :
506 llvm::make_early_inc_range(MaybeProfiles->Contexts))
507 if (!ProfileRootsInModule.contains(RootGuid))
508 MaybeProfiles->Contexts.erase(RootGuid);
509 // we can also drop the flat profiles
510 MaybeProfiles->FlatProfiles.clear();
511 }
512
513 for (const auto &F : M) {
514 if (F.isDeclaration())
515 continue;
516 auto GUID = AssignGUIDPass::getGUID(F);
517 assert(GUID && "guid not found for defined function");
518 const auto &Entry = F.begin();
519 uint32_t MaxCounters = 0; // we expect at least a counter.
520 for (const auto &I : *Entry)
521 if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
522 MaxCounters =
523 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
524 break;
525 }
526 if (!MaxCounters)
527 continue;
528 uint32_t MaxCallsites = 0;
529 for (const auto &BB : F)
530 for (const auto &I : BB)
531 if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
532 MaxCallsites =
533 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
534 break;
535 }
536 auto [It, Ins] = Result.FuncInfo.insert(
537 {GUID, PGOContextualProfile::FunctionInfo(F.getName())});
538 (void)Ins;
539 assert(Ins);
540 It->second.NextCallsiteIndex = MaxCallsites;
541 It->second.NextCounterIndex = MaxCounters;
542 }
543 // If we made it this far, the Result is valid - which we mark by setting
544 // .Profiles.
545 Result.Profiles = std::move(*MaybeProfiles);
546 Result.initIndex();
547 return Result;
548}
549
551PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {
552 if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())
553 return It->first;
554 return 0;
555}
556
559
563 if (C.contexts().empty()) {
564 OS << "No contextual profile was provided.\n";
565 return PreservedAnalyses::all();
566 }
567
568 if (Mode == PrintMode::Everything) {
569 OS << "Function Info:\n";
570 for (const auto &[Guid, FuncInfo] : C.FuncInfo)
571 OS << Guid << " : " << FuncInfo.Name
572 << ". MaxCounterID: " << FuncInfo.NextCounterIndex
573 << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
574 }
575
576 if (Mode == PrintMode::Everything)
577 OS << "\nCurrent Profile:\n";
578 convertCtxProfToYaml(OS, C.profiles());
579 OS << "\n";
580 if (Mode == PrintMode::YAML)
581 return PreservedAnalyses::all();
582
583 OS << "\nFlat Profile:\n";
584 auto Flat = C.flatten();
585 for (const auto &[Guid, Counters] : Flat) {
586 OS << Guid << " : ";
587 for (auto V : Counters)
588 OS << V << " ";
589 OS << "\n";
590 }
591 return PreservedAnalyses::all();
592}
593
596 return nullptr;
597 for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
598 if (auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
599 return IPC;
600 assert(!isa<CallBase>(Prev) &&
601 "didn't expect to find another call, that's not the callsite "
602 "instrumentation, before an instrumentable callsite");
603 }
604 return nullptr;
605}
606
608 for (auto &I : BB)
609 if (auto *Incr = dyn_cast<InstrProfIncrementInst>(&I))
611 return Incr;
612 return nullptr;
613}
614
617 Instruction *Prev = &SI;
618 while ((Prev = Prev->getPrevNode()))
619 if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
620 return Step;
621 return nullptr;
622}
623
624template <class ProfTy>
625static void preorderVisitOneRoot(ProfTy &Profile,
626 function_ref<void(ProfTy &)> Visitor) {
627 std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
628 Visitor(Ctx);
629 for (auto &[_, SubCtxSet] : Ctx.callsites())
630 for (auto &[__, Subctx] : SubCtxSet)
631 Traverser(Subctx);
632 };
633 Traverser(Profile);
634}
635
636template <class ProfilesTy, class ProfTy>
637static void preorderVisit(ProfilesTy &Profiles,
638 function_ref<void(ProfTy &)> Visitor) {
639 for (auto &[_, P] : Profiles)
641}
642
643void PGOContextualProfile::initIndex() {
644 // Initialize the head of the index list for each function. We don't need it
645 // after this point.
646 DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
647 for (auto &[Guid, FI] : FuncInfo)
648 InsertionPoints[Guid] = &FI.Index;
650 Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
651 auto InsertIt = InsertionPoints.find(Ctx.guid());
652 if (InsertIt == InsertionPoints.end())
653 return;
654 // Insert at the end of the list. Since we traverse in preorder, it
655 // means that when we iterate the list from the beginning, we'd
656 // encounter the contexts in the order we would have, should we have
657 // performed a full preorder traversal.
658 InsertIt->second->Next = &Ctx;
659 Ctx.Previous = InsertIt->second;
660 InsertIt->second = &Ctx;
661 });
662}
663
665 return ForceIsInSpecializedModule.getNumOccurrences() > 0
667 : IsInSpecializedModule;
668}
669
672 GlobalValue::GUID G = getDefinedFunctionGUID(F);
673 for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
674 Node = Node->Next)
675 V(*reinterpret_cast<PGOCtxProfContext *>(Node));
676}
677
679 if (!F)
681 const PGOCtxProfContext>(Profiles.Contexts, V);
683 GlobalValue::GUID G = getDefinedFunctionGUID(*F);
684 for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
685 Node = Node->Next)
686 V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
687}
688
691 auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
692 const SmallVectorImpl<uint64_t> &From,
693 uint64_t SamplingRate) {
694 if (Into.empty())
695 Into.resize(From.size());
696 assert(Into.size() == From.size() &&
697 "All contexts corresponding to a function should have the exact "
698 "same number of counters.");
699 for (size_t I = 0, E = Into.size(); I < E; ++I)
700 Into[I] += From[I] * SamplingRate;
701 };
702
703 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
704 const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
706 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
707 Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
708 });
709
710 for (const auto &[G, Unh] : CtxRoot.getUnhandled())
711 Accummulate(Flat[G], Unh, SamplingFactor);
712 }
713 // We don't sample "Flat" currently, so sampling rate is 1.
714 for (const auto &[G, FC] : Profiles.FlatProfiles)
715 Accummulate(Flat[G], FC, /*SamplingRate=*/1);
716 return Flat;
717}
718
722 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
723 const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
725 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
726 auto &Targets = Ret[Ctx.guid()];
727 for (const auto &[ID, SubctxSet] : Ctx.callsites())
728 for (const auto &Subctx : SubctxSet)
729 Targets[ID][Subctx.first] +=
730 Subctx.second.getEntrycount() * TotalRootEntryCount;
731 });
732 }
733 return Ret;
734}
735
737 CallBase &IC, Result &Profile,
738 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
739 const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
740 if (!Instr)
741 return;
742 Module &M = *IC.getParent()->getModule();
743 const uint32_t CallID = Instr->getIndex()->getZExtValue();
744 Profile.visit(
745 [&](const PGOCtxProfContext &Ctx) {
746 const auto &Targets = Ctx.callsites().find(CallID);
747 if (Targets == Ctx.callsites().end())
748 return;
749 for (const auto &[Guid, _] : Targets->second)
750 if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
751 if (auto *Target = M.getFunction(Name))
752 if (Target->hasFnAttribute(Attribute::AlwaysInline))
753 Candidates.insert({&IC, Target});
754 },
755 IC.getCaller());
756}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static void preorderVisitOneRoot(ProfTy &Profile, function_ref< void(ProfTy &)> Visitor)
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
static cl::opt< bool > ForceIsInSpecializedModule("ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root"))
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
#define _
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Load MIR Sample Profile
#define P(N)
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
This file contains some templates that are useful if you are working with the STL at all.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
static LLVM_ABI uint64_t getGUID(const Function &F)
static LLVM_ABI const char * GUIDMetadataName
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Assign a GUID if one is not already assign, as a function metadata named GUIDMetadataName.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction & front() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI CtxProfAnalysisPrinterPass(raw_ostream &OS)
LLVM_ABI PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static LLVM_ABI InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static LLVM_ABI InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static LLVM_ABI void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
static LLVM_ABI AnalysisKey Key
PGOContextualProfile Result
LLVM_ABI CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
Implements a dense probed hash-table based set.
Definition DenseSet.h:261
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static bool isExternalLinkage(LinkageTypes Linkage)
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The instrumented contextual profile, produced by the CtxProfAnalysis.
LLVM_ABI void visit(ConstVisitor, const Function *F=nullptr) const
LLVM_ABI const CtxProfFlatProfile flatten() const
LLVM_ABI bool isInSpecializedModule() const
LLVM_ABI void update(Visitor, const Function &F)
function_ref< void(PGOCtxProfContext &)> Visitor
function_ref< void(const PGOCtxProfContext &)> ConstVisitor
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
LLVM_ABI Expected< PGOCtxProfile > loadProfiles()
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
uint64_t getBBCount(const BasicBlock &BB)
ProfileAnnotatorImpl(const Function &F, ArrayRef< uint64_t > Counters)
LLVM_ABI ~ProfileAnnotator()
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
LLVM_ABI uint64_t getBBCount(const BasicBlock &BB) const
LLVM_ABI ProfileAnnotator(const Function &F, ArrayRef< uint64_t > RawCounters)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:59
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void resize(size_type N)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target - Wrapper for Target specific information.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Pass manager infrastructure for declaring and invalidating analyses.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:577
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
auto pred_size(const MachineBasicBlock *BB)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1943
DenseMap< GlobalValue::GUID, DenseMap< uint32_t, FlatIndirectTargets > > CtxProfFlatIndirectCallProfile
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
Definition CFG.cpp:361
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile)
@ TotalRootEntryCount
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:851
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29