LLVM 22.0.0git
CtxProfAnalysis.cpp
Go to the documentation of this file.
1//===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the contextual profile analysis, which maintains contextual
10// profiling info through IPO passes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Analysis/CFG.h"
18#include "llvm/IR/Analysis.h"
19#include "llvm/IR/Dominators.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/PassManager.h"
26#include "llvm/Support/Path.h"
27#include <deque>
28#include <memory>
29
30#define DEBUG_TYPE "ctx_prof"
31
32using namespace llvm;
34 UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
35 cl::desc("Use the specified contextual profile file"));
36
38 "ctx-profile-printer-level",
39 cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden,
40 cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything,
41 "everything", "print everything - most verbose"),
42 clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml",
43 "just the yaml representation of the profile")),
44 cl::desc("Verbosity level of the contextual profile printer pass."));
45
47 "ctx-profile-force-is-specialized", cl::init(false),
48 cl::desc("Treat the given module as-if it were containing the "
49 "post-thinlink module containing the root"));
50
51const char *AssignGUIDPass::GUIDMetadataName = "guid";
52
53namespace llvm {
55 friend class ProfileAnnotator;
56 class BBInfo;
57 struct EdgeInfo {
58 BBInfo *const Src;
59 BBInfo *const Dest;
60 std::optional<uint64_t> Count;
61
62 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
63 };
64
65 class BBInfo {
66 std::optional<uint64_t> Count;
67 // OutEdges is dimensioned to match the number of terminator operands.
68 // Entries in the vector match the index in the terminator operand list. In
69 // some cases - see `shouldExcludeEdge` and its implementation - an entry
70 // will be nullptr.
71 // InEdges doesn't have the above constraint.
74 size_t UnknownCountOutEdges = 0;
75 size_t UnknownCountInEdges = 0;
76
77 // Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
78 // because all the edge counters must be known.
79 // Return std::nullopt if there were no edges to sum. The user can decide
80 // how to interpret that.
81 std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
82 bool AssumeAllKnown) const {
83 std::optional<uint64_t> Sum;
84 for (const auto *E : Edges) {
85 // `Edges` may be `OutEdges`, case in which `E` could be nullptr.
86 if (E) {
87 if (!Sum.has_value())
88 Sum = 0;
89 *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
90 }
91 }
92 return Sum;
93 }
94
95 bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
96 assert(!Count.has_value());
97 Count = getEdgeSum(Edges, true);
98 return Count.has_value();
99 }
100
101 void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
102 uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
103 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
104 EdgeInfo *E = nullptr;
105 for (auto *I : Edges)
106 if (I && !I->Count.has_value()) {
107 E = I;
108#ifdef NDEBUG
109 break;
110#else
111 assert((!E || E == I) &&
112 "Expected exactly one edge to have an unknown count, "
113 "found a second one");
114 continue;
115#endif
116 }
117 assert(E && "Expected exactly one edge to have an unknown count");
118 assert(!E->Count.has_value());
119 E->Count = EdgeVal;
120 assert(E->Src->UnknownCountOutEdges > 0);
121 assert(E->Dest->UnknownCountInEdges > 0);
122 --E->Src->UnknownCountOutEdges;
123 --E->Dest->UnknownCountInEdges;
124 }
125
126 public:
127 BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
128 : Count(Count) {
129 // For in edges, we just want to pre-allocate enough space, since we know
130 // it at this stage. For out edges, we will insert edges at the indices
131 // corresponding to positions in this BB's terminator instruction, so we
132 // construct a default (nullptr values)-initialized vector. A nullptr edge
133 // corresponds to those that are excluded (see shouldExcludeEdge).
134 InEdges.reserve(NumInEdges);
135 OutEdges.resize(NumOutEdges);
136 }
137
138 bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
139 if (!UnknownCountOutEdges) {
140 return computeCountFrom(OutEdges);
141 }
142 return false;
143 }
144
145 bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
146 if (!UnknownCountInEdges) {
147 return computeCountFrom(InEdges);
148 }
149 return false;
150 }
151
152 void addInEdge(EdgeInfo &Info) {
153 InEdges.push_back(&Info);
154 ++UnknownCountInEdges;
155 }
156
157 // For the out edges, we care about the position we place them in, which is
158 // the position in terminator instruction's list (at construction). Later,
159 // we build branch_weights metadata with edge frequency values matching
160 // these positions.
161 void addOutEdge(size_t Index, EdgeInfo &Info) {
162 OutEdges[Index] = &Info;
163 ++UnknownCountOutEdges;
164 }
165
166 bool hasCount() const { return Count.has_value(); }
167
168 uint64_t getCount() const { return *Count; }
169
170 bool trySetSingleUnknownInEdgeCount() {
171 if (UnknownCountInEdges == 1) {
172 setSingleUnknownEdgeCount(InEdges);
173 return true;
174 }
175 return false;
176 }
177
178 bool trySetSingleUnknownOutEdgeCount() {
179 if (UnknownCountOutEdges == 1) {
180 setSingleUnknownEdgeCount(OutEdges);
181 return true;
182 }
183 return false;
184 }
185 size_t getNumOutEdges() const { return OutEdges.size(); }
186
187 uint64_t getEdgeCount(size_t Index) const {
188 if (auto *E = OutEdges[Index])
189 return *E->Count;
190 return 0U;
191 }
192 };
193
194 const Function &F;
195 ArrayRef<uint64_t> Counters;
196 // To be accessed through getBBInfo() after construction.
197 std::map<const BasicBlock *, BBInfo> BBInfos;
198 std::vector<EdgeInfo> EdgeInfos;
199
200 // The only criteria for exclusion is faux suspend -> exit edges in presplit
201 // coroutines. The API serves for readability, currently.
202 bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
203 return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
204 }
205
206 BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
207
208 const BBInfo &getBBInfo(const BasicBlock &BB) const {
209 return BBInfos.find(&BB)->second;
210 }
211
212 // validation function after we propagate the counters: all BBs and edges'
213 // counters must have a value.
214 bool allCountersAreAssigned() const {
215 for (const auto &BBInfo : BBInfos)
216 if (!BBInfo.second.hasCount())
217 return false;
218 for (const auto &EdgeInfo : EdgeInfos)
219 if (!EdgeInfo.Count.has_value())
220 return false;
221 return true;
222 }
223
224 /// Check that all paths from the entry basic block that use edges with
225 /// non-zero counts arrive at a basic block with no successors (i.e. "exit")
226 bool allTakenPathsExit() const {
227 std::deque<const BasicBlock *> Worklist;
229 Worklist.push_back(&F.getEntryBlock());
230 bool HitExit = false;
231 while (!Worklist.empty()) {
232 const auto *BB = Worklist.front();
233 Worklist.pop_front();
234 if (!Visited.insert(BB).second)
235 continue;
236 if (succ_size(BB) == 0) {
237 if (isa<UnreachableInst>(BB->getTerminator()))
238 return false;
239 HitExit = true;
240 continue;
241 }
242 if (succ_size(BB) == 1) {
243 Worklist.push_back(BB->getUniqueSuccessor());
244 continue;
245 }
246 const auto &BBInfo = getBBInfo(*BB);
247 bool HasAWayOut = false;
248 for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
249 const auto *Succ = BB->getTerminator()->getSuccessor(I);
250 if (!shouldExcludeEdge(*BB, *Succ)) {
251 if (BBInfo.getEdgeCount(I) > 0) {
252 HasAWayOut = true;
253 Worklist.push_back(Succ);
254 }
255 }
256 }
257 if (!HasAWayOut)
258 return false;
259 }
260 return HitExit;
261 }
262
263 bool allNonColdSelectsHaveProfile() const {
264 for (const auto &BB : F) {
265 if (getBBInfo(BB).getCount() > 0) {
266 for (const auto &I : BB) {
267 if (const auto *SI = dyn_cast<SelectInst>(&I)) {
268 if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation(
269 *const_cast<SelectInst *>(SI))) {
270 auto Index = Inst->getIndex()->getZExtValue();
271 assert(Index < Counters.size());
272 if (Counters[Index] == 0)
273 return false;
274 }
275 }
276 }
277 }
278 }
279 return true;
280 }
281
282 // This is an adaptation of PGOUseFunc::populateCounters.
283 // FIXME(mtrofin): look into factoring the code to share one implementation.
284 void propagateCounterValues() {
285 bool KeepGoing = true;
286 while (KeepGoing) {
287 KeepGoing = false;
288 for (const auto &BB : F) {
289 auto &Info = getBBInfo(BB);
290 if (!Info.hasCount())
291 KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
292 Info.tryTakeCountFromKnownInEdges(BB);
293 if (Info.hasCount()) {
294 KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
295 KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
296 }
297 }
298 }
299 assert(allCountersAreAssigned() &&
300 "[ctx-prof] Expected all counters have been assigned.");
301 assert(allTakenPathsExit() &&
302 "[ctx-prof] Encountered a BB with more than one successor, where "
303 "all outgoing edges have a 0 count. This occurs in non-exiting "
304 "functions (message pumps, usually) which are not supported in the "
305 "contextual profiling case");
306 assert(allNonColdSelectsHaveProfile() &&
307 "[ctx-prof] All non-cold select instructions were expected to have "
308 "a profile.");
309 }
310
311public:
313 : F(F), Counters(Counters) {
314 assert(!F.isDeclaration());
315 assert(!Counters.empty());
316 size_t NrEdges = 0;
317 for (const auto &BB : F) {
318 std::optional<uint64_t> Count;
320 const_cast<BasicBlock &>(BB))) {
321 auto Index = Ins->getIndex()->getZExtValue();
322 assert(Index < Counters.size() &&
323 "The index must be inside the counters vector by construction - "
324 "tripping this assertion indicates a bug in how the contextual "
325 "profile is managed by IPO transforms");
326 (void)Index;
327 Count = Counters[Ins->getIndex()->getZExtValue()];
328 } else if (isa<UnreachableInst>(BB.getTerminator())) {
329 // The program presumably didn't crash.
330 Count = 0;
331 }
332 auto [It, Ins] =
333 BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
334 (void)Ins;
335 assert(Ins && "We iterate through the function's BBs, no reason to "
336 "insert one more than once");
337 NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
338 return !shouldExcludeEdge(BB, *Succ);
339 });
340 }
341 // Pre-allocate the vector, we want references to its contents to be stable.
342 EdgeInfos.reserve(NrEdges);
343 for (const auto &BB : F) {
344 auto &Info = getBBInfo(BB);
345 for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
346 const auto *Succ = BB.getTerminator()->getSuccessor(I);
347 if (!shouldExcludeEdge(BB, *Succ)) {
348 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
349 Info.addOutEdge(I, EI);
350 getBBInfo(*Succ).addInEdge(EI);
351 }
352 }
353 }
354 assert(EdgeInfos.capacity() == NrEdges &&
355 "The capacity of EdgeInfos should have stayed unchanged it was "
356 "populated, because we need pointers to its contents to be stable");
357 propagateCounterValues();
358 }
359
360 uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); }
361};
362
363} // namespace llvm
364
366 ArrayRef<uint64_t> RawCounters)
367 : PImpl(std::make_unique<ProfileAnnotatorImpl>(F, RawCounters)) {}
368
370
372 return PImpl->getBBCount(BB);
373}
374
376 uint64_t &TrueCount,
377 uint64_t &FalseCount) const {
378 const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());
379 TrueCount = FalseCount = 0;
380 if (BBInfo.getCount() == 0)
381 return false;
382
384 if (!Step)
385 return false;
386 auto Index = Step->getIndex()->getZExtValue();
387 assert(Index < PImpl->Counters.size() &&
388 "The index of the step instruction must be inside the "
389 "counters vector by "
390 "construction - tripping this assertion indicates a bug in "
391 "how the contextual profile is managed by IPO transforms");
392 auto TotalCount = BBInfo.getCount();
393 TrueCount = PImpl->Counters[Index];
394 FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
395 return true;
396}
397
400 uint64_t &MaxCount) const {
401 Profile.clear();
402
403 if (succ_size(&BB) < 2)
404 return false;
405
406 auto *Term = BB.getTerminator();
407 Profile.resize(Term->getNumSuccessors());
408
409 const auto &BBInfo = PImpl->getBBInfo(BB);
410 MaxCount = 0;
411 for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
412 ++SuccIdx) {
413 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
414 if (EdgeCount > MaxCount)
415 MaxCount = EdgeCount;
416 Profile[SuccIdx] = EdgeCount;
417 }
418 return MaxCount > 0;
419}
420
422 for (auto &F : M.functions()) {
423 if (F.isDeclaration())
424 continue;
425 if (F.getMetadata(GUIDMetadataName))
426 continue;
427 const GlobalValue::GUID GUID = F.getGUID();
428 F.setMetadata(GUIDMetadataName,
429 MDNode::get(M.getContext(),
430 {ConstantAsMetadata::get(ConstantInt::get(
431 Type::getInt64Ty(M.getContext()), GUID))}));
432 }
434}
435
437 if (F.isDeclaration()) {
439 return F.getGUID();
440 }
441 auto *MD = F.getMetadata(GUIDMetadataName);
442 assert(MD && "guid not found for defined function");
443 return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
444 ->getValue()
445 ->stripPointerCasts())
446 ->getZExtValue();
447}
449
451 : Profile([&]() -> std::optional<StringRef> {
452 if (Profile)
453 return *Profile;
454 if (UseCtxProfile.getNumOccurrences())
455 return UseCtxProfile;
456 return std::nullopt;
457 }()) {}
458
461 if (!Profile)
462 return {};
464 if (auto EC = MB.getError()) {
465 M.getContext().emitError("could not open contextual profile file: " +
466 EC.message());
467 return {};
468 }
469 PGOCtxProfileReader Reader(MB.get()->getBuffer());
470 auto MaybeProfiles = Reader.loadProfiles();
471 if (!MaybeProfiles) {
472 M.getContext().emitError("contextual profile file is invalid: " +
473 toString(MaybeProfiles.takeError()));
474 return {};
475 }
476
477 // FIXME: We should drive this from ThinLTO, but for the time being, use the
478 // module name as indicator.
479 // We want to *only* keep the contextual profiles in modules that capture
480 // context trees. That allows us to compute specific PSIs, for example.
481 auto DetermineRootsInModule = [&M]() -> const DenseSet<GlobalValue::GUID> {
482 DenseSet<GlobalValue::GUID> ProfileRootsInModule;
483 auto ModName = M.getName();
484 auto Filename = sys::path::filename(ModName);
485 // Drop the file extension.
486 Filename = Filename.substr(0, Filename.find_last_of('.'));
487 // See if it parses
488 APInt Guid;
489 // getAsInteger returns true if there are more chars to read other than the
490 // integer. So the "false" test is what we want.
491 if (!Filename.getAsInteger(0, Guid))
492 ProfileRootsInModule.insert(Guid.getZExtValue());
493 return ProfileRootsInModule;
494 };
495 const auto ProfileRootsInModule = DetermineRootsInModule();
497
498 // the logic from here on allows for modules that contain - by design - more
499 // than one root. We currently don't support that, because the determination
500 // happens based on the module name matching the root guid, but the logic can
501 // avoid assuming that.
502 if (!ProfileRootsInModule.empty()) {
503 Result.IsInSpecializedModule = true;
504 // Trim first the roots that aren't in this module.
505 for (auto &[RootGuid, _] :
506 llvm::make_early_inc_range(MaybeProfiles->Contexts))
507 if (!ProfileRootsInModule.contains(RootGuid))
508 MaybeProfiles->Contexts.erase(RootGuid);
509 // we can also drop the flat profiles
510 MaybeProfiles->FlatProfiles.clear();
511 }
512
513 for (const auto &F : M) {
514 if (F.isDeclaration())
515 continue;
516 auto GUID = AssignGUIDPass::getGUID(F);
517 assert(GUID && "guid not found for defined function");
518 const auto &Entry = F.begin();
519 uint32_t MaxCounters = 0; // we expect at least a counter.
520 for (const auto &I : *Entry)
521 if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
522 MaxCounters =
523 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
524 break;
525 }
526 if (!MaxCounters)
527 continue;
528 uint32_t MaxCallsites = 0;
529 for (const auto &BB : F)
530 for (const auto &I : BB)
531 if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
532 MaxCallsites =
533 static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
534 break;
535 }
536 auto [It, Ins] = Result.FuncInfo.insert(
537 {GUID, PGOContextualProfile::FunctionInfo(F.getName())});
538 (void)Ins;
539 assert(Ins);
540 It->second.NextCallsiteIndex = MaxCallsites;
541 It->second.NextCounterIndex = MaxCounters;
542 }
543 // If we made it this far, the Result is valid - which we mark by setting
544 // .Profiles.
545 Result.Profiles = std::move(*MaybeProfiles);
546 Result.initIndex();
547 return Result;
548}
549
551PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {
552 if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())
553 return It->first;
554 return 0;
555}
556
558 : OS(OS), Mode(PrintLevel) {}
559
563 if (C.contexts().empty()) {
564 OS << "No contextual profile was provided.\n";
565 return PreservedAnalyses::all();
566 }
567
568 if (Mode == PrintMode::Everything) {
569 OS << "Function Info:\n";
570 for (const auto &[Guid, FuncInfo] : C.FuncInfo)
571 OS << Guid << " : " << FuncInfo.Name
572 << ". MaxCounterID: " << FuncInfo.NextCounterIndex
573 << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
574 }
575
576 if (Mode == PrintMode::Everything)
577 OS << "\nCurrent Profile:\n";
578 convertCtxProfToYaml(OS, C.profiles());
579 OS << "\n";
580 if (Mode == PrintMode::YAML)
581 return PreservedAnalyses::all();
582
583 OS << "\nFlat Profile:\n";
584 auto Flat = C.flatten();
585 for (const auto &[Guid, Counters] : Flat) {
586 OS << Guid << " : ";
587 for (auto V : Counters)
588 OS << V << " ";
589 OS << "\n";
590 }
591 return PreservedAnalyses::all();
592}
593
596 return nullptr;
597 for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
598 if (auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
599 return IPC;
600 assert(!isa<CallBase>(Prev) &&
601 "didn't expect to find another call, that's not the callsite "
602 "instrumentation, before an instrumentable callsite");
603 }
604 return nullptr;
605}
606
608 for (auto &I : BB)
609 if (auto *Incr = dyn_cast<InstrProfIncrementInst>(&I))
610 if (!isa<InstrProfIncrementInstStep>(&I))
611 return Incr;
612 return nullptr;
613}
614
617 Instruction *Prev = &SI;
618 while ((Prev = Prev->getPrevNode()))
619 if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
620 return Step;
621 return nullptr;
622}
623
624template <class ProfTy>
625static void preorderVisitOneRoot(ProfTy &Profile,
626 function_ref<void(ProfTy &)> Visitor) {
627 std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
628 Visitor(Ctx);
629 for (auto &[_, SubCtxSet] : Ctx.callsites())
630 for (auto &[__, Subctx] : SubCtxSet)
631 Traverser(Subctx);
632 };
633 Traverser(Profile);
634}
635
636template <class ProfilesTy, class ProfTy>
637static void preorderVisit(ProfilesTy &Profiles,
638 function_ref<void(ProfTy &)> Visitor) {
639 for (auto &[_, P] : Profiles)
640 preorderVisitOneRoot<ProfTy>(P, Visitor);
641}
642
643void PGOContextualProfile::initIndex() {
644 // Initialize the head of the index list for each function. We don't need it
645 // after this point.
647 for (auto &[Guid, FI] : FuncInfo)
648 InsertionPoints[Guid] = &FI.Index;
649 preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
650 Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
651 auto InsertIt = InsertionPoints.find(Ctx.guid());
652 if (InsertIt == InsertionPoints.end())
653 return;
654 // Insert at the end of the list. Since we traverse in preorder, it
655 // means that when we iterate the list from the beginning, we'd
656 // encounter the contexts in the order we would have, should we have
657 // performed a full preorder traversal.
658 InsertIt->second->Next = &Ctx;
659 Ctx.Previous = InsertIt->second;
660 InsertIt->second = &Ctx;
661 });
662}
663
667 : IsInSpecializedModule;
668}
669
672 GlobalValue::GUID G = getDefinedFunctionGUID(F);
673 for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
674 Node = Node->Next)
675 V(*reinterpret_cast<PGOCtxProfContext *>(Node));
676}
677
679 if (!F)
681 const PGOCtxProfContext>(Profiles.Contexts, V);
683 GlobalValue::GUID G = getDefinedFunctionGUID(*F);
684 for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
685 Node = Node->Next)
686 V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
687}
688
691 auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
693 uint64_t SamplingRate) {
694 if (Into.empty())
695 Into.resize(From.size());
696 assert(Into.size() == From.size() &&
697 "All contexts corresponding to a function should have the exact "
698 "same number of counters.");
699 for (size_t I = 0, E = Into.size(); I < E; ++I)
700 Into[I] += From[I] * SamplingRate;
701 };
702
703 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
704 const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
705 preorderVisitOneRoot<const PGOCtxProfContext>(
706 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
707 Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
708 });
709
710 for (const auto &[G, Unh] : CtxRoot.getUnhandled())
711 Accummulate(Flat[G], Unh, SamplingFactor);
712 }
713 // We don't sample "Flat" currently, so sampling rate is 1.
714 for (const auto &[G, FC] : Profiles.FlatProfiles)
715 Accummulate(Flat[G], FC, /*SamplingRate=*/1);
716 return Flat;
717}
718
722 for (const auto &[_, CtxRoot] : Profiles.Contexts) {
723 const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
724 preorderVisitOneRoot<const PGOCtxProfContext>(
725 CtxRoot, [&](const PGOCtxProfContext &Ctx) {
726 auto &Targets = Ret[Ctx.guid()];
727 for (const auto &[ID, SubctxSet] : Ctx.callsites())
728 for (const auto &Subctx : SubctxSet)
729 Targets[ID][Subctx.first] +=
730 Subctx.second.getEntrycount() * TotalRootEntryCount;
731 });
732 }
733 return Ret;
734}
735
737 CallBase &IC, Result &Profile,
738 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
739 const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
740 if (!Instr)
741 return;
742 Module &M = *IC.getParent()->getModule();
743 const uint32_t CallID = Instr->getIndex()->getZExtValue();
744 Profile.visit(
745 [&](const PGOCtxProfContext &Ctx) {
746 const auto &Targets = Ctx.callsites().find(CallID);
747 if (Targets == Ctx.callsites().end())
748 return;
749 for (const auto &[Guid, _] : Targets->second)
750 if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
751 if (auto *Target = M.getFunction(Name))
752 if (Target->hasFnAttribute(Attribute::AlwaysInline))
753 Candidates.insert({&IC, Target});
754 },
755 IC.getCaller());
756}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockVerifier::State From
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static void preorderVisitOneRoot(ProfTy &Profile, function_ref< void(ProfTy &)> Visitor)
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
static cl::opt< bool > ForceIsInSpecializedModule("ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root"))
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
std::string Name
uint64_t Size
#define _
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Load MIR Sample Profile
#define P(N)
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
static LLVM_ABI uint64_t getGUID(const Function &F)
static LLVM_ABI const char * GUIDMetadataName
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Assign a GUID if one is not already assign, as a function metadata named GUIDMetadataName.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
const Instruction & front() const
Definition: BasicBlock.h:482
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:475
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI CtxProfAnalysisPrinterPass(raw_ostream &OS)
LLVM_ABI PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static LLVM_ABI InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static LLVM_ABI InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static LLVM_ABI void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
static LLVM_ABI AnalysisKey Key
PGOContextualProfile Result
LLVM_ABI CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Represents either an error or a value T.
Definition: ErrorOr.h:56
reference get()
Definition: ErrorOr.h:149
std::error_code getError() const
Definition: ErrorOr.h:152
static bool isExternalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:378
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
The instrumented contextual profile, produced by the CtxProfAnalysis.
LLVM_ABI void visit(ConstVisitor, const Function *F=nullptr) const
LLVM_ABI const CtxProfFlatProfile flatten() const
LLVM_ABI bool isInSpecializedModule() const
LLVM_ABI void update(Visitor, const Function &F)
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
GlobalValue::GUID guid() const
const SmallVectorImpl< uint64_t > & counters() const
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
const CallsiteMapTy & callsites() const
LLVM_ABI Expected< PGOCtxProfile > loadProfiles()
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
uint64_t getBBCount(const BasicBlock &BB)
ProfileAnnotatorImpl(const Function &F, ArrayRef< uint64_t > Counters)
LLVM_ABI ~ProfileAnnotator()
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
LLVM_ABI uint64_t getBBCount(const BasicBlock &BB) const
LLVM_ABI ProfileAnnotator(const Function &F, ArrayRef< uint64_t > RawCounters)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition: SetVector.h:59
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void reserve(size_type N)
Definition: SmallVector.h:664
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Target - Wrapper for Target specific information.
int getNumOccurrences() const
Definition: CommandLine.h:400
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
Pass manager infrastructure for declaring and invalidating analyses.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition: Path.cpp:577
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
auto pred_size(const MachineBasicBlock *BB)
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
auto succ_size(const MachineBasicBlock *BB)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
Definition: CFG.cpp:361
const char * toString(DWARFSectionKind Kind)
LLVM_ABI void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile)
@ TotalRootEntryCount
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:29