30#define DEBUG_TYPE "ctx_prof"
35 cl::desc(
"Use the specified contextual profile file"));
38 "ctx-profile-printer-level",
41 "everything",
"print everything - most verbose"),
42 clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML,
"yaml",
43 "just the yaml representation of the profile")),
44 cl::desc(
"Verbosity level of the contextual profile printer pass."));
47 "ctx-profile-force-is-specialized",
cl::init(
false),
48 cl::desc(
"Treat the given module as-if it were containing the "
49 "post-thinlink module containing the root"));
60 std::optional<uint64_t> Count;
62 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
66 std::optional<uint64_t> Count;
74 size_t UnknownCountOutEdges = 0;
75 size_t UnknownCountInEdges = 0;
82 bool AssumeAllKnown)
const {
83 std::optional<uint64_t> Sum;
84 for (
const auto *E : Edges) {
89 *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
96 assert(!Count.has_value());
97 Count = getEdgeSum(Edges,
true);
98 return Count.has_value();
102 uint64_t KnownSum = getEdgeSum(Edges,
false).value_or(0U);
103 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0
U;
104 EdgeInfo *E =
nullptr;
105 for (
auto *
I : Edges)
106 if (
I && !
I->Count.has_value()) {
112 "Expected exactly one edge to have an unknown count, "
113 "found a second one");
117 assert(E &&
"Expected exactly one edge to have an unknown count");
118 assert(!E->Count.has_value());
120 assert(E->Src->UnknownCountOutEdges > 0);
121 assert(E->Dest->UnknownCountInEdges > 0);
122 --E->Src->UnknownCountOutEdges;
123 --E->Dest->UnknownCountInEdges;
127 BBInfo(
size_t NumInEdges,
size_t NumOutEdges, std::optional<uint64_t> Count)
135 OutEdges.
resize(NumOutEdges);
138 bool tryTakeCountFromKnownOutEdges(
const BasicBlock &BB) {
139 if (!UnknownCountOutEdges) {
140 return computeCountFrom(OutEdges);
145 bool tryTakeCountFromKnownInEdges(
const BasicBlock &BB) {
146 if (!UnknownCountInEdges) {
147 return computeCountFrom(InEdges);
152 void addInEdge(EdgeInfo &Info) {
154 ++UnknownCountInEdges;
161 void addOutEdge(
size_t Index, EdgeInfo &Info) {
163 ++UnknownCountOutEdges;
166 bool hasCount()
const {
return Count.has_value(); }
168 uint64_t getCount()
const {
return *Count; }
170 bool trySetSingleUnknownInEdgeCount() {
171 if (UnknownCountInEdges == 1) {
172 setSingleUnknownEdgeCount(InEdges);
178 bool trySetSingleUnknownOutEdgeCount() {
179 if (UnknownCountOutEdges == 1) {
180 setSingleUnknownEdgeCount(OutEdges);
185 size_t getNumOutEdges()
const {
return OutEdges.
size(); }
187 uint64_t getEdgeCount(
size_t Index)
const {
188 if (
auto *E = OutEdges[Index])
197 std::map<const BasicBlock *, BBInfo> BBInfos;
198 std::vector<EdgeInfo> EdgeInfos;
206 BBInfo &getBBInfo(
const BasicBlock &BB) {
return BBInfos.find(&BB)->second; }
208 const BBInfo &getBBInfo(
const BasicBlock &BB)
const {
209 return BBInfos.find(&BB)->second;
214 bool allCountersAreAssigned()
const {
215 for (
const auto &BBInfo : BBInfos)
216 if (!BBInfo.second.hasCount())
218 for (
const auto &EdgeInfo : EdgeInfos)
219 if (!EdgeInfo.Count.has_value())
226 bool allTakenPathsExit()
const {
227 std::deque<const BasicBlock *> Worklist;
229 Worklist.push_back(&
F.getEntryBlock());
230 bool HitExit =
false;
231 while (!Worklist.empty()) {
232 const auto *BB = Worklist.
front();
233 Worklist.pop_front();
234 if (!Visited.
insert(BB).second)
246 const auto &BBInfo = getBBInfo(*BB);
247 bool HasAWayOut =
false;
250 if (!shouldExcludeEdge(*BB, *Succ)) {
251 if (BBInfo.getEdgeCount(
I) > 0) {
253 Worklist.push_back(Succ);
263 bool allNonColdSelectsHaveProfile()
const {
264 for (
const auto &BB : F) {
265 if (getBBInfo(BB).getCount() > 0) {
266 for (
const auto &
I : BB) {
267 if (
const auto *SI = dyn_cast<SelectInst>(&
I)) {
270 auto Index = Inst->getIndex()->getZExtValue();
272 if (Counters[Index] == 0)
284 void propagateCounterValues() {
285 bool KeepGoing =
true;
288 for (
const auto &BB : F) {
289 auto &
Info = getBBInfo(BB);
290 if (!
Info.hasCount())
291 KeepGoing |=
Info.tryTakeCountFromKnownOutEdges(BB) ||
292 Info.tryTakeCountFromKnownInEdges(BB);
293 if (
Info.hasCount()) {
294 KeepGoing |=
Info.trySetSingleUnknownOutEdgeCount();
295 KeepGoing |=
Info.trySetSingleUnknownInEdgeCount();
299 assert(allCountersAreAssigned() &&
300 "[ctx-prof] Expected all counters have been assigned.");
301 assert(allTakenPathsExit() &&
302 "[ctx-prof] Encountered a BB with more than one successor, where "
303 "all outgoing edges have a 0 count. This occurs in non-exiting "
304 "functions (message pumps, usually) which are not supported in the "
305 "contextual profiling case");
306 assert(allNonColdSelectsHaveProfile() &&
307 "[ctx-prof] All non-cold select instructions were expected to have "
317 for (
const auto &BB :
F) {
318 std::optional<uint64_t> Count;
321 auto Index = Ins->getIndex()->getZExtValue();
323 "The index must be inside the counters vector by construction - "
324 "tripping this assertion indicates a bug in how the contextual "
325 "profile is managed by IPO transforms");
327 Count =
Counters[Ins->getIndex()->getZExtValue()];
328 }
else if (isa<UnreachableInst>(BB.getTerminator())) {
335 assert(Ins &&
"We iterate through the function's BBs, no reason to "
336 "insert one more than once");
338 return !shouldExcludeEdge(BB, *Succ);
342 EdgeInfos.reserve(NrEdges);
343 for (
const auto &BB :
F) {
344 auto &
Info = getBBInfo(BB);
345 for (
auto I = 0U;
I < BB.getTerminator()->getNumSuccessors(); ++
I) {
346 const auto *Succ = BB.getTerminator()->getSuccessor(
I);
347 if (!shouldExcludeEdge(BB, *Succ)) {
348 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
349 Info.addOutEdge(
I, EI);
350 getBBInfo(*Succ).addInEdge(EI);
354 assert(EdgeInfos.capacity() == NrEdges &&
355 "The capacity of EdgeInfos should have stayed unchanged it was "
356 "populated, because we need pointers to its contents to be stable");
357 propagateCounterValues();
372 return PImpl->getBBCount(BB);
378 const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());
379 TrueCount = FalseCount = 0;
380 if (BBInfo.getCount() == 0)
386 auto Index = Step->getIndex()->getZExtValue();
388 "The index of the step instruction must be inside the "
389 "counters vector by "
390 "construction - tripping this assertion indicates a bug in "
391 "how the contextual profile is managed by IPO transforms");
392 auto TotalCount = BBInfo.getCount();
393 TrueCount = PImpl->Counters[Index];
394 FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
407 Profile.resize(Term->getNumSuccessors());
409 const auto &BBInfo = PImpl->getBBInfo(BB);
411 for (
unsigned SuccIdx = 0,
Size = BBInfo.getNumOutEdges(); SuccIdx <
Size;
413 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
414 if (EdgeCount > MaxCount)
415 MaxCount = EdgeCount;
422 for (
auto &
F : M.functions()) {
423 if (
F.isDeclaration())
430 {ConstantAsMetadata::get(ConstantInt::get(
431 Type::getInt64Ty(M.getContext()), GUID))}));
437 if (
F.isDeclaration()) {
442 assert(MD &&
"guid not found for defined function");
443 return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
445 ->stripPointerCasts())
465 M.getContext().emitError(
"could not open contextual profile file: " +
471 if (!MaybeProfiles) {
472 M.getContext().emitError(
"contextual profile file is invalid: " +
473 toString(MaybeProfiles.takeError()));
483 auto ModName = M.getName();
486 Filename = Filename.substr(0, Filename.find_last_of(
'.'));
491 if (!Filename.getAsInteger(0,
Guid))
492 ProfileRootsInModule.
insert(
Guid.getZExtValue());
493 return ProfileRootsInModule;
495 const auto ProfileRootsInModule = DetermineRootsInModule();
502 if (!ProfileRootsInModule.empty()) {
503 Result.IsInSpecializedModule =
true;
505 for (
auto &[RootGuid,
_] :
507 if (!ProfileRootsInModule.contains(RootGuid))
508 MaybeProfiles->Contexts.erase(RootGuid);
510 MaybeProfiles->FlatProfiles.clear();
513 for (
const auto &
F : M) {
514 if (
F.isDeclaration())
517 assert(GUID &&
"guid not found for defined function");
518 const auto &Entry =
F.begin();
520 for (
const auto &
I : *Entry)
521 if (
auto *
C = dyn_cast<InstrProfIncrementInst>(&
I)) {
523 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
529 for (
const auto &BB :
F)
530 for (
const auto &
I : BB)
531 if (
auto *
C = dyn_cast<InstrProfCallsite>(&
I)) {
533 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
536 auto [It, Ins] =
Result.FuncInfo.insert(
537 {GUID, PGOContextualProfile::FunctionInfo(
F.getName())});
540 It->second.NextCallsiteIndex = MaxCallsites;
541 It->second.NextCounterIndex = MaxCounters;
545 Result.Profiles = std::move(*MaybeProfiles);
551PGOContextualProfile::getDefinedFunctionGUID(
const Function &
F)
const {
563 if (
C.contexts().empty()) {
564 OS <<
"No contextual profile was provided.\n";
569 OS <<
"Function Info:\n";
570 for (
const auto &[
Guid, FuncInfo] :
C.FuncInfo)
571 OS <<
Guid <<
" : " << FuncInfo.Name
572 <<
". MaxCounterID: " << FuncInfo.NextCounterIndex
573 <<
". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex <<
"\n";
577 OS <<
"\nCurrent Profile:\n";
583 OS <<
"\nFlat Profile:\n";
584 auto Flat =
C.flatten();
598 if (
auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
600 assert(!isa<CallBase>(Prev) &&
601 "didn't expect to find another call, that's not the callsite "
602 "instrumentation, before an instrumentable callsite");
609 if (
auto *Incr = dyn_cast<InstrProfIncrementInst>(&
I))
610 if (!isa<InstrProfIncrementInstStep>(&
I))
619 if (
auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
624template <
class ProfTy>
627 std::function<void(ProfTy &)> Traverser = [&](
auto &Ctx) {
629 for (
auto &[
_, SubCtxSet] : Ctx.callsites())
630 for (
auto &[__, Subctx] : SubCtxSet)
636template <
class ProfilesTy,
class ProfTy>
639 for (
auto &[
_,
P] : Profiles)
640 preorderVisitOneRoot<ProfTy>(
P, Visitor);
643void PGOContextualProfile::initIndex() {
647 for (
auto &[
Guid, FI] : FuncInfo)
648 InsertionPoints[
Guid] = &FI.Index;
649 preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
651 auto InsertIt = InsertionPoints.find(Ctx.guid());
652 if (InsertIt == InsertionPoints.end())
658 InsertIt->second->Next = &Ctx;
659 Ctx.Previous = InsertIt->second;
660 InsertIt->second = &Ctx;
667 : IsInSpecializedModule;
673 for (
auto *Node = FuncInfo.find(
G)->second.Index.Next; Node;
684 for (
const auto *Node = FuncInfo.find(
G)->second.Index.Next; Node;
697 "All contexts corresponding to a function should have the exact "
698 "same number of counters.");
699 for (
size_t I = 0, E = Into.
size();
I < E; ++
I)
700 Into[
I] +=
From[
I] * SamplingRate;
703 for (
const auto &[
_, CtxRoot] : Profiles.Contexts) {
704 const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
705 preorderVisitOneRoot<const PGOCtxProfContext>(
707 Accummulate(Flat[Ctx.
guid()], Ctx.
counters(), SamplingFactor);
710 for (
const auto &[
G, Unh] : CtxRoot.getUnhandled())
711 Accummulate(Flat[
G], Unh, SamplingFactor);
714 for (
const auto &[
G, FC] : Profiles.FlatProfiles)
715 Accummulate(Flat[
G], FC, 1);
722 for (
const auto &[
_, CtxRoot] : Profiles.Contexts) {
724 preorderVisitOneRoot<const PGOCtxProfContext>(
726 auto &Targets = Ret[Ctx.
guid()];
727 for (
const auto &[
ID, SubctxSet] : Ctx.
callsites())
728 for (
const auto &Subctx : SubctxSet)
729 Targets[
ID][Subctx.first] +=
738 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
743 const uint32_t CallID = Instr->getIndex()->getZExtValue();
749 for (
const auto &[
Guid,
_] : Targets->second)
752 if (
Target->hasFnAttribute(Attribute::AlwaysInline))
753 Candidates.insert({&IC, Target});
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockVerifier::State From
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static void preorderVisitOneRoot(ProfTy &Profile, function_ref< void(ProfTy &)> Visitor)
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml", "just the yaml representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
static cl::opt< bool > ForceIsInSpecializedModule("ctx-profile-force-is-specialized", cl::init(false), cl::desc("Treat the given module as-if it were containing the " "post-thinlink module containing the root"))
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static LLVM_ABI uint64_t getGUID(const Function &F)
static LLVM_ABI const char * GUIDMetadataName
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Assign a GUID if one is not already assign, as a function metadata named GUIDMetadataName.
LLVM Basic Block Representation.
const Instruction & front() const
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI CtxProfAnalysisPrinterPass(raw_ostream &OS)
LLVM_ABI PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static LLVM_ABI InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static LLVM_ABI InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static LLVM_ABI void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
static LLVM_ABI AnalysisKey Key
PGOContextualProfile Result
LLVM_ABI CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
Implements a dense probed hash-table based set.
Represents either an error or a value T.
std::error_code getError() const
static bool isExternalLinkage(LinkageTypes Linkage)
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
The instrumented contextual profile, produced by the CtxProfAnalysis.
LLVM_ABI void visit(ConstVisitor, const Function *F=nullptr) const
LLVM_ABI const CtxProfFlatProfile flatten() const
LLVM_ABI bool isInSpecializedModule() const
LLVM_ABI void update(Visitor, const Function &F)
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
GlobalValue::GUID guid() const
const SmallVectorImpl< uint64_t > & counters() const
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
const CallsiteMapTy & callsites() const
LLVM_ABI Expected< PGOCtxProfile > loadProfiles()
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
uint64_t getBBCount(const BasicBlock &BB)
ProfileAnnotatorImpl(const Function &F, ArrayRef< uint64_t > Counters)
LLVM_ABI ~ProfileAnnotator()
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
LLVM_ABI uint64_t getBBCount(const BasicBlock &BB) const
LLVM_ABI ProfileAnnotator(const Function &F, ArrayRef< uint64_t > RawCounters)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
Pass manager infrastructure for declaring and invalidating analyses.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto pred_size(const MachineBasicBlock *BB)
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
auto succ_size(const MachineBasicBlock *BB)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
const char * toString(DWARFSectionKind Kind)
LLVM_ABI void convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile)
Implement std::hash so that hash_code can be used in STL containers.
A special type used by analysis passes to provide an address that identifies that particular analysis...