LLVM 22.0.0git
MemProfUse.cpp
Go to the documentation of this file.
1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Function.h"
24#include "llvm/IR/Module.h"
29#include "llvm/Support/BLAKE3.h"
31#include "llvm/Support/Debug.h"
35#include <map>
36#include <set>
37
38using namespace llvm;
39using namespace llvm::memprof;
40
41#define DEBUG_TYPE "memprof"
42
43namespace llvm {
47} // namespace llvm
48
49// By default disable matching of allocation profiles onto operator new that
50// already explicitly pass a hot/cold hint, since we don't currently
51// override these hints anyway.
53 "memprof-match-hot-cold-new",
55 "Match allocation profiles onto existing hot/cold operator new calls"),
56 cl::Hidden, cl::init(false));
57
58static cl::opt<bool>
59 ClPrintMemProfMatchInfo("memprof-print-match-info",
60 cl::desc("Print matching stats for each allocation "
61 "context in this module's profiles"),
62 cl::Hidden, cl::init(false));
63
64static cl::opt<bool>
65 SalvageStaleProfile("memprof-salvage-stale-profile",
66 cl::desc("Salvage stale MemProf profile"),
67 cl::init(false), cl::Hidden);
68
70 "memprof-attach-calleeguids",
72 "Attach calleeguids as value profile metadata for indirect calls."),
73 cl::init(true), cl::Hidden);
74
76 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
77 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
78
80 "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
81 cl::desc("If true, annotate the static data section prefix"));
82
83// Matching statistics
84STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
85STATISTIC(NumOfMemProfMismatch,
86 "Number of functions having mismatched memory profile hash.");
87STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
88STATISTIC(NumOfMemProfAllocContextProfiles,
89 "Number of alloc contexts in memory profile.");
90STATISTIC(NumOfMemProfCallSiteProfiles,
91 "Number of callsites in memory profile.");
92STATISTIC(NumOfMemProfMatchedAllocContexts,
93 "Number of matched memory profile alloc contexts.");
94STATISTIC(NumOfMemProfMatchedAllocs,
95 "Number of matched memory profile allocs.");
96STATISTIC(NumOfMemProfMatchedCallSites,
97 "Number of matched memory profile callsites.");
98STATISTIC(NumOfMemProfHotGlobalVars,
99 "Number of global vars annotated with 'hot' section prefix.");
100STATISTIC(NumOfMemProfColdGlobalVars,
101 "Number of global vars annotated with 'unlikely' section prefix.");
102STATISTIC(NumOfMemProfUnknownGlobalVars,
103 "Number of global vars with unknown hotness (no section prefix).");
104STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
105 "Number of global vars with user-specified section (not annotated).");
106
108 ArrayRef<uint64_t> InlinedCallStack,
109 LLVMContext &Ctx) {
110 I.setMetadata(LLVMContext::MD_callsite,
111 buildCallstackMetadata(InlinedCallStack, Ctx));
112}
113
115 uint32_t Column) {
118 HashBuilder.add(Function, LineOffset, Column);
120 uint64_t Id;
121 std::memcpy(&Id, Hash.data(), sizeof(Hash));
122 return Id;
123}
124
127}
128
131 uint64_t FullStackId) {
132 SmallVector<uint64_t> StackIds;
133 for (const auto &StackFrame : AllocInfo->CallStack)
134 StackIds.push_back(computeStackId(StackFrame));
135 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
136 AllocInfo->Info.getAllocCount(),
137 AllocInfo->Info.getTotalLifetime());
138 std::vector<ContextTotalSize> ContextSizeInfo;
140 auto TotalSize = AllocInfo->Info.getTotalSize();
141 assert(TotalSize);
142 assert(FullStackId != 0);
143 ContextSizeInfo.push_back({FullStackId, TotalSize});
144 }
145 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
146 return AllocType;
147}
148
149// Return true if InlinedCallStack, computed from a call instruction's debug
150// info, is a prefix of ProfileCallStack, a list of Frames from profile data
151// (either the allocation data or a callsite).
152static bool
154 ArrayRef<uint64_t> InlinedCallStack) {
155 return ProfileCallStack.size() >= InlinedCallStack.size() &&
156 llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),
157 InlinedCallStack, [](const Frame &F, uint64_t StackId) {
158 return computeStackId(F) == StackId;
159 });
160}
161
162static bool isAllocationWithHotColdVariant(const Function *Callee,
163 const TargetLibraryInfo &TLI) {
164 if (!Callee)
165 return false;
166 LibFunc Func;
167 if (!TLI.getLibFunc(*Callee, Func))
168 return false;
169 switch (Func) {
170 case LibFunc_Znwm:
171 case LibFunc_ZnwmRKSt9nothrow_t:
172 case LibFunc_ZnwmSt11align_val_t:
173 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
174 case LibFunc_Znam:
175 case LibFunc_ZnamRKSt9nothrow_t:
176 case LibFunc_ZnamSt11align_val_t:
177 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
178 case LibFunc_size_returning_new:
179 case LibFunc_size_returning_new_aligned:
180 return true;
181 case LibFunc_Znwm12__hot_cold_t:
182 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
183 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
184 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
185 case LibFunc_Znam12__hot_cold_t:
186 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
187 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
188 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
189 case LibFunc_size_returning_new_hot_cold:
190 case LibFunc_size_returning_new_aligned_hot_cold:
192 default:
193 return false;
194 }
195}
196
199 AllocationType AllocType = AllocationType::None;
200};
201
204 function_ref<bool(uint64_t)> IsPresentInProfile) {
206
207 auto GetOffset = [](const DILocation *DIL) {
208 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
209 0xffff;
210 };
211
212 for (Function &F : M) {
213 if (F.isDeclaration())
214 continue;
215
216 for (auto &BB : F) {
217 for (auto &I : BB) {
218 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
219 continue;
220
221 auto *CB = dyn_cast<CallBase>(&I);
222 auto *CalledFunction = CB->getCalledFunction();
223 // Disregard indirect calls and intrinsics.
224 if (!CalledFunction || CalledFunction->isIntrinsic())
225 continue;
226
227 StringRef CalleeName = CalledFunction->getName();
228 // True if we are calling a heap allocation function that supports
229 // hot/cold variants.
230 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
231 // True for the first iteration below, indicating that we are looking at
232 // a leaf node.
233 bool IsLeaf = true;
234 for (const DILocation *DIL = I.getDebugLoc(); DIL;
235 DIL = DIL->getInlinedAt()) {
236 StringRef CallerName = DIL->getSubprogramLinkageName();
237 assert(!CallerName.empty() &&
238 "Be sure to enable -fdebug-info-for-profiling");
239 uint64_t CallerGUID = memprof::getGUID(CallerName);
240 uint64_t CalleeGUID = memprof::getGUID(CalleeName);
241 // Pretend that we are calling a function with GUID == 0 if we are
242 // in the inline stack leading to a heap allocation function.
243 if (IsAlloc) {
244 if (IsLeaf) {
245 // For leaf nodes, set CalleeGUID to 0 without consulting
246 // IsPresentInProfile.
247 CalleeGUID = 0;
248 } else if (!IsPresentInProfile(CalleeGUID)) {
249 // In addition to the leaf case above, continue to set CalleeGUID
250 // to 0 as long as we don't see CalleeGUID in the profile.
251 CalleeGUID = 0;
252 } else {
253 // Once we encounter a callee that exists in the profile, stop
254 // setting CalleeGUID to 0.
255 IsAlloc = false;
256 }
257 }
258
259 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
260 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
261 CalleeName = CallerName;
262 IsLeaf = false;
263 }
264 }
265 }
266 }
267
268 // Sort each call list by the source location.
269 for (auto &[CallerGUID, CallList] : Calls) {
270 llvm::sort(CallList);
271 CallList.erase(llvm::unique(CallList), CallList.end());
272 }
273
274 return Calls;
275}
276
279 const TargetLibraryInfo &TLI) {
281
283 MemProfReader->getMemProfCallerCalleePairs();
285 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
286 return CallsFromProfile.contains(GUID);
287 });
288
289 // Compute an undrift map for each CallerGUID.
290 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
291 auto It = CallsFromProfile.find(CallerGUID);
292 if (It == CallsFromProfile.end())
293 continue;
294 const auto &ProfileAnchors = It->second;
295
296 LocToLocMap Matchings;
297 longestCommonSequence<LineLocation, GlobalValue::GUID>(
298 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
299 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
300 [[maybe_unused]] bool Inserted =
301 UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;
302
303 // The insertion must succeed because we visit each GUID exactly once.
304 assert(Inserted);
305 }
306
307 return UndriftMaps;
308}
309
310// Given a MemProfRecord, undrift all the source locations present in the
311// record in place.
312static void
314 memprof::MemProfRecord &MemProfRec) {
315 // Undrift a call stack in place.
316 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
317 for (auto &F : CallStack) {
318 auto I = UndriftMaps.find(F.Function);
319 if (I == UndriftMaps.end())
320 continue;
321 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
322 if (J == I->second.end())
323 continue;
324 auto &NewLoc = J->second;
325 F.LineOffset = NewLoc.LineOffset;
326 F.Column = NewLoc.Column;
327 }
328 };
329
330 for (auto &AS : MemProfRec.AllocSites)
331 UndriftCallStack(AS.CallStack);
332
333 for (auto &CS : MemProfRec.CallSites)
334 UndriftCallStack(CS.Frames);
335}
336
337// Helper function to process CalleeGuids and create value profile metadata
339 ArrayRef<GlobalValue::GUID> CalleeGuids) {
340 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
341 return;
342
343 if (I.getMetadata(LLVMContext::MD_prof)) {
344 uint64_t Unused;
345 // TODO: When merging is implemented, increase this to a typical ICP value
346 // (e.g., 3-6) For now, we only need to check if existing data exists, so 1
347 // is sufficient
348 auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
349 /*MaxNumValueData=*/1, Unused);
350 // We don't know how to merge value profile data yet.
351 if (!ExistingVD.empty()) {
352 return;
353 }
354 }
355
357 uint64_t TotalCount = 0;
358
359 for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
360 InstrProfValueData VD;
361 VD.Value = CalleeGUID;
362 // For MemProf, we don't have actual call counts, so we assign
363 // a weight of 1 to each potential target.
364 // TODO: Consider making this weight configurable or increasing it to
365 // improve effectiveness for ICP.
366 VD.Count = 1;
367 VDs.push_back(VD);
368 TotalCount += VD.Count;
369 }
370
371 if (!VDs.empty()) {
372 annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
373 VDs.size());
374 }
375}
376
377static void
379 ArrayRef<uint64_t> InlinedCallStack, LLVMContext &Ctx,
380 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
381 const std::set<const AllocationInfo *> &AllocInfoSet,
382 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
383 &FullStackIdToAllocMatchInfo) {
384 // We may match this instruction's location list to multiple MIB
385 // contexts. Add them to a Trie specialized for trimming the contexts to
386 // the minimal needed to disambiguate contexts with unique behavior.
387 CallStackTrie AllocTrie(&ORE, MaxColdSize);
388 uint64_t TotalSize = 0;
389 uint64_t TotalColdSize = 0;
390 for (auto *AllocInfo : AllocInfoSet) {
391 // Check the full inlined call stack against this one.
392 // If we found and thus matched all frames on the call, include
393 // this MIB.
395 InlinedCallStack)) {
396 NumOfMemProfMatchedAllocContexts++;
397 uint64_t FullStackId = 0;
399 FullStackId = computeFullStackId(AllocInfo->CallStack);
400 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
401 TotalSize += AllocInfo->Info.getTotalSize();
402 if (AllocType == AllocationType::Cold)
403 TotalColdSize += AllocInfo->Info.getTotalSize();
404 // Record information about the allocation if match info printing
405 // was requested.
407 assert(FullStackId != 0);
408 FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
409 InlinedCallStack.size())] = {
410 AllocInfo->Info.getTotalSize(), AllocType};
411 }
412 }
413 }
414 // If the threshold for the percent of cold bytes is less than 100%,
415 // and not all bytes are cold, see if we should still hint this
416 // allocation as cold without context sensitivity.
417 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
418 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
419 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, "dominant");
420 return;
421 }
422
423 // We might not have matched any to the full inlined call stack.
424 // But if we did, create and attach metadata, or a function attribute if
425 // all contexts have identical profiled behavior.
426 if (!AllocTrie.empty()) {
427 NumOfMemProfMatchedAllocs++;
428 // MemprofMDAttached will be false if a function attribute was
429 // attached.
430 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
431 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
432 if (MemprofMDAttached) {
433 // Add callsite metadata for the instruction's location list so that
434 // it simpler later on to identify which part of the MIB contexts
435 // are from this particular instruction (including during inlining,
436 // when the callsite metadata will be updated appropriately).
437 // FIXME: can this be changed to strip out the matching stack
438 // context ids from the MIB contexts and not add any callsite
439 // metadata here to save space?
440 addCallsiteMetadata(I, InlinedCallStack, Ctx);
441 }
442 }
443}
444
445// Helper struct for maintaining refs to callsite data. As an alternative we
446// could store a pointer to the CallSiteInfo struct but we also need the frame
447// index. Using ArrayRefs instead makes it a little easier to read.
449 // Subset of frames for the corresponding CallSiteInfo.
451 // Potential targets for indirect calls.
453
454 // Only compare Frame contents.
455 // Use pointer-based equality instead of ArrayRef's operator== which does
456 // element-wise comparison. We want to check if it's the same slice of the
457 // underlying array, not just equivalent content.
458 bool operator==(const CallSiteEntry &Other) const {
459 return Frames.data() == Other.Frames.data() &&
460 Frames.size() == Other.Frames.size();
461 }
462};
463
465 size_t operator()(const CallSiteEntry &Entry) const {
466 return computeFullStackId(Entry.Frames);
467 }
468};
469
470static void handleCallSite(
471 Instruction &I, const Function *CalledFunction,
472 ArrayRef<uint64_t> InlinedCallStack,
473 const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
474 Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites) {
475 auto &Ctx = M.getContext();
476 for (const auto &CallSiteEntry : CallSiteEntries) {
477 // If we found and thus matched all frames on the call, create and
478 // attach call stack metadata.
480 InlinedCallStack)) {
481 NumOfMemProfMatchedCallSites++;
482 addCallsiteMetadata(I, InlinedCallStack, Ctx);
483
484 // Try to attach indirect call metadata if possible.
485 if (!CalledFunction)
487
488 // Only need to find one with a matching call stack and add a single
489 // callsite metadata.
490
491 // Accumulate call site matching information upon request.
493 std::vector<uint64_t> CallStack;
494 append_range(CallStack, InlinedCallStack);
495 MatchedCallSites.insert(std::move(CallStack));
496 }
497 break;
498 }
499 }
500}
501
502static void readMemprof(Module &M, Function &F,
504 const TargetLibraryInfo &TLI,
505 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
506 &FullStackIdToAllocMatchInfo,
507 std::set<std::vector<uint64_t>> &MatchedCallSites,
509 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
510 auto &Ctx = M.getContext();
511 // Previously we used getIRPGOFuncName() here. If F is local linkage,
512 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
513 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
514 // contain FileName's prefix. It caused local linkage function can't
515 // find MemProfRecord. So we use getName() now.
516 // 'unique-internal-linkage-names' can make MemProf work better for local
517 // linkage function.
518 auto FuncName = F.getName();
519 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
520 std::optional<memprof::MemProfRecord> MemProfRec;
521 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
522 if (Err) {
523 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
524 auto Err = IPE.get();
525 bool SkipWarning = false;
526 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
527 << ": ");
528 if (Err == instrprof_error::unknown_function) {
529 NumOfMemProfMissing++;
530 SkipWarning = !PGOWarnMissing;
531 LLVM_DEBUG(dbgs() << "unknown function");
532 } else if (Err == instrprof_error::hash_mismatch) {
533 NumOfMemProfMismatch++;
534 SkipWarning =
537 (F.hasComdat() ||
539 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
540 }
541
542 if (SkipWarning)
543 return;
544
545 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
546 Twine(" Hash = ") + std::to_string(FuncGUID))
547 .str();
548
549 Ctx.diagnose(
550 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
551 });
552 return;
553 }
554
555 NumOfMemProfFunc++;
556
557 // If requested, undrfit MemProfRecord so that the source locations in it
558 // match those in the IR.
560 undriftMemProfRecord(UndriftMaps, *MemProfRec);
561
562 // Detect if there are non-zero column numbers in the profile. If not,
563 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
564 // columns in the IR). The profiled binary might have been built with
565 // column numbers disabled, for example.
566 bool ProfileHasColumns = false;
567
568 // Build maps of the location hash to all profile data with that leaf location
569 // (allocation info and the callsites).
570 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
571
572 // For the callsites we need to record slices of the frame array (see comments
573 // below where the map entries are added) along with their CalleeGuids.
574 std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
575 LocHashToCallSites;
576 for (auto &AI : MemProfRec->AllocSites) {
577 NumOfMemProfAllocContextProfiles++;
578 // Associate the allocation info with the leaf frame. The later matching
579 // code will match any inlined call sequences in the IR with a longer prefix
580 // of call stack frames.
581 uint64_t StackId = computeStackId(AI.CallStack[0]);
582 LocHashToAllocInfo[StackId].insert(&AI);
583 ProfileHasColumns |= AI.CallStack[0].Column;
584 }
585 for (auto &CS : MemProfRec->CallSites) {
586 NumOfMemProfCallSiteProfiles++;
587 // Need to record all frames from leaf up to and including this function,
588 // as any of these may or may not have been inlined at this point.
589 unsigned Idx = 0;
590 for (auto &StackFrame : CS.Frames) {
591 uint64_t StackId = computeStackId(StackFrame);
592 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
593 ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
594 LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
595
596 ProfileHasColumns |= StackFrame.Column;
597 // Once we find this function, we can stop recording.
598 if (StackFrame.Function == FuncGUID)
599 break;
600 }
601 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
602 }
603
604 auto GetOffset = [](const DILocation *DIL) {
605 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
606 0xffff;
607 };
608
609 // Now walk the instructions, looking up the associated profile data using
610 // debug locations.
611 for (auto &BB : F) {
612 for (auto &I : BB) {
613 if (I.isDebugOrPseudoInst())
614 continue;
615 // We are only interested in calls (allocation or interior call stack
616 // context calls).
617 auto *CI = dyn_cast<CallBase>(&I);
618 if (!CI)
619 continue;
620 auto *CalledFunction = CI->getCalledFunction();
621 if (CalledFunction && CalledFunction->isIntrinsic())
622 continue;
623 // List of call stack ids computed from the location hashes on debug
624 // locations (leaf to inlined at root).
625 SmallVector<uint64_t, 8> InlinedCallStack;
626 // Was the leaf location found in one of the profile maps?
627 bool LeafFound = false;
628 // If leaf was found in a map, iterators pointing to its location in both
629 // of the maps. It might exist in neither, one, or both (the latter case
630 // can happen because we don't currently have discriminators to
631 // distinguish the case when a single line/col maps to both an allocation
632 // and another callsite).
633 auto AllocInfoIter = LocHashToAllocInfo.end();
634 auto CallSitesIter = LocHashToCallSites.end();
635 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
636 DIL = DIL->getInlinedAt()) {
637 // Use C++ linkage name if possible. Need to compile with
638 // -fdebug-info-for-profiling to get linkage name.
639 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
640 if (Name.empty())
641 Name = DIL->getScope()->getSubprogram()->getName();
642 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
643 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
644 ProfileHasColumns ? DIL->getColumn() : 0);
645 // Check if we have found the profile's leaf frame. If yes, collect
646 // the rest of the call's inlined context starting here. If not, see if
647 // we find a match further up the inlined context (in case the profile
648 // was missing debug frames at the leaf).
649 if (!LeafFound) {
650 AllocInfoIter = LocHashToAllocInfo.find(StackId);
651 CallSitesIter = LocHashToCallSites.find(StackId);
652 if (AllocInfoIter != LocHashToAllocInfo.end() ||
653 CallSitesIter != LocHashToCallSites.end())
654 LeafFound = true;
655 }
656 if (LeafFound)
657 InlinedCallStack.push_back(StackId);
658 }
659 // If leaf not in either of the maps, skip inst.
660 if (!LeafFound)
661 continue;
662
663 // First add !memprof metadata from allocation info, if we found the
664 // instruction's leaf location in that map, and if the rest of the
665 // instruction's locations match the prefix Frame locations on an
666 // allocation context with the same leaf.
667 if (AllocInfoIter != LocHashToAllocInfo.end() &&
668 // Only consider allocations which support hinting.
669 isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
670 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
671 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
672 else if (CallSitesIter != LocHashToCallSites.end())
673 // Otherwise, add callsite metadata. If we reach here then we found the
674 // instruction's leaf location in the callsites map and not the
675 // allocation map.
676 handleCallSite(I, CalledFunction, InlinedCallStack,
677 CallSitesIter->second, M, MatchedCallSites);
678 }
679 }
680}
681
682MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
684 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
685 if (!FS)
686 this->FS = vfs::getRealFileSystem();
687}
688
690 // Return immediately if the module doesn't contain any function or global
691 // variables.
692 if (M.empty() && M.globals().empty())
693 return PreservedAnalyses::all();
694
695 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
696 auto &Ctx = M.getContext();
697 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
698 if (Error E = ReaderOrErr.takeError()) {
699 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
700 Ctx.diagnose(
701 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
702 });
703 return PreservedAnalyses::all();
704 }
705
706 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
707 std::move(ReaderOrErr.get());
708 if (!MemProfReader) {
709 Ctx.diagnose(DiagnosticInfoPGOProfile(
710 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
711 return PreservedAnalyses::all();
712 }
713
714 if (!MemProfReader->hasMemoryProfile()) {
715 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
716 "Not a memory profile"));
717 return PreservedAnalyses::all();
718 }
719
720 const bool Changed =
721 annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
722
723 // If the module doesn't contain any function, return after we process all
724 // global variables.
725 if (M.empty())
726 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
727
728 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
729
733 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
734
735 // Map from the stack hash and matched frame count of each allocation context
736 // in the function profiles to the total profiled size (bytes) and allocation
737 // type.
738 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
739 FullStackIdToAllocMatchInfo;
740
741 // Set of the matched call sites, each expressed as a sequence of an inline
742 // call stack.
743 std::set<std::vector<uint64_t>> MatchedCallSites;
744
745 uint64_t MaxColdSize = 0;
746 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
747 MaxColdSize = MemProfSum->getMaxColdTotalSize();
748
749 for (auto &F : M) {
750 if (F.isDeclaration())
751 continue;
752
755 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
756 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
757 }
758
760 for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
761 auto [Id, Length] = IdLengthPair;
762 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
763 << " context with id " << Id << " has total profiled size "
764 << Info.TotalSize << " is matched with " << Length << " frames\n";
765 }
766
767 for (const auto &CallStack : MatchedCallSites) {
768 errs() << "MemProf callsite match for inline call stack";
769 for (uint64_t StackId : CallStack)
770 errs() << " " << StackId;
771 errs() << "\n";
772 }
773 }
774
776}
777
778// Returns true iff the global variable has custom section either by
779// __attribute__((section("name")))
780// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
781// or #pragma clang section directives
782// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
783static bool hasExplicitSectionName(const GlobalVariable &GVar) {
784 if (GVar.hasSection())
785 return true;
786
787 auto Attrs = GVar.getAttributes();
788 if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
789 Attrs.hasAttribute("relro-section") ||
790 Attrs.hasAttribute("rodata-section"))
791 return true;
792 return false;
793}
794
795bool MemProfUsePass::annotateGlobalVariables(
796 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
797 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
798 return false;
799
800 if (!DataAccessProf) {
801 M.getContext().diagnose(DiagnosticInfoPGOProfile(
802 MemoryProfileFileName.data(),
803 StringRef("Data access profiles not found in memprof. Ignore "
804 "-memprof-annotate-static-data-prefix."),
805 DS_Warning));
806 return false;
807 }
808
809 bool Changed = false;
810 // Iterate all global variables in the module and annotate them based on
811 // data access profiles. Note it's up to the linker to decide how to map input
812 // sections to output sections, and one conservative practice is to map
813 // unlikely-prefixed ones to unlikely output section, and map the rest
814 // (hot-prefixed or prefix-less) to the canonical output section.
815 for (GlobalVariable &GVar : M.globals()) {
816 assert(!GVar.getSectionPrefix().has_value() &&
817 "GVar shouldn't have section prefix yet");
818 if (GVar.isDeclarationForLinker())
819 continue;
820
821 if (hasExplicitSectionName(GVar)) {
822 ++NumOfMemProfExplicitSectionGlobalVars;
823 LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
824 << " has explicit section name. Skip annotating.\n");
825 continue;
826 }
827
828 StringRef Name = GVar.getName();
829 // Skip string literals as their mangled names don't stay stable across
830 // binary releases.
831 // TODO: Track string content hash in the profiles and compute it inside the
832 // compiler to categeorize the hotness string literals.
833 if (Name.starts_with(".str")) {
834
835 LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
836 continue;
837 }
838
839 // DataAccessProfRecord's get* methods will canonicalize the name under the
840 // hood before looking it up, so optimizer doesn't need to do it.
841 std::optional<DataAccessProfRecord> Record =
842 DataAccessProf->getProfileRecord(Name);
843 // Annotate a global variable as hot if it has non-zero sampled count, and
844 // annotate it as cold if it's seen in the profiled binary
845 // file but doesn't have any access sample.
846 // For logging, optimization remark emitter requires a llvm::Function, but
847 // it's not well defined how to associate a global variable with a function.
848 // So we just print out the static data section prefix in LLVM_DEBUG.
849 if (Record && Record->AccessCount > 0) {
850 ++NumOfMemProfHotGlobalVars;
851 GVar.setSectionPrefix("hot");
852 Changed = true;
853 LLVM_DEBUG(dbgs() << "Global variable " << Name
854 << " is annotated as hot\n");
855 } else if (DataAccessProf->isKnownColdSymbol(Name)) {
856 ++NumOfMemProfColdGlobalVars;
857 GVar.setSectionPrefix("unlikely");
858 Changed = true;
859 LLVM_DEBUG(dbgs() << "Global variable " << Name
860 << " is annotated as unlikely\n");
861 } else {
862 ++NumOfMemProfUnknownGlobalVars;
863 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
864 }
865 }
866
867 return Changed;
868}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::unordered_set< CallSiteEntry, CallSiteEntryHash > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites)
Definition: MemProfUse.cpp:470
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
Definition: MemProfUse.cpp:107
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
Definition: MemProfUse.cpp:162
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
Definition: MemProfUse.cpp:313
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
Definition: MemProfUse.cpp:114
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
Definition: MemProfUse.cpp:338
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static bool hasExplicitSectionName(const GlobalVariable &GVar)
Definition: MemProfUse.cpp:783
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
Definition: MemProfUse.cpp:129
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
Definition: MemProfUse.cpp:502
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
Definition: MemProfUse.cpp:378
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
Definition: MemProfUse.cpp:153
AllocType
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:224
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:200
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
const T * data() const
Definition: ArrayRef.h:144
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Debug location.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
iterator end()
Definition: DenseMap.h:87
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:168
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition: Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:52
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:106
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:54
AttributeSet getAttributes() const
Return the attribute set for this global.
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition: HashBuilder.h:66
Interface to help hash various types through a hasher type.
Definition: HashBuilder.h:139
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Definition: HashBuilder.h:149
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:585
instrprof_error get() const
Definition: InstrProf.h:465
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:257
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: MemProfUse.cpp:689
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
Definition: MemProfUse.cpp:682
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
Definition: MemProfUse.cpp:278
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
Definition: MemProfUse.cpp:203
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition: MemProf.cpp:344
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition: MemProfUse.h:66
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:477
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:990
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2095
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition: BLAKE3.h:35
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1334
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
Definition: InstrProf.cpp:1402
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfUse.cpp:45
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ DS_Warning
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2107
cl::opt< bool > NoPGOWarnMismatchComdatWeak
uint64_t TotalSize
Definition: MemProfUse.cpp:198
size_t operator()(const CallSiteEntry &Entry) const
Definition: MemProfUse.cpp:465
ArrayRef< GlobalValue::GUID > CalleeGuids
Definition: MemProfUse.cpp:452
bool operator==(const CallSiteEntry &Other) const
Definition: MemProfUse.cpp:458
ArrayRef< Frame > Frames
Definition: MemProfUse.cpp:450
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition: MemProf.h:245
uint32_t LineOffset
Definition: MemProf.h:250
llvm::SmallVector< CallSiteInfo > CallSites
Definition: MemProf.h:522
llvm::SmallVector< AllocationInfo > AllocSites
Definition: MemProf.h:520