50#include <unordered_map>
55#define DEBUG_TYPE "memprof-context-disambiguation"
58 "Number of function clones created during whole program analysis");
60 "Number of function clones created during ThinLTO backend");
62 "Number of functions that had clones created during ThinLTO backend");
63STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
64 "cloned) during whole program analysis");
65STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
66 "during whole program analysis");
68 "Number of not cold static allocations (possibly cloned) during "
70STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
71 "(possibly cloned) during ThinLTO backend");
73 "Number of original (not cloned) allocations with memprof profiles "
74 "during ThinLTO backend");
76 AllocVersionsThinBackend,
77 "Number of allocation versions (including clones) during ThinLTO backend");
79 "Maximum number of allocation versions created for an original "
80 "allocation during ThinLTO backend");
82 "Number of unclonable ambigous allocations during ThinLTO backend");
84 "Number of edges removed due to mismatched callees (profiled vs IR)");
86 "Number of profiled callees found via tail calls");
88 "Aggregate depth of profiled callees found via tail calls");
90 "Maximum depth of profiled callees found via tail calls");
92 "Number of profiled callees found via multiple tail call chains");
93STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
94STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
95STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
97 "Number of missing alloc nodes for context ids");
99 "Number of calls skipped during cloning due to unexpected operand");
101 "Number of callsites assigned to call multiple non-matching clones");
102STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
103STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
104STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
109 cl::desc(
"Specify the path prefix of the MemProf dot files."));
113 cl::desc(
"Export graph to dot files."));
118 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
128 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
133 "Export only nodes with contexts feeding given "
134 "-memprof-dot-alloc-id"),
136 "Export only nodes with given -memprof-dot-context-id")));
140 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
141 "or to highlight if -memprof-dot-scope=all"));
145 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
146 "highlight otherwise"));
150 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
154 cl::desc(
"Perform verification checks on CallingContextGraph."));
158 cl::desc(
"Perform frequent verification checks on nodes."));
161 "memprof-import-summary",
162 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
168 cl::desc(
"Max depth to recursively search for missing "
169 "frames through tail calls."));
174 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
178 cl::desc(
"Allow cloning of contexts through recursive cycles"));
185 cl::desc(
"Merge clones before assigning functions"));
194 cl::desc(
"Allow cloning of contexts having recursive cycles"));
200 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
211 cl::desc(
"Linking with hot/cold operator new interfaces"));
216 "Require target function definition when promoting indirect calls"));
237template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
238class CallsiteContextGraph {
240 CallsiteContextGraph() =
default;
241 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
242 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
249 void identifyClones();
256 bool assignFunctions();
263 const CallsiteContextGraph &CCG) {
268 friend struct GraphTraits<
269 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
270 friend struct DOTGraphTraits<
271 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
273 void exportToDot(std::string Label)
const;
276 struct FuncInfo final
277 :
public std::pair<FuncTy *, unsigned > {
278 using Base = std::pair<FuncTy *, unsigned>;
279 FuncInfo(
const Base &
B) : Base(
B) {}
280 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) : Base(
F, CloneNo) {}
281 explicit operator bool()
const {
return this->first !=
nullptr; }
282 FuncTy *
func()
const {
return this->first; }
283 unsigned cloneNo()
const {
return this->second; }
287 struct CallInfo final :
public std::pair<CallTy, unsigned > {
288 using Base = std::pair<CallTy, unsigned>;
289 CallInfo(
const Base &
B) : Base(
B) {}
290 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
291 : Base(
Call, CloneNo) {}
292 explicit operator bool()
const {
return (
bool)this->first; }
293 CallTy call()
const {
return this->first; }
294 unsigned cloneNo()
const {
return this->second; }
295 void setCloneNo(
unsigned N) { this->second =
N; }
296 void print(raw_ostream &OS)
const {
297 if (!
operator bool()) {
303 OS <<
"\t(clone " << cloneNo() <<
")";
309 friend raw_ostream &
operator<<(raw_ostream &OS,
const CallInfo &
Call) {
329 bool Recursive =
false;
333 uint8_t AllocTypes = 0;
352 uint64_t OrigStackOrAllocId = 0;
356 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
360 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
364 bool useCallerEdgesForContextInfo()
const {
369 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
381 DenseSet<uint32_t> getContextIds()
const {
387 for (
auto &
Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
389 DenseSet<uint32_t> ContextIds;
392 CalleeEdges, useCallerEdgesForContextInfo()
394 : std::vector<std::shared_ptr<ContextEdge>>());
395 for (
const auto &
Edge : Edges)
402 uint8_t computeAllocType()
const {
404 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
405 uint8_t
AllocType = (uint8_t)AllocationType::None;
407 CalleeEdges, useCallerEdgesForContextInfo()
409 : std::vector<std::shared_ptr<ContextEdge>>());
410 for (
const auto &
Edge : Edges) {
421 bool emptyContextIds()
const {
423 CalleeEdges, useCallerEdgesForContextInfo()
425 : std::vector<std::shared_ptr<ContextEdge>>());
426 for (
const auto &
Edge : Edges) {
427 if (!
Edge->getContextIds().empty())
434 std::vector<ContextNode *> Clones;
437 ContextNode *CloneOf =
nullptr;
439 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
441 ContextNode(
bool IsAllocation, CallInfo
C)
442 : IsAllocation(IsAllocation), Call(
C) {}
444 void addClone(ContextNode *Clone) {
446 CloneOf->Clones.push_back(Clone);
447 Clone->CloneOf = CloneOf;
449 Clones.push_back(Clone);
451 Clone->CloneOf =
this;
455 ContextNode *getOrigNode() {
462 unsigned int ContextId);
464 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
465 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
466 void eraseCalleeEdge(
const ContextEdge *
Edge);
467 void eraseCallerEdge(
const ContextEdge *
Edge);
469 void setCall(CallInfo
C) { Call =
C; }
471 bool hasCall()
const {
return (
bool)Call.call(); }
473 void printCall(raw_ostream &OS)
const { Call.print(OS); }
477 bool isRemoved()
const {
483 (AllocTypes == (uint8_t)AllocationType::None) ==
485 return AllocTypes == (uint8_t)AllocationType::None;
489 void print(raw_ostream &OS)
const;
491 friend raw_ostream &
operator<<(raw_ostream &OS,
const ContextNode &Node) {
505 uint8_t AllocTypes = 0;
513 bool IsBackedge =
false;
516 DenseSet<uint32_t> ContextIds;
518 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t
AllocType,
519 DenseSet<uint32_t> ContextIds)
520 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
521 ContextIds(std::
move(ContextIds)) {}
523 DenseSet<uint32_t> &getContextIds() {
return ContextIds; }
527 inline void clear() {
529 AllocTypes = (uint8_t)AllocationType::None;
537 inline bool isRemoved()
const {
538 if (Callee || Caller)
543 assert(AllocTypes == (uint8_t)AllocationType::None);
544 assert(ContextIds.empty());
549 void print(raw_ostream &OS)
const;
551 friend raw_ostream &
operator<<(raw_ostream &OS,
const ContextEdge &
Edge) {
559 void removeNoneTypeCalleeEdges(ContextNode *Node);
560 void removeNoneTypeCallerEdges(ContextNode *Node);
562 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,
563 DenseSet<const ContextNode *> &Visited);
568 template <
class NodeT,
class IteratorT>
569 std::vector<uint64_t>
570 getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
574 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
577 template <
class NodeT,
class IteratorT>
578 void addStackNodesForMIB(ContextNode *AllocNode,
579 CallStack<NodeT, IteratorT> &StackContext,
580 CallStack<NodeT, IteratorT> &CallsiteContext,
587 void updateStackNodes();
591 void handleCallsitesWithMultipleTargets();
594 void markBackedges();
604 bool partitionCallsByCallee(
606 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
610 MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
613 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
617 DenseSet<uint32_t> DotAllocContextIds;
620 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
625 struct CallContextInfo {
629 std::vector<uint64_t> StackIds;
634 DenseSet<uint32_t> ContextIds;
643 void removeEdgeFromGraph(ContextEdge *
Edge, EdgeIter *EI =
nullptr,
644 bool CalleeIter =
true);
652 void assignStackNodesPostOrder(
653 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
654 DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
655 DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
660 DenseSet<uint32_t> duplicateContextIds(
661 const DenseSet<uint32_t> &StackSequenceContextIds,
662 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
665 void propagateDuplicateContextIds(
666 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
671 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
673 DenseSet<uint32_t> RemainingContextIds);
678 uint64_t getStackId(uint64_t IdOrIndex)
const {
679 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
689 calleesMatch(CallTy
Call, EdgeIter &EI,
690 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);
694 const FuncTy *getCalleeFunc(CallTy
Call) {
695 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
701 bool calleeMatchesFunc(
702 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
703 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
704 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
705 Call, Func, CallerFunc, FoundCalleeChain);
709 bool sameCallee(CallTy Call1, CallTy Call2) {
710 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
715 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
716 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
721 uint64_t getLastStackId(CallTy
Call) {
722 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
727 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
728 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
733 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
738 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
739 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
745 FuncInfo cloneFunctionForCallsite(
746 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
747 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
748 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
749 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
754 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
755 unsigned CloneNo)
const {
756 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
760 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
761 CallInfo
C = CallInfo()) {
762 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
763 auto *NewNode = NodeOwner.back().get();
765 NodeToCallingFunc[NewNode] =
F;
766 NewNode->NodeId = NodeOwner.size();
771 ContextNode *getNodeForInst(
const CallInfo &
C);
772 ContextNode *getNodeForAlloc(
const CallInfo &
C);
773 ContextNode *getNodeForStackId(uint64_t StackId);
777 uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds)
const;
782 uint8_t intersectAllocTypesImpl(
const DenseSet<uint32_t> &Node1Ids,
783 const DenseSet<uint32_t> &Node2Ids)
const;
787 uint8_t intersectAllocTypes(
const DenseSet<uint32_t> &Node1Ids,
788 const DenseSet<uint32_t> &Node2Ids)
const;
795 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
796 DenseSet<uint32_t> ContextIdsToMove = {});
802 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
803 ContextNode *NewCallee,
804 bool NewClone =
false,
805 DenseSet<uint32_t> ContextIdsToMove = {});
812 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
813 ContextNode *NewCaller);
816 void markBackedges(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
817 DenseSet<const ContextNode *> &CurrentStack);
821 mergeClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
822 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
824 void mergeNodeCalleeClones(
825 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
826 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode);
829 void findOtherCallersToShareMerge(
830 ContextNode *Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
831 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
832 DenseSet<ContextNode *> &OtherCallersToShareMerge);
838 void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
839 const DenseSet<uint32_t> &AllocContextIds);
842 DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
848 DenseMap<uint32_t, std::vector<ContextTotalSize>> ContextIdToContextSizeInfos;
854 DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
857 MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
858 MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
861 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
867 unsigned int LastContextId = 0;
870template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
872 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
873template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
875 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
876template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
878 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
879template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
881 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
884class ModuleCallsiteContextGraph
885 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
888 ModuleCallsiteContextGraph(
890 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
893 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
896 uint64_t getStackId(uint64_t IdOrIndex)
const;
898 bool calleeMatchesFunc(
899 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
900 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
901 bool sameCallee(Instruction *Call1, Instruction *Call2);
902 bool findProfiledCalleeThroughTailCalls(
903 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
904 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
905 bool &FoundMultipleCalleeChains);
906 uint64_t getLastStackId(Instruction *
Call);
907 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
910 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
911 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
913 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
914 DenseMap<CallInfo, CallInfo> &CallMap,
915 std::vector<CallInfo> &CallsWithMetadataInFunc,
917 std::string getLabel(
const Function *Func,
const Instruction *
Call,
918 unsigned CloneNo)
const;
921 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
927struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
928 IndexCall() : PointerUnion() {}
929 IndexCall(std::nullptr_t) : IndexCall() {}
930 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
931 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
932 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
934 IndexCall *operator->() {
return this; }
936 void print(raw_ostream &OS)
const {
937 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
962class IndexCallsiteContextGraph
963 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
966 IndexCallsiteContextGraph(
967 ModuleSummaryIndex &Index,
971 ~IndexCallsiteContextGraph() {
976 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
978 for (
auto &Callsite :
I.second)
979 FS->addCallsite(*Callsite.second);
984 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
987 uint64_t getStackId(uint64_t IdOrIndex)
const;
988 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
989 bool calleeMatchesFunc(
990 IndexCall &
Call,
const FunctionSummary *Func,
991 const FunctionSummary *CallerFunc,
992 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
993 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
994 bool findProfiledCalleeThroughTailCalls(
995 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
996 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
997 bool &FoundMultipleCalleeChains);
998 uint64_t getLastStackId(IndexCall &
Call);
999 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1002 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1003 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1004 IndexCall>::FuncInfo
1005 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1006 DenseMap<CallInfo, CallInfo> &CallMap,
1007 std::vector<CallInfo> &CallsWithMetadataInFunc,
1009 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1010 unsigned CloneNo)
const;
1014 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1016 const ModuleSummaryIndex &Index;
1024 std::unordered_map<FunctionSummary *,
1025 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1026 FunctionCalleesToSynthesizedCallsiteInfos;
1038 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1041 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1063template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1064bool allocTypesMatch(
1065 const std::vector<uint8_t> &InAllocTypes,
1066 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1070 assert(InAllocTypes.size() == Edges.size());
1072 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1074 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1078 if (l == (uint8_t)AllocationType::None ||
1079 r->AllocTypes == (uint8_t)AllocationType::None)
1081 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1090template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1091bool allocTypesMatchClone(
1092 const std::vector<uint8_t> &InAllocTypes,
1093 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1094 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1098 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1102 for (
const auto &
E : Clone->CalleeEdges) {
1104 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1108 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1109 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1111 if (Iter == EdgeCalleeMap.
end())
1119 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1127template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1128typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1129CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1130 const CallInfo &
C) {
1131 ContextNode *
Node = getNodeForAlloc(
C);
1135 return NonAllocationCallToContextNodeMap.lookup(
C);
1138template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1139typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1140CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1141 const CallInfo &
C) {
1142 return AllocationCallToContextNodeMap.lookup(
C);
1145template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1146typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1147CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1149 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1150 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1151 return StackEntryNode->second;
1155template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1156void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1158 unsigned int ContextId) {
1159 for (
auto &
Edge : CallerEdges) {
1160 if (
Edge->Caller == Caller) {
1162 Edge->getContextIds().insert(ContextId);
1166 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1167 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1168 CallerEdges.push_back(
Edge);
1172template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1173void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1174 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1190 auto CalleeCallerCount =
Callee->CallerEdges.size();
1191 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1196 }
else if (CalleeIter) {
1198 *EI =
Caller->CalleeEdges.erase(*EI);
1201 *EI =
Callee->CallerEdges.erase(*EI);
1203 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1204 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1207template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1208void CallsiteContextGraph<
1209 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1210 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1212 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1214 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1220template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1221void CallsiteContextGraph<
1222 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1223 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1225 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1227 Edge->Caller->eraseCalleeEdge(
Edge.get());
1228 EI =
Node->CallerEdges.erase(EI);
1234template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1235typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1236CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1237 findEdgeFromCallee(
const ContextNode *Callee) {
1238 for (
const auto &
Edge : CalleeEdges)
1239 if (
Edge->Callee == Callee)
1244template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1245typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1246CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1247 findEdgeFromCaller(
const ContextNode *Caller) {
1248 for (
const auto &
Edge : CallerEdges)
1249 if (
Edge->Caller == Caller)
1254template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1255void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1256 eraseCalleeEdge(
const ContextEdge *
Edge) {
1258 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1259 return CalleeEdge.get() ==
Edge;
1261 assert(EI != CalleeEdges.end());
1262 CalleeEdges.erase(EI);
1265template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1266void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1267 eraseCallerEdge(
const ContextEdge *
Edge) {
1269 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1270 return CallerEdge.get() ==
Edge;
1272 assert(EI != CallerEdges.end());
1273 CallerEdges.erase(EI);
1276template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1277uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1278 DenseSet<uint32_t> &ContextIds)
const {
1280 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1281 uint8_t
AllocType = (uint8_t)AllocationType::None;
1282 for (
auto Id : ContextIds) {
1283 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1291template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1293CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1294 const DenseSet<uint32_t> &Node1Ids,
1295 const DenseSet<uint32_t> &Node2Ids)
const {
1297 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1298 uint8_t
AllocType = (uint8_t)AllocationType::None;
1299 for (
auto Id : Node1Ids) {
1300 if (!Node2Ids.
count(Id))
1302 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1310template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1311uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1312 const DenseSet<uint32_t> &Node1Ids,
1313 const DenseSet<uint32_t> &Node2Ids)
const {
1314 if (Node1Ids.
size() < Node2Ids.
size())
1315 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1317 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1320template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1321typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1322CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1323 CallInfo
Call,
const FuncTy *
F) {
1325 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1326 AllocationCallToContextNodeMap[
Call] = AllocNode;
1328 AllocNode->OrigStackOrAllocId = LastContextId;
1331 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1347template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1348template <
class NodeT,
class IteratorT>
1349void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1350 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1358 ContextIdToAllocationType[++LastContextId] =
AllocType;
1360 if (!ContextSizeInfo.
empty()) {
1361 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1366 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1371 ContextNode *PrevNode = AllocNode;
1375 SmallSet<uint64_t, 8> StackIdSet;
1378 ContextIter != StackContext.
end(); ++ContextIter) {
1379 auto StackId = getStackId(*ContextIter);
1380 ContextNode *StackNode = getNodeForStackId(StackId);
1382 StackNode = createNewNode(
false);
1383 StackEntryIdToContextNodeMap[StackId] = StackNode;
1384 StackNode->OrigStackOrAllocId = StackId;
1391 StackNode->Recursive =
true;
1393 StackNode->AllocTypes |= (uint8_t)
AllocType;
1394 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1395 PrevNode = StackNode;
1399template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1401CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1402 const DenseSet<uint32_t> &StackSequenceContextIds,
1403 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1404 DenseSet<uint32_t> NewContextIds;
1405 for (
auto OldId : StackSequenceContextIds) {
1406 NewContextIds.
insert(++LastContextId);
1407 OldToNewContextIds[OldId].insert(LastContextId);
1408 assert(ContextIdToAllocationType.count(OldId));
1410 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1411 if (DotAllocContextIds.
contains(OldId))
1412 DotAllocContextIds.
insert(LastContextId);
1414 return NewContextIds;
1417template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1418void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1419 propagateDuplicateContextIds(
1420 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1422 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1423 DenseSet<uint32_t> NewIds;
1424 for (
auto Id : ContextIds)
1425 if (
auto NewId = OldToNewContextIds.find(Id);
1426 NewId != OldToNewContextIds.end())
1432 auto UpdateCallers = [&](ContextNode *
Node,
1433 DenseSet<const ContextEdge *> &Visited,
1434 auto &&UpdateCallers) ->
void {
1435 for (
const auto &
Edge :
Node->CallerEdges) {
1439 ContextNode *NextNode =
Edge->Caller;
1440 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1443 if (!NewIdsToAdd.
empty()) {
1444 Edge->getContextIds().insert_range(NewIdsToAdd);
1445 UpdateCallers(NextNode, Visited, UpdateCallers);
1450 DenseSet<const ContextEdge *> Visited;
1451 for (
auto &Entry : AllocationCallToContextNodeMap) {
1453 UpdateCallers(Node, Visited, UpdateCallers);
1457template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1458void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1459 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1462 DenseSet<uint32_t> RemainingContextIds) {
1464 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1465 DenseSet<uint32_t> RecursiveContextIds;
1466 DenseSet<uint32_t> AllCallerContextIds;
1471 for (
auto &CE : OrigEdges) {
1472 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1473 for (
auto Id :
CE->getContextIds())
1474 if (!AllCallerContextIds.
insert(Id).second)
1475 RecursiveContextIds.
insert(Id);
1479 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1481 DenseSet<uint32_t> NewEdgeContextIds;
1482 DenseSet<uint32_t> NotFoundContextIds;
1486 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1487 NotFoundContextIds);
1490 if (RecursiveContextIds.
empty()) {
1493 RemainingContextIds.
swap(NotFoundContextIds);
1503 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1505 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1508 if (NewEdgeContextIds.
empty()) {
1512 if (TowardsCallee) {
1513 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1514 auto NewEdge = std::make_shared<ContextEdge>(
1515 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1516 NewNode->CalleeEdges.push_back(NewEdge);
1517 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1519 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1520 auto NewEdge = std::make_shared<ContextEdge>(
1521 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1522 NewNode->CallerEdges.push_back(NewEdge);
1523 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1526 if (
Edge->getContextIds().empty()) {
1527 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1534template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1536 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1540 assert(!Edge->ContextIds.empty());
1543template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1545 bool CheckEdges =
true) {
1546 if (
Node->isRemoved())
1550 auto NodeContextIds =
Node->getContextIds();
1554 if (
Node->CallerEdges.size()) {
1556 Node->CallerEdges.front()->ContextIds);
1560 set_union(CallerEdgeContextIds, Edge->ContextIds);
1567 NodeContextIds == CallerEdgeContextIds ||
1570 if (
Node->CalleeEdges.size()) {
1572 Node->CalleeEdges.front()->ContextIds);
1576 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1582 NodeContextIds == CalleeEdgeContextIds);
1591 for (
const auto &
E :
Node->CalleeEdges)
1597template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1598void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1599 assignStackNodesPostOrder(
1600 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1601 DenseMap<uint64_t, std::vector<CallContextInfo>>
1602 &StackIdToMatchingCalls,
1603 DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1611 auto CallerEdges =
Node->CallerEdges;
1612 for (
auto &
Edge : CallerEdges) {
1614 if (
Edge->isRemoved()) {
1618 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1619 CallToMatchingCall);
1628 if (
Node->IsAllocation ||
1629 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1632 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1636 if (Calls.size() == 1) {
1637 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1638 if (Ids.size() == 1) {
1639 assert(SavedContextIds.empty());
1641 assert(Node == getNodeForStackId(Ids[0]));
1642 if (
Node->Recursive)
1645 NonAllocationCallToContextNodeMap[
Call] =
Node;
1654 uint64_t LastId =
Node->OrigStackOrAllocId;
1655 ContextNode *LastNode = getNodeForStackId(LastId);
1658 assert(LastNode == Node);
1660 ContextNode *LastNode =
Node;
1665 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1667 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1668 bool CreatedNode =
false;
1669 for (
unsigned I = 0;
I < Calls.size();
1670 I++, PrevIterCreatedNode = CreatedNode) {
1671 CreatedNode =
false;
1672 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1675 if (SavedContextIds.empty()) {
1682 auto MatchingCall = CallToMatchingCall[
Call];
1683 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1687 assert(
I > 0 && !PrevIterCreatedNode);
1690 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1695 assert(LastId == Ids.back());
1704 ContextNode *PrevNode = LastNode;
1708 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1710 ContextNode *CurNode = getNodeForStackId(Id);
1714 assert(!CurNode->Recursive);
1716 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1728 if (SavedContextIds.empty()) {
1737 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1738 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1740 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1742 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1748 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1753 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1758 for (
auto Id : Ids) {
1759 ContextNode *CurNode = getNodeForStackId(Id);
1766 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1773 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1774 if (PrevEdge->getContextIds().empty())
1775 removeEdgeFromGraph(PrevEdge);
1780 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1781 ? (uint8_t)AllocationType::None
1782 : CurNode->computeAllocType();
1787 for (
auto Id : Ids) {
1788 ContextNode *CurNode = getNodeForStackId(Id);
1797template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1798void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1806 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1807 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1808 for (
auto &
Call : CallsWithMetadata) {
1810 if (AllocationCallToContextNodeMap.count(
Call))
1812 auto StackIdsWithContextNodes =
1813 getStackIdsWithContextNodesForCall(
Call.call());
1816 if (StackIdsWithContextNodes.empty())
1820 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1821 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
1831 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1835 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1836 for (
auto &It : StackIdToMatchingCalls) {
1837 auto &Calls = It.getSecond();
1839 if (Calls.size() == 1) {
1840 auto &Ids = Calls[0].StackIds;
1841 if (Ids.size() == 1)
1854 DenseMap<const FuncTy *, unsigned> FuncToIndex;
1855 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
1856 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
1859 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1860 return A.StackIds.size() >
B.StackIds.size() ||
1861 (
A.StackIds.size() ==
B.StackIds.size() &&
1862 (
A.StackIds <
B.StackIds ||
1863 (
A.StackIds ==
B.StackIds &&
1864 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
1870 uint64_t LastId = It.getFirst();
1871 ContextNode *LastNode = getNodeForStackId(LastId);
1875 if (LastNode->Recursive)
1880 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1888 DenseSet<const FuncTy *> MatchingIdsFuncSet;
1891 for (
unsigned I = 0;
I < Calls.size();
I++) {
1892 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1893 assert(SavedContextIds.empty());
1894 assert(LastId == Ids.back());
1899 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1900 MatchingIdsFuncSet.
clear();
1907 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1909 ContextNode *PrevNode = LastNode;
1910 ContextNode *CurNode = LastNode;
1915 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1917 CurNode = getNodeForStackId(Id);
1921 if (CurNode->Recursive) {
1926 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1947 if (StackSequenceContextIds.
empty()) {
1960 if (Ids.back() != getLastStackId(
Call)) {
1961 for (
const auto &PE : LastNode->CallerEdges) {
1962 set_subtract(StackSequenceContextIds, PE->getContextIds());
1963 if (StackSequenceContextIds.
empty())
1967 if (StackSequenceContextIds.
empty())
1979 MatchingIdsFuncSet.
insert(Func);
1986 bool DuplicateContextIds =
false;
1987 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1988 auto &CallCtxInfo = Calls[J];
1989 auto &NextIds = CallCtxInfo.StackIds;
1992 auto *NextFunc = CallCtxInfo.Func;
1993 if (NextFunc != Func) {
1996 DuplicateContextIds =
true;
1999 auto &NextCall = CallCtxInfo.Call;
2000 CallToMatchingCall[NextCall] =
Call;
2011 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2012 StackSequenceContextIds.
size());
2015 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2016 : StackSequenceContextIds;
2017 assert(!SavedContextIds.empty());
2019 if (!DuplicateContextIds) {
2023 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2024 if (LastNodeContextIds.
empty())
2031 propagateDuplicateContextIds(OldToNewContextIds);
2041 DenseSet<const ContextNode *> Visited;
2042 for (
auto &Entry : AllocationCallToContextNodeMap)
2043 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2044 CallToMatchingCall);
2049uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2050 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2052 return CallsiteContext.
back();
2055uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2057 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2060 return Index.getStackIdAtIndex(CallsiteContext.
back());
2082 auto Pos =
F.getName().find_last_of(
'.');
2085 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2091std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2092 const Instruction *
Call,
2093 unsigned CloneNo)
const {
2099std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2100 const IndexCall &
Call,
2101 unsigned CloneNo)
const {
2102 auto VI = FSToVIMap.find(Func);
2103 assert(VI != FSToVIMap.end());
2106 return CallerName +
" -> alloc";
2109 return CallerName +
" -> " +
2111 Callsite->Clones[CloneNo]);
2115std::vector<uint64_t>
2116ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2117 Instruction *
Call) {
2118 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2120 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2124std::vector<uint64_t>
2125IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2127 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2129 return getStackIdsWithContextNodes<CallsiteInfo,
2130 SmallVector<unsigned>::const_iterator>(
2134template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2135template <
class NodeT,
class IteratorT>
2136std::vector<uint64_t>
2137CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2138 CallStack<NodeT, IteratorT> &CallsiteContext) {
2139 std::vector<uint64_t> StackIds;
2140 for (
auto IdOrIndex : CallsiteContext) {
2141 auto StackId = getStackId(IdOrIndex);
2142 ContextNode *
Node = getNodeForStackId(StackId);
2145 StackIds.push_back(StackId);
2150ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2152 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2153 :
Mod(
M), OREGetter(OREGetter) {
2155 std::vector<CallInfo> CallsWithMetadata;
2156 for (
auto &BB :
F) {
2157 for (
auto &
I : BB) {
2160 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2161 CallsWithMetadata.push_back(&
I);
2162 auto *AllocNode = addAllocNode(&
I, &
F);
2163 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2167 for (
auto &MDOp : MemProfMD->operands()) {
2169 std::vector<ContextTotalSize> ContextSizeInfo;
2171 if (MIBMD->getNumOperands() > 2) {
2172 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2173 MDNode *ContextSizePair =
2182 ContextSizeInfo.push_back({FullStackId, TotalSize});
2188 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2189 AllocNode, StackContext, CallsiteContext,
2195 DotAllocContextIds = AllocNode->getContextIds();
2199 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2200 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2203 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2204 CallsWithMetadata.push_back(&
I);
2208 if (!CallsWithMetadata.empty())
2209 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2213 dbgs() <<
"CCG before updating call stack chains:\n";
2218 exportToDot(
"prestackupdate");
2223 exportToDot(
"poststackupdate");
2225 handleCallsitesWithMultipleTargets();
2230 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2231 for (
auto &
Call : FuncEntry.second)
2232 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2235IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2239 : Index(Index), isPrevailing(isPrevailing) {
2240 for (
auto &
I : Index) {
2241 auto VI = Index.getValueInfo(
I);
2242 for (
auto &S : VI.getSummaryList()) {
2251 !isPrevailing(VI.getGUID(), S.get()))
2256 std::vector<CallInfo> CallsWithMetadata;
2257 if (!
FS->allocs().empty()) {
2258 for (
auto &AN :
FS->mutableAllocs()) {
2263 if (AN.MIBs.empty())
2265 IndexCall AllocCall(&AN);
2266 CallsWithMetadata.push_back(AllocCall);
2267 auto *AllocNode = addAllocNode(AllocCall, FS);
2275 AN.ContextSizeInfos.size() == AN.MIBs.size());
2277 for (
auto &MIB : AN.MIBs) {
2280 std::vector<ContextTotalSize> ContextSizeInfo;
2281 if (!AN.ContextSizeInfos.empty()) {
2282 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2283 ContextSizeInfo.push_back({FullStackId, TotalSize});
2285 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2286 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2293 DotAllocContextIds = AllocNode->getContextIds();
2299 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2303 if (!
FS->callsites().empty())
2304 for (
auto &SN :
FS->mutableCallsites()) {
2305 IndexCall StackNodeCall(&SN);
2306 CallsWithMetadata.push_back(StackNodeCall);
2309 if (!CallsWithMetadata.empty())
2310 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2312 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2318 dbgs() <<
"CCG before updating call stack chains:\n";
2323 exportToDot(
"prestackupdate");
2328 exportToDot(
"poststackupdate");
2330 handleCallsitesWithMultipleTargets();
2335template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2336void CallsiteContextGraph<DerivedCCG, FuncTy,
2337 CallTy>::handleCallsitesWithMultipleTargets() {
2352 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2353 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2354 auto *
Node = Entry.second;
2363 std::vector<CallInfo> AllCalls;
2364 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2365 AllCalls.push_back(
Node->Call);
2379 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2382 auto It = AllCalls.begin();
2384 for (; It != AllCalls.end(); ++It) {
2387 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2390 if (!Edge->Callee->hasCall())
2392 assert(NodeToCallingFunc.count(Edge->Callee));
2394 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2403 if (
Node->Call != ThisCall) {
2404 Node->setCall(ThisCall);
2415 Node->MatchingCalls.clear();
2418 if (It == AllCalls.end()) {
2419 RemovedEdgesWithMismatchedCallees++;
2423 Node->setCall(CallInfo());
2428 for (++It; It != AllCalls.end(); ++It) {
2432 Node->MatchingCalls.push_back(ThisCall);
2441 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2442 return !it.second->hasCall() || it.second->Call != it.first;
2446 for (
auto &[
Call,
Node] : NewCallToNode)
2447 NonAllocationCallToContextNodeMap[
Call] =
Node;
2451 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2452 NonAllocationCallToContextNodeMap[
Call] =
Node;
2455template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2456bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2458 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2462 struct CallsWithSameCallee {
2463 std::vector<CallInfo> Calls;
2464 ContextNode *
Node =
nullptr;
2470 for (
auto ThisCall : AllCalls) {
2471 auto *
F = getCalleeFunc(
ThisCall.call());
2473 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2482 for (
const auto &Edge :
Node->CalleeEdges) {
2483 if (!Edge->Callee->hasCall())
2485 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2486 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2487 CalleeNodeToCallInfo[Edge->Callee] =
2488 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2494 if (CalleeNodeToCallInfo.
empty())
2506 ContextNode *UnmatchedCalleesNode =
nullptr;
2508 bool UsedOrigNode =
false;
2513 auto CalleeEdges =
Node->CalleeEdges;
2514 for (
auto &Edge : CalleeEdges) {
2515 if (!Edge->Callee->hasCall())
2520 ContextNode *CallerNodeToUse =
nullptr;
2524 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2525 if (!UnmatchedCalleesNode)
2526 UnmatchedCalleesNode =
2527 createNewNode(
false, NodeToCallingFunc[
Node]);
2528 CallerNodeToUse = UnmatchedCalleesNode;
2532 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2535 if (!UsedOrigNode) {
2538 Node->MatchingCalls.clear();
2539 UsedOrigNode =
true;
2542 createNewNode(
false, NodeToCallingFunc[
Node]);
2546 Info->Node->setCall(
Info->Calls.front());
2552 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2554 CallerNodeToUse =
Info->Node;
2558 if (CallerNodeToUse ==
Node)
2561 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2568 for (
auto &
I : CalleeNodeToCallInfo)
2569 removeNoneTypeCallerEdges(
I.second->Node);
2570 if (UnmatchedCalleesNode)
2571 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2572 removeNoneTypeCallerEdges(
Node);
2582uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2585 return Index.getStackIdAtIndex(IdOrIndex);
2588template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2589bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2590 CallTy
Call, EdgeIter &EI,
2591 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2593 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2594 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2597 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2598 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2603 if (FoundCalleeChain.empty())
2607 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2611 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2612 CurEdge->AllocTypes |=
Edge->AllocTypes;
2617 auto NewEdge = std::make_shared<ContextEdge>(
2618 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2619 Callee->CallerEdges.push_back(NewEdge);
2620 if (Caller ==
Edge->Caller) {
2624 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2627 "Iterator position not restored after insert and increment");
2629 Caller->CalleeEdges.push_back(NewEdge);
2634 auto *CurCalleeNode =
Edge->Callee;
2635 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2636 ContextNode *NewNode =
nullptr;
2638 if (TailCallToContextNodeMap.
count(NewCall)) {
2639 NewNode = TailCallToContextNodeMap[NewCall];
2640 NewNode->AllocTypes |=
Edge->AllocTypes;
2642 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2644 NewNode = createNewNode(
false, Func, NewCall);
2645 TailCallToContextNodeMap[NewCall] = NewNode;
2646 NewNode->AllocTypes =
Edge->AllocTypes;
2650 AddEdge(NewNode, CurCalleeNode);
2652 CurCalleeNode = NewNode;
2656 AddEdge(
Edge->Caller, CurCalleeNode);
2664 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2676bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2677 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2678 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2679 bool &FoundMultipleCalleeChains) {
2686 FoundCalleeChain.push_back({Callsite,
F});
2701 bool FoundSingleCalleeChain =
false;
2702 for (
auto &BB : *CalleeFunc) {
2703 for (
auto &
I : BB) {
2705 if (!CB || !CB->isTailCall())
2707 auto *CalledValue = CB->getCalledOperand();
2708 auto *CalledFunction = CB->getCalledFunction();
2709 if (CalledValue && !CalledFunction) {
2710 CalledValue = CalledValue->stripPointerCasts();
2717 assert(!CalledFunction &&
2718 "Expected null called function in callsite for alias");
2721 if (!CalledFunction)
2723 if (CalledFunction == ProfiledCallee) {
2724 if (FoundSingleCalleeChain) {
2725 FoundMultipleCalleeChains =
true;
2728 FoundSingleCalleeChain =
true;
2729 FoundProfiledCalleeCount++;
2730 FoundProfiledCalleeDepth +=
Depth;
2731 if (
Depth > FoundProfiledCalleeMaxDepth)
2732 FoundProfiledCalleeMaxDepth =
Depth;
2733 SaveCallsiteInfo(&
I, CalleeFunc);
2734 }
else if (findProfiledCalleeThroughTailCalls(
2735 ProfiledCallee, CalledFunction,
Depth + 1,
2736 FoundCalleeChain, FoundMultipleCalleeChains)) {
2739 assert(!FoundMultipleCalleeChains);
2740 if (FoundSingleCalleeChain) {
2741 FoundMultipleCalleeChains =
true;
2744 FoundSingleCalleeChain =
true;
2745 SaveCallsiteInfo(&
I, CalleeFunc);
2746 }
else if (FoundMultipleCalleeChains)
2751 return FoundSingleCalleeChain;
2754const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2756 if (!CB->getCalledOperand() || CB->isIndirectCall())
2758 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2765bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2766 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
2767 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2769 if (!CB->getCalledOperand() || CB->isIndirectCall())
2771 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2773 if (CalleeFunc == Func)
2776 if (Alias && Alias->getAliasee() == Func)
2787 bool FoundMultipleCalleeChains =
false;
2788 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2790 FoundMultipleCalleeChains)) {
2791 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2792 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2793 <<
" that actually called " << CalleeVal->getName()
2794 << (FoundMultipleCalleeChains
2795 ?
" (found multiple possible chains)"
2798 if (FoundMultipleCalleeChains)
2799 FoundProfiledCalleeNonUniquelyCount++;
2806bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
2807 Instruction *Call2) {
2809 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2811 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2814 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2816 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2818 return CalleeFunc1 == CalleeFunc2;
2821bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2822 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
2823 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2824 bool &FoundMultipleCalleeChains) {
2830 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
2833 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2834 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2837 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
2838 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2839 CallsiteInfo *NewCallsiteInfo =
2840 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2841 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2848 bool FoundSingleCalleeChain =
false;
2851 !isPrevailing(CurCallee.
getGUID(), S.get()))
2856 auto FSVI = CurCallee;
2859 FSVI = AS->getAliaseeVI();
2860 for (
auto &CallEdge :
FS->calls()) {
2861 if (!CallEdge.second.hasTailCall())
2863 if (CallEdge.first == ProfiledCallee) {
2864 if (FoundSingleCalleeChain) {
2865 FoundMultipleCalleeChains =
true;
2868 FoundSingleCalleeChain =
true;
2869 FoundProfiledCalleeCount++;
2870 FoundProfiledCalleeDepth +=
Depth;
2871 if (
Depth > FoundProfiledCalleeMaxDepth)
2872 FoundProfiledCalleeMaxDepth =
Depth;
2873 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2875 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2876 FSToVIMap[
FS] = FSVI;
2877 }
else if (findProfiledCalleeThroughTailCalls(
2878 ProfiledCallee, CallEdge.first,
Depth + 1,
2879 FoundCalleeChain, FoundMultipleCalleeChains)) {
2882 assert(!FoundMultipleCalleeChains);
2883 if (FoundSingleCalleeChain) {
2884 FoundMultipleCalleeChains =
true;
2887 FoundSingleCalleeChain =
true;
2888 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2890 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2891 FSToVIMap[
FS] = FSVI;
2892 }
else if (FoundMultipleCalleeChains)
2897 return FoundSingleCalleeChain;
2900const FunctionSummary *
2901IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
2903 if (
Callee.getSummaryList().empty())
2908bool IndexCallsiteContextGraph::calleeMatchesFunc(
2909 IndexCall &
Call,
const FunctionSummary *Func,
2910 const FunctionSummary *CallerFunc,
2911 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2915 AliasSummary *Alias =
2916 Callee.getSummaryList().empty()
2919 assert(FSToVIMap.count(Func));
2920 auto FuncVI = FSToVIMap[
Func];
2921 if (Callee == FuncVI ||
2936 bool FoundMultipleCalleeChains =
false;
2937 if (!findProfiledCalleeThroughTailCalls(
2938 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2939 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2940 <<
" from " << FSToVIMap[CallerFunc]
2941 <<
" that actually called " << Callee
2942 << (FoundMultipleCalleeChains
2943 ?
" (found multiple possible chains)"
2946 if (FoundMultipleCalleeChains)
2947 FoundProfiledCalleeNonUniquelyCount++;
2954bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2957 return Callee1 == Callee2;
2960template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2961void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2967template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2968void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2969 raw_ostream &OS)
const {
2970 OS <<
"Node " <<
this <<
"\n";
2974 OS <<
" (recursive)";
2976 if (!MatchingCalls.
empty()) {
2977 OS <<
"\tMatchingCalls:\n";
2978 for (
auto &MatchingCall : MatchingCalls) {
2980 MatchingCall.print(OS);
2984 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
2986 OS <<
"\tContextIds:";
2988 auto ContextIds = getContextIds();
2989 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2990 std::sort(SortedIds.begin(), SortedIds.end());
2991 for (
auto Id : SortedIds)
2994 OS <<
"\tCalleeEdges:\n";
2995 for (
auto &
Edge : CalleeEdges)
2996 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
2998 OS <<
"\tCallerEdges:\n";
2999 for (
auto &
Edge : CallerEdges)
3000 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3002 if (!Clones.empty()) {
3005 for (
auto *
C : Clones) {
3009 OS <<
C <<
" NodeId: " <<
C->NodeId;
3012 }
else if (CloneOf) {
3013 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3017template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3018void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3024template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3025void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3026 raw_ostream &OS)
const {
3027 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3028 << (IsBackedge ?
" (BE)" :
"")
3030 OS <<
" ContextIds:";
3031 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3032 std::sort(SortedIds.begin(), SortedIds.end());
3033 for (
auto Id : SortedIds)
3037template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3038void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3042template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3043void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3044 raw_ostream &OS)
const {
3045 OS <<
"Callsite Context Graph:\n";
3046 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3048 if (
Node->isRemoved())
3055template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3056void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3057 raw_ostream &OS)
const {
3058 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3060 if (
Node->isRemoved())
3062 if (!
Node->IsAllocation)
3064 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3065 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3066 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3067 std::sort(SortedIds.begin(), SortedIds.end());
3068 for (
auto Id : SortedIds) {
3069 auto TypeI = ContextIdToAllocationType.find(Id);
3070 assert(TypeI != ContextIdToAllocationType.end());
3071 auto CSI = ContextIdToContextSizeInfos.find(Id);
3072 if (CSI != ContextIdToContextSizeInfos.end()) {
3073 for (
auto &
Info : CSI->second) {
3074 OS <<
"MemProf hinting: "
3076 <<
" full allocation context " <<
Info.FullStackId
3077 <<
" with total size " <<
Info.TotalSize <<
" is "
3079 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3081 <<
" due to cold byte percent";
3083 OS <<
" (context id " <<
Id <<
")";
3091template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3092void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3093 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3096 for (
auto &
Edge :
Node->CallerEdges)
3101template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3103 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3104 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3106 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3122 return G->NodeOwner.begin()->get();
3125 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3126 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3145template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3159 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3165 std::string LabelString =
3166 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3169 LabelString +=
"\n";
3170 if (
Node->hasCall()) {
3171 auto Func =
G->NodeToCallingFunc.find(
Node);
3172 assert(Func !=
G->NodeToCallingFunc.end());
3174 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3176 LabelString +=
"null call";
3177 if (
Node->Recursive)
3178 LabelString +=
" (recursive)";
3180 LabelString +=
" (external)";
3186 auto ContextIds =
Node->getContextIds();
3190 bool Highlight =
false;
3199 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3200 getContextIds(ContextIds) +
"\"")
3204 AttributeString +=
",fontsize=\"30\"";
3206 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3208 if (
Node->CloneOf) {
3209 AttributeString +=
",color=\"blue\"";
3210 AttributeString +=
",style=\"filled,bold,dashed\"";
3212 AttributeString +=
",style=\"filled\"";
3213 return AttributeString;
3218 auto &Edge = *(ChildIter.getCurrent());
3223 bool Highlight =
false;
3232 auto Color = getColor(Edge->AllocTypes, Highlight);
3233 std::string AttributeString =
3234 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3236 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3239 if (Edge->IsBackedge)
3240 AttributeString +=
",style=\"dotted\"";
3243 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3244 return AttributeString;
3250 if (
Node->isRemoved())
3263 std::string IdString =
"ContextIds:";
3264 if (ContextIds.
size() < 100) {
3265 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3266 std::sort(SortedIds.begin(), SortedIds.end());
3267 for (
auto Id : SortedIds)
3268 IdString += (
" " +
Twine(Id)).str();
3270 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3275 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3281 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3283 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3284 if (AllocTypes == (uint8_t)AllocationType::Cold)
3285 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3287 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3288 return Highlight ?
"magenta" :
"mediumorchid1";
3292 static std::string getNodeId(NodeRef Node) {
3293 std::stringstream SStream;
3294 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3295 std::string
Result = SStream.str();
3304template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3309template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3310void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3311 std::string Label)
const {
3316template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3317typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3318CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3319 const std::shared_ptr<ContextEdge> &
Edge,
3320 DenseSet<uint32_t> ContextIdsToMove) {
3322 assert(NodeToCallingFunc.count(Node));
3323 ContextNode *Clone =
3324 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3325 Node->addClone(Clone);
3326 Clone->MatchingCalls =
Node->MatchingCalls;
3327 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3332template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3333void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3334 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3335 ContextNode *NewCallee,
bool NewClone,
3336 DenseSet<uint32_t> ContextIdsToMove) {
3339 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3341 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3343 ContextNode *OldCallee =
Edge->Callee;
3347 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3351 if (ContextIdsToMove.
empty())
3352 ContextIdsToMove =
Edge->getContextIds();
3356 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3359 NewCallee->AllocTypes |=
Edge->AllocTypes;
3361 if (ExistingEdgeToNewCallee) {
3364 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3365 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3366 assert(
Edge->ContextIds == ContextIdsToMove);
3367 removeEdgeFromGraph(
Edge.get());
3370 Edge->Callee = NewCallee;
3371 NewCallee->CallerEdges.push_back(
Edge);
3373 OldCallee->eraseCallerEdge(
Edge.get());
3380 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3381 if (ExistingEdgeToNewCallee) {
3384 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3385 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3388 auto NewEdge = std::make_shared<ContextEdge>(
3389 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3390 Edge->Caller->CalleeEdges.push_back(NewEdge);
3391 NewCallee->CallerEdges.push_back(NewEdge);
3395 NewCallee->AllocTypes |= CallerEdgeAllocType;
3397 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3402 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3403 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3407 if (CalleeToUse == OldCallee) {
3411 if (EdgeIsRecursive) {
3415 CalleeToUse = NewCallee;
3419 DenseSet<uint32_t> EdgeContextIdsToMove =
3421 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3422 OldCalleeEdge->AllocTypes =
3423 computeAllocType(OldCalleeEdge->getContextIds());
3430 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3431 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3432 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3436 auto NewEdge = std::make_shared<ContextEdge>(
3437 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3438 EdgeContextIdsToMove);
3439 NewCallee->CalleeEdges.push_back(NewEdge);
3440 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3444 OldCallee->AllocTypes = OldCallee->computeAllocType();
3446 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3447 OldCallee->emptyContextIds());
3451 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3454 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3460template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3461void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3462 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3463 ContextNode *NewCaller) {
3464 auto *OldCallee =
Edge->Callee;
3465 auto *NewCallee = OldCallee;
3468 bool Recursive =
Edge->Caller ==
Edge->Callee;
3470 NewCallee = NewCaller;
3472 ContextNode *OldCaller =
Edge->Caller;
3473 OldCaller->eraseCalleeEdge(
Edge.get());
3477 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3479 if (ExistingEdgeToNewCaller) {
3482 ExistingEdgeToNewCaller->getContextIds().insert_range(
3483 Edge->getContextIds());
3484 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3485 Edge->ContextIds.clear();
3486 Edge->AllocTypes = (uint8_t)AllocationType::None;
3487 OldCallee->eraseCallerEdge(
Edge.get());
3490 Edge->Caller = NewCaller;
3491 NewCaller->CalleeEdges.push_back(
Edge);
3493 assert(NewCallee == NewCaller);
3496 Edge->Callee = NewCallee;
3497 NewCallee->CallerEdges.push_back(
Edge);
3498 OldCallee->eraseCallerEdge(
Edge.get());
3504 NewCaller->AllocTypes |=
Edge->AllocTypes;
3511 bool IsNewNode = NewCaller->CallerEdges.empty();
3520 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3521 auto OldCallerCaller = OldCallerEdge->Caller;
3525 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3526 if (OldCaller == OldCallerCaller) {
3527 OldCallerCaller = NewCaller;
3533 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3534 OldCallerEdge->AllocTypes =
3535 computeAllocType(OldCallerEdge->getContextIds());
3540 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3544 if (ExistingCallerEdge) {
3545 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3546 ExistingCallerEdge->AllocTypes |=
3547 computeAllocType(EdgeContextIdsToMove);
3550 auto NewEdge = std::make_shared<ContextEdge>(
3551 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3552 EdgeContextIdsToMove);
3553 NewCaller->CallerEdges.push_back(NewEdge);
3554 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3559 OldCaller->AllocTypes = OldCaller->computeAllocType();
3561 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3562 OldCaller->emptyContextIds());
3566 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3569 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3575template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3576void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3577 recursivelyRemoveNoneTypeCalleeEdges(
3578 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3583 removeNoneTypeCalleeEdges(Node);
3585 for (
auto *Clone :
Node->Clones)
3586 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3590 auto CallerEdges =
Node->CallerEdges;
3591 for (
auto &
Edge : CallerEdges) {
3593 if (
Edge->isRemoved()) {
3597 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3602template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3603void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3608 DenseSet<const ContextNode *> Visited;
3609 DenseSet<const ContextNode *> CurrentStack;
3610 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3612 if (
Node->isRemoved())
3615 if (!
Node->CallerEdges.empty())
3617 markBackedges(Node, Visited, CurrentStack);
3623template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3624void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3625 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3626 DenseSet<const ContextNode *> &CurrentStack) {
3627 auto I = Visited.
insert(Node);
3631 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3632 auto *
Callee = CalleeEdge->Callee;
3633 if (Visited.
count(Callee)) {
3636 if (CurrentStack.
count(Callee))
3637 CalleeEdge->IsBackedge =
true;
3640 CurrentStack.
insert(Callee);
3641 markBackedges(Callee, Visited, CurrentStack);
3642 CurrentStack.
erase(Callee);
3646template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3647void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3648 DenseSet<const ContextNode *> Visited;
3649 for (
auto &Entry : AllocationCallToContextNodeMap) {
3651 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3654 for (
auto &Entry : AllocationCallToContextNodeMap)
3655 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3668template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3669void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3670 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3671 const DenseSet<uint32_t> &AllocContextIds) {
3681 if (!
Node->hasCall())
3700 auto CallerEdges =
Node->CallerEdges;
3701 for (
auto &
Edge : CallerEdges) {
3703 if (
Edge->isRemoved()) {
3709 if (
Edge->IsBackedge) {
3716 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3717 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3740 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3744 [&](
const std::shared_ptr<ContextEdge> &
A,
3745 const std::shared_ptr<ContextEdge> &
B) {
3748 if (A->ContextIds.empty())
3754 if (B->ContextIds.empty())
3757 if (A->AllocTypes == B->AllocTypes)
3760 return *A->ContextIds.begin() < *B->ContextIds.begin();
3761 return AllocTypeCloningPriority[A->AllocTypes] <
3762 AllocTypeCloningPriority[B->AllocTypes];
3765 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3767 DenseSet<uint32_t> RecursiveContextIds;
3772 DenseSet<uint32_t> AllCallerContextIds;
3773 for (
auto &CE :
Node->CallerEdges) {
3776 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3777 for (
auto Id :
CE->getContextIds())
3778 if (!AllCallerContextIds.
insert(Id).second)
3779 RecursiveContextIds.
insert(Id);
3789 auto CallerEdges =
Node->CallerEdges;
3790 for (
auto &CallerEdge : CallerEdges) {
3792 if (CallerEdge->isRemoved()) {
3796 assert(CallerEdge->Callee == Node);
3805 if (!CallerEdge->Caller->hasCall())
3810 auto CallerEdgeContextsForAlloc =
3812 if (!RecursiveContextIds.
empty())
3813 CallerEdgeContextsForAlloc =
3815 if (CallerEdgeContextsForAlloc.empty())
3818 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3822 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3823 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3824 for (
auto &CalleeEdge :
Node->CalleeEdges)
3825 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3826 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3842 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3843 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3844 if (!CallerEdge->IsBackedge &&
3845 allocTypeToUse(CallerAllocTypeForAlloc) ==
3846 allocTypeToUse(
Node->AllocTypes) &&
3847 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3848 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3852 if (CallerEdge->IsBackedge) {
3856 DeferredBackedges++;
3869 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
3870 !Visited.
count(CallerEdge->Caller)) {
3871 const auto OrigIdCount = CallerEdge->getContextIds().size();
3874 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
3875 removeNoneTypeCalleeEdges(CallerEdge->Caller);
3879 bool UpdatedEdge =
false;
3880 if (OrigIdCount > CallerEdge->getContextIds().size()) {
3881 for (
auto E :
Node->CallerEdges) {
3883 if (
E->Caller->CloneOf != CallerEdge->Caller)
3887 auto CallerEdgeContextsForAllocNew =
3889 if (CallerEdgeContextsForAllocNew.empty())
3899 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
3909 if (CallerEdge->isRemoved())
3919 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
3920 if (CallerEdgeContextsForAlloc.empty())
3925 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3926 CalleeEdgeAllocTypesForCallerEdge.clear();
3927 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3928 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3929 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3935 ContextNode *Clone =
nullptr;
3936 for (
auto *CurClone :
Node->Clones) {
3937 if (allocTypeToUse(CurClone->AllocTypes) !=
3938 allocTypeToUse(CallerAllocTypeForAlloc))
3945 assert(!BothSingleAlloc ||
3946 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3952 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3953 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3961 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3962 CallerEdgeContextsForAlloc);
3964 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3967 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3974 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3980void ModuleCallsiteContextGraph::updateAllocationCall(
3985 "memprof", AllocTypeString);
3988 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
3989 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3991 <<
" marked with memprof allocation attribute "
3992 <<
ore::NV(
"Attribute", AllocTypeString));
3995void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
3999 assert(AI->Versions.size() >
Call.cloneNo());
4004ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4006 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4007 return AllocationType::None;
4008 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4009 ? AllocationType::Cold
4010 : AllocationType::NotCold;
4014IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4016 assert(AI->Versions.size() >
Call.cloneNo());
4020void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4021 FuncInfo CalleeFunc) {
4022 auto *CurF = getCalleeFunc(CallerCall.call());
4023 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4030 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4032 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4034 MismatchedCloneAssignments++;
4037 if (NewCalleeCloneNo > 0)
4038 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4039 OREGetter(CallerCall.call()->getFunction())
4040 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4041 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4042 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4043 <<
" assigned to call function clone "
4044 <<
ore::NV(
"Callee", CalleeFunc.func()));
4047void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4048 FuncInfo CalleeFunc) {
4051 "Caller cannot be an allocation which should not have profiled calls");
4052 assert(CI->Clones.size() > CallerCall.cloneNo());
4053 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4054 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4059 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4061 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4063 MismatchedCloneAssignments++;
4065 CurCalleeCloneNo = NewCalleeCloneNo;
4077 SP->replaceLinkageName(MDName);
4081 TempDISubprogram NewDecl = Decl->
clone();
4082 NewDecl->replaceLinkageName(MDName);
4086CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4088ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4089 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4090 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4095 assert(!
Func.func()->getParent()->getFunction(Name));
4096 NewFunc->setName(Name);
4098 for (
auto &Inst : CallsWithMetadataInFunc) {
4100 assert(Inst.cloneNo() == 0);
4103 OREGetter(
Func.func())
4104 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4105 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4106 return {NewFunc, CloneNo};
4109CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4110 IndexCall>::FuncInfo
4111IndexCallsiteContextGraph::cloneFunctionForCallsite(
4112 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4113 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4127 for (
auto &Inst : CallsWithMetadataInFunc) {
4129 assert(Inst.cloneNo() == 0);
4131 assert(AI->Versions.size() == CloneNo);
4134 AI->Versions.push_back(0);
4137 assert(CI && CI->Clones.size() == CloneNo);
4140 CI->Clones.push_back(0);
4142 CallMap[Inst] = {Inst.call(), CloneNo};
4144 return {
Func.func(), CloneNo};
4161template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4162void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4168 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4169 for (
auto &Entry : AllocationCallToContextNodeMap) {
4171 for (
auto Id :
Node->getContextIds())
4172 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4173 for (
auto *Clone :
Node->Clones) {
4174 for (
auto Id : Clone->getContextIds())
4175 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4182 DenseSet<const ContextNode *> Visited;
4183 for (
auto &Entry : AllocationCallToContextNodeMap) {
4186 mergeClones(Node, Visited, ContextIdToAllocationNode);
4192 auto Clones =
Node->Clones;
4193 for (
auto *Clone : Clones)
4194 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4198 dbgs() <<
"CCG after merging:\n";
4202 exportToDot(
"aftermerge");
4210template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4211void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4212 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4213 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4223 bool FoundUnvisited =
true;
4225 while (FoundUnvisited) {
4227 FoundUnvisited =
false;
4230 auto CallerEdges =
Node->CallerEdges;
4231 for (
auto CallerEdge : CallerEdges) {
4233 if (CallerEdge->Callee != Node)
4238 FoundUnvisited =
true;
4239 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4243 TotalMergeInvokes++;
4244 TotalMergeIters += Iters;
4245 if (Iters > MaxMergeIters)
4246 MaxMergeIters = Iters;
4249 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4252template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4253void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4254 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4255 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4257 if (
Node->emptyContextIds())
4262 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4263 OrigNodeToCloneEdges;
4264 for (
const auto &
E :
Node->CalleeEdges) {
4269 OrigNodeToCloneEdges[
Base].push_back(
E);
4275 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4276 const std::shared_ptr<ContextEdge> &
B) {
4277 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4278 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4279 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4281 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4285 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4290 for (
auto Entry : OrigNodeToCloneEdges) {
4293 auto &CalleeEdges =
Entry.second;
4294 auto NumCalleeClones = CalleeEdges.size();
4296 if (NumCalleeClones == 1)
4307 DenseSet<ContextNode *> OtherCallersToShareMerge;
4308 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4309 OtherCallersToShareMerge);
4314 ContextNode *MergeNode =
nullptr;
4315 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4316 for (
auto CalleeEdge : CalleeEdges) {
4317 auto *OrigCallee = CalleeEdge->Callee;
4323 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4324 MergeNode = OrigCallee;
4325 NonNewMergedNodes++;
4332 if (!OtherCallersToShareMerge.
empty()) {
4333 bool MoveAllCallerEdges =
true;
4334 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4335 if (CalleeCallerE == CalleeEdge)
4337 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4338 MoveAllCallerEdges =
false;
4344 if (MoveAllCallerEdges) {
4345 MergeNode = OrigCallee;
4346 NonNewMergedNodes++;
4353 assert(MergeNode != OrigCallee);
4354 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4357 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4362 if (!OtherCallersToShareMerge.
empty()) {
4366 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4367 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4368 if (CalleeCallerE == CalleeEdge)
4370 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4372 CallerToMoveCount[CalleeCallerE->Caller]++;
4373 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4377 removeNoneTypeCalleeEdges(OrigCallee);
4378 removeNoneTypeCalleeEdges(MergeNode);
4396template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4397void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4398 findOtherCallersToShareMerge(
4400 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4401 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4402 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4403 auto NumCalleeClones = CalleeEdges.size();
4406 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4409 unsigned PossibleOtherCallerNodes = 0;
4413 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4419 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4420 for (
auto CalleeEdge : CalleeEdges) {
4421 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4424 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4425 if (CalleeCallerEdges->Caller == Node) {
4426 assert(CalleeCallerEdges == CalleeEdge);
4429 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4432 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4434 PossibleOtherCallerNodes++;
4438 for (
auto Id : CalleeEdge->getContextIds()) {
4439 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4443 MissingAllocForContextId++;
4446 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4453 for (
auto CalleeEdge : CalleeEdges) {
4454 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4456 if (!PossibleOtherCallerNodes)
4458 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4460 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4462 if (CalleeCallerE == CalleeEdge)
4466 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4471 for (
auto Id : CalleeCallerE->getContextIds()) {
4472 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4477 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4478 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4479 PossibleOtherCallerNodes--;
4486 if (!PossibleOtherCallerNodes)
4491 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4492 if (
Count != NumCalleeClones)
4494 OtherCallersToShareMerge.
insert(OtherCaller);
4529template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4530bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4537 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4541 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4542 const FuncInfo &CalleeFunc) {
4544 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4548 struct FuncCloneInfo {
4553 DenseMap<CallInfo, CallInfo> CallMap;
4558 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4559 FuncInfo OrigFunc(Func);
4564 std::vector<FuncCloneInfo> FuncCloneInfos;
4565 for (
auto &
Call : CallsWithMetadata) {
4566 ContextNode *
Node = getNodeForInst(
Call);
4570 if (!Node ||
Node->Clones.empty())
4573 "Not having a call should have prevented cloning");
4577 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4581 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4583 ContextNode *CallsiteClone,
4586 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4588 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4589 DenseMap<CallInfo, CallInfo> &CallMap =
4590 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4591 CallInfo CallClone(
Call);
4592 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4593 CallClone = It->second;
4594 CallsiteClone->setCall(CallClone);
4596 for (
auto &MatchingCall :
Node->MatchingCalls) {
4597 CallInfo CallClone(MatchingCall);
4598 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4599 CallClone = It->second;
4601 MatchingCall = CallClone;
4608 std::deque<ContextNode *> ClonesWorklist;
4610 if (!
Node->emptyContextIds())
4611 ClonesWorklist.push_back(Node);
4617 unsigned NodeCloneCount = 0;
4618 while (!ClonesWorklist.empty()) {
4619 ContextNode *Clone = ClonesWorklist.front();
4620 ClonesWorklist.pop_front();
4629 if (FuncCloneInfos.size() < NodeCloneCount) {
4631 if (NodeCloneCount == 1) {
4636 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4637 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4641 FuncCloneInfos.push_back(
4642 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4643 AssignCallsiteCloneToFuncClone(
4644 OrigFunc,
Call, Clone,
4645 AllocationCallToContextNodeMap.count(
Call));
4646 for (
auto &CE : Clone->CallerEdges) {
4648 if (!
CE->Caller->hasCall())
4650 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4660 FuncInfo PreviousAssignedFuncClone;
4662 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4663 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4665 bool CallerAssignedToCloneOfFunc =
false;
4666 if (EI != Clone->CallerEdges.end()) {
4667 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4668 PreviousAssignedFuncClone =
4669 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4670 CallerAssignedToCloneOfFunc =
true;
4675 DenseMap<CallInfo, CallInfo> NewCallMap;
4676 unsigned CloneNo = FuncCloneInfos.size();
4677 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4678 "should already exist in the map");
4679 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4680 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4681 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4682 FunctionClonesAnalysis++;
4688 if (!CallerAssignedToCloneOfFunc) {
4689 AssignCallsiteCloneToFuncClone(
4690 NewFuncClone,
Call, Clone,
4691 AllocationCallToContextNodeMap.count(
Call));
4692 for (
auto &CE : Clone->CallerEdges) {
4694 if (!
CE->Caller->hasCall())
4696 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4708 auto CallerEdges = Clone->CallerEdges;
4709 for (
auto CE : CallerEdges) {
4711 if (
CE->isRemoved()) {
4717 if (!
CE->Caller->hasCall())
4720 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
4724 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
4725 PreviousAssignedFuncClone)
4728 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4741 auto CalleeEdges =
CE->Caller->CalleeEdges;
4742 for (
auto CalleeEdge : CalleeEdges) {
4745 if (CalleeEdge->isRemoved()) {
4750 ContextNode *
Callee = CalleeEdge->Callee;
4754 if (Callee == Clone || !
Callee->hasCall())
4759 if (Callee == CalleeEdge->Caller)
4761 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
4762 removeNoneTypeCalleeEdges(NewClone);
4765 removeNoneTypeCalleeEdges(Callee);
4766 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4770 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
4771 RecordCalleeFuncOfCallsite(
4772 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
4780 CallInfo OrigCall(
Callee->getOrigNode()->Call);
4781 OrigCall.setCloneNo(0);
4782 DenseMap<CallInfo, CallInfo> &CallMap =
4783 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
4785 CallInfo NewCall(CallMap[OrigCall]);
4787 NewClone->setCall(NewCall);
4789 for (
auto &MatchingCall : NewClone->MatchingCalls) {
4790 CallInfo OrigMatchingCall(MatchingCall);
4791 OrigMatchingCall.setCloneNo(0);
4793 CallInfo NewCall(CallMap[OrigMatchingCall]);
4796 MatchingCall = NewCall;
4805 auto FindFirstAvailFuncClone = [&]() {
4810 for (
auto &CF : FuncCloneInfos) {
4811 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
4812 return CF.FuncClone;
4815 "Expected an available func clone for this callsite clone");
4832 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4833 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4837 auto CloneCallerEdges = Clone->CallerEdges;
4838 for (
auto &
Edge : CloneCallerEdges) {
4842 if (
Edge->isRemoved())
4845 if (!
Edge->Caller->hasCall())
4849 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
4850 FuncInfo FuncCloneCalledByCaller =
4851 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4861 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4862 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4870 (FuncCloneAssignedToCurCallsiteClone &&
4871 FuncCloneAssignedToCurCallsiteClone !=
4872 FuncCloneCalledByCaller)) {
4887 if (FuncCloneToNewCallsiteCloneMap.count(
4888 FuncCloneCalledByCaller)) {
4889 ContextNode *NewClone =
4890 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4891 moveEdgeToExistingCalleeClone(
Edge, NewClone);
4893 removeNoneTypeCalleeEdges(NewClone);
4896 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4897 removeNoneTypeCalleeEdges(NewClone);
4898 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4901 ClonesWorklist.push_back(NewClone);
4902 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4906 removeNoneTypeCalleeEdges(Clone);
4914 if (!FuncCloneAssignedToCurCallsiteClone) {
4915 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4917 AssignCallsiteCloneToFuncClone(
4918 FuncCloneCalledByCaller,
Call, Clone,
4919 AllocationCallToContextNodeMap.count(
Call));
4923 assert(FuncCloneAssignedToCurCallsiteClone ==
4924 FuncCloneCalledByCaller);
4933 if (!FuncCloneAssignedToCurCallsiteClone) {
4934 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4935 assert(FuncCloneAssignedToCurCallsiteClone);
4937 AssignCallsiteCloneToFuncClone(
4938 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4939 AllocationCallToContextNodeMap.count(
Call));
4941 assert(FuncCloneToCurNodeCloneMap
4942 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4944 RecordCalleeFuncOfCallsite(
Edge->Caller,
4945 FuncCloneAssignedToCurCallsiteClone);
4965 if (!FuncCloneAssignedToCurCallsiteClone) {
4966 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4967 assert(FuncCloneAssignedToCurCallsiteClone &&
4968 "No available func clone for this callsite clone");
4969 AssignCallsiteCloneToFuncClone(
4970 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4971 AllocationCallToContextNodeMap.contains(
Call));
4976 for (
const auto &PE :
Node->CalleeEdges)
4978 for (
const auto &CE :
Node->CallerEdges)
4980 for (
auto *Clone :
Node->Clones) {
4982 for (
const auto &PE : Clone->CalleeEdges)
4984 for (
const auto &CE : Clone->CallerEdges)
4992 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
4994 auto UpdateCalls = [&](ContextNode *
Node,
4995 DenseSet<const ContextNode *> &Visited,
4996 auto &&UpdateCalls) {
4997 auto Inserted = Visited.insert(Node);
5001 for (
auto *Clone :
Node->Clones)
5002 UpdateCalls(Clone, Visited, UpdateCalls);
5004 for (
auto &
Edge :
Node->CallerEdges)
5005 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5009 if (!
Node->hasCall() ||
Node->emptyContextIds())
5012 if (
Node->IsAllocation) {
5013 auto AT = allocTypeToUse(
Node->AllocTypes);
5019 !ContextIdToContextSizeInfos.empty()) {
5020 uint64_t TotalCold = 0;
5022 for (
auto Id :
Node->getContextIds()) {
5023 auto TypeI = ContextIdToAllocationType.find(Id);
5024 assert(TypeI != ContextIdToAllocationType.end());
5025 auto CSI = ContextIdToContextSizeInfos.find(Id);
5026 if (CSI != ContextIdToContextSizeInfos.end()) {
5027 for (
auto &
Info : CSI->second) {
5029 if (TypeI->second == AllocationType::Cold)
5030 TotalCold +=
Info.TotalSize;
5035 AT = AllocationType::Cold;
5037 updateAllocationCall(
Node->Call, AT);
5042 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5045 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5046 updateCall(
Node->Call, CalleeFunc);
5048 for (
auto &
Call :
Node->MatchingCalls)
5049 updateCall(
Call, CalleeFunc);
5057 DenseSet<const ContextNode *> Visited;
5058 for (
auto &Entry : AllocationCallToContextNodeMap)
5059 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5073 FunctionsClonedThinBackend++;
5074 for (
unsigned I = 1;
I < NumClones;
I++) {
5075 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5077 FunctionClonesThinBackend++;
5080 for (
auto &BB : *NewF) {
5081 for (
auto &Inst : BB) {
5082 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5083 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5087 auto *PrevF = M.getFunction(Name);
5091 assert(PrevF->isDeclaration());
5092 NewF->takeName(PrevF);
5093 PrevF->replaceAllUsesWith(NewF);
5094 PrevF->eraseFromParent();
5096 NewF->setName(Name);
5099 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5102 if (!FuncToAliasMap.count(&
F))
5104 for (
auto *
A : FuncToAliasMap[&
F]) {
5106 auto *PrevA = M.getNamedAlias(Name);
5108 A->getType()->getPointerAddressSpace(),
5109 A->getLinkage(), Name, NewF);
5110 NewA->copyAttributesFrom(
A);
5114 assert(PrevA->isDeclaration());
5115 NewA->takeName(PrevA);
5116 PrevA->replaceAllUsesWith(NewA);
5117 PrevA->eraseFromParent();
5128 const Function *CallingFunc =
nullptr) {
5147 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5153 if (!SrcFileMD &&
F.isDeclaration()) {
5157 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5162 assert(SrcFileMD || OrigName ==
F.getName());
5164 StringRef SrcFile = M.getSourceFileName();
5176 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5177 F.getName().contains(
'.')) {
5178 OrigName =
F.getName().rsplit(
'.').first;
5187 assert(TheFnVI ||
F.isDeclaration());
5191bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5193 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5194 Symtab = std::make_unique<InstrProfSymtab>();
5205 if (
Error E = Symtab->create(M,
true,
false)) {
5206 std::string SymtabFailure =
toString(std::move(
E));
5207 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5220 auto MIBIter = AllocNode.
MIBs.begin();
5221 for (
auto &MDOp : MemProfMD->
operands()) {
5223 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5228 auto ContextIterBegin =
5232 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5234 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5239 if (LastStackContextId == *ContextIter)
5241 LastStackContextId = *ContextIter;
5242 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5252bool MemProfContextDisambiguation::applyImport(
Module &M) {
5259 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5261 for (
auto &
A :
M.aliases()) {
5262 auto *Aliasee =
A.getAliaseeObject();
5264 FuncToAliasMap[
F].insert(&
A);
5267 if (!initializeIndirectCallPromotionInfo(M))
5274 OptimizationRemarkEmitter ORE(&
F);
5277 bool ClonesCreated =
false;
5278 unsigned NumClonesCreated = 0;
5279 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
5289 if (ClonesCreated) {
5290 assert(NumClonesCreated == NumClones);
5297 ClonesCreated =
true;
5298 NumClonesCreated = NumClones;
5301 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5316 if (CalledFunction != CB->getCalledOperand() &&
5317 (!GA || CalledFunction != GA->getAliaseeObject())) {
5318 SkippedCallsCloning++;
5324 auto CalleeOrigName = CalledFunction->getName();
5325 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5328 if (!StackNode.
Clones[J])
5330 auto NewF =
M.getOrInsertFunction(
5332 CalledFunction->getFunctionType());
5346 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5347 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5349 <<
" assigned to call function clone "
5350 <<
ore::NV(
"Callee", NewF.getCallee()));
5364 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5368 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5370 "enable-import-metadata is needed to emit thinlto_src_module");
5371 StringRef SrcModule =
5374 if (GVS->modulePath() == SrcModule) {
5375 GVSummary = GVS.get();
5379 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5389 if (
FS->allocs().empty() &&
FS->callsites().empty())
5392 auto SI =
FS->callsites().begin();
5393 auto AI =
FS->allocs().begin();
5398 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5401 for (
auto CallsiteIt =
FS->callsites().rbegin();
5402 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5403 auto &Callsite = *CallsiteIt;
5407 if (!Callsite.StackIdIndices.empty())
5409 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5418 for (
auto &BB :
F) {
5419 for (
auto &
I : BB) {
5425 auto *CalledValue = CB->getCalledOperand();
5426 auto *CalledFunction = CB->getCalledFunction();
5427 if (CalledValue && !CalledFunction) {
5428 CalledValue = CalledValue->stripPointerCasts();
5435 assert(!CalledFunction &&
5436 "Expected null called function in callsite for alias");
5440 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5441 I.getMetadata(LLVMContext::MD_callsite));
5442 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5446 if (CB->getAttributes().hasFnAttr(
"memprof")) {
5448 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5449 ? AllocTypeColdThinBackend++
5450 : AllocTypeNotColdThinBackend++;
5451 OrigAllocsThinBackend++;
5452 AllocVersionsThinBackend++;
5453 if (!MaxAllocVersionsThinBackend)
5454 MaxAllocVersionsThinBackend = 1;
5461 auto &AllocNode = *(AI++);
5469 CloneFuncIfNeeded(AllocNode.Versions.size());
5471 OrigAllocsThinBackend++;
5472 AllocVersionsThinBackend += AllocNode.Versions.size();
5473 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5474 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5484 if (AllocNode.Versions.size() == 1 &&
5487 AllocationType::NotCold ||
5489 AllocationType::None);
5490 UnclonableAllocsThinBackend++;
5496 return Type == ((uint8_t)AllocationType::NotCold |
5497 (uint8_t)AllocationType::Cold);
5501 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5503 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5506 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5507 : AllocTypeNotColdThinBackend++;
5522 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5523 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5525 <<
" marked with memprof allocation attribute "
5526 <<
ore::NV(
"Attribute", AllocTypeString));
5528 }
else if (!CallsiteContext.empty()) {
5529 if (!CalledFunction) {
5533 assert(!CI || !CI->isInlineAsm());
5543 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
5549 CloneFuncIfNeeded(NumClones);
5554 assert(SI !=
FS->callsites().end());
5555 auto &StackNode = *(
SI++);
5561 for (
auto StackId : CallsiteContext) {
5563 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
5569 CloneCallsite(StackNode, CB, CalledFunction);
5571 }
else if (CB->isTailCall() && CalledFunction) {
5574 ValueInfo CalleeVI =
5576 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
5577 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
5578 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
5579 CloneCallsite(Callsite->second, CB, CalledFunction);
5586 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
5596 for (
auto &BB :
F) {
5597 for (
auto &
I : BB) {
5600 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
5601 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
5609unsigned MemProfContextDisambiguation::recordICPInfo(
5614 uint32_t NumCandidates;
5615 uint64_t TotalCount;
5616 auto CandidateProfileData =
5617 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
5619 if (CandidateProfileData.empty())
5625 bool ICPNeeded =
false;
5626 unsigned NumClones = 0;
5627 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
5628 for (
const auto &Candidate : CandidateProfileData) {
5630 auto CalleeValueInfo =
5632 ImportSummary->getValueInfo(Candidate.Value);
5635 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
5637 auto &StackNode = *(
SI++);
5642 [](
unsigned CloneNo) { return CloneNo != 0; });
5652 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
5653 TotalCount, CallsiteInfoStartIndex});
5657void MemProfContextDisambiguation::performICP(
5659 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
5661 OptimizationRemarkEmitter &ORE) {
5668 for (
auto &
Info : ICallAnalysisInfo) {
5671 auto TotalCount =
Info.TotalCount;
5672 unsigned NumPromoted = 0;
5673 unsigned NumClones = 0;
5675 for (
auto &Candidate :
Info.CandidateProfileData) {
5686 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
5687 if (TargetFunction ==
nullptr ||
5695 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
5696 <<
"Memprof cannot promote indirect call: target with md5sum "
5697 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
5706 const char *Reason =
nullptr;
5709 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
5710 <<
"Memprof cannot promote indirect call to "
5711 <<
ore::NV(
"TargetFunction", TargetFunction)
5712 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
5723 CallBase *CBClone = CB;
5724 for (
unsigned J = 0; J < NumClones; J++) {
5733 TotalCount, isSamplePGO, &ORE);
5734 auto *TargetToUse = TargetFunction;
5737 if (StackNode.
Clones[J]) {
5756 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5758 <<
" promoted and assigned to call function clone "
5759 <<
ore::NV(
"Callee", TargetToUse));
5763 TotalCount -= Candidate.Count;
5768 CallBase *CBClone = CB;
5769 for (
unsigned J = 0; J < NumClones; J++) {
5774 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
5777 if (TotalCount != 0)
5779 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
5780 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
5785template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
5786bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
5788 dbgs() <<
"CCG before cloning:\n";
5792 exportToDot(
"postbuild");
5805 dbgs() <<
"CCG after cloning:\n";
5809 exportToDot(
"cloned");
5811 bool Changed = assignFunctions();
5814 dbgs() <<
"CCG after assigning function clones:\n";
5818 exportToDot(
"clonefuncassign");
5821 printTotalSizes(
errs());
5826bool MemProfContextDisambiguation::processModule(
5828 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
5833 return applyImport(M);
5846 ModuleCallsiteContextGraph CCG(M, OREGetter);
5847 return CCG.process();
5852 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
5857 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
5861 "-memprof-dot-scope=context requires -memprof-dot-context-id");
5865 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
5866 "-memprof-dot-context-id");
5867 if (ImportSummary) {
5877 auto ReadSummaryFile =
5879 if (!ReadSummaryFile) {
5886 if (!ImportSummaryForTestingOrErr) {
5892 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
5893 ImportSummary = ImportSummaryForTesting.get();
5902 if (!processModule(M, OREGetter))
5919 IndexCallsiteContextGraph CCG(Index, isPrevailing);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...