50#include <unordered_map>
55#define DEBUG_TYPE "memprof-context-disambiguation"
58 "Number of function clones created during whole program analysis");
60 "Number of function clones created during ThinLTO backend");
62 "Number of functions that had clones created during ThinLTO backend");
63STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
64 "cloned) during whole program analysis");
65STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
66 "during whole program analysis");
68 "Number of not cold static allocations (possibly cloned) during "
70STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
71 "(possibly cloned) during ThinLTO backend");
73 "Number of original (not cloned) allocations with memprof profiles "
74 "during ThinLTO backend");
76 AllocVersionsThinBackend,
77 "Number of allocation versions (including clones) during ThinLTO backend");
79 "Maximum number of allocation versions created for an original "
80 "allocation during ThinLTO backend");
82 "Number of unclonable ambigous allocations during ThinLTO backend");
84 "Number of edges removed due to mismatched callees (profiled vs IR)");
86 "Number of profiled callees found via tail calls");
88 "Aggregate depth of profiled callees found via tail calls");
90 "Maximum depth of profiled callees found via tail calls");
92 "Number of profiled callees found via multiple tail call chains");
93STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
94STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
95STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
97 "Number of missing alloc nodes for context ids");
99 "Number of calls skipped during cloning due to unexpected operand");
101 "Number of callsites assigned to call multiple non-matching clones");
102STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
103STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
104STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
109 cl::desc(
"Specify the path prefix of the MemProf dot files."));
113 cl::desc(
"Export graph to dot files."));
118 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
128 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
133 "Export only nodes with contexts feeding given "
134 "-memprof-dot-alloc-id"),
136 "Export only nodes with given -memprof-dot-context-id")));
140 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
141 "or to highlight if -memprof-dot-scope=all"));
145 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
146 "highlight otherwise"));
150 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
154 cl::desc(
"Perform verification checks on CallingContextGraph."));
158 cl::desc(
"Perform frequent verification checks on nodes."));
161 "memprof-import-summary",
162 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
168 cl::desc(
"Max depth to recursively search for missing "
169 "frames through tail calls."));
174 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
178 cl::desc(
"Allow cloning of contexts through recursive cycles"));
185 cl::desc(
"Merge clones before assigning functions"));
194 cl::desc(
"Allow cloning of contexts having recursive cycles"));
200 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
211 cl::desc(
"Linking with hot/cold operator new interfaces"));
216 "Require target function definition when promoting indirect calls"));
237template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
238class CallsiteContextGraph {
240 CallsiteContextGraph() =
default;
241 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
242 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
249 void identifyClones();
256 bool assignFunctions();
263 const CallsiteContextGraph &CCG) {
269 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
271 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
273 void exportToDot(std::string Label)
const;
276 struct FuncInfo final
277 :
public std::pair<FuncTy *, unsigned > {
278 using Base = std::pair<FuncTy *, unsigned>;
279 FuncInfo(
const Base &
B) :
Base(
B) {}
280 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
281 explicit operator bool()
const {
return this->first !=
nullptr; }
282 FuncTy *
func()
const {
return this->first; }
283 unsigned cloneNo()
const {
return this->second; }
287 struct CallInfo final :
public std::pair<CallTy, unsigned > {
288 using Base = std::pair<CallTy, unsigned>;
290 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
292 explicit operator bool()
const {
return (
bool)this->first; }
293 CallTy call()
const {
return this->first; }
294 unsigned cloneNo()
const {
return this->second; }
295 void setCloneNo(
unsigned N) { this->second =
N; }
297 if (!
operator bool()) {
303 OS <<
"\t(clone " << cloneNo() <<
")";
326 bool Recursive =
false;
353 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
357 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
361 bool useCallerEdgesForContextInfo()
const {
366 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
384 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
385 Count +=
Edge->getContextIds().size();
388 auto Edges = llvm::concat<const std::shared_ptr<ContextEdge>>(
389 CalleeEdges, useCallerEdgesForContextInfo()
391 : std::vector<std::shared_ptr<ContextEdge>>());
392 for (
const auto &Edge : Edges)
399 uint8_t computeAllocType()
const {
403 auto Edges = llvm::concat<const std::shared_ptr<ContextEdge>>(
404 CalleeEdges, useCallerEdgesForContextInfo()
406 : std::vector<std::shared_ptr<ContextEdge>>());
407 for (
const auto &Edge : Edges) {
418 bool emptyContextIds()
const {
419 auto Edges = llvm::concat<const std::shared_ptr<ContextEdge>>(
420 CalleeEdges, useCallerEdgesForContextInfo()
422 : std::vector<std::shared_ptr<ContextEdge>>());
423 for (
const auto &Edge : Edges) {
424 if (!
Edge->getContextIds().empty())
431 std::vector<ContextNode *> Clones;
434 ContextNode *CloneOf =
nullptr;
436 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
438 ContextNode(
bool IsAllocation,
CallInfo C)
439 : IsAllocation(IsAllocation),
Call(
C) {}
441 void addClone(ContextNode *Clone) {
443 CloneOf->Clones.push_back(Clone);
444 Clone->CloneOf = CloneOf;
446 Clones.push_back(Clone);
448 Clone->CloneOf =
this;
452 ContextNode *getOrigNode() {
459 unsigned int ContextId);
461 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
462 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
463 void eraseCalleeEdge(
const ContextEdge *Edge);
464 void eraseCallerEdge(
const ContextEdge *Edge);
468 bool hasCall()
const {
return (
bool)
Call.call(); }
474 bool isRemoved()
const {
480 (AllocTypes == (
uint8_t)AllocationType::None) ==
482 return AllocTypes == (
uint8_t)AllocationType::None;
510 bool IsBackedge =
false;
518 ContextIds(
std::
move(ContextIds)) {}
524 inline void clear() {
526 AllocTypes = (
uint8_t)AllocationType::None;
534 inline bool isRemoved()
const {
535 if (Callee || Caller)
556 void removeNoneTypeCalleeEdges(ContextNode *
Node);
557 void removeNoneTypeCallerEdges(ContextNode *
Node);
559 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
565 template <
class NodeT,
class IteratorT>
566 std::vector<uint64_t>
571 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
574 template <
class NodeT,
class IteratorT>
575 void addStackNodesForMIB(ContextNode *AllocNode,
584 void updateStackNodes();
588 void handleCallsitesWithMultipleTargets();
591 void markBackedges();
601 bool partitionCallsByCallee(
603 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
610 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
617 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
622 struct CallContextInfo {
626 std::vector<uint64_t> StackIds;
640 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
641 bool CalleeIter =
true);
649 void assignStackNodesPostOrder(
662 void propagateDuplicateContextIds(
668 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
676 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
686 calleesMatch(CallTy Call, EdgeIter &EI,
691 const FuncTy *getCalleeFunc(CallTy Call) {
692 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(Call);
698 bool calleeMatchesFunc(
699 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
700 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
701 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
702 Call, Func, CallerFunc, FoundCalleeChain);
706 bool sameCallee(CallTy Call1, CallTy Call2) {
707 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
712 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
713 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
718 uint64_t getLastStackId(CallTy Call) {
719 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
724 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
725 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
730 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(Call);
735 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
736 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
742 FuncInfo cloneFunctionForCallsite(
744 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
745 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
746 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
751 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
752 unsigned CloneNo)
const {
753 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
757 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
759 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
760 auto *NewNode = NodeOwner.back().get();
762 NodeToCallingFunc[NewNode] =
F;
767 ContextNode *getNodeForInst(
const CallInfo &
C);
768 ContextNode *getNodeForAlloc(
const CallInfo &
C);
769 ContextNode *getNodeForStackId(
uint64_t StackId);
791 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
798 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
799 ContextNode *NewCallee,
800 bool NewClone =
false,
808 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
809 ContextNode *NewCaller);
820 void mergeNodeCalleeClones(
825 void findOtherCallersToShareMerge(
826 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
857 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
863 unsigned int LastContextId = 0;
866template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
868 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
869template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
871 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
872template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
874 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
875template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
877 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
880class ModuleCallsiteContextGraph
881 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
884 ModuleCallsiteContextGraph(
889 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
894 bool calleeMatchesFunc(
896 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
898 bool findProfiledCalleeThroughTailCalls(
900 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
901 bool &FoundMultipleCalleeChains);
903 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
906 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
907 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
909 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
911 std::vector<CallInfo> &CallsWithMetadataInFunc,
914 unsigned CloneNo)
const;
923struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
925 IndexCall(std::nullptr_t) : IndexCall() {}
930 IndexCall *operator->() {
return this; }
934 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(
Base)) {
937 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(
Base);
958class IndexCallsiteContextGraph
959 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
962 IndexCallsiteContextGraph(
967 ~IndexCallsiteContextGraph() {
972 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
974 for (
auto &Callsite :
I.second)
975 FS->addCallsite(*Callsite.second);
980 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
985 bool calleeMatchesFunc(
988 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
989 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
990 bool findProfiledCalleeThroughTailCalls(
992 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
993 bool &FoundMultipleCalleeChains);
994 uint64_t getLastStackId(IndexCall &Call);
995 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
998 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
1000 IndexCall>::FuncInfo
1001 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
1003 std::vector<CallInfo> &CallsWithMetadataInFunc,
1005 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
1006 unsigned CloneNo)
const;
1010 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1021 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1022 FunctionCalleesToSynthesizedCallsiteInfos;
1034 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1037 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1050 ((
uint8_t)AllocationType::NotCold | (
uint8_t)AllocationType::Cold))
1051 return AllocationType::NotCold;
1059template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1060bool allocTypesMatch(
1061 const std::vector<uint8_t> &InAllocTypes,
1062 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1066 assert(InAllocTypes.size() == Edges.size());
1068 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1070 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1074 if (l == (uint8_t)AllocationType::None ||
1075 r->AllocTypes == (uint8_t)AllocationType::None)
1077 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1086template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1087bool allocTypesMatchClone(
1088 const std::vector<uint8_t> &InAllocTypes,
1089 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1090 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1094 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1098 for (
const auto &E : Clone->CalleeEdges) {
1100 EdgeCalleeMap[E->Callee] = E->AllocTypes;
1104 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1105 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1107 if (Iter == EdgeCalleeMap.
end())
1112 if (InAllocTypes[
I] == (
uint8_t)AllocationType::None ||
1113 Iter->second == (
uint8_t)AllocationType::None)
1115 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1123template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1124typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1125CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1127 ContextNode *
Node = getNodeForAlloc(
C);
1131 return NonAllocationCallToContextNodeMap.lookup(
C);
1134template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1135typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1136CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1138 return AllocationCallToContextNodeMap.lookup(
C);
1141template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1142typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1143CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1145 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1146 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1147 return StackEntryNode->second;
1151template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1152void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1154 unsigned int ContextId) {
1155 for (
auto &Edge : CallerEdges) {
1156 if (
Edge->Caller == Caller) {
1158 Edge->getContextIds().insert(ContextId);
1162 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1164 CallerEdges.push_back(Edge);
1165 Caller->CalleeEdges.push_back(Edge);
1168template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1169void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1170 ContextEdge *Edge, EdgeIter *EI,
bool CalleeIter) {
1171 assert(!EI || (*EI)->get() == Edge);
1186 auto CalleeCallerCount =
Callee->CallerEdges.size();
1187 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1190 Callee->eraseCallerEdge(Edge);
1191 Caller->eraseCalleeEdge(Edge);
1192 }
else if (CalleeIter) {
1193 Callee->eraseCallerEdge(Edge);
1194 *EI =
Caller->CalleeEdges.erase(*EI);
1196 Caller->eraseCalleeEdge(Edge);
1197 *EI =
Callee->CallerEdges.erase(*EI);
1199 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1200 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1203template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1204void CallsiteContextGraph<
1205 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
1206 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1208 if (
Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1210 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1216template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1217void CallsiteContextGraph<
1218 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *
Node) {
1219 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1221 if (
Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1223 Edge->Caller->eraseCalleeEdge(
Edge.get());
1224 EI =
Node->CallerEdges.erase(EI);
1230template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1231typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1232CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1233 findEdgeFromCallee(
const ContextNode *Callee) {
1234 for (
const auto &Edge : CalleeEdges)
1235 if (
Edge->Callee == Callee)
1240template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1241typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1242CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1243 findEdgeFromCaller(
const ContextNode *Caller) {
1244 for (
const auto &Edge : CallerEdges)
1245 if (
Edge->Caller == Caller)
1250template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1251void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1252 eraseCalleeEdge(
const ContextEdge *Edge) {
1254 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1255 return CalleeEdge.get() == Edge;
1257 assert(EI != CalleeEdges.end());
1258 CalleeEdges.erase(EI);
1261template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1262void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1263 eraseCallerEdge(
const ContextEdge *Edge) {
1265 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1266 return CallerEdge.get() == Edge;
1268 assert(EI != CallerEdges.end());
1269 CallerEdges.erase(EI);
1272template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1273uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1276 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1278 for (
auto Id : ContextIds) {
1287template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1289CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1293 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1295 for (
auto Id : Node1Ids) {
1296 if (!Node2Ids.
count(Id))
1306template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1307uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1310 if (Node1Ids.
size() < Node2Ids.
size())
1311 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1313 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1316template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1317typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1318CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1320 assert(!getNodeForAlloc(Call));
1321 ContextNode *AllocNode = createNewNode(
true,
F, Call);
1322 AllocationCallToContextNodeMap[
Call] = AllocNode;
1324 AllocNode->OrigStackOrAllocId = LastContextId;
1327 AllocNode->AllocTypes = (
uint8_t)AllocationType::None;
1336 if (AllocTypes & (
uint8_t)AllocationType::NotCold)
1338 if (AllocTypes & (
uint8_t)AllocationType::Cold)
1343template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1344template <
class NodeT,
class IteratorT>
1345void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1354 ContextIdToAllocationType[++LastContextId] =
AllocType;
1356 if (!ContextSizeInfo.
empty()) {
1357 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1367 ContextNode *PrevNode = AllocNode;
1374 ContextIter != StackContext.
end(); ++ContextIter) {
1375 auto StackId = getStackId(*ContextIter);
1376 ContextNode *
StackNode = getNodeForStackId(StackId);
1379 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1380 StackNode->OrigStackOrAllocId = StackId;
1395template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1397CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1401 for (
auto OldId : StackSequenceContextIds) {
1402 NewContextIds.
insert(++LastContextId);
1403 OldToNewContextIds[OldId].insert(LastContextId);
1404 assert(ContextIdToAllocationType.count(OldId));
1406 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1407 if (DotAllocContextIds.
contains(OldId))
1408 DotAllocContextIds.
insert(LastContextId);
1410 return NewContextIds;
1413template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1414void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1415 propagateDuplicateContextIds(
1420 for (
auto Id : ContextIds)
1421 if (
auto NewId = OldToNewContextIds.find(Id);
1422 NewId != OldToNewContextIds.end())
1428 auto UpdateCallers = [&](ContextNode *
Node,
1430 auto &&UpdateCallers) ->
void {
1431 for (
const auto &Edge :
Node->CallerEdges) {
1435 ContextNode *NextNode =
Edge->Caller;
1439 if (!NewIdsToAdd.
empty()) {
1440 Edge->getContextIds().insert_range(NewIdsToAdd);
1441 UpdateCallers(NextNode, Visited, UpdateCallers);
1447 for (
auto &Entry : AllocationCallToContextNodeMap) {
1449 UpdateCallers(
Node, Visited, UpdateCallers);
1453template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1454void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1455 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1460 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1467 for (
auto &CE : OrigEdges) {
1468 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1469 for (
auto Id :
CE->getContextIds())
1470 if (!AllCallerContextIds.
insert(Id).second)
1471 RecursiveContextIds.
insert(Id);
1475 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1482 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1483 NotFoundContextIds);
1486 if (RecursiveContextIds.
empty()) {
1489 RemainingContextIds.
swap(NotFoundContextIds);
1501 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1504 if (NewEdgeContextIds.
empty()) {
1508 if (TowardsCallee) {
1509 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1510 auto NewEdge = std::make_shared<ContextEdge>(
1511 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1512 NewNode->CalleeEdges.push_back(NewEdge);
1513 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1515 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1516 auto NewEdge = std::make_shared<ContextEdge>(
1517 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1518 NewNode->CallerEdges.push_back(NewEdge);
1519 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1522 if (
Edge->getContextIds().empty()) {
1523 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1530template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1532 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1539template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1541 bool CheckEdges =
true) {
1542 if (
Node->isRemoved())
1546 auto NodeContextIds =
Node->getContextIds();
1550 if (
Node->CallerEdges.size()) {
1552 Node->CallerEdges.front()->ContextIds);
1555 checkEdge<DerivedCCG, FuncTy, CallTy>(
Edge);
1563 NodeContextIds == CallerEdgeContextIds ||
1566 if (
Node->CalleeEdges.size()) {
1568 Node->CalleeEdges.front()->ContextIds);
1571 checkEdge<DerivedCCG, FuncTy, CallTy>(
Edge);
1578 NodeContextIds == CalleeEdgeContextIds);
1587 for (
const auto &E :
Node->CalleeEdges)
1593template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1594void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1595 assignStackNodesPostOrder(
1598 &StackIdToMatchingCalls,
1607 auto CallerEdges =
Node->CallerEdges;
1608 for (
auto &Edge : CallerEdges) {
1610 if (
Edge->isRemoved()) {
1614 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1615 CallToMatchingCall);
1624 if (
Node->IsAllocation ||
1625 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1628 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1632 if (Calls.size() == 1) {
1633 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1634 if (Ids.size() == 1) {
1635 assert(SavedContextIds.empty());
1638 if (
Node->Recursive)
1640 Node->setCall(Call);
1641 NonAllocationCallToContextNodeMap[
Call] =
Node;
1651 ContextNode *LastNode = getNodeForStackId(LastId);
1656 ContextNode *LastNode =
Node;
1663 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1664 bool CreatedNode =
false;
1665 for (
unsigned I = 0;
I < Calls.size();
1666 I++, PrevIterCreatedNode = CreatedNode) {
1667 CreatedNode =
false;
1668 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1671 if (SavedContextIds.empty()) {
1676 if (!CallToMatchingCall.
contains(Call))
1678 auto MatchingCall = CallToMatchingCall[
Call];
1679 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1683 assert(
I > 0 && !PrevIterCreatedNode);
1686 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1691 assert(LastId == Ids.back());
1700 ContextNode *PrevNode = LastNode;
1704 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1706 ContextNode *CurNode = getNodeForStackId(Id);
1710 assert(!CurNode->Recursive);
1712 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1724 if (SavedContextIds.empty()) {
1733 ContextNode *NewNode = createNewNode(
false, Func, Call);
1734 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1736 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1738 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1744 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1749 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1754 for (
auto Id : Ids) {
1755 ContextNode *CurNode = getNodeForStackId(Id);
1762 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1769 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1770 if (PrevEdge->getContextIds().empty())
1771 removeEdgeFromGraph(PrevEdge);
1776 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1777 ? (
uint8_t)AllocationType::None
1778 : CurNode->computeAllocType();
1782 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1783 for (
auto Id : Ids) {
1784 ContextNode *CurNode = getNodeForStackId(Id);
1787 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1793template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1794void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1803 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1804 for (
auto &Call : CallsWithMetadata) {
1806 if (AllocationCallToContextNodeMap.count(Call))
1808 auto StackIdsWithContextNodes =
1809 getStackIdsWithContextNodesForCall(
Call.call());
1812 if (StackIdsWithContextNodes.empty())
1816 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1817 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1832 for (
auto &It : StackIdToMatchingCalls) {
1833 auto &Calls = It.getSecond();
1835 if (Calls.size() == 1) {
1836 auto &Ids = Calls[0].StackIds;
1837 if (Ids.size() == 1)
1851 for (
const auto &[
Idx, CallCtxInfo] :
enumerate(Calls))
1852 FuncToIndex.
insert({CallCtxInfo.Func,
Idx});
1855 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1856 return A.StackIds.size() >
B.StackIds.size() ||
1857 (
A.StackIds.size() ==
B.StackIds.size() &&
1858 (
A.StackIds <
B.StackIds ||
1859 (
A.StackIds ==
B.StackIds &&
1860 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
1867 ContextNode *LastNode = getNodeForStackId(LastId);
1871 if (LastNode->Recursive)
1887 for (
unsigned I = 0;
I < Calls.size();
I++) {
1888 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1889 assert(SavedContextIds.empty());
1890 assert(LastId == Ids.back());
1895 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1896 MatchingIdsFuncSet.
clear();
1905 ContextNode *PrevNode = LastNode;
1906 ContextNode *CurNode = LastNode;
1911 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1913 CurNode = getNodeForStackId(Id);
1917 if (CurNode->Recursive) {
1922 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1943 if (StackSequenceContextIds.
empty()) {
1956 if (Ids.back() != getLastStackId(Call)) {
1957 for (
const auto &PE : LastNode->CallerEdges) {
1958 set_subtract(StackSequenceContextIds, PE->getContextIds());
1959 if (StackSequenceContextIds.
empty())
1963 if (StackSequenceContextIds.
empty())
1975 MatchingIdsFuncSet.
insert(Func);
1982 bool DuplicateContextIds =
false;
1983 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1984 auto &CallCtxInfo = Calls[J];
1985 auto &NextIds = CallCtxInfo.StackIds;
1988 auto *NextFunc = CallCtxInfo.Func;
1989 if (NextFunc != Func) {
1992 DuplicateContextIds =
true;
1995 auto &NextCall = CallCtxInfo.Call;
1996 CallToMatchingCall[NextCall] =
Call;
2007 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2008 StackSequenceContextIds.
size());
2011 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2012 : StackSequenceContextIds;
2013 assert(!SavedContextIds.empty());
2015 if (!DuplicateContextIds) {
2019 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2020 if (LastNodeContextIds.
empty())
2027 propagateDuplicateContextIds(OldToNewContextIds);
2038 for (
auto &Entry : AllocationCallToContextNodeMap)
2039 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2040 CallToMatchingCall);
2047 Call->getMetadata(LLVMContext::MD_callsite));
2048 return CallsiteContext.
back();
2051uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
2052 assert(isa<CallsiteInfo *>(Call));
2054 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
2056 return Index.getStackIdAtIndex(CallsiteContext.
back());
2078 auto Pos =
F.getName().find_last_of(
'.');
2081 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2087std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2089 unsigned CloneNo)
const {
2090 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
2091 cast<CallBase>(Call)->getCalledFunction()->getName())
2095std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2096 const IndexCall &Call,
2097 unsigned CloneNo)
const {
2098 auto VI = FSToVIMap.find(Func);
2099 assert(VI != FSToVIMap.end());
2101 if (isa<AllocInfo *>(Call))
2102 return CallerName +
" -> alloc";
2104 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call);
2105 return CallerName +
" -> " +
2107 Callsite->Clones[CloneNo]);
2111std::vector<uint64_t>
2112ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2115 Call->getMetadata(LLVMContext::MD_callsite));
2116 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2120std::vector<uint64_t>
2121IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
2122 assert(isa<CallsiteInfo *>(Call));
2124 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
2130template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2131template <
class NodeT,
class IteratorT>
2132std::vector<uint64_t>
2133CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2135 std::vector<uint64_t> StackIds;
2136 for (
auto IdOrIndex : CallsiteContext) {
2137 auto StackId = getStackId(IdOrIndex);
2138 ContextNode *
Node = getNodeForStackId(StackId);
2141 StackIds.push_back(StackId);
2146ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2149 :
Mod(
M), OREGetter(OREGetter) {
2151 std::vector<CallInfo> CallsWithMetadata;
2152 for (
auto &BB :
F) {
2153 for (
auto &
I : BB) {
2154 if (!isa<CallBase>(
I))
2156 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2157 CallsWithMetadata.push_back(&
I);
2158 auto *AllocNode = addAllocNode(&
I, &
F);
2159 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2163 for (
auto &MDOp : MemProfMD->operands()) {
2164 auto *MIBMD = cast<const MDNode>(MDOp);
2165 std::vector<ContextTotalSize> ContextSizeInfo;
2167 if (MIBMD->getNumOperands() > 2) {
2168 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2169 MDNode *ContextSizePair =
2170 dyn_cast<MDNode>(MIBMD->getOperand(
I));
2172 uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
2175 uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
2178 ContextSizeInfo.push_back({FullStackId, TotalSize});
2184 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2185 AllocNode, StackContext, CallsiteContext,
2191 DotAllocContextIds = AllocNode->getContextIds();
2192 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2195 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2196 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2199 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2200 CallsWithMetadata.push_back(&
I);
2204 if (!CallsWithMetadata.empty())
2205 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2209 dbgs() <<
"CCG before updating call stack chains:\n";
2214 exportToDot(
"prestackupdate");
2219 exportToDot(
"poststackupdate");
2221 handleCallsitesWithMultipleTargets();
2226 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2227 for (
auto &Call : FuncEntry.second)
2228 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2231IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2236 for (
auto &
I : Index) {
2238 for (
auto &S :
VI.getSummaryList()) {
2247 !isPrevailing(
VI.getGUID(), S.get()))
2249 auto *
FS = dyn_cast<FunctionSummary>(S.get());
2252 std::vector<CallInfo> CallsWithMetadata;
2253 if (!
FS->allocs().empty()) {
2254 for (
auto &AN :
FS->mutableAllocs()) {
2259 if (AN.MIBs.empty())
2261 IndexCall AllocCall(&AN);
2262 CallsWithMetadata.push_back(AllocCall);
2263 auto *AllocNode = addAllocNode(AllocCall, FS);
2271 AN.ContextSizeInfos.size() == AN.MIBs.size());
2273 for (
auto &MIB : AN.MIBs) {
2276 std::vector<ContextTotalSize> ContextSizeInfo;
2277 if (!AN.ContextSizeInfos.empty()) {
2278 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2279 ContextSizeInfo.push_back({FullStackId, TotalSize});
2281 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2282 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2289 DotAllocContextIds = AllocNode->getContextIds();
2290 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2295 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2299 if (!
FS->callsites().empty())
2300 for (
auto &SN :
FS->mutableCallsites()) {
2301 IndexCall StackNodeCall(&SN);
2302 CallsWithMetadata.push_back(StackNodeCall);
2305 if (!CallsWithMetadata.empty())
2306 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2308 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2314 dbgs() <<
"CCG before updating call stack chains:\n";
2319 exportToDot(
"prestackupdate");
2324 exportToDot(
"poststackupdate");
2326 handleCallsitesWithMultipleTargets();
2331template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2332void CallsiteContextGraph<DerivedCCG, FuncTy,
2333 CallTy>::handleCallsitesWithMultipleTargets() {
2348 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2349 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2359 std::vector<CallInfo> AllCalls;
2360 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2361 AllCalls.push_back(
Node->Call);
2375 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2378 auto It = AllCalls.begin();
2380 for (; It != AllCalls.end(); ++It) {
2383 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2386 if (!
Edge->Callee->hasCall())
2388 assert(NodeToCallingFunc.count(
Edge->Callee));
2390 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2399 if (
Node->Call != ThisCall) {
2400 Node->setCall(ThisCall);
2411 Node->MatchingCalls.clear();
2414 if (It == AllCalls.end()) {
2415 RemovedEdgesWithMismatchedCallees++;
2424 for (++It; It != AllCalls.end(); ++It) {
2428 Node->MatchingCalls.push_back(ThisCall);
2437 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2438 return !it.second->hasCall() || it.second->Call != it.first;
2442 for (
auto &[Call,
Node] : NewCallToNode)
2443 NonAllocationCallToContextNodeMap[
Call] =
Node;
2447 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
2448 NonAllocationCallToContextNodeMap[
Call] =
Node;
2451template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2452bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2454 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2458 struct CallsWithSameCallee {
2459 std::vector<CallInfo> Calls;
2460 ContextNode *
Node =
nullptr;
2466 for (
auto ThisCall : AllCalls) {
2467 auto *
F = getCalleeFunc(
ThisCall.call());
2469 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2478 for (
const auto &Edge :
Node->CalleeEdges) {
2479 if (!
Edge->Callee->hasCall())
2481 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2482 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2483 CalleeNodeToCallInfo[
Edge->Callee] =
2484 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2490 if (CalleeNodeToCallInfo.
empty())
2502 ContextNode *UnmatchedCalleesNode =
nullptr;
2504 bool UsedOrigNode =
false;
2509 auto CalleeEdges =
Node->CalleeEdges;
2510 for (
auto &Edge : CalleeEdges) {
2511 if (!
Edge->Callee->hasCall())
2516 ContextNode *CallerNodeToUse =
nullptr;
2521 if (!UnmatchedCalleesNode)
2522 UnmatchedCalleesNode =
2523 createNewNode(
false, NodeToCallingFunc[
Node]);
2524 CallerNodeToUse = UnmatchedCalleesNode;
2528 auto *
Info = CalleeNodeToCallInfo[
Edge->Callee];
2531 if (!UsedOrigNode) {
2534 Node->MatchingCalls.clear();
2535 UsedOrigNode =
true;
2538 createNewNode(
false, NodeToCallingFunc[
Node]);
2542 Info->Node->setCall(
Info->Calls.front());
2548 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2550 CallerNodeToUse =
Info->Node;
2554 if (CallerNodeToUse ==
Node)
2557 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2564 for (
auto &
I : CalleeNodeToCallInfo)
2565 removeNoneTypeCallerEdges(
I.second->Node);
2566 if (UnmatchedCalleesNode)
2567 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2568 removeNoneTypeCallerEdges(
Node);
2581 return Index.getStackIdAtIndex(IdOrIndex);
2584template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2585bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2586 CallTy Call, EdgeIter &EI,
2589 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2590 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2593 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2594 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2599 if (FoundCalleeChain.empty())
2603 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2607 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2608 CurEdge->AllocTypes |=
Edge->AllocTypes;
2613 auto NewEdge = std::make_shared<ContextEdge>(
2614 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2615 Callee->CallerEdges.push_back(NewEdge);
2616 if (Caller ==
Edge->Caller) {
2620 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2623 "Iterator position not restored after insert and increment");
2625 Caller->CalleeEdges.push_back(NewEdge);
2630 auto *CurCalleeNode =
Edge->Callee;
2631 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2632 ContextNode *NewNode =
nullptr;
2634 if (TailCallToContextNodeMap.
count(NewCall)) {
2635 NewNode = TailCallToContextNodeMap[NewCall];
2636 NewNode->AllocTypes |=
Edge->AllocTypes;
2638 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2640 NewNode = createNewNode(
false, Func, NewCall);
2641 TailCallToContextNodeMap[NewCall] = NewNode;
2642 NewNode->AllocTypes =
Edge->AllocTypes;
2646 AddEdge(NewNode, CurCalleeNode);
2648 CurCalleeNode = NewNode;
2652 AddEdge(
Edge->Caller, CurCalleeNode);
2660 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2672bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2674 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2675 bool &FoundMultipleCalleeChains) {
2682 FoundCalleeChain.push_back({Callsite,
F});
2685 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2687 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2689 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2697 bool FoundSingleCalleeChain =
false;
2698 for (
auto &BB : *CalleeFunc) {
2699 for (
auto &
I : BB) {
2700 auto *CB = dyn_cast<CallBase>(&
I);
2701 if (!CB || !CB->isTailCall())
2703 auto *CalledValue = CB->getCalledOperand();
2704 auto *CalledFunction = CB->getCalledFunction();
2705 if (CalledValue && !CalledFunction) {
2706 CalledValue = CalledValue->stripPointerCasts();
2708 CalledFunction = dyn_cast<Function>(CalledValue);
2712 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2713 assert(!CalledFunction &&
2714 "Expected null called function in callsite for alias");
2715 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2717 if (!CalledFunction)
2719 if (CalledFunction == ProfiledCallee) {
2720 if (FoundSingleCalleeChain) {
2721 FoundMultipleCalleeChains =
true;
2724 FoundSingleCalleeChain =
true;
2725 FoundProfiledCalleeCount++;
2726 FoundProfiledCalleeDepth +=
Depth;
2727 if (
Depth > FoundProfiledCalleeMaxDepth)
2728 FoundProfiledCalleeMaxDepth =
Depth;
2729 SaveCallsiteInfo(&
I, CalleeFunc);
2730 }
else if (findProfiledCalleeThroughTailCalls(
2731 ProfiledCallee, CalledFunction,
Depth + 1,
2732 FoundCalleeChain, FoundMultipleCalleeChains)) {
2735 assert(!FoundMultipleCalleeChains);
2736 if (FoundSingleCalleeChain) {
2737 FoundMultipleCalleeChains =
true;
2740 FoundSingleCalleeChain =
true;
2741 SaveCallsiteInfo(&
I, CalleeFunc);
2742 }
else if (FoundMultipleCalleeChains)
2747 return FoundSingleCalleeChain;
2751 auto *CB = dyn_cast<CallBase>(Call);
2752 if (!CB->getCalledOperand() || CB->isIndirectCall())
2754 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2755 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2757 return dyn_cast<Function>(Alias->getAliasee());
2758 return dyn_cast<Function>(CalleeVal);
2761bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2763 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2764 auto *CB = dyn_cast<CallBase>(Call);
2765 if (!CB->getCalledOperand() || CB->isIndirectCall())
2767 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2768 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2769 if (CalleeFunc == Func)
2771 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2772 if (Alias && Alias->getAliasee() == Func)
2783 bool FoundMultipleCalleeChains =
false;
2784 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2786 FoundMultipleCalleeChains)) {
2787 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2788 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2789 <<
" that actually called " << CalleeVal->getName()
2790 << (FoundMultipleCalleeChains
2791 ?
" (found multiple possible chains)"
2794 if (FoundMultipleCalleeChains)
2795 FoundProfiledCalleeNonUniquelyCount++;
2802bool ModuleCallsiteContextGraph::sameCallee(
Instruction *Call1,
2804 auto *CB1 = cast<CallBase>(Call1);
2805 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2807 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2808 auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
2809 auto *CB2 = cast<CallBase>(Call2);
2810 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2812 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2813 auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
2814 return CalleeFunc1 == CalleeFunc2;
2817bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2819 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2820 bool &FoundMultipleCalleeChains) {
2829 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2830 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2833 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2836 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2837 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2844 bool FoundSingleCalleeChain =
false;
2847 !isPrevailing(CurCallee.
getGUID(), S.get()))
2849 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2852 auto FSVI = CurCallee;
2853 auto *AS = dyn_cast<AliasSummary>(S.get());
2855 FSVI = AS->getAliaseeVI();
2856 for (
auto &CallEdge :
FS->calls()) {
2857 if (!CallEdge.second.hasTailCall())
2859 if (CallEdge.first == ProfiledCallee) {
2860 if (FoundSingleCalleeChain) {
2861 FoundMultipleCalleeChains =
true;
2864 FoundSingleCalleeChain =
true;
2865 FoundProfiledCalleeCount++;
2866 FoundProfiledCalleeDepth +=
Depth;
2867 if (
Depth > FoundProfiledCalleeMaxDepth)
2868 FoundProfiledCalleeMaxDepth =
Depth;
2869 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2871 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2872 FSToVIMap[
FS] = FSVI;
2873 }
else if (findProfiledCalleeThroughTailCalls(
2874 ProfiledCallee, CallEdge.first,
Depth + 1,
2875 FoundCalleeChain, FoundMultipleCalleeChains)) {
2878 assert(!FoundMultipleCalleeChains);
2879 if (FoundSingleCalleeChain) {
2880 FoundMultipleCalleeChains =
true;
2883 FoundSingleCalleeChain =
true;
2884 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2886 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2887 FSToVIMap[
FS] = FSVI;
2888 }
else if (FoundMultipleCalleeChains)
2893 return FoundSingleCalleeChain;
2897IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
2899 if (
Callee.getSummaryList().empty())
2901 return dyn_cast<FunctionSummary>(
Callee.getSummaryList()[0]->getBaseObject());
2904bool IndexCallsiteContextGraph::calleeMatchesFunc(
2907 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2912 Callee.getSummaryList().empty()
2914 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2915 assert(FSToVIMap.count(Func));
2916 auto FuncVI = FSToVIMap[
Func];
2917 if (Callee == FuncVI ||
2932 bool FoundMultipleCalleeChains =
false;
2933 if (!findProfiledCalleeThroughTailCalls(
2934 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2935 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2936 <<
" from " << FSToVIMap[CallerFunc]
2937 <<
" that actually called " << Callee
2938 << (FoundMultipleCalleeChains
2939 ?
" (found multiple possible chains)"
2942 if (FoundMultipleCalleeChains)
2943 FoundProfiledCalleeNonUniquelyCount++;
2950bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2951 ValueInfo Callee1 = dyn_cast_if_present<CallsiteInfo *>(Call1)->Callee;
2952 ValueInfo Callee2 = dyn_cast_if_present<CallsiteInfo *>(Call2)->Callee;
2953 return Callee1 == Callee2;
2956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2957void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2963template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2964void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2966 OS <<
"Node " <<
this <<
"\n";
2970 OS <<
" (recursive)";
2972 if (!MatchingCalls.
empty()) {
2973 OS <<
"\tMatchingCalls:\n";
2974 for (
auto &MatchingCall : MatchingCalls) {
2976 MatchingCall.print(
OS);
2981 OS <<
"\tContextIds:";
2983 auto ContextIds = getContextIds();
2984 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2985 std::sort(SortedIds.begin(), SortedIds.end());
2986 for (
auto Id : SortedIds)
2989 OS <<
"\tCalleeEdges:\n";
2990 for (
auto &Edge : CalleeEdges)
2991 OS <<
"\t\t" << *
Edge <<
"\n";
2992 OS <<
"\tCallerEdges:\n";
2993 for (
auto &Edge : CallerEdges)
2994 OS <<
"\t\t" << *
Edge <<
"\n";
2995 if (!Clones.empty()) {
2997 }
else if (CloneOf) {
2998 OS <<
"\tClone of " << CloneOf <<
"\n";
3002template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3003void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3009template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3010void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3013 << (IsBackedge ?
" (BE)" :
"")
3015 OS <<
" ContextIds:";
3016 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3017 std::sort(SortedIds.begin(), SortedIds.end());
3018 for (
auto Id : SortedIds)
3022template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3023void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3027template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3028void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3030 OS <<
"Callsite Context Graph:\n";
3031 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3032 for (
const auto Node : nodes<GraphType>(
this)) {
3033 if (
Node->isRemoved())
3040template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3041void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3043 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3044 for (
const auto Node : nodes<GraphType>(
this)) {
3045 if (
Node->isRemoved())
3047 if (!
Node->IsAllocation)
3050 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3051 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3052 std::sort(SortedIds.begin(), SortedIds.end());
3053 for (
auto Id : SortedIds) {
3054 auto TypeI = ContextIdToAllocationType.find(Id);
3055 assert(TypeI != ContextIdToAllocationType.end());
3056 auto CSI = ContextIdToContextSizeInfos.find(Id);
3057 if (CSI != ContextIdToContextSizeInfos.end()) {
3058 for (
auto &Info : CSI->second) {
3059 OS <<
"MemProf hinting: "
3061 <<
" full allocation context " <<
Info.FullStackId
3062 <<
" with total size " <<
Info.TotalSize <<
" is "
3064 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3066 <<
" due to cold byte percent";
3068 OS <<
" (context id " <<
Id <<
")";
3076template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3077void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3078 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3079 for (
const auto Node : nodes<GraphType>(
this)) {
3080 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3081 for (
auto &Edge :
Node->CallerEdges)
3082 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
3086template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3088 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3089 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3091 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3107 return G->NodeOwner.begin()->get();
3110 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3111 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3119 decltype(&GetCallee)>;
3130template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3144 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3150 std::string LabelString =
3151 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
3152 Twine(Node->OrigStackOrAllocId))
3154 LabelString +=
"\n";
3155 if (Node->hasCall()) {
3156 auto Func =
G->NodeToCallingFunc.find(Node);
3157 assert(Func !=
G->NodeToCallingFunc.end());
3159 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
3161 LabelString +=
"null call";
3162 if (Node->Recursive)
3163 LabelString +=
" (recursive)";
3165 LabelString +=
" (external)";
3171 auto ContextIds = Node->getContextIds();
3175 bool Highlight =
false;
3184 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(Node) +
" " +
3185 getContextIds(ContextIds) +
"\"")
3189 AttributeString +=
",fontsize=\"30\"";
3191 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes, Highlight) +
"\"")
3193 if (Node->CloneOf) {
3194 AttributeString +=
",color=\"blue\"";
3195 AttributeString +=
",style=\"filled,bold,dashed\"";
3197 AttributeString +=
",style=\"filled\"";
3198 return AttributeString;
3203 auto &
Edge = *(ChildIter.getCurrent());
3208 bool Highlight =
false;
3217 auto Color = getColor(
Edge->AllocTypes, Highlight);
3218 std::string AttributeString =
3219 (
Twine(
"tooltip=\"") + getContextIds(
Edge->ContextIds) +
"\"" +
3221 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3224 if (
Edge->IsBackedge)
3225 AttributeString +=
",style=\"dotted\"";
3228 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3229 return AttributeString;
3235 if (Node->isRemoved())
3248 std::string IdString =
"ContextIds:";
3249 if (ContextIds.
size() < 100) {
3250 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3251 std::sort(SortedIds.begin(), SortedIds.end());
3252 for (
auto Id : SortedIds)
3253 IdString += (
" " +
Twine(Id)).str();
3255 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
3260 static std::string getColor(
uint8_t AllocTypes,
bool Highlight) {
3268 return !DoHighlight || Highlight ?
"brown1" :
"lightpink";
3270 return !DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3273 return Highlight ?
"magenta" :
"mediumorchid1";
3277 static std::string getNodeId(NodeRef
Node) {
3278 std::stringstream SStream;
3279 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
3280 std::string
Result = SStream.str();
3286 static bool DoHighlight;
3289template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3291 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>::DoHighlight =
3294template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3295void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3296 std::string Label)
const {
3301template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3302typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3303CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3304 const std::shared_ptr<ContextEdge> &Edge,
3308 ContextNode *Clone =
3309 createNewNode(
Node->IsAllocation, NodeToCallingFunc[
Node],
Node->Call);
3310 Node->addClone(Clone);
3311 Clone->MatchingCalls =
Node->MatchingCalls;
3312 moveEdgeToExistingCalleeClone(Edge, Clone,
true,
3317template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3318void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3319 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
3320 ContextNode *NewCallee,
bool NewClone,
3324 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3326 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3328 ContextNode *OldCallee =
Edge->Callee;
3332 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3336 if (ContextIdsToMove.
empty())
3337 ContextIdsToMove =
Edge->getContextIds();
3341 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3344 NewCallee->AllocTypes |=
Edge->AllocTypes;
3346 if (ExistingEdgeToNewCallee) {
3349 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3350 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3351 assert(
Edge->ContextIds == ContextIdsToMove);
3352 removeEdgeFromGraph(
Edge.get());
3355 Edge->Callee = NewCallee;
3356 NewCallee->CallerEdges.push_back(Edge);
3358 OldCallee->eraseCallerEdge(
Edge.get());
3365 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3366 if (ExistingEdgeToNewCallee) {
3369 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3370 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3373 auto NewEdge = std::make_shared<ContextEdge>(
3374 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3375 Edge->Caller->CalleeEdges.push_back(NewEdge);
3376 NewCallee->CallerEdges.push_back(NewEdge);
3380 NewCallee->AllocTypes |= CallerEdgeAllocType;
3382 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3387 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3388 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3392 if (CalleeToUse == OldCallee) {
3396 if (EdgeIsRecursive) {
3397 assert(OldCalleeEdge == Edge);
3400 CalleeToUse = NewCallee;
3406 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3407 OldCalleeEdge->AllocTypes =
3408 computeAllocType(OldCalleeEdge->getContextIds());
3415 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3416 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3417 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3421 auto NewEdge = std::make_shared<ContextEdge>(
3422 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3423 EdgeContextIdsToMove);
3424 NewCallee->CalleeEdges.push_back(NewEdge);
3425 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3429 OldCallee->AllocTypes = OldCallee->computeAllocType();
3432 OldCallee->emptyContextIds());
3434 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
3435 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
3436 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3437 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3439 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3440 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3445template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3446void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3447 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
3448 ContextNode *NewCaller) {
3449 auto *OldCallee =
Edge->Callee;
3450 auto *NewCallee = OldCallee;
3453 bool Recursive =
Edge->Caller ==
Edge->Callee;
3455 NewCallee = NewCaller;
3457 ContextNode *OldCaller =
Edge->Caller;
3458 OldCaller->eraseCalleeEdge(
Edge.get());
3462 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3464 if (ExistingEdgeToNewCaller) {
3467 ExistingEdgeToNewCaller->getContextIds().insert_range(
3468 Edge->getContextIds());
3469 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3470 Edge->ContextIds.clear();
3472 OldCallee->eraseCallerEdge(
Edge.get());
3475 Edge->Caller = NewCaller;
3476 NewCaller->CalleeEdges.push_back(Edge);
3478 assert(NewCallee == NewCaller);
3481 Edge->Callee = NewCallee;
3482 NewCallee->CallerEdges.push_back(Edge);
3483 OldCallee->eraseCallerEdge(
Edge.get());
3489 NewCaller->AllocTypes |=
Edge->AllocTypes;
3496 bool IsNewNode = NewCaller->CallerEdges.empty();
3505 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3506 auto OldCallerCaller = OldCallerEdge->Caller;
3510 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3511 if (OldCaller == OldCallerCaller) {
3512 OldCallerCaller = NewCaller;
3518 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3519 OldCallerEdge->AllocTypes =
3520 computeAllocType(OldCallerEdge->getContextIds());
3525 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3529 if (ExistingCallerEdge) {
3530 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3531 ExistingCallerEdge->AllocTypes |=
3532 computeAllocType(EdgeContextIdsToMove);
3535 auto NewEdge = std::make_shared<ContextEdge>(
3536 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3537 EdgeContextIdsToMove);
3538 NewCaller->CallerEdges.push_back(NewEdge);
3539 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3544 OldCaller->AllocTypes = OldCaller->computeAllocType();
3547 OldCaller->emptyContextIds());
3549 checkNode<DerivedCCG, FuncTy, CallTy>(OldCaller,
false);
3550 checkNode<DerivedCCG, FuncTy, CallTy>(NewCaller,
false);
3551 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3552 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3554 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3555 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3560template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3561void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3562 recursivelyRemoveNoneTypeCalleeEdges(
3568 removeNoneTypeCalleeEdges(
Node);
3570 for (
auto *Clone :
Node->Clones)
3571 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3575 auto CallerEdges =
Node->CallerEdges;
3576 for (
auto &Edge : CallerEdges) {
3578 if (
Edge->isRemoved()) {
3582 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3587template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3588void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3595 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3597 if (
Node->isRemoved())
3600 if (!
Node->CallerEdges.empty())
3602 markBackedges(
Node, Visited, CurrentStack);
3608template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3609void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3616 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3617 auto *
Callee = CalleeEdge->Callee;
3618 if (Visited.
count(Callee)) {
3621 if (CurrentStack.
count(Callee))
3622 CalleeEdge->IsBackedge =
true;
3625 CurrentStack.
insert(Callee);
3626 markBackedges(Callee, Visited, CurrentStack);
3627 CurrentStack.
erase(Callee);
3631template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3632void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3634 for (
auto &Entry : AllocationCallToContextNodeMap) {
3636 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3639 for (
auto &Entry : AllocationCallToContextNodeMap)
3640 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3653template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3654void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3658 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3666 if (!
Node->hasCall())
3685 auto CallerEdges =
Node->CallerEdges;
3686 for (
auto &Edge : CallerEdges) {
3688 if (
Edge->isRemoved()) {
3694 if (
Edge->IsBackedge) {
3701 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3702 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3725 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3729 [&](
const std::shared_ptr<ContextEdge> &
A,
3730 const std::shared_ptr<ContextEdge> &
B) {
3733 if (A->ContextIds.empty())
3739 if (B->ContextIds.empty())
3742 if (A->AllocTypes == B->AllocTypes)
3745 return *A->ContextIds.begin() < *B->ContextIds.begin();
3746 return AllocTypeCloningPriority[A->AllocTypes] <
3747 AllocTypeCloningPriority[B->AllocTypes];
3758 for (
auto &CE :
Node->CallerEdges) {
3761 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3762 for (
auto Id :
CE->getContextIds())
3763 if (!AllCallerContextIds.
insert(Id).second)
3764 RecursiveContextIds.
insert(Id);
3774 auto CallerEdges =
Node->CallerEdges;
3775 for (
auto &CallerEdge : CallerEdges) {
3777 if (CallerEdge->isRemoved()) {
3790 if (!CallerEdge->Caller->hasCall())
3795 auto CallerEdgeContextsForAlloc =
3797 if (!RecursiveContextIds.
empty())
3798 CallerEdgeContextsForAlloc =
3800 if (CallerEdgeContextsForAlloc.empty())
3803 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3807 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3808 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3809 for (
auto &CalleeEdge :
Node->CalleeEdges)
3810 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3811 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3829 if (!CallerEdge->IsBackedge &&
3830 allocTypeToUse(CallerAllocTypeForAlloc) ==
3831 allocTypeToUse(
Node->AllocTypes) &&
3832 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3833 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3837 if (CallerEdge->IsBackedge) {
3841 DeferredBackedges++;
3854 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
3855 !Visited.
count(CallerEdge->Caller)) {
3856 const auto OrigIdCount = CallerEdge->getContextIds().size();
3859 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
3860 removeNoneTypeCalleeEdges(CallerEdge->Caller);
3864 bool UpdatedEdge =
false;
3865 if (OrigIdCount > CallerEdge->getContextIds().size()) {
3866 for (
auto E :
Node->CallerEdges) {
3868 if (E->Caller->CloneOf != CallerEdge->Caller)
3872 auto CallerEdgeContextsForAllocNew =
3874 if (CallerEdgeContextsForAllocNew.empty())
3884 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
3894 if (CallerEdge->isRemoved())
3904 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
3905 if (CallerEdgeContextsForAlloc.empty())
3910 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3911 CalleeEdgeAllocTypesForCallerEdge.clear();
3912 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3913 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3914 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3920 ContextNode *Clone =
nullptr;
3921 for (
auto *CurClone :
Node->Clones) {
3922 if (allocTypeToUse(CurClone->AllocTypes) !=
3923 allocTypeToUse(CallerAllocTypeForAlloc))
3930 assert(!BothSingleAlloc ||
3931 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3937 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3938 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3946 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3947 CallerEdgeContextsForAlloc);
3949 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3962 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3965void ModuleCallsiteContextGraph::updateAllocationCall(
3969 "memprof", AllocTypeString);
3970 cast<CallBase>(
Call.call())->addFnAttr(
A);
3971 OREGetter(
Call.call()->getFunction())
3973 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3975 <<
" marked with memprof allocation attribute "
3976 <<
ore::NV(
"Attribute", AllocTypeString));
3979void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
3981 auto *AI = cast<AllocInfo *>(
Call.call());
3983 assert(AI->Versions.size() >
Call.cloneNo());
3988ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3989 const auto *CB = cast<CallBase>(
Call.call());
3990 if (!CB->getAttributes().hasFnAttr(
"memprof"))
3992 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3998IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3999 const auto *AI = cast<AllocInfo *>(
Call.call());
4000 assert(AI->Versions.size() >
Call.cloneNo());
4004void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
4005 FuncInfo CalleeFunc) {
4006 auto *CurF = getCalleeFunc(CallerCall.call());
4007 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4014 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4016 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4018 MismatchedCloneAssignments++;
4021 if (NewCalleeCloneNo > 0)
4022 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4023 OREGetter(CallerCall.call()->getFunction())
4025 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4026 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4027 <<
" assigned to call function clone "
4028 <<
ore::NV(
"Callee", CalleeFunc.func()));
4031void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
4032 FuncInfo CalleeFunc) {
4033 auto *CI = cast<CallsiteInfo *>(CallerCall.call());
4035 "Caller cannot be an allocation which should not have profiled calls");
4036 assert(CI->Clones.size() > CallerCall.cloneNo());
4037 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4038 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4043 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4045 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4047 MismatchedCloneAssignments++;
4049 CurCalleeCloneNo = NewCalleeCloneNo;
4061 SP->replaceLinkageName(MDName);
4065 TempDISubprogram NewDecl = Decl->
clone();
4066 NewDecl->replaceLinkageName(MDName);
4070CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4072ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4074 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4080 NewFunc->setName(
Name);
4082 for (
auto &Inst : CallsWithMetadataInFunc) {
4084 assert(Inst.cloneNo() == 0);
4085 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
4087 OREGetter(
Func.func())
4089 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4090 return {NewFunc, CloneNo};
4094 IndexCall>::FuncInfo
4095IndexCallsiteContextGraph::cloneFunctionForCallsite(
4097 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4102 assert(CloneNo == (isa<AllocInfo *>(
Call.call())
4103 ? cast<AllocInfo *>(
Call.call())->Versions.size()
4104 : cast<CallsiteInfo *>(
Call.call())->Clones.size()));
4111 for (
auto &Inst : CallsWithMetadataInFunc) {
4113 assert(Inst.cloneNo() == 0);
4114 if (
auto *AI = dyn_cast<AllocInfo *>(Inst.call())) {
4115 assert(AI->Versions.size() == CloneNo);
4118 AI->Versions.push_back(0);
4120 auto *CI = cast<CallsiteInfo *>(Inst.call());
4121 assert(CI && CI->Clones.size() == CloneNo);
4124 CI->Clones.push_back(0);
4126 CallMap[Inst] = {Inst.call(), CloneNo};
4128 return {
Func.func(), CloneNo};
4145template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4146void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4153 for (
auto &Entry : AllocationCallToContextNodeMap) {
4155 for (
auto Id :
Node->getContextIds())
4156 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4157 for (
auto *Clone :
Node->Clones) {
4158 for (
auto Id : Clone->getContextIds())
4159 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4167 for (
auto &Entry : AllocationCallToContextNodeMap) {
4170 mergeClones(
Node, Visited, ContextIdToAllocationNode);
4176 auto Clones =
Node->Clones;
4177 for (
auto *Clone : Clones)
4178 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4182 dbgs() <<
"CCG after merging:\n";
4186 exportToDot(
"aftermerge");
4194template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4195void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4207 bool FoundUnvisited =
true;
4209 while (FoundUnvisited) {
4211 FoundUnvisited =
false;
4214 auto CallerEdges =
Node->CallerEdges;
4215 for (
auto CallerEdge : CallerEdges) {
4217 if (CallerEdge->Callee !=
Node)
4222 FoundUnvisited =
true;
4223 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4227 TotalMergeInvokes++;
4228 TotalMergeIters += Iters;
4229 if (Iters > MaxMergeIters)
4230 MaxMergeIters = Iters;
4233 mergeNodeCalleeClones(
Node, Visited, ContextIdToAllocationNode);
4236template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4237void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4241 if (
Node->emptyContextIds())
4247 OrigNodeToCloneEdges;
4248 for (
const auto &E :
Node->CalleeEdges) {
4249 auto *
Callee = E->Callee;
4253 OrigNodeToCloneEdges[
Base].push_back(E);
4259 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4260 const std::shared_ptr<ContextEdge> &
B) {
4261 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4262 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4263 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4265 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4269 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4274 for (
auto Entry : OrigNodeToCloneEdges) {
4277 auto &CalleeEdges =
Entry.second;
4278 auto NumCalleeClones = CalleeEdges.size();
4280 if (NumCalleeClones == 1)
4292 findOtherCallersToShareMerge(
Node, CalleeEdges, ContextIdToAllocationNode,
4293 OtherCallersToShareMerge);
4298 ContextNode *MergeNode =
nullptr;
4300 for (
auto CalleeEdge : CalleeEdges) {
4301 auto *OrigCallee = CalleeEdge->Callee;
4307 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4308 MergeNode = OrigCallee;
4309 NonNewMergedNodes++;
4316 if (!OtherCallersToShareMerge.
empty()) {
4317 bool MoveAllCallerEdges =
true;
4318 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4319 if (CalleeCallerE == CalleeEdge)
4321 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4322 MoveAllCallerEdges =
false;
4328 if (MoveAllCallerEdges) {
4329 MergeNode = OrigCallee;
4330 NonNewMergedNodes++;
4337 assert(MergeNode != OrigCallee);
4338 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4341 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4346 if (!OtherCallersToShareMerge.
empty()) {
4350 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4351 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4352 if (CalleeCallerE == CalleeEdge)
4354 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4356 CallerToMoveCount[CalleeCallerE->Caller]++;
4357 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4361 removeNoneTypeCalleeEdges(OrigCallee);
4362 removeNoneTypeCalleeEdges(MergeNode);
4380template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4381void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4382 findOtherCallersToShareMerge(
4384 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4387 auto NumCalleeClones = CalleeEdges.size();
4393 unsigned PossibleOtherCallerNodes = 0;
4397 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4404 for (
auto CalleeEdge : CalleeEdges) {
4405 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4408 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4409 if (CalleeCallerEdges->Caller ==
Node) {
4410 assert(CalleeCallerEdges == CalleeEdge);
4413 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4416 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4418 PossibleOtherCallerNodes++;
4422 for (
auto Id : CalleeEdge->getContextIds()) {
4423 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4427 MissingAllocForContextId++;
4430 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4437 for (
auto CalleeEdge : CalleeEdges) {
4438 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4440 if (!PossibleOtherCallerNodes)
4442 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4444 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4446 if (CalleeCallerE == CalleeEdge)
4450 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4455 for (
auto Id : CalleeCallerE->getContextIds()) {
4456 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4461 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4462 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4463 PossibleOtherCallerNodes--;
4470 if (!PossibleOtherCallerNodes)
4475 for (
auto &[OtherCaller, Count] : OtherCallersToSharedCalleeEdgeCount) {
4476 if (Count != NumCalleeClones)
4478 OtherCallersToShareMerge.
insert(OtherCaller);
4513template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4514bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4515 bool Changed =
false;
4525 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4526 const FuncInfo &CalleeFunc) {
4528 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4532 struct FuncCloneInfo {
4542 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4543 FuncInfo OrigFunc(Func);
4548 std::vector<FuncCloneInfo> FuncCloneInfos;
4549 for (
auto &Call : CallsWithMetadata) {
4550 ContextNode *
Node = getNodeForInst(Call);
4557 "Not having a call should have prevented cloning");
4561 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4565 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4567 ContextNode *CallsiteClone,
4570 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4572 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4574 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4576 if (
auto It = CallMap.
find(Call); It != CallMap.
end())
4577 CallClone = It->second;
4578 CallsiteClone->setCall(CallClone);
4580 for (
auto &MatchingCall :
Node->MatchingCalls) {
4582 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4583 CallClone = It->second;
4585 MatchingCall = CallClone;
4592 std::deque<ContextNode *> ClonesWorklist;
4594 if (!
Node->emptyContextIds())
4595 ClonesWorklist.push_back(
Node);
4601 unsigned NodeCloneCount = 0;
4602 while (!ClonesWorklist.empty()) {
4603 ContextNode *Clone = ClonesWorklist.front();
4604 ClonesWorklist.pop_front();
4607 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4613 if (FuncCloneInfos.size() < NodeCloneCount) {
4615 if (NodeCloneCount == 1) {
4620 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
4621 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4625 FuncCloneInfos.push_back(
4627 AssignCallsiteCloneToFuncClone(
4628 OrigFunc, Call, Clone,
4629 AllocationCallToContextNodeMap.count(Call));
4630 for (
auto &CE : Clone->CallerEdges) {
4632 if (!
CE->Caller->hasCall())
4634 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4644 FuncInfo PreviousAssignedFuncClone;
4646 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
4647 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4649 bool CallerAssignedToCloneOfFunc =
false;
4650 if (EI != Clone->CallerEdges.end()) {
4651 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4652 PreviousAssignedFuncClone =
4653 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4654 CallerAssignedToCloneOfFunc =
true;
4660 unsigned CloneNo = FuncCloneInfos.size();
4661 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4662 "should already exist in the map");
4663 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4664 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
4665 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4666 FunctionClonesAnalysis++;
4672 if (!CallerAssignedToCloneOfFunc) {
4673 AssignCallsiteCloneToFuncClone(
4674 NewFuncClone, Call, Clone,
4675 AllocationCallToContextNodeMap.count(Call));
4676 for (
auto &CE : Clone->CallerEdges) {
4678 if (!
CE->Caller->hasCall())
4680 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4692 auto CallerEdges = Clone->CallerEdges;
4693 for (
auto CE : CallerEdges) {
4695 if (
CE->isRemoved()) {
4701 if (!
CE->Caller->hasCall())
4704 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
4708 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
4709 PreviousAssignedFuncClone)
4712 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4725 auto CalleeEdges =
CE->Caller->CalleeEdges;
4726 for (
auto CalleeEdge : CalleeEdges) {
4729 if (CalleeEdge->isRemoved()) {
4734 ContextNode *
Callee = CalleeEdge->Callee;
4738 if (Callee == Clone || !
Callee->hasCall())
4743 if (Callee == CalleeEdge->Caller)
4745 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
4746 removeNoneTypeCalleeEdges(NewClone);
4749 removeNoneTypeCalleeEdges(Callee);
4750 assert(NewClone->AllocTypes != (
uint8_t)AllocationType::None);
4754 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
4755 RecordCalleeFuncOfCallsite(
4756 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
4765 OrigCall.setCloneNo(0);
4767 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
4769 CallInfo NewCall(CallMap[OrigCall]);
4771 NewClone->setCall(NewCall);
4773 for (
auto &MatchingCall : NewClone->MatchingCalls) {
4774 CallInfo OrigMatchingCall(MatchingCall);
4775 OrigMatchingCall.setCloneNo(0);
4777 CallInfo NewCall(CallMap[OrigMatchingCall]);
4780 MatchingCall = NewCall;
4789 auto FindFirstAvailFuncClone = [&]() {
4794 for (
auto &CF : FuncCloneInfos) {
4795 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
4796 return CF.FuncClone;
4799 "Expected an available func clone for this callsite clone");
4816 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4817 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4821 auto CloneCallerEdges = Clone->CallerEdges;
4822 for (
auto &Edge : CloneCallerEdges) {
4826 if (
Edge->isRemoved())
4829 if (!
Edge->Caller->hasCall())
4833 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
4834 FuncInfo FuncCloneCalledByCaller =
4835 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4845 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4846 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4854 (FuncCloneAssignedToCurCallsiteClone &&
4855 FuncCloneAssignedToCurCallsiteClone !=
4856 FuncCloneCalledByCaller)) {
4871 if (FuncCloneToNewCallsiteCloneMap.count(
4872 FuncCloneCalledByCaller)) {
4873 ContextNode *NewClone =
4874 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4875 moveEdgeToExistingCalleeClone(Edge, NewClone);
4877 removeNoneTypeCalleeEdges(NewClone);
4880 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge);
4881 removeNoneTypeCalleeEdges(NewClone);
4882 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4885 ClonesWorklist.push_back(NewClone);
4886 assert(NewClone->AllocTypes != (
uint8_t)AllocationType::None);
4890 removeNoneTypeCalleeEdges(Clone);
4898 if (!FuncCloneAssignedToCurCallsiteClone) {
4899 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4901 AssignCallsiteCloneToFuncClone(
4902 FuncCloneCalledByCaller, Call, Clone,
4903 AllocationCallToContextNodeMap.count(Call));
4907 assert(FuncCloneAssignedToCurCallsiteClone ==
4908 FuncCloneCalledByCaller);
4917 if (!FuncCloneAssignedToCurCallsiteClone) {
4918 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4919 assert(FuncCloneAssignedToCurCallsiteClone);
4921 AssignCallsiteCloneToFuncClone(
4922 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4923 AllocationCallToContextNodeMap.count(Call));
4925 assert(FuncCloneToCurNodeCloneMap
4926 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4928 RecordCalleeFuncOfCallsite(
Edge->Caller,
4929 FuncCloneAssignedToCurCallsiteClone);
4949 if (!FuncCloneAssignedToCurCallsiteClone) {
4950 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4951 assert(FuncCloneAssignedToCurCallsiteClone &&
4952 "No available func clone for this callsite clone");
4953 AssignCallsiteCloneToFuncClone(
4954 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4955 AllocationCallToContextNodeMap.contains(Call));
4959 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
4960 for (
const auto &PE :
Node->CalleeEdges)
4961 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4962 for (
const auto &CE :
Node->CallerEdges)
4963 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4964 for (
auto *Clone :
Node->Clones) {
4965 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4966 for (
const auto &PE : Clone->CalleeEdges)
4967 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4968 for (
const auto &CE : Clone->CallerEdges)
4969 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4976 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
4978 auto UpdateCalls = [&](ContextNode *
Node,
4980 auto &&UpdateCalls) {
4985 for (
auto *Clone :
Node->Clones)
4986 UpdateCalls(Clone, Visited, UpdateCalls);
4988 for (
auto &Edge :
Node->CallerEdges)
4989 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
4993 if (!
Node->hasCall() ||
Node->emptyContextIds())
4996 if (
Node->IsAllocation) {
4997 auto AT = allocTypeToUse(
Node->AllocTypes);
5003 !ContextIdToContextSizeInfos.empty()) {
5006 for (
auto Id :
Node->getContextIds()) {
5007 auto TypeI = ContextIdToAllocationType.find(Id);
5008 assert(TypeI != ContextIdToAllocationType.end());
5009 auto CSI = ContextIdToContextSizeInfos.find(Id);
5010 if (CSI != ContextIdToContextSizeInfos.end()) {
5011 for (
auto &Info : CSI->second) {
5013 if (TypeI->second == AllocationType::Cold)
5014 TotalCold +=
Info.TotalSize;
5019 AT = AllocationType::Cold;
5021 updateAllocationCall(
Node->Call, AT);
5026 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
5029 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5030 updateCall(
Node->Call, CalleeFunc);
5032 for (
auto &Call :
Node->MatchingCalls)
5033 updateCall(Call, CalleeFunc);
5042 for (
auto &Entry : AllocationCallToContextNodeMap)
5043 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5057 FunctionsClonedThinBackend++;
5058 for (
unsigned I = 1;
I < NumClones;
I++) {
5059 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5061 FunctionClonesThinBackend++;
5064 for (
auto &BB : *NewF) {
5065 for (
auto &Inst : BB) {
5066 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5067 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5071 auto *PrevF = M.getFunction(
Name);
5075 assert(PrevF->isDeclaration());
5076 NewF->takeName(PrevF);
5077 PrevF->replaceAllUsesWith(NewF);
5078 PrevF->eraseFromParent();
5080 NewF->setName(
Name);
5083 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5086 if (!FuncToAliasMap.count(&
F))
5088 for (
auto *
A : FuncToAliasMap[&
F]) {
5090 auto *PrevA = M.getNamedAlias(
Name);
5092 A->getType()->getPointerAddressSpace(),
5093 A->getLinkage(),
Name, NewF);
5094 NewA->copyAttributesFrom(
A);
5098 assert(PrevA->isDeclaration());
5099 NewA->takeName(PrevA);
5100 PrevA->replaceAllUsesWith(NewA);
5101 PrevA->eraseFromParent();
5112 const Function *CallingFunc =
nullptr) {
5131 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5137 if (!SrcFileMD &&
F.isDeclaration()) {
5141 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5146 assert(SrcFileMD || OrigName ==
F.getName());
5148 StringRef SrcFile = M.getSourceFileName();
5150 SrcFile = dyn_cast<MDString>(SrcFileMD->getOperand(0))->getString();
5151 std::string OrigId = GlobalValue::getGlobalIdentifier(
5160 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5161 F.getName().contains(
'.')) {
5162 OrigName =
F.getName().rsplit(
'.').first;
5163 OrigId = GlobalValue::getGlobalIdentifier(
5171 assert(TheFnVI ||
F.isDeclaration());
5175bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5177 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5178 Symtab = std::make_unique<InstrProfSymtab>();
5189 if (
Error E = Symtab->create(M,
true,
false)) {
5190 std::string SymtabFailure =
toString(std::move(E));
5191 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5204 auto MIBIter = AllocNode.
MIBs.begin();
5205 for (
auto &MDOp : MemProfMD->
operands()) {
5207 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5208 auto *MIBMD = cast<const MDNode>(MDOp);
5212 auto ContextIterBegin =
5216 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5218 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5223 if (LastStackContextId == *ContextIter)
5225 LastStackContextId = *ContextIter;
5226 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5236bool MemProfContextDisambiguation::applyImport(
Module &M) {
5238 bool Changed =
false;
5243 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5245 for (
auto &
A :
M.aliases()) {
5246 auto *Aliasee =
A.getAliaseeObject();
5247 if (
auto *
F = dyn_cast<Function>(Aliasee))
5248 FuncToAliasMap[
F].insert(&
A);
5251 if (!initializeIndirectCallPromotionInfo(M))
5261 bool ClonesCreated =
false;
5262 unsigned NumClonesCreated = 0;
5263 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
5273 if (ClonesCreated) {
5274 assert(NumClonesCreated == NumClones);
5281 ClonesCreated =
true;
5282 NumClonesCreated = NumClones;
5288 CloneFuncIfNeeded(
StackNode.Clones.size());
5299 auto *GA = dyn_cast_or_null<GlobalAlias>(CB->getCalledOperand());
5300 if (CalledFunction != CB->getCalledOperand() &&
5301 (!GA || CalledFunction != GA->getAliaseeObject())) {
5302 SkippedCallsCloning++;
5308 auto CalleeOrigName = CalledFunction->getName();
5309 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
5314 auto NewF =
M.getOrInsertFunction(
5316 CalledFunction->getFunctionType());
5322 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
5331 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5333 <<
" assigned to call function clone "
5334 <<
ore::NV(
"Callee", NewF.getCallee()));
5352 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5354 "enable-import-metadata is needed to emit thinlto_src_module");
5356 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
5358 if (GVS->modulePath() == SrcModule) {
5359 GVSummary = GVS.get();
5363 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5368 if (isa<AliasSummary>(GVSummary))
5371 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
5373 if (
FS->allocs().empty() &&
FS->callsites().empty())
5376 auto SI =
FS->callsites().begin();
5377 auto AI =
FS->allocs().begin();
5385 for (
auto CallsiteIt =
FS->callsites().rbegin();
5386 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5387 auto &Callsite = *CallsiteIt;
5391 if (!Callsite.StackIdIndices.empty())
5393 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5402 for (
auto &BB :
F) {
5403 for (
auto &
I : BB) {
5404 auto *CB = dyn_cast<CallBase>(&
I);
5409 auto *CalledValue = CB->getCalledOperand();
5410 auto *CalledFunction = CB->getCalledFunction();
5411 if (CalledValue && !CalledFunction) {
5412 CalledValue = CalledValue->stripPointerCasts();
5414 CalledFunction = dyn_cast<Function>(CalledValue);
5418 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
5419 assert(!CalledFunction &&
5420 "Expected null called function in callsite for alias");
5421 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
5425 I.getMetadata(LLVMContext::MD_callsite));
5426 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5430 if (CB->getAttributes().hasFnAttr(
"memprof")) {
5432 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5433 ? AllocTypeColdThinBackend++
5434 : AllocTypeNotColdThinBackend++;
5435 OrigAllocsThinBackend++;
5436 AllocVersionsThinBackend++;
5437 if (!MaxAllocVersionsThinBackend)
5438 MaxAllocVersionsThinBackend = 1;
5445 auto &AllocNode = *(AI++);
5453 CloneFuncIfNeeded(AllocNode.Versions.size());
5455 OrigAllocsThinBackend++;
5456 AllocVersionsThinBackend += AllocNode.Versions.size();
5457 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5458 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5468 if (AllocNode.Versions.size() == 1 &&
5474 UnclonableAllocsThinBackend++;
5480 return Type == ((uint8_t)AllocationType::NotCold |
5481 (uint8_t)AllocationType::Cold);
5485 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5491 : AllocTypeNotColdThinBackend++;
5503 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
5506 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5508 <<
" marked with memprof allocation attribute "
5509 <<
ore::NV(
"Attribute", AllocTypeString));
5511 }
else if (!CallsiteContext.empty()) {
5512 if (!CalledFunction) {
5515 auto *CI = dyn_cast<CallInst>(CB);
5516 assert(!CI || !CI->isInlineAsm());
5519 assert(CalledValue && !isa<Constant>(CalledValue));
5526 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
5532 CloneFuncIfNeeded(NumClones);
5537 assert(SI !=
FS->callsites().end());
5543 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
5544 for (
auto StackId : CallsiteContext) {
5552 CloneCallsite(
StackNode, CB, CalledFunction);
5554 }
else if (CB->isTailCall() && CalledFunction) {
5559 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
5560 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
5561 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
5562 CloneCallsite(Callsite->second, CB, CalledFunction);
5569 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
5579 for (
auto &BB :
F) {
5580 for (
auto &
I : BB) {
5581 if (!isa<CallBase>(
I))
5583 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
5584 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
5592unsigned MemProfContextDisambiguation::recordICPInfo(
5599 auto CandidateProfileData =
5600 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
5602 if (CandidateProfileData.empty())
5608 bool ICPNeeded =
false;
5609 unsigned NumClones = 0;
5610 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
5611 for (
const auto &Candidate : CandidateProfileData) {
5613 auto CalleeValueInfo =
5618 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
5625 [](
unsigned CloneNo) { return CloneNo != 0; });
5635 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
5636 TotalCount, CallsiteInfoStartIndex});
5640void MemProfContextDisambiguation::performICP(
5642 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
5651 for (
auto &Info : ICallAnalysisInfo) {
5654 auto TotalCount =
Info.TotalCount;
5655 unsigned NumPromoted = 0;
5656 unsigned NumClones = 0;
5658 for (
auto &Candidate :
Info.CandidateProfileData) {
5669 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
5670 if (TargetFunction ==
nullptr ||
5679 <<
"Memprof cannot promote indirect call: target with md5sum "
5680 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
5689 const char *Reason =
nullptr;
5693 <<
"Memprof cannot promote indirect call to "
5694 <<
ore::NV(
"TargetFunction", TargetFunction)
5695 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
5707 for (
unsigned J = 0; J < NumClones; J++) {
5710 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
5716 TotalCount, isSamplePGO, &ORE);
5717 auto *TargetToUse = TargetFunction;
5722 cast<Function>(
M.getOrInsertFunction(
5739 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5741 <<
" promoted and assigned to call function clone "
5742 <<
ore::NV(
"Callee", TargetToUse));
5746 TotalCount -= Candidate.Count;
5752 for (
unsigned J = 0; J < NumClones; J++) {
5755 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
5757 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
5760 if (TotalCount != 0)
5763 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
5768template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
5769bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
5771 dbgs() <<
"CCG before cloning:\n";
5775 exportToDot(
"postbuild");
5788 dbgs() <<
"CCG after cloning:\n";
5792 exportToDot(
"cloned");
5794 bool Changed = assignFunctions();
5797 dbgs() <<
"CCG after assigning function clones:\n";
5801 exportToDot(
"clonefuncassign");
5804 printTotalSizes(
errs());
5809bool MemProfContextDisambiguation::processModule(
5816 return applyImport(M);
5829 ModuleCallsiteContextGraph CCG(M, OREGetter);
5830 return CCG.process();
5835 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
5840 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
5844 "-memprof-dot-scope=context requires -memprof-dot-context-id");
5848 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
5849 "-memprof-dot-context-id");
5850 if (ImportSummary) {
5860 auto ReadSummaryFile =
5862 if (!ReadSummaryFile) {
5869 if (!ImportSummaryForTestingOrErr) {
5875 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
5876 ImportSummary = ImportSummaryForTesting.get();
5885 if (!processModule(M, OREGetter))
5902 IndexCallsiteContextGraph CCG(Index, isPrevailing);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
cl::opt< unsigned > MinClonedColdBytePercent
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
static unsigned getMemProfCloneNum(const Function &F)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Lightweight error class with error context and mandatory checking.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Module * getParent()
Get the module that this global value is contained inside of...
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
void stable_sort(R &&Range)
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
InterleavedRange< Range > interleaved(const Range &R, StringRef Separator=", ", StringRef Prefix="", StringRef Suffix="")
Output range R as a sequence of interleaved elements.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
const char * toString(DWARFSectionKind Kind)
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
Summary of memprof callsite metadata.
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
static SimpleType getSimplifiedValue(IndexCall &Val)
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...