52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
114 cl::desc(
"Specify the path prefix of the MemProf dot files."));
118 cl::desc(
"Export graph to dot files."));
123 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
133 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
138 "Export only nodes with contexts feeding given "
139 "-memprof-dot-alloc-id"),
141 "Export only nodes with given -memprof-dot-context-id")));
145 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
146 "or to highlight if -memprof-dot-scope=all"));
150 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
151 "highlight otherwise"));
155 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
159 cl::desc(
"Perform verification checks on CallingContextGraph."));
163 cl::desc(
"Perform frequent verification checks on nodes."));
166 "memprof-import-summary",
167 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
173 cl::desc(
"Max depth to recursively search for missing "
174 "frames through tail calls."));
179 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
183 cl::desc(
"Allow cloning of contexts through recursive cycles"));
190 cl::desc(
"Merge clones before assigning functions"));
199 cl::desc(
"Allow cloning of contexts having recursive cycles"));
205 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
216 cl::desc(
"Linking with hot/cold operator new interfaces"));
221 "Require target function definition when promoting indirect calls"));
243template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
244class CallsiteContextGraph {
246 CallsiteContextGraph() =
default;
247 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
248 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
255 void identifyClones();
262 bool assignFunctions();
269 const CallsiteContextGraph &CCG) {
275 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
277 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
279 void exportToDot(std::string Label)
const;
282 struct FuncInfo final
283 :
public std::pair<FuncTy *, unsigned > {
284 using Base = std::pair<FuncTy *, unsigned>;
286 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
287 explicit operator bool()
const {
return this->first !=
nullptr; }
288 FuncTy *
func()
const {
return this->first; }
289 unsigned cloneNo()
const {
return this->second; }
293 struct CallInfo final :
public std::pair<CallTy, unsigned > {
294 using Base = std::pair<CallTy, unsigned>;
296 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
298 explicit operator bool()
const {
return (
bool)this->first; }
299 CallTy call()
const {
return this->first; }
300 unsigned cloneNo()
const {
return this->second; }
301 void setCloneNo(
unsigned N) { this->second =
N; }
303 if (!
operator bool()) {
309 OS <<
"\t(clone " << cloneNo() <<
")";
335 bool Recursive =
false;
362 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
366 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
370 bool useCallerEdgesForContextInfo()
const {
375 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
393 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
394 Count += Edge->getContextIds().size();
398 CalleeEdges, useCallerEdgesForContextInfo()
400 : std::vector<std::shared_ptr<ContextEdge>>());
401 for (
const auto &Edge : Edges)
408 uint8_t computeAllocType()
const {
413 CalleeEdges, useCallerEdgesForContextInfo()
415 : std::vector<std::shared_ptr<ContextEdge>>());
416 for (
const auto &Edge : Edges) {
427 bool emptyContextIds()
const {
429 CalleeEdges, useCallerEdgesForContextInfo()
431 : std::vector<std::shared_ptr<ContextEdge>>());
432 for (
const auto &Edge : Edges) {
433 if (!Edge->getContextIds().empty())
440 std::vector<ContextNode *> Clones;
443 ContextNode *CloneOf =
nullptr;
445 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
447 ContextNode(
bool IsAllocation, CallInfo
C)
448 : IsAllocation(IsAllocation),
Call(
C) {}
450 void addClone(ContextNode *Clone) {
452 CloneOf->Clones.push_back(Clone);
453 Clone->CloneOf = CloneOf;
455 Clones.push_back(Clone);
457 Clone->CloneOf =
this;
461 ContextNode *getOrigNode() {
468 unsigned int ContextId);
470 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
471 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
472 void eraseCalleeEdge(
const ContextEdge *Edge);
473 void eraseCallerEdge(
const ContextEdge *Edge);
475 void setCall(CallInfo
C) {
Call =
C; }
477 bool hasCall()
const {
return (
bool)
Call.call(); }
483 bool isRemoved()
const {
519 bool IsBackedge =
false;
526 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
527 ContextIds(std::move(ContextIds)) {}
533 inline void clear() {
543 inline bool isRemoved()
const {
544 if (Callee || Caller)
565 void removeNoneTypeCalleeEdges(ContextNode *
Node);
566 void removeNoneTypeCallerEdges(ContextNode *
Node);
568 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
574 template <
class NodeT,
class IteratorT>
575 std::vector<uint64_t>
580 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
583 template <
class NodeT,
class IteratorT>
584 void addStackNodesForMIB(ContextNode *AllocNode,
593 void updateStackNodes();
597 void handleCallsitesWithMultipleTargets();
600 void markBackedges();
610 bool partitionCallsByCallee(
612 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
619 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
626 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
631 struct CallContextInfo {
635 std::vector<uint64_t> StackIds;
649 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
650 bool CalleeIter =
true);
658 void assignStackNodesPostOrder(
671 void propagateDuplicateContextIds(
677 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
685 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
695 calleesMatch(CallTy
Call, EdgeIter &EI,
700 const FuncTy *getCalleeFunc(CallTy
Call) {
701 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
707 bool calleeMatchesFunc(
708 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
709 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
710 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
711 Call, Func, CallerFunc, FoundCalleeChain);
715 bool sameCallee(CallTy Call1, CallTy Call2) {
716 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
721 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
722 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
728 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
734 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
739 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
744 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
745 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
751 FuncInfo cloneFunctionForCallsite(
753 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
754 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
755 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
760 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
761 unsigned CloneNo)
const {
762 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
766 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
767 CallInfo
C = CallInfo()) {
768 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
769 auto *NewNode = NodeOwner.back().get();
771 NodeToCallingFunc[NewNode] =
F;
772 NewNode->NodeId = NodeOwner.size();
777 ContextNode *getNodeForInst(
const CallInfo &
C);
778 ContextNode *getNodeForAlloc(
const CallInfo &
C);
779 ContextNode *getNodeForStackId(
uint64_t StackId);
801 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
808 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
809 ContextNode *NewCallee,
810 bool NewClone =
false,
818 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
819 ContextNode *NewCaller);
830 void mergeNodeCalleeClones(
835 void findOtherCallersToShareMerge(
836 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
867 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
873 unsigned int LastContextId = 0;
876template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
878 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
879template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
881 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
882template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
884 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
885template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
887 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
890class ModuleCallsiteContextGraph
891 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
894 ModuleCallsiteContextGraph(
896 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
899 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
902 uint64_t getStackId(uint64_t IdOrIndex)
const;
904 bool calleeMatchesFunc(
905 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
906 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
907 bool sameCallee(Instruction *Call1, Instruction *Call2);
908 bool findProfiledCalleeThroughTailCalls(
909 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
910 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
911 bool &FoundMultipleCalleeChains);
912 uint64_t getLastStackId(Instruction *
Call);
913 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
916 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
917 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
919 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
920 DenseMap<CallInfo, CallInfo> &CallMap,
921 std::vector<CallInfo> &CallsWithMetadataInFunc,
923 std::string getLabel(
const Function *Func,
const Instruction *
Call,
924 unsigned CloneNo)
const;
927 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
933struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
934 IndexCall() : PointerUnion() {}
935 IndexCall(std::nullptr_t) : IndexCall() {}
936 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
937 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
938 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
940 IndexCall *operator->() {
return this; }
942 void print(raw_ostream &OS)
const {
943 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
968class IndexCallsiteContextGraph
969 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
972 IndexCallsiteContextGraph(
973 ModuleSummaryIndex &Index,
977 ~IndexCallsiteContextGraph() {
982 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
984 for (
auto &Callsite :
I.second)
985 FS->addCallsite(*Callsite.second);
990 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
993 uint64_t getStackId(uint64_t IdOrIndex)
const;
994 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
995 bool calleeMatchesFunc(
996 IndexCall &
Call,
const FunctionSummary *Func,
997 const FunctionSummary *CallerFunc,
998 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
999 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1000 bool findProfiledCalleeThroughTailCalls(
1001 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1002 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1003 bool &FoundMultipleCalleeChains);
1004 uint64_t getLastStackId(IndexCall &
Call);
1005 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1008 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1009 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1010 IndexCall>::FuncInfo
1011 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1012 DenseMap<CallInfo, CallInfo> &CallMap,
1013 std::vector<CallInfo> &CallsWithMetadataInFunc,
1015 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1016 unsigned CloneNo)
const;
1020 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1022 const ModuleSummaryIndex &Index;
1030 std::unordered_map<FunctionSummary *,
1031 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1032 FunctionCalleesToSynthesizedCallsiteInfos;
1044 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1047 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1069template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1070bool allocTypesMatch(
1071 const std::vector<uint8_t> &InAllocTypes,
1072 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1076 assert(InAllocTypes.size() == Edges.size());
1078 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1080 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1084 if (l == (uint8_t)AllocationType::None ||
1085 r->AllocTypes == (uint8_t)AllocationType::None)
1087 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1096template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1097bool allocTypesMatchClone(
1098 const std::vector<uint8_t> &InAllocTypes,
1099 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1100 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1104 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1108 for (
const auto &
E : Clone->CalleeEdges) {
1110 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1114 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1115 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1117 if (Iter == EdgeCalleeMap.
end())
1125 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1133template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1134typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1135CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1136 const CallInfo &
C) {
1137 ContextNode *
Node = getNodeForAlloc(
C);
1141 return NonAllocationCallToContextNodeMap.lookup(
C);
1144template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1145typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1146CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1147 const CallInfo &
C) {
1148 return AllocationCallToContextNodeMap.lookup(
C);
1151template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1152typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1153CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1155 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1156 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1157 return StackEntryNode->second;
1161template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1162void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1164 unsigned int ContextId) {
1165 for (
auto &
Edge : CallerEdges) {
1166 if (
Edge->Caller == Caller) {
1168 Edge->getContextIds().insert(ContextId);
1172 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1173 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1174 CallerEdges.push_back(
Edge);
1178template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1179void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1180 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1196 auto CalleeCallerCount =
Callee->CallerEdges.size();
1197 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1202 }
else if (CalleeIter) {
1204 *EI =
Caller->CalleeEdges.erase(*EI);
1207 *EI =
Callee->CallerEdges.erase(*EI);
1209 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1210 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1213template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1214void CallsiteContextGraph<
1215 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1216 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1218 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1220 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1226template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1227void CallsiteContextGraph<
1228 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1229 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1231 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1233 Edge->Caller->eraseCalleeEdge(
Edge.get());
1234 EI =
Node->CallerEdges.erase(EI);
1240template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1241typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1242CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1243 findEdgeFromCallee(
const ContextNode *Callee) {
1244 for (
const auto &
Edge : CalleeEdges)
1245 if (
Edge->Callee == Callee)
1250template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1251typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1252CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1253 findEdgeFromCaller(
const ContextNode *Caller) {
1254 for (
const auto &
Edge : CallerEdges)
1255 if (
Edge->Caller == Caller)
1260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1261void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1262 eraseCalleeEdge(
const ContextEdge *
Edge) {
1264 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1265 return CalleeEdge.get() ==
Edge;
1267 assert(EI != CalleeEdges.end());
1268 CalleeEdges.erase(EI);
1271template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1272void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1273 eraseCallerEdge(
const ContextEdge *
Edge) {
1275 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1276 return CallerEdge.get() ==
Edge;
1278 assert(EI != CallerEdges.end());
1279 CallerEdges.erase(EI);
1282template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1283uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1284 DenseSet<uint32_t> &ContextIds)
const {
1286 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1287 uint8_t
AllocType = (uint8_t)AllocationType::None;
1288 for (
auto Id : ContextIds) {
1289 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1297template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1299CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1300 const DenseSet<uint32_t> &Node1Ids,
1301 const DenseSet<uint32_t> &Node2Ids)
const {
1303 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1304 uint8_t
AllocType = (uint8_t)AllocationType::None;
1305 for (
auto Id : Node1Ids) {
1306 if (!Node2Ids.
count(Id))
1308 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1316template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1317uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1318 const DenseSet<uint32_t> &Node1Ids,
1319 const DenseSet<uint32_t> &Node2Ids)
const {
1320 if (Node1Ids.
size() < Node2Ids.
size())
1321 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1323 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1326template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1327typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1328CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1329 CallInfo
Call,
const FuncTy *
F) {
1331 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1332 AllocationCallToContextNodeMap[
Call] = AllocNode;
1334 AllocNode->OrigStackOrAllocId = LastContextId;
1337 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1353template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1354template <
class NodeT,
class IteratorT>
1355void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1356 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1364 ContextIdToAllocationType[++LastContextId] =
AllocType;
1366 if (!ContextSizeInfo.
empty()) {
1367 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1372 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1377 ContextNode *PrevNode = AllocNode;
1381 SmallSet<uint64_t, 8> StackIdSet;
1384 ContextIter != StackContext.
end(); ++ContextIter) {
1385 auto StackId = getStackId(*ContextIter);
1386 ContextNode *StackNode = getNodeForStackId(StackId);
1388 StackNode = createNewNode(
false);
1389 StackEntryIdToContextNodeMap[StackId] = StackNode;
1390 StackNode->OrigStackOrAllocId = StackId;
1397 StackNode->Recursive =
true;
1399 StackNode->AllocTypes |= (uint8_t)
AllocType;
1400 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1401 PrevNode = StackNode;
1405template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1407CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1408 const DenseSet<uint32_t> &StackSequenceContextIds,
1409 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1410 DenseSet<uint32_t> NewContextIds;
1411 for (
auto OldId : StackSequenceContextIds) {
1412 NewContextIds.
insert(++LastContextId);
1413 OldToNewContextIds[OldId].insert(LastContextId);
1414 assert(ContextIdToAllocationType.count(OldId));
1416 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1417 if (DotAllocContextIds.
contains(OldId))
1418 DotAllocContextIds.
insert(LastContextId);
1420 return NewContextIds;
1423template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1424void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1425 propagateDuplicateContextIds(
1426 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1428 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1429 DenseSet<uint32_t> NewIds;
1430 for (
auto Id : ContextIds)
1431 if (
auto NewId = OldToNewContextIds.find(Id);
1432 NewId != OldToNewContextIds.end())
1438 auto UpdateCallers = [&](ContextNode *
Node,
1439 DenseSet<const ContextEdge *> &Visited,
1440 auto &&UpdateCallers) ->
void {
1441 for (
const auto &
Edge :
Node->CallerEdges) {
1445 ContextNode *NextNode =
Edge->Caller;
1446 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1449 if (!NewIdsToAdd.
empty()) {
1450 Edge->getContextIds().insert_range(NewIdsToAdd);
1451 UpdateCallers(NextNode, Visited, UpdateCallers);
1456 DenseSet<const ContextEdge *> Visited;
1457 for (
auto &Entry : AllocationCallToContextNodeMap) {
1459 UpdateCallers(Node, Visited, UpdateCallers);
1463template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1464void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1465 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1468 DenseSet<uint32_t> RemainingContextIds) {
1470 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1471 DenseSet<uint32_t> RecursiveContextIds;
1472 DenseSet<uint32_t> AllCallerContextIds;
1477 for (
auto &CE : OrigEdges) {
1478 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1479 for (
auto Id :
CE->getContextIds())
1480 if (!AllCallerContextIds.
insert(Id).second)
1481 RecursiveContextIds.
insert(Id);
1485 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1487 DenseSet<uint32_t> NewEdgeContextIds;
1488 DenseSet<uint32_t> NotFoundContextIds;
1492 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1493 NotFoundContextIds);
1496 if (RecursiveContextIds.
empty()) {
1499 RemainingContextIds.
swap(NotFoundContextIds);
1509 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1511 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1514 if (NewEdgeContextIds.
empty()) {
1518 if (TowardsCallee) {
1519 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1520 auto NewEdge = std::make_shared<ContextEdge>(
1521 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1522 NewNode->CalleeEdges.push_back(NewEdge);
1523 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1525 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1526 auto NewEdge = std::make_shared<ContextEdge>(
1527 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1528 NewNode->CallerEdges.push_back(NewEdge);
1529 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1532 if (
Edge->getContextIds().empty()) {
1533 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1540template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1542 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1546 assert(!Edge->ContextIds.empty());
1549template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1551 bool CheckEdges =
true) {
1552 if (
Node->isRemoved())
1556 auto NodeContextIds =
Node->getContextIds();
1560 if (
Node->CallerEdges.size()) {
1562 Node->CallerEdges.front()->ContextIds);
1566 set_union(CallerEdgeContextIds, Edge->ContextIds);
1573 NodeContextIds == CallerEdgeContextIds ||
1576 if (
Node->CalleeEdges.size()) {
1578 Node->CalleeEdges.front()->ContextIds);
1582 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1588 NodeContextIds == CalleeEdgeContextIds);
1597 for (
const auto &
E :
Node->CalleeEdges)
1603template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1604void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1605 assignStackNodesPostOrder(
1606 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1607 DenseMap<uint64_t, std::vector<CallContextInfo>>
1608 &StackIdToMatchingCalls,
1609 DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1617 auto CallerEdges =
Node->CallerEdges;
1618 for (
auto &
Edge : CallerEdges) {
1620 if (
Edge->isRemoved()) {
1624 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1625 CallToMatchingCall);
1634 if (
Node->IsAllocation ||
1635 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1638 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1642 if (Calls.size() == 1) {
1643 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1644 if (Ids.size() == 1) {
1645 assert(SavedContextIds.empty());
1647 assert(Node == getNodeForStackId(Ids[0]));
1648 if (
Node->Recursive)
1651 NonAllocationCallToContextNodeMap[
Call] =
Node;
1660 uint64_t LastId =
Node->OrigStackOrAllocId;
1661 ContextNode *LastNode = getNodeForStackId(LastId);
1664 assert(LastNode == Node);
1666 ContextNode *LastNode =
Node;
1671 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1673 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1674 bool CreatedNode =
false;
1675 for (
unsigned I = 0;
I < Calls.size();
1676 I++, PrevIterCreatedNode = CreatedNode) {
1677 CreatedNode =
false;
1678 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1681 if (SavedContextIds.empty()) {
1688 auto MatchingCall = CallToMatchingCall[
Call];
1689 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1693 assert(
I > 0 && !PrevIterCreatedNode);
1696 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1701 assert(LastId == Ids.back());
1710 ContextNode *PrevNode = LastNode;
1714 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1716 ContextNode *CurNode = getNodeForStackId(Id);
1720 assert(!CurNode->Recursive);
1722 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1734 if (SavedContextIds.empty()) {
1743 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1744 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1746 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1748 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1754 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1759 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1764 for (
auto Id : Ids) {
1765 ContextNode *CurNode = getNodeForStackId(Id);
1772 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1779 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1780 if (PrevEdge->getContextIds().empty())
1781 removeEdgeFromGraph(PrevEdge);
1786 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1787 ? (uint8_t)AllocationType::None
1788 : CurNode->computeAllocType();
1793 for (
auto Id : Ids) {
1794 ContextNode *CurNode = getNodeForStackId(Id);
1803template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1804void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1812 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1813 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1814 for (
auto &
Call : CallsWithMetadata) {
1816 if (AllocationCallToContextNodeMap.count(
Call))
1818 auto StackIdsWithContextNodes =
1819 getStackIdsWithContextNodesForCall(
Call.call());
1822 if (StackIdsWithContextNodes.empty())
1826 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1827 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
1837 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1841 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1842 for (
auto &It : StackIdToMatchingCalls) {
1843 auto &Calls = It.getSecond();
1845 if (Calls.size() == 1) {
1846 auto &Ids = Calls[0].StackIds;
1847 if (Ids.size() == 1)
1860 DenseMap<const FuncTy *, unsigned> FuncToIndex;
1861 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
1862 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
1865 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1866 return A.StackIds.size() >
B.StackIds.size() ||
1867 (
A.StackIds.size() ==
B.StackIds.size() &&
1868 (
A.StackIds <
B.StackIds ||
1869 (
A.StackIds ==
B.StackIds &&
1870 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
1876 uint64_t LastId = It.getFirst();
1877 ContextNode *LastNode = getNodeForStackId(LastId);
1881 if (LastNode->Recursive)
1886 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1894 DenseSet<const FuncTy *> MatchingIdsFuncSet;
1897 for (
unsigned I = 0;
I < Calls.size();
I++) {
1898 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1899 assert(SavedContextIds.empty());
1900 assert(LastId == Ids.back());
1905 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1906 MatchingIdsFuncSet.
clear();
1913 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1915 ContextNode *PrevNode = LastNode;
1916 ContextNode *CurNode = LastNode;
1921 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1923 CurNode = getNodeForStackId(Id);
1927 if (CurNode->Recursive) {
1932 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1953 if (StackSequenceContextIds.
empty()) {
1966 if (Ids.back() != getLastStackId(
Call)) {
1967 for (
const auto &PE : LastNode->CallerEdges) {
1968 set_subtract(StackSequenceContextIds, PE->getContextIds());
1969 if (StackSequenceContextIds.
empty())
1973 if (StackSequenceContextIds.
empty())
1985 MatchingIdsFuncSet.
insert(Func);
1992 bool DuplicateContextIds =
false;
1993 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1994 auto &CallCtxInfo = Calls[J];
1995 auto &NextIds = CallCtxInfo.StackIds;
1998 auto *NextFunc = CallCtxInfo.Func;
1999 if (NextFunc != Func) {
2002 DuplicateContextIds =
true;
2005 auto &NextCall = CallCtxInfo.Call;
2006 CallToMatchingCall[NextCall] =
Call;
2017 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2018 StackSequenceContextIds.
size());
2021 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2022 : StackSequenceContextIds;
2023 assert(!SavedContextIds.empty());
2025 if (!DuplicateContextIds) {
2029 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2030 if (LastNodeContextIds.
empty())
2037 propagateDuplicateContextIds(OldToNewContextIds);
2047 DenseSet<const ContextNode *> Visited;
2048 for (
auto &Entry : AllocationCallToContextNodeMap)
2049 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2050 CallToMatchingCall);
2055uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2056 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2058 return CallsiteContext.
back();
2061uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2063 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2066 return Index.getStackIdAtIndex(CallsiteContext.
back());
2088 auto Pos =
F.getName().find_last_of(
'.');
2091 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2097std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2098 const Instruction *
Call,
2099 unsigned CloneNo)
const {
2105std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2106 const IndexCall &
Call,
2107 unsigned CloneNo)
const {
2108 auto VI = FSToVIMap.find(Func);
2109 assert(VI != FSToVIMap.end());
2112 return CallerName +
" -> alloc";
2115 return CallerName +
" -> " +
2117 Callsite->Clones[CloneNo]);
2121std::vector<uint64_t>
2122ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2123 Instruction *
Call) {
2124 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2126 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2130std::vector<uint64_t>
2131IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2133 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2135 return getStackIdsWithContextNodes<CallsiteInfo,
2136 SmallVector<unsigned>::const_iterator>(
2140template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2141template <
class NodeT,
class IteratorT>
2142std::vector<uint64_t>
2143CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2144 CallStack<NodeT, IteratorT> &CallsiteContext) {
2145 std::vector<uint64_t> StackIds;
2146 for (
auto IdOrIndex : CallsiteContext) {
2147 auto StackId = getStackId(IdOrIndex);
2148 ContextNode *
Node = getNodeForStackId(StackId);
2151 StackIds.push_back(StackId);
2156ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2158 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2159 :
Mod(
M), OREGetter(OREGetter) {
2161 std::vector<CallInfo> CallsWithMetadata;
2162 for (
auto &BB :
F) {
2163 for (
auto &
I : BB) {
2166 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2167 CallsWithMetadata.push_back(&
I);
2168 auto *AllocNode = addAllocNode(&
I, &
F);
2169 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2173 for (
auto &MDOp : MemProfMD->operands()) {
2175 std::vector<ContextTotalSize> ContextSizeInfo;
2177 if (MIBMD->getNumOperands() > 2) {
2178 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2179 MDNode *ContextSizePair =
2188 ContextSizeInfo.push_back({FullStackId, TotalSize});
2194 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2195 AllocNode, StackContext, CallsiteContext,
2201 DotAllocContextIds = AllocNode->getContextIds();
2205 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2206 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2209 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2210 CallsWithMetadata.push_back(&
I);
2214 if (!CallsWithMetadata.empty())
2215 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2219 dbgs() <<
"CCG before updating call stack chains:\n";
2224 exportToDot(
"prestackupdate");
2229 exportToDot(
"poststackupdate");
2231 handleCallsitesWithMultipleTargets();
2236 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2237 for (
auto &
Call : FuncEntry.second)
2238 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2241IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2245 : Index(Index), isPrevailing(isPrevailing) {
2246 for (
auto &
I : Index) {
2247 auto VI = Index.getValueInfo(
I);
2248 for (
auto &S : VI.getSummaryList()) {
2257 !isPrevailing(VI.getGUID(), S.get()))
2262 std::vector<CallInfo> CallsWithMetadata;
2263 if (!
FS->allocs().empty()) {
2264 for (
auto &AN :
FS->mutableAllocs()) {
2269 if (AN.MIBs.empty())
2271 IndexCall AllocCall(&AN);
2272 CallsWithMetadata.push_back(AllocCall);
2273 auto *AllocNode = addAllocNode(AllocCall, FS);
2281 AN.ContextSizeInfos.size() == AN.MIBs.size());
2283 for (
auto &MIB : AN.MIBs) {
2286 std::vector<ContextTotalSize> ContextSizeInfo;
2287 if (!AN.ContextSizeInfos.empty()) {
2288 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2289 ContextSizeInfo.push_back({FullStackId, TotalSize});
2291 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2292 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2299 DotAllocContextIds = AllocNode->getContextIds();
2305 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2309 if (!
FS->callsites().empty())
2310 for (
auto &SN :
FS->mutableCallsites()) {
2311 IndexCall StackNodeCall(&SN);
2312 CallsWithMetadata.push_back(StackNodeCall);
2315 if (!CallsWithMetadata.empty())
2316 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2318 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2324 dbgs() <<
"CCG before updating call stack chains:\n";
2329 exportToDot(
"prestackupdate");
2334 exportToDot(
"poststackupdate");
2336 handleCallsitesWithMultipleTargets();
2341template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2342void CallsiteContextGraph<DerivedCCG, FuncTy,
2343 CallTy>::handleCallsitesWithMultipleTargets() {
2358 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2359 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2360 auto *
Node = Entry.second;
2369 std::vector<CallInfo> AllCalls;
2370 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2371 AllCalls.push_back(
Node->Call);
2385 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2388 auto It = AllCalls.begin();
2390 for (; It != AllCalls.end(); ++It) {
2393 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2396 if (!Edge->Callee->hasCall())
2398 assert(NodeToCallingFunc.count(Edge->Callee));
2400 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2409 if (
Node->Call != ThisCall) {
2410 Node->setCall(ThisCall);
2421 Node->MatchingCalls.clear();
2424 if (It == AllCalls.end()) {
2425 RemovedEdgesWithMismatchedCallees++;
2429 Node->setCall(CallInfo());
2434 for (++It; It != AllCalls.end(); ++It) {
2438 Node->MatchingCalls.push_back(ThisCall);
2447 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2448 return !it.second->hasCall() || it.second->Call != it.first;
2452 for (
auto &[
Call,
Node] : NewCallToNode)
2453 NonAllocationCallToContextNodeMap[
Call] =
Node;
2457 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2458 NonAllocationCallToContextNodeMap[
Call] =
Node;
2461template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2462bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2464 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2468 struct CallsWithSameCallee {
2469 std::vector<CallInfo> Calls;
2470 ContextNode *
Node =
nullptr;
2476 for (
auto ThisCall : AllCalls) {
2477 auto *
F = getCalleeFunc(
ThisCall.call());
2479 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2488 for (
const auto &Edge :
Node->CalleeEdges) {
2489 if (!Edge->Callee->hasCall())
2491 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2492 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2493 CalleeNodeToCallInfo[Edge->Callee] =
2494 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2500 if (CalleeNodeToCallInfo.
empty())
2512 ContextNode *UnmatchedCalleesNode =
nullptr;
2514 bool UsedOrigNode =
false;
2519 auto CalleeEdges =
Node->CalleeEdges;
2520 for (
auto &Edge : CalleeEdges) {
2521 if (!Edge->Callee->hasCall())
2526 ContextNode *CallerNodeToUse =
nullptr;
2530 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2531 if (!UnmatchedCalleesNode)
2532 UnmatchedCalleesNode =
2533 createNewNode(
false, NodeToCallingFunc[
Node]);
2534 CallerNodeToUse = UnmatchedCalleesNode;
2538 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2541 if (!UsedOrigNode) {
2544 Node->MatchingCalls.clear();
2545 UsedOrigNode =
true;
2548 createNewNode(
false, NodeToCallingFunc[
Node]);
2552 Info->Node->setCall(
Info->Calls.front());
2558 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2560 CallerNodeToUse =
Info->Node;
2564 if (CallerNodeToUse ==
Node)
2567 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2574 for (
auto &
I : CalleeNodeToCallInfo)
2575 removeNoneTypeCallerEdges(
I.second->Node);
2576 if (UnmatchedCalleesNode)
2577 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2578 removeNoneTypeCallerEdges(
Node);
2588uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2591 return Index.getStackIdAtIndex(IdOrIndex);
2594template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2595bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2596 CallTy
Call, EdgeIter &EI,
2597 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2599 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2600 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2603 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2604 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2609 if (FoundCalleeChain.empty())
2613 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2617 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2618 CurEdge->AllocTypes |=
Edge->AllocTypes;
2623 auto NewEdge = std::make_shared<ContextEdge>(
2624 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2625 Callee->CallerEdges.push_back(NewEdge);
2626 if (Caller ==
Edge->Caller) {
2630 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2633 "Iterator position not restored after insert and increment");
2635 Caller->CalleeEdges.push_back(NewEdge);
2640 auto *CurCalleeNode =
Edge->Callee;
2641 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2642 ContextNode *NewNode =
nullptr;
2644 if (TailCallToContextNodeMap.
count(NewCall)) {
2645 NewNode = TailCallToContextNodeMap[NewCall];
2646 NewNode->AllocTypes |=
Edge->AllocTypes;
2648 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2650 NewNode = createNewNode(
false, Func, NewCall);
2651 TailCallToContextNodeMap[NewCall] = NewNode;
2652 NewNode->AllocTypes =
Edge->AllocTypes;
2656 AddEdge(NewNode, CurCalleeNode);
2658 CurCalleeNode = NewNode;
2662 AddEdge(
Edge->Caller, CurCalleeNode);
2670 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2682bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2683 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2684 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2685 bool &FoundMultipleCalleeChains) {
2692 FoundCalleeChain.push_back({Callsite,
F});
2707 bool FoundSingleCalleeChain =
false;
2708 for (
auto &BB : *CalleeFunc) {
2709 for (
auto &
I : BB) {
2711 if (!CB || !CB->isTailCall())
2713 auto *CalledValue = CB->getCalledOperand();
2714 auto *CalledFunction = CB->getCalledFunction();
2715 if (CalledValue && !CalledFunction) {
2716 CalledValue = CalledValue->stripPointerCasts();
2723 assert(!CalledFunction &&
2724 "Expected null called function in callsite for alias");
2727 if (!CalledFunction)
2729 if (CalledFunction == ProfiledCallee) {
2730 if (FoundSingleCalleeChain) {
2731 FoundMultipleCalleeChains =
true;
2734 FoundSingleCalleeChain =
true;
2735 FoundProfiledCalleeCount++;
2736 FoundProfiledCalleeDepth +=
Depth;
2737 if (
Depth > FoundProfiledCalleeMaxDepth)
2738 FoundProfiledCalleeMaxDepth =
Depth;
2739 SaveCallsiteInfo(&
I, CalleeFunc);
2740 }
else if (findProfiledCalleeThroughTailCalls(
2741 ProfiledCallee, CalledFunction,
Depth + 1,
2742 FoundCalleeChain, FoundMultipleCalleeChains)) {
2745 assert(!FoundMultipleCalleeChains);
2746 if (FoundSingleCalleeChain) {
2747 FoundMultipleCalleeChains =
true;
2750 FoundSingleCalleeChain =
true;
2751 SaveCallsiteInfo(&
I, CalleeFunc);
2752 }
else if (FoundMultipleCalleeChains)
2757 return FoundSingleCalleeChain;
2760const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2762 if (!CB->getCalledOperand() || CB->isIndirectCall())
2764 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2771bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2772 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
2773 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2775 if (!CB->getCalledOperand() || CB->isIndirectCall())
2777 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2779 if (CalleeFunc == Func)
2782 if (Alias && Alias->getAliasee() == Func)
2793 bool FoundMultipleCalleeChains =
false;
2794 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2796 FoundMultipleCalleeChains)) {
2797 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2798 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2799 <<
" that actually called " << CalleeVal->getName()
2800 << (FoundMultipleCalleeChains
2801 ?
" (found multiple possible chains)"
2804 if (FoundMultipleCalleeChains)
2805 FoundProfiledCalleeNonUniquelyCount++;
2812bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
2813 Instruction *Call2) {
2815 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2817 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2820 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2822 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2824 return CalleeFunc1 == CalleeFunc2;
2827bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2828 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
2829 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2830 bool &FoundMultipleCalleeChains) {
2836 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
2839 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2840 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2843 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
2844 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2845 CallsiteInfo *NewCallsiteInfo =
2846 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2847 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2854 bool FoundSingleCalleeChain =
false;
2857 !isPrevailing(CurCallee.
getGUID(), S.get()))
2862 auto FSVI = CurCallee;
2865 FSVI = AS->getAliaseeVI();
2866 for (
auto &CallEdge :
FS->calls()) {
2867 if (!CallEdge.second.hasTailCall())
2869 if (CallEdge.first == ProfiledCallee) {
2870 if (FoundSingleCalleeChain) {
2871 FoundMultipleCalleeChains =
true;
2874 FoundSingleCalleeChain =
true;
2875 FoundProfiledCalleeCount++;
2876 FoundProfiledCalleeDepth +=
Depth;
2877 if (
Depth > FoundProfiledCalleeMaxDepth)
2878 FoundProfiledCalleeMaxDepth =
Depth;
2879 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2881 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2882 FSToVIMap[
FS] = FSVI;
2883 }
else if (findProfiledCalleeThroughTailCalls(
2884 ProfiledCallee, CallEdge.first,
Depth + 1,
2885 FoundCalleeChain, FoundMultipleCalleeChains)) {
2888 assert(!FoundMultipleCalleeChains);
2889 if (FoundSingleCalleeChain) {
2890 FoundMultipleCalleeChains =
true;
2893 FoundSingleCalleeChain =
true;
2894 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2896 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2897 FSToVIMap[
FS] = FSVI;
2898 }
else if (FoundMultipleCalleeChains)
2903 return FoundSingleCalleeChain;
2906const FunctionSummary *
2907IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
2909 if (
Callee.getSummaryList().empty())
2914bool IndexCallsiteContextGraph::calleeMatchesFunc(
2915 IndexCall &
Call,
const FunctionSummary *Func,
2916 const FunctionSummary *CallerFunc,
2917 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2921 AliasSummary *Alias =
2922 Callee.getSummaryList().empty()
2925 assert(FSToVIMap.count(Func));
2926 auto FuncVI = FSToVIMap[
Func];
2927 if (Callee == FuncVI ||
2942 bool FoundMultipleCalleeChains =
false;
2943 if (!findProfiledCalleeThroughTailCalls(
2944 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2945 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2946 <<
" from " << FSToVIMap[CallerFunc]
2947 <<
" that actually called " << Callee
2948 << (FoundMultipleCalleeChains
2949 ?
" (found multiple possible chains)"
2952 if (FoundMultipleCalleeChains)
2953 FoundProfiledCalleeNonUniquelyCount++;
2960bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2963 return Callee1 == Callee2;
2966template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2967void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2973template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2974void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2975 raw_ostream &OS)
const {
2976 OS <<
"Node " <<
this <<
"\n";
2980 OS <<
" (recursive)";
2982 if (!MatchingCalls.empty()) {
2983 OS <<
"\tMatchingCalls:\n";
2984 for (
auto &MatchingCall : MatchingCalls) {
2986 MatchingCall.print(OS);
2990 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
2992 OS <<
"\tContextIds:";
2994 auto ContextIds = getContextIds();
2995 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2996 std::sort(SortedIds.begin(), SortedIds.end());
2997 for (
auto Id : SortedIds)
3000 OS <<
"\tCalleeEdges:\n";
3001 for (
auto &
Edge : CalleeEdges)
3002 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3004 OS <<
"\tCallerEdges:\n";
3005 for (
auto &
Edge : CallerEdges)
3006 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3008 if (!Clones.empty()) {
3011 for (
auto *
C : Clones) {
3015 OS <<
C <<
" NodeId: " <<
C->NodeId;
3018 }
else if (CloneOf) {
3019 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3023template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3024void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3030template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3031void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3032 raw_ostream &OS)
const {
3033 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3034 << (IsBackedge ?
" (BE)" :
"")
3036 OS <<
" ContextIds:";
3037 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3038 std::sort(SortedIds.begin(), SortedIds.end());
3039 for (
auto Id : SortedIds)
3043template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3044void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3048template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3049void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3050 raw_ostream &OS)
const {
3051 OS <<
"Callsite Context Graph:\n";
3052 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3054 if (
Node->isRemoved())
3061template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3062void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3063 raw_ostream &OS)
const {
3064 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3066 if (
Node->isRemoved())
3068 if (!
Node->IsAllocation)
3070 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3071 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3072 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3073 std::sort(SortedIds.begin(), SortedIds.end());
3074 for (
auto Id : SortedIds) {
3075 auto TypeI = ContextIdToAllocationType.find(Id);
3076 assert(TypeI != ContextIdToAllocationType.end());
3077 auto CSI = ContextIdToContextSizeInfos.find(Id);
3078 if (CSI != ContextIdToContextSizeInfos.end()) {
3079 for (
auto &
Info : CSI->second) {
3080 OS <<
"MemProf hinting: "
3082 <<
" full allocation context " <<
Info.FullStackId
3083 <<
" with total size " <<
Info.TotalSize <<
" is "
3085 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3087 <<
" due to cold byte percent";
3089 OS <<
" (context id " <<
Id <<
")";
3097template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3098void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3099 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3102 for (
auto &
Edge :
Node->CallerEdges)
3107template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3109 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3110 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3112 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3128 return G->NodeOwner.begin()->get();
3131 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3132 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3151template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3165 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3171 std::string LabelString =
3172 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3175 LabelString +=
"\n";
3176 if (
Node->hasCall()) {
3177 auto Func =
G->NodeToCallingFunc.find(
Node);
3178 assert(Func !=
G->NodeToCallingFunc.end());
3180 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3182 LabelString +=
"null call";
3183 if (
Node->Recursive)
3184 LabelString +=
" (recursive)";
3186 LabelString +=
" (external)";
3192 auto ContextIds =
Node->getContextIds();
3196 bool Highlight =
false;
3205 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3206 getContextIds(ContextIds) +
"\"")
3210 AttributeString +=
",fontsize=\"30\"";
3212 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3214 if (
Node->CloneOf) {
3215 AttributeString +=
",color=\"blue\"";
3216 AttributeString +=
",style=\"filled,bold,dashed\"";
3218 AttributeString +=
",style=\"filled\"";
3219 return AttributeString;
3224 auto &Edge = *(ChildIter.getCurrent());
3229 bool Highlight =
false;
3238 auto Color = getColor(Edge->AllocTypes, Highlight);
3239 std::string AttributeString =
3240 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3242 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3245 if (Edge->IsBackedge)
3246 AttributeString +=
",style=\"dotted\"";
3249 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3250 return AttributeString;
3256 if (
Node->isRemoved())
3269 std::string IdString =
"ContextIds:";
3270 if (ContextIds.
size() < 100) {
3271 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3272 std::sort(SortedIds.begin(), SortedIds.end());
3273 for (
auto Id : SortedIds)
3274 IdString += (
" " +
Twine(Id)).str();
3276 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3281 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3287 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3289 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3290 if (AllocTypes == (uint8_t)AllocationType::Cold)
3291 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3293 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3294 return Highlight ?
"magenta" :
"mediumorchid1";
3298 static std::string getNodeId(NodeRef Node) {
3299 std::stringstream SStream;
3300 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3301 std::string
Result = SStream.str();
3310template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3315template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3316void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3317 std::string Label)
const {
3322template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3323typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3324CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3325 const std::shared_ptr<ContextEdge> &
Edge,
3326 DenseSet<uint32_t> ContextIdsToMove) {
3328 assert(NodeToCallingFunc.count(Node));
3329 ContextNode *Clone =
3330 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3331 Node->addClone(Clone);
3332 Clone->MatchingCalls =
Node->MatchingCalls;
3333 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3338template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3339void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3340 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3341 ContextNode *NewCallee,
bool NewClone,
3342 DenseSet<uint32_t> ContextIdsToMove) {
3345 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3347 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3349 ContextNode *OldCallee =
Edge->Callee;
3353 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3357 if (ContextIdsToMove.
empty())
3358 ContextIdsToMove =
Edge->getContextIds();
3362 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3365 NewCallee->AllocTypes |=
Edge->AllocTypes;
3367 if (ExistingEdgeToNewCallee) {
3370 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3371 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3372 assert(
Edge->ContextIds == ContextIdsToMove);
3373 removeEdgeFromGraph(
Edge.get());
3376 Edge->Callee = NewCallee;
3377 NewCallee->CallerEdges.push_back(
Edge);
3379 OldCallee->eraseCallerEdge(
Edge.get());
3386 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3387 if (ExistingEdgeToNewCallee) {
3390 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3391 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3394 auto NewEdge = std::make_shared<ContextEdge>(
3395 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3396 Edge->Caller->CalleeEdges.push_back(NewEdge);
3397 NewCallee->CallerEdges.push_back(NewEdge);
3401 NewCallee->AllocTypes |= CallerEdgeAllocType;
3403 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3408 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3409 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3413 if (CalleeToUse == OldCallee) {
3417 if (EdgeIsRecursive) {
3421 CalleeToUse = NewCallee;
3425 DenseSet<uint32_t> EdgeContextIdsToMove =
3427 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3428 OldCalleeEdge->AllocTypes =
3429 computeAllocType(OldCalleeEdge->getContextIds());
3436 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3437 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3438 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3442 auto NewEdge = std::make_shared<ContextEdge>(
3443 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3444 EdgeContextIdsToMove);
3445 NewCallee->CalleeEdges.push_back(NewEdge);
3446 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3450 OldCallee->AllocTypes = OldCallee->computeAllocType();
3452 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3453 OldCallee->emptyContextIds());
3457 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3460 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3466template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3467void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3468 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3469 ContextNode *NewCaller) {
3470 auto *OldCallee =
Edge->Callee;
3471 auto *NewCallee = OldCallee;
3474 bool Recursive =
Edge->Caller ==
Edge->Callee;
3476 NewCallee = NewCaller;
3478 ContextNode *OldCaller =
Edge->Caller;
3479 OldCaller->eraseCalleeEdge(
Edge.get());
3483 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3485 if (ExistingEdgeToNewCaller) {
3488 ExistingEdgeToNewCaller->getContextIds().insert_range(
3489 Edge->getContextIds());
3490 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3491 Edge->ContextIds.clear();
3492 Edge->AllocTypes = (uint8_t)AllocationType::None;
3493 OldCallee->eraseCallerEdge(
Edge.get());
3496 Edge->Caller = NewCaller;
3497 NewCaller->CalleeEdges.push_back(
Edge);
3499 assert(NewCallee == NewCaller);
3502 Edge->Callee = NewCallee;
3503 NewCallee->CallerEdges.push_back(
Edge);
3504 OldCallee->eraseCallerEdge(
Edge.get());
3510 NewCaller->AllocTypes |=
Edge->AllocTypes;
3517 bool IsNewNode = NewCaller->CallerEdges.empty();
3526 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3527 auto OldCallerCaller = OldCallerEdge->Caller;
3531 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3532 if (OldCaller == OldCallerCaller) {
3533 OldCallerCaller = NewCaller;
3539 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3540 OldCallerEdge->AllocTypes =
3541 computeAllocType(OldCallerEdge->getContextIds());
3546 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3550 if (ExistingCallerEdge) {
3551 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3552 ExistingCallerEdge->AllocTypes |=
3553 computeAllocType(EdgeContextIdsToMove);
3556 auto NewEdge = std::make_shared<ContextEdge>(
3557 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3558 EdgeContextIdsToMove);
3559 NewCaller->CallerEdges.push_back(NewEdge);
3560 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3565 OldCaller->AllocTypes = OldCaller->computeAllocType();
3567 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3568 OldCaller->emptyContextIds());
3572 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3575 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3581template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3582void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3583 recursivelyRemoveNoneTypeCalleeEdges(
3584 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3589 removeNoneTypeCalleeEdges(Node);
3591 for (
auto *Clone :
Node->Clones)
3592 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3596 auto CallerEdges =
Node->CallerEdges;
3597 for (
auto &
Edge : CallerEdges) {
3599 if (
Edge->isRemoved()) {
3603 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3608template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3609void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3614 DenseSet<const ContextNode *> Visited;
3615 DenseSet<const ContextNode *> CurrentStack;
3616 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3618 if (
Node->isRemoved())
3621 if (!
Node->CallerEdges.empty())
3623 markBackedges(Node, Visited, CurrentStack);
3629template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3630void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3631 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3632 DenseSet<const ContextNode *> &CurrentStack) {
3633 auto I = Visited.
insert(Node);
3637 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3638 auto *
Callee = CalleeEdge->Callee;
3639 if (Visited.
count(Callee)) {
3642 if (CurrentStack.
count(Callee))
3643 CalleeEdge->IsBackedge =
true;
3646 CurrentStack.
insert(Callee);
3647 markBackedges(Callee, Visited, CurrentStack);
3648 CurrentStack.
erase(Callee);
3652template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3653void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3654 DenseSet<const ContextNode *> Visited;
3655 for (
auto &Entry : AllocationCallToContextNodeMap) {
3657 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3660 for (
auto &Entry : AllocationCallToContextNodeMap)
3661 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3674template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3675void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3676 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3677 const DenseSet<uint32_t> &AllocContextIds) {
3687 if (!
Node->hasCall())
3706 auto CallerEdges =
Node->CallerEdges;
3707 for (
auto &
Edge : CallerEdges) {
3709 if (
Edge->isRemoved()) {
3715 if (
Edge->IsBackedge) {
3722 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3723 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3746 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3750 [&](
const std::shared_ptr<ContextEdge> &
A,
3751 const std::shared_ptr<ContextEdge> &
B) {
3754 if (A->ContextIds.empty())
3760 if (B->ContextIds.empty())
3763 if (A->AllocTypes == B->AllocTypes)
3766 return *A->ContextIds.begin() < *B->ContextIds.begin();
3767 return AllocTypeCloningPriority[A->AllocTypes] <
3768 AllocTypeCloningPriority[B->AllocTypes];
3771 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3773 DenseSet<uint32_t> RecursiveContextIds;
3778 DenseSet<uint32_t> AllCallerContextIds;
3779 for (
auto &CE :
Node->CallerEdges) {
3782 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3783 for (
auto Id :
CE->getContextIds())
3784 if (!AllCallerContextIds.
insert(Id).second)
3785 RecursiveContextIds.
insert(Id);
3795 auto CallerEdges =
Node->CallerEdges;
3796 for (
auto &CallerEdge : CallerEdges) {
3798 if (CallerEdge->isRemoved()) {
3802 assert(CallerEdge->Callee == Node);
3811 if (!CallerEdge->Caller->hasCall())
3816 auto CallerEdgeContextsForAlloc =
3818 if (!RecursiveContextIds.
empty())
3819 CallerEdgeContextsForAlloc =
3821 if (CallerEdgeContextsForAlloc.empty())
3824 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3828 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3829 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3830 for (
auto &CalleeEdge :
Node->CalleeEdges)
3831 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3832 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3848 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3849 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3850 if (!CallerEdge->IsBackedge &&
3851 allocTypeToUse(CallerAllocTypeForAlloc) ==
3852 allocTypeToUse(
Node->AllocTypes) &&
3853 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3854 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3858 if (CallerEdge->IsBackedge) {
3862 DeferredBackedges++;
3875 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
3876 !Visited.
count(CallerEdge->Caller)) {
3877 const auto OrigIdCount = CallerEdge->getContextIds().size();
3880 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
3881 removeNoneTypeCalleeEdges(CallerEdge->Caller);
3885 bool UpdatedEdge =
false;
3886 if (OrigIdCount > CallerEdge->getContextIds().size()) {
3887 for (
auto E :
Node->CallerEdges) {
3889 if (
E->Caller->CloneOf != CallerEdge->Caller)
3893 auto CallerEdgeContextsForAllocNew =
3895 if (CallerEdgeContextsForAllocNew.empty())
3905 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
3915 if (CallerEdge->isRemoved())
3925 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
3926 if (CallerEdgeContextsForAlloc.empty())
3931 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3932 CalleeEdgeAllocTypesForCallerEdge.clear();
3933 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3934 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3935 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3941 ContextNode *Clone =
nullptr;
3942 for (
auto *CurClone :
Node->Clones) {
3943 if (allocTypeToUse(CurClone->AllocTypes) !=
3944 allocTypeToUse(CallerAllocTypeForAlloc))
3951 assert(!BothSingleAlloc ||
3952 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3958 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3959 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3967 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3968 CallerEdgeContextsForAlloc);
3970 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3973 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3980 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3986void ModuleCallsiteContextGraph::updateAllocationCall(
3991 "memprof", AllocTypeString);
3994 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
3995 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3997 <<
" marked with memprof allocation attribute "
3998 <<
ore::NV(
"Attribute", AllocTypeString));
4001void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4005 assert(AI->Versions.size() >
Call.cloneNo());
4010ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4012 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4013 return AllocationType::None;
4014 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4015 ? AllocationType::Cold
4016 : AllocationType::NotCold;
4020IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4022 assert(AI->Versions.size() >
Call.cloneNo());
4026void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4027 FuncInfo CalleeFunc) {
4028 auto *CurF = getCalleeFunc(CallerCall.call());
4029 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4036 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4038 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4040 MismatchedCloneAssignments++;
4043 if (NewCalleeCloneNo > 0)
4044 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4045 OREGetter(CallerCall.call()->getFunction())
4046 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4047 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4048 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4049 <<
" assigned to call function clone "
4050 <<
ore::NV(
"Callee", CalleeFunc.func()));
4053void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4054 FuncInfo CalleeFunc) {
4057 "Caller cannot be an allocation which should not have profiled calls");
4058 assert(CI->Clones.size() > CallerCall.cloneNo());
4059 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4060 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4065 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4067 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4069 MismatchedCloneAssignments++;
4071 CurCalleeCloneNo = NewCalleeCloneNo;
4083 SP->replaceLinkageName(MDName);
4087 TempDISubprogram NewDecl = Decl->
clone();
4088 NewDecl->replaceLinkageName(MDName);
4092CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4094ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4095 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4096 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4101 assert(!
Func.func()->getParent()->getFunction(Name));
4102 NewFunc->setName(Name);
4104 for (
auto &Inst : CallsWithMetadataInFunc) {
4106 assert(Inst.cloneNo() == 0);
4109 OREGetter(
Func.func())
4110 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4111 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4112 return {NewFunc, CloneNo};
4115CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4116 IndexCall>::FuncInfo
4117IndexCallsiteContextGraph::cloneFunctionForCallsite(
4118 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4119 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4133 for (
auto &Inst : CallsWithMetadataInFunc) {
4135 assert(Inst.cloneNo() == 0);
4137 assert(AI->Versions.size() == CloneNo);
4140 AI->Versions.push_back(0);
4143 assert(CI && CI->Clones.size() == CloneNo);
4146 CI->Clones.push_back(0);
4148 CallMap[Inst] = {Inst.call(), CloneNo};
4150 return {
Func.func(), CloneNo};
4167template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4168void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4174 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4175 for (
auto &Entry : AllocationCallToContextNodeMap) {
4177 for (
auto Id :
Node->getContextIds())
4178 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4179 for (
auto *Clone :
Node->Clones) {
4180 for (
auto Id : Clone->getContextIds())
4181 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4188 DenseSet<const ContextNode *> Visited;
4189 for (
auto &Entry : AllocationCallToContextNodeMap) {
4192 mergeClones(Node, Visited, ContextIdToAllocationNode);
4198 auto Clones =
Node->Clones;
4199 for (
auto *Clone : Clones)
4200 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4204 dbgs() <<
"CCG after merging:\n";
4208 exportToDot(
"aftermerge");
4216template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4217void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4218 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4219 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4229 bool FoundUnvisited =
true;
4231 while (FoundUnvisited) {
4233 FoundUnvisited =
false;
4236 auto CallerEdges =
Node->CallerEdges;
4237 for (
auto CallerEdge : CallerEdges) {
4239 if (CallerEdge->Callee != Node)
4244 FoundUnvisited =
true;
4245 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4249 TotalMergeInvokes++;
4250 TotalMergeIters += Iters;
4251 if (Iters > MaxMergeIters)
4252 MaxMergeIters = Iters;
4255 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4258template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4259void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4260 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4261 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4263 if (
Node->emptyContextIds())
4268 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4269 OrigNodeToCloneEdges;
4270 for (
const auto &
E :
Node->CalleeEdges) {
4275 OrigNodeToCloneEdges[
Base].push_back(
E);
4281 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4282 const std::shared_ptr<ContextEdge> &
B) {
4283 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4284 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4285 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4287 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4291 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4296 for (
auto Entry : OrigNodeToCloneEdges) {
4299 auto &CalleeEdges =
Entry.second;
4300 auto NumCalleeClones = CalleeEdges.size();
4302 if (NumCalleeClones == 1)
4313 DenseSet<ContextNode *> OtherCallersToShareMerge;
4314 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4315 OtherCallersToShareMerge);
4320 ContextNode *MergeNode =
nullptr;
4321 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4322 for (
auto CalleeEdge : CalleeEdges) {
4323 auto *OrigCallee = CalleeEdge->Callee;
4329 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4330 MergeNode = OrigCallee;
4331 NonNewMergedNodes++;
4338 if (!OtherCallersToShareMerge.
empty()) {
4339 bool MoveAllCallerEdges =
true;
4340 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4341 if (CalleeCallerE == CalleeEdge)
4343 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4344 MoveAllCallerEdges =
false;
4350 if (MoveAllCallerEdges) {
4351 MergeNode = OrigCallee;
4352 NonNewMergedNodes++;
4359 assert(MergeNode != OrigCallee);
4360 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4363 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4368 if (!OtherCallersToShareMerge.
empty()) {
4372 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4373 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4374 if (CalleeCallerE == CalleeEdge)
4376 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4378 CallerToMoveCount[CalleeCallerE->Caller]++;
4379 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4383 removeNoneTypeCalleeEdges(OrigCallee);
4384 removeNoneTypeCalleeEdges(MergeNode);
4402template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4403void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4404 findOtherCallersToShareMerge(
4406 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4407 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4408 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4409 auto NumCalleeClones = CalleeEdges.size();
4412 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4415 unsigned PossibleOtherCallerNodes = 0;
4419 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4425 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4426 for (
auto CalleeEdge : CalleeEdges) {
4427 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4430 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4431 if (CalleeCallerEdges->Caller == Node) {
4432 assert(CalleeCallerEdges == CalleeEdge);
4435 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4438 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4440 PossibleOtherCallerNodes++;
4444 for (
auto Id : CalleeEdge->getContextIds()) {
4445 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4449 MissingAllocForContextId++;
4452 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4459 for (
auto CalleeEdge : CalleeEdges) {
4460 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4462 if (!PossibleOtherCallerNodes)
4464 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4466 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4468 if (CalleeCallerE == CalleeEdge)
4472 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4477 for (
auto Id : CalleeCallerE->getContextIds()) {
4478 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4483 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4484 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4485 PossibleOtherCallerNodes--;
4492 if (!PossibleOtherCallerNodes)
4497 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4498 if (
Count != NumCalleeClones)
4500 OtherCallersToShareMerge.
insert(OtherCaller);
4545template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4546bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4553 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4557 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4558 const FuncInfo &CalleeFunc) {
4560 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4564 struct FuncCloneInfo {
4569 DenseMap<CallInfo, CallInfo> CallMap;
4597 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4598 UnassignedCallClones;
4602 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4603 FuncInfo OrigFunc(Func);
4608 std::vector<FuncCloneInfo> FuncCloneInfos;
4609 for (
auto &
Call : CallsWithMetadata) {
4610 ContextNode *
Node = getNodeForInst(
Call);
4614 if (!Node ||
Node->Clones.empty())
4617 "Not having a call should have prevented cloning");
4621 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4625 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4627 ContextNode *CallsiteClone,
4630 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4632 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4633 DenseMap<CallInfo, CallInfo> &CallMap =
4634 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4635 CallInfo CallClone(
Call);
4636 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4637 CallClone = It->second;
4638 CallsiteClone->setCall(CallClone);
4640 for (
auto &MatchingCall :
Node->MatchingCalls) {
4641 CallInfo CallClone(MatchingCall);
4642 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4643 CallClone = It->second;
4645 MatchingCall = CallClone;
4653 auto MoveEdgeToNewCalleeCloneAndSetUp =
4654 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4655 ContextNode *OrigCallee =
Edge->Callee;
4656 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4657 removeNoneTypeCalleeEdges(NewClone);
4658 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4662 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4663 RecordCalleeFuncOfCallsite(
4664 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4671 std::deque<ContextNode *> ClonesWorklist;
4673 if (!
Node->emptyContextIds())
4674 ClonesWorklist.push_back(Node);
4680 unsigned NodeCloneCount = 0;
4681 while (!ClonesWorklist.empty()) {
4682 ContextNode *Clone = ClonesWorklist.front();
4683 ClonesWorklist.pop_front();
4692 if (FuncCloneInfos.size() < NodeCloneCount) {
4694 if (NodeCloneCount == 1) {
4699 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4700 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4704 FuncCloneInfos.push_back(
4705 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4706 AssignCallsiteCloneToFuncClone(
4707 OrigFunc,
Call, Clone,
4708 AllocationCallToContextNodeMap.count(
Call));
4709 for (
auto &CE : Clone->CallerEdges) {
4711 if (!
CE->Caller->hasCall())
4713 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4723 FuncInfo PreviousAssignedFuncClone;
4725 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4726 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4728 bool CallerAssignedToCloneOfFunc =
false;
4729 if (EI != Clone->CallerEdges.end()) {
4730 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4731 PreviousAssignedFuncClone =
4732 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4733 CallerAssignedToCloneOfFunc =
true;
4738 DenseMap<CallInfo, CallInfo> NewCallMap;
4739 unsigned CloneNo = FuncCloneInfos.size();
4740 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4741 "should already exist in the map");
4742 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4743 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4744 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4745 FunctionClonesAnalysis++;
4751 if (!CallerAssignedToCloneOfFunc) {
4752 AssignCallsiteCloneToFuncClone(
4753 NewFuncClone,
Call, Clone,
4754 AllocationCallToContextNodeMap.count(
Call));
4755 for (
auto &CE : Clone->CallerEdges) {
4757 if (!
CE->Caller->hasCall())
4759 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4771 auto CallerEdges = Clone->CallerEdges;
4772 for (
auto CE : CallerEdges) {
4774 if (
CE->isRemoved()) {
4780 if (!
CE->Caller->hasCall())
4783 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
4787 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
4788 PreviousAssignedFuncClone)
4791 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4804 auto CalleeEdges =
CE->Caller->CalleeEdges;
4805 for (
auto CalleeEdge : CalleeEdges) {
4808 if (CalleeEdge->isRemoved()) {
4813 ContextNode *
Callee = CalleeEdge->Callee;
4817 if (Callee == Clone || !
Callee->hasCall())
4822 if (Callee == CalleeEdge->Caller)
4824 ContextNode *NewClone =
4825 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
4828 removeNoneTypeCalleeEdges(Callee);
4836 CallInfo OrigCall(
Callee->getOrigNode()->Call);
4837 OrigCall.setCloneNo(0);
4838 DenseMap<CallInfo, CallInfo> &CallMap =
4839 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
4841 CallInfo NewCall(CallMap[OrigCall]);
4843 NewClone->setCall(NewCall);
4845 for (
auto &MatchingCall : NewClone->MatchingCalls) {
4846 CallInfo OrigMatchingCall(MatchingCall);
4847 OrigMatchingCall.setCloneNo(0);
4849 CallInfo NewCall(CallMap[OrigMatchingCall]);
4852 MatchingCall = NewCall;
4861 auto FindFirstAvailFuncClone = [&]() {
4866 for (
auto &CF : FuncCloneInfos) {
4867 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
4868 return CF.FuncClone;
4871 "Expected an available func clone for this callsite clone");
4888 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4889 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4893 auto CloneCallerEdges = Clone->CallerEdges;
4894 for (
auto &
Edge : CloneCallerEdges) {
4898 if (
Edge->isRemoved())
4901 if (!
Edge->Caller->hasCall())
4905 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
4906 FuncInfo FuncCloneCalledByCaller =
4907 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4917 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4918 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4926 (FuncCloneAssignedToCurCallsiteClone &&
4927 FuncCloneAssignedToCurCallsiteClone !=
4928 FuncCloneCalledByCaller)) {
4943 if (FuncCloneToNewCallsiteCloneMap.count(
4944 FuncCloneCalledByCaller)) {
4945 ContextNode *NewClone =
4946 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4947 moveEdgeToExistingCalleeClone(
Edge, NewClone);
4949 removeNoneTypeCalleeEdges(NewClone);
4952 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
4953 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4956 ClonesWorklist.push_back(NewClone);
4960 removeNoneTypeCalleeEdges(Clone);
4968 if (!FuncCloneAssignedToCurCallsiteClone) {
4969 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4971 AssignCallsiteCloneToFuncClone(
4972 FuncCloneCalledByCaller,
Call, Clone,
4973 AllocationCallToContextNodeMap.count(
Call));
4977 assert(FuncCloneAssignedToCurCallsiteClone ==
4978 FuncCloneCalledByCaller);
4987 if (!FuncCloneAssignedToCurCallsiteClone) {
4988 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4989 assert(FuncCloneAssignedToCurCallsiteClone);
4991 AssignCallsiteCloneToFuncClone(
4992 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4993 AllocationCallToContextNodeMap.count(
Call));
4995 assert(FuncCloneToCurNodeCloneMap
4996 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4998 RecordCalleeFuncOfCallsite(
Edge->Caller,
4999 FuncCloneAssignedToCurCallsiteClone);
5019 if (!FuncCloneAssignedToCurCallsiteClone) {
5020 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5021 assert(FuncCloneAssignedToCurCallsiteClone &&
5022 "No available func clone for this callsite clone");
5023 AssignCallsiteCloneToFuncClone(
5024 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5025 AllocationCallToContextNodeMap.contains(
Call));
5030 for (
const auto &PE :
Node->CalleeEdges)
5032 for (
const auto &CE :
Node->CallerEdges)
5034 for (
auto *Clone :
Node->Clones) {
5036 for (
const auto &PE : Clone->CalleeEdges)
5038 for (
const auto &CE : Clone->CallerEdges)
5044 if (FuncCloneInfos.size() < 2)
5050 for (
auto &
Call : CallsWithMetadata) {
5051 ContextNode *
Node = getNodeForInst(
Call);
5052 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5058 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5062 DenseSet<unsigned> NodeCallClones;
5063 for (
auto *
C :
Node->Clones)
5064 NodeCallClones.
insert(
C->Call.cloneNo());
5067 for (
auto &FC : FuncCloneInfos) {
5072 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5077 auto &CallVector = UnassignedCallClones[
Node][
I];
5078 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5079 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5080 CallInfo CallClone = It->second;
5081 CallVector.push_back(CallClone);
5085 assert(
false &&
"Expected to find call in CallMap");
5088 for (
auto &MatchingCall :
Node->MatchingCalls) {
5089 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5090 CallInfo CallClone = It->second;
5091 CallVector.push_back(CallClone);
5095 assert(
false &&
"Expected to find call in CallMap");
5103 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5105 auto UpdateCalls = [&](ContextNode *
Node,
5106 DenseSet<const ContextNode *> &Visited,
5107 auto &&UpdateCalls) {
5108 auto Inserted = Visited.insert(Node);
5112 for (
auto *Clone :
Node->Clones)
5113 UpdateCalls(Clone, Visited, UpdateCalls);
5115 for (
auto &
Edge :
Node->CallerEdges)
5116 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5120 if (!
Node->hasCall() ||
Node->emptyContextIds())
5123 if (
Node->IsAllocation) {
5124 auto AT = allocTypeToUse(
Node->AllocTypes);
5130 !ContextIdToContextSizeInfos.empty()) {
5131 uint64_t TotalCold = 0;
5133 for (
auto Id :
Node->getContextIds()) {
5134 auto TypeI = ContextIdToAllocationType.find(Id);
5135 assert(TypeI != ContextIdToAllocationType.end());
5136 auto CSI = ContextIdToContextSizeInfos.find(Id);
5137 if (CSI != ContextIdToContextSizeInfos.end()) {
5138 for (
auto &
Info : CSI->second) {
5140 if (TypeI->second == AllocationType::Cold)
5141 TotalCold +=
Info.TotalSize;
5146 AT = AllocationType::Cold;
5148 updateAllocationCall(
Node->Call, AT);
5153 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5156 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5157 updateCall(
Node->Call, CalleeFunc);
5159 for (
auto &
Call :
Node->MatchingCalls)
5160 updateCall(
Call, CalleeFunc);
5164 if (!UnassignedCallClones.
contains(Node))
5166 DenseSet<unsigned> NodeCallClones;
5167 for (
auto *
C :
Node->Clones)
5168 NodeCallClones.
insert(
C->Call.cloneNo());
5170 auto &ClonedCalls = UnassignedCallClones[
Node];
5171 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5175 if (NodeCallClones.
contains(CloneNo))
5178 for (
auto &
Call : CallVector)
5179 updateCall(
Call, CalleeFunc);
5188 DenseSet<const ContextNode *> Visited;
5189 for (
auto &Entry : AllocationCallToContextNodeMap)
5190 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5201 for (
auto &SN : FS->callsites()) {
5206 SN.Clones.size() >
I &&
5207 "Callsite summary has fewer entries than other summaries in function");
5208 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5215 for (
auto &AN : FS->allocs()) {
5219 assert(AN.Versions.size() >
I &&
5220 "Alloc summary has fewer entries than other summaries in function");
5221 if (AN.Versions.size() <=
I ||
5238 NewGV->takeName(DeclGV);
5245 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5246 if (!FuncToAliasMap.count(&
F))
5248 for (
auto *
A : FuncToAliasMap[&
F]) {
5250 auto *PrevA = M.getNamedAlias(AliasName);
5252 A->getType()->getPointerAddressSpace(),
5253 A->getLinkage(), AliasName, NewF);
5254 NewA->copyAttributesFrom(
A);
5256 TakeDeclNameAndReplace(PrevA, NewA);
5265 FunctionsClonedThinBackend++;
5282 for (
unsigned I = 1;
I < NumClones;
I++) {
5283 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5290 FunctionCloneDuplicatesThinBackend++;
5291 auto *Func = HashToFunc[Hash];
5292 if (Func->hasAvailableExternallyLinkage()) {
5298 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5300 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5303 auto *PrevF = M.getFunction(Name);
5306 TakeDeclNameAndReplace(PrevF, Alias);
5308 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5311 CloneFuncAliases(Func,
I);
5315 HashToFunc[Hash] = NewF;
5316 FunctionClonesThinBackend++;
5319 for (
auto &BB : *NewF) {
5320 for (
auto &Inst : BB) {
5321 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5322 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5327 TakeDeclNameAndReplace(PrevF, NewF);
5329 NewF->setName(Name);
5332 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5335 CloneFuncAliases(NewF,
I);
5344 const Function *CallingFunc =
nullptr) {
5363 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5369 if (!SrcFileMD &&
F.isDeclaration()) {
5373 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5378 assert(SrcFileMD || OrigName ==
F.getName());
5380 StringRef SrcFile = M.getSourceFileName();
5392 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5393 F.getName().contains(
'.')) {
5394 OrigName =
F.getName().rsplit(
'.').first;
5403 assert(TheFnVI ||
F.isDeclaration());
5407bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5409 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5410 Symtab = std::make_unique<InstrProfSymtab>();
5421 if (
Error E = Symtab->create(M,
true,
false)) {
5422 std::string SymtabFailure =
toString(std::move(
E));
5423 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5436 auto MIBIter = AllocNode.
MIBs.begin();
5437 for (
auto &MDOp : MemProfMD->
operands()) {
5439 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5444 auto ContextIterBegin =
5448 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5450 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5455 if (LastStackContextId == *ContextIter)
5457 LastStackContextId = *ContextIter;
5458 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5468bool MemProfContextDisambiguation::applyImport(
Module &M) {
5475 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5477 for (
auto &
A :
M.aliases()) {
5478 auto *Aliasee =
A.getAliaseeObject();
5480 FuncToAliasMap[
F].insert(&
A);
5483 if (!initializeIndirectCallPromotionInfo(M))
5490 OptimizationRemarkEmitter ORE(&
F);
5493 bool ClonesCreated =
false;
5494 unsigned NumClonesCreated = 0;
5495 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5505 if (ClonesCreated) {
5506 assert(NumClonesCreated == NumClones);
5513 ClonesCreated =
true;
5514 NumClonesCreated = NumClones;
5517 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5518 Function *CalledFunction, FunctionSummary *
FS) {
5520 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5532 if (CalledFunction != CB->getCalledOperand() &&
5533 (!GA || CalledFunction != GA->getAliaseeObject())) {
5534 SkippedCallsCloning++;
5540 auto CalleeOrigName = CalledFunction->getName();
5541 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5544 if (J > 0 && VMaps[J - 1]->
empty())
5548 if (!StackNode.
Clones[J])
5550 auto NewF =
M.getOrInsertFunction(
5552 CalledFunction->getFunctionType());
5566 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5567 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5569 <<
" assigned to call function clone "
5570 <<
ore::NV(
"Callee", NewF.getCallee()));
5584 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5588 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5590 "enable-import-metadata is needed to emit thinlto_src_module");
5591 StringRef SrcModule =
5594 if (GVS->modulePath() == SrcModule) {
5595 GVSummary = GVS.get();
5599 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5609 if (
FS->allocs().empty() &&
FS->callsites().empty())
5612 auto SI =
FS->callsites().begin();
5613 auto AI =
FS->allocs().begin();
5618 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5621 for (
auto CallsiteIt =
FS->callsites().rbegin();
5622 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5623 auto &Callsite = *CallsiteIt;
5627 if (!Callsite.StackIdIndices.empty())
5629 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5638 for (
auto &BB :
F) {
5639 for (
auto &
I : BB) {
5645 auto *CalledValue = CB->getCalledOperand();
5646 auto *CalledFunction = CB->getCalledFunction();
5647 if (CalledValue && !CalledFunction) {
5648 CalledValue = CalledValue->stripPointerCasts();
5655 assert(!CalledFunction &&
5656 "Expected null called function in callsite for alias");
5660 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5661 I.getMetadata(LLVMContext::MD_callsite));
5662 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5668 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5669 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5670 ? AllocTypeColdThinBackend++
5671 : AllocTypeNotColdThinBackend++;
5672 OrigAllocsThinBackend++;
5673 AllocVersionsThinBackend++;
5674 if (!MaxAllocVersionsThinBackend)
5675 MaxAllocVersionsThinBackend = 1;
5682 auto &AllocNode = *(AI++);
5690 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5692 OrigAllocsThinBackend++;
5693 AllocVersionsThinBackend += AllocNode.Versions.size();
5694 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5695 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5705 if (AllocNode.Versions.size() == 1 &&
5708 AllocationType::NotCold ||
5710 AllocationType::None);
5711 UnclonableAllocsThinBackend++;
5717 return Type == ((uint8_t)AllocationType::NotCold |
5718 (uint8_t)AllocationType::Cold);
5722 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5725 if (J > 0 && VMaps[J - 1]->
empty())
5728 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5731 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5732 : AllocTypeNotColdThinBackend++;
5747 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5748 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5750 <<
" marked with memprof allocation attribute "
5751 <<
ore::NV(
"Attribute", AllocTypeString));
5753 }
else if (!CallsiteContext.empty()) {
5754 if (!CalledFunction) {
5758 assert(!CI || !CI->isInlineAsm());
5768 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
5774 CloneFuncIfNeeded(NumClones, FS);
5779 assert(SI !=
FS->callsites().end());
5780 auto &StackNode = *(
SI++);
5786 for (
auto StackId : CallsiteContext) {
5788 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
5794 CloneCallsite(StackNode, CB, CalledFunction, FS);
5796 }
else if (CB->isTailCall() && CalledFunction) {
5799 ValueInfo CalleeVI =
5801 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
5802 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
5803 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
5804 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
5811 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
5821 for (
auto &BB :
F) {
5822 for (
auto &
I : BB) {
5825 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
5826 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
5834unsigned MemProfContextDisambiguation::recordICPInfo(
5839 uint32_t NumCandidates;
5840 uint64_t TotalCount;
5841 auto CandidateProfileData =
5842 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
5844 if (CandidateProfileData.empty())
5850 bool ICPNeeded =
false;
5851 unsigned NumClones = 0;
5852 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
5853 for (
const auto &Candidate : CandidateProfileData) {
5855 auto CalleeValueInfo =
5857 ImportSummary->getValueInfo(Candidate.Value);
5860 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
5862 auto &StackNode = *(
SI++);
5867 [](
unsigned CloneNo) { return CloneNo != 0; });
5877 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
5878 TotalCount, CallsiteInfoStartIndex});
5882void MemProfContextDisambiguation::performICP(
5884 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
5886 OptimizationRemarkEmitter &ORE) {
5893 for (
auto &
Info : ICallAnalysisInfo) {
5896 auto TotalCount =
Info.TotalCount;
5897 unsigned NumPromoted = 0;
5898 unsigned NumClones = 0;
5900 for (
auto &Candidate :
Info.CandidateProfileData) {
5911 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
5912 if (TargetFunction ==
nullptr ||
5920 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
5921 <<
"Memprof cannot promote indirect call: target with md5sum "
5922 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
5931 const char *Reason =
nullptr;
5934 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
5935 <<
"Memprof cannot promote indirect call to "
5936 <<
ore::NV(
"TargetFunction", TargetFunction)
5937 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
5948 CallBase *CBClone = CB;
5949 for (
unsigned J = 0; J < NumClones; J++) {
5952 if (J > 0 && VMaps[J - 1]->
empty())
5962 TotalCount, isSamplePGO, &ORE);
5963 auto *TargetToUse = TargetFunction;
5966 if (StackNode.
Clones[J]) {
5985 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5987 <<
" promoted and assigned to call function clone "
5988 <<
ore::NV(
"Callee", TargetToUse));
5992 TotalCount -= Candidate.Count;
5997 CallBase *CBClone = CB;
5998 for (
unsigned J = 0; J < NumClones; J++) {
6001 if (J > 0 && VMaps[J - 1]->
empty())
6007 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6010 if (TotalCount != 0)
6012 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6013 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6018template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6019bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6021 dbgs() <<
"CCG before cloning:\n";
6025 exportToDot(
"postbuild");
6038 dbgs() <<
"CCG after cloning:\n";
6042 exportToDot(
"cloned");
6044 bool Changed = assignFunctions();
6047 dbgs() <<
"CCG after assigning function clones:\n";
6051 exportToDot(
"clonefuncassign");
6054 printTotalSizes(
errs());
6059bool MemProfContextDisambiguation::processModule(
6061 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6066 return applyImport(M);
6079 ModuleCallsiteContextGraph CCG(M, OREGetter);
6080 return CCG.process();
6085 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6090 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6094 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6098 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6099 "-memprof-dot-context-id");
6100 if (ImportSummary) {
6110 auto ReadSummaryFile =
6112 if (!ReadSummaryFile) {
6119 if (!ImportSummaryForTestingOrErr) {
6125 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6126 ImportSummary = ImportSummaryForTesting.get();
6135 if (!processModule(M, OREGetter))
6152 IndexCallsiteContextGraph CCG(Index, isPrevailing);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...