52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
114 cl::desc(
"Specify the path prefix of the MemProf dot files."));
118 cl::desc(
"Export graph to dot files."));
123 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
133 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
138 "Export only nodes with contexts feeding given "
139 "-memprof-dot-alloc-id"),
141 "Export only nodes with given -memprof-dot-context-id")));
145 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
146 "or to highlight if -memprof-dot-scope=all"));
150 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
151 "highlight otherwise"));
155 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
159 cl::desc(
"Perform verification checks on CallingContextGraph."));
163 cl::desc(
"Perform frequent verification checks on nodes."));
166 "memprof-import-summary",
167 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
173 cl::desc(
"Max depth to recursively search for missing "
174 "frames through tail calls."));
179 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
183 cl::desc(
"Allow cloning of contexts through recursive cycles"));
190 cl::desc(
"Merge clones before assigning functions"));
199 cl::desc(
"Allow cloning of contexts having recursive cycles"));
205 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
216 cl::desc(
"Linking with hot/cold operator new interfaces"));
221 "Require target function definition when promoting indirect calls"));
243template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
244class CallsiteContextGraph {
246 CallsiteContextGraph() =
default;
247 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
248 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
255 void identifyClones();
262 bool assignFunctions();
269 const CallsiteContextGraph &CCG) {
275 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
277 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
279 void exportToDot(std::string Label)
const;
282 struct FuncInfo final
283 :
public std::pair<FuncTy *, unsigned > {
284 using Base = std::pair<FuncTy *, unsigned>;
286 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
287 explicit operator bool()
const {
return this->first !=
nullptr; }
288 FuncTy *
func()
const {
return this->first; }
289 unsigned cloneNo()
const {
return this->second; }
293 struct CallInfo final :
public std::pair<CallTy, unsigned > {
294 using Base = std::pair<CallTy, unsigned>;
296 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
298 explicit operator bool()
const {
return (
bool)this->first; }
299 CallTy call()
const {
return this->first; }
300 unsigned cloneNo()
const {
return this->second; }
301 void setCloneNo(
unsigned N) { this->second =
N; }
303 if (!
operator bool()) {
309 OS <<
"\t(clone " << cloneNo() <<
")";
335 bool Recursive =
false;
362 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
366 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
370 bool useCallerEdgesForContextInfo()
const {
375 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
393 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
394 Count += Edge->getContextIds().size();
398 CalleeEdges, useCallerEdgesForContextInfo()
400 : std::vector<std::shared_ptr<ContextEdge>>());
401 for (
const auto &Edge : Edges)
408 uint8_t computeAllocType()
const {
413 CalleeEdges, useCallerEdgesForContextInfo()
415 : std::vector<std::shared_ptr<ContextEdge>>());
416 for (
const auto &Edge : Edges) {
427 bool emptyContextIds()
const {
429 CalleeEdges, useCallerEdgesForContextInfo()
431 : std::vector<std::shared_ptr<ContextEdge>>());
432 for (
const auto &Edge : Edges) {
433 if (!Edge->getContextIds().empty())
440 std::vector<ContextNode *> Clones;
443 ContextNode *CloneOf =
nullptr;
445 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
447 ContextNode(
bool IsAllocation, CallInfo
C)
448 : IsAllocation(IsAllocation),
Call(
C) {}
450 void addClone(ContextNode *Clone) {
452 CloneOf->Clones.push_back(Clone);
453 Clone->CloneOf = CloneOf;
455 Clones.push_back(Clone);
457 Clone->CloneOf =
this;
461 ContextNode *getOrigNode() {
468 unsigned int ContextId);
470 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
471 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
472 void eraseCalleeEdge(
const ContextEdge *Edge);
473 void eraseCallerEdge(
const ContextEdge *Edge);
475 void setCall(CallInfo
C) {
Call =
C; }
477 bool hasCall()
const {
return (
bool)
Call.call(); }
483 bool isRemoved()
const {
519 bool IsBackedge =
false;
526 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
527 ContextIds(std::move(ContextIds)) {}
533 inline void clear() {
543 inline bool isRemoved()
const {
544 if (Callee || Caller)
565 void removeNoneTypeCalleeEdges(ContextNode *
Node);
566 void removeNoneTypeCallerEdges(ContextNode *
Node);
568 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
574 template <
class NodeT,
class IteratorT>
575 std::vector<uint64_t>
580 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
583 template <
class NodeT,
class IteratorT>
584 void addStackNodesForMIB(ContextNode *AllocNode,
593 void updateStackNodes();
597 void handleCallsitesWithMultipleTargets();
600 void markBackedges();
610 bool partitionCallsByCallee(
612 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
619 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
626 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
631 struct CallContextInfo {
635 std::vector<uint64_t> StackIds;
649 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
650 bool CalleeIter =
true);
658 void assignStackNodesPostOrder(
671 void propagateDuplicateContextIds(
677 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
685 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
695 calleesMatch(CallTy
Call, EdgeIter &EI,
700 const FuncTy *getCalleeFunc(CallTy
Call) {
701 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
707 bool calleeMatchesFunc(
708 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
709 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
710 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
711 Call, Func, CallerFunc, FoundCalleeChain);
715 bool sameCallee(CallTy Call1, CallTy Call2) {
716 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
721 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
722 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
728 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
734 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
739 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
744 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
745 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
751 FuncInfo cloneFunctionForCallsite(
753 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
754 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
755 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
760 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
761 unsigned CloneNo)
const {
762 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
766 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
767 CallInfo
C = CallInfo()) {
768 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
769 auto *NewNode = NodeOwner.back().get();
771 NodeToCallingFunc[NewNode] =
F;
772 NewNode->NodeId = NodeOwner.size();
777 ContextNode *getNodeForInst(
const CallInfo &
C);
778 ContextNode *getNodeForAlloc(
const CallInfo &
C);
779 ContextNode *getNodeForStackId(
uint64_t StackId);
801 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
808 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
809 ContextNode *NewCallee,
810 bool NewClone =
false,
818 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
819 ContextNode *NewCaller);
830 void mergeNodeCalleeClones(
835 void findOtherCallersToShareMerge(
836 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
867 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
873 unsigned int LastContextId = 0;
876template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
878 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
879template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
881 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
882template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
884 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
885template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
887 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
890class ModuleCallsiteContextGraph
891 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
894 ModuleCallsiteContextGraph(
896 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
899 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
902 uint64_t getStackId(uint64_t IdOrIndex)
const;
904 bool calleeMatchesFunc(
905 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
906 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
907 bool sameCallee(Instruction *Call1, Instruction *Call2);
908 bool findProfiledCalleeThroughTailCalls(
909 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
910 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
911 bool &FoundMultipleCalleeChains);
912 uint64_t getLastStackId(Instruction *
Call);
913 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
916 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
917 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
919 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
920 DenseMap<CallInfo, CallInfo> &CallMap,
921 std::vector<CallInfo> &CallsWithMetadataInFunc,
923 std::string getLabel(
const Function *Func,
const Instruction *
Call,
924 unsigned CloneNo)
const;
927 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
933struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
934 IndexCall() : PointerUnion() {}
935 IndexCall(std::nullptr_t) : IndexCall() {}
936 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
937 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
938 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
940 IndexCall *operator->() {
return this; }
942 void print(raw_ostream &OS)
const {
943 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
968class IndexCallsiteContextGraph
969 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
972 IndexCallsiteContextGraph(
973 ModuleSummaryIndex &Index,
977 ~IndexCallsiteContextGraph() {
982 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
984 for (
auto &Callsite :
I.second)
985 FS->addCallsite(*Callsite.second);
990 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
993 uint64_t getStackId(uint64_t IdOrIndex)
const;
994 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
995 bool calleeMatchesFunc(
996 IndexCall &
Call,
const FunctionSummary *Func,
997 const FunctionSummary *CallerFunc,
998 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
999 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1000 bool findProfiledCalleeThroughTailCalls(
1001 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1002 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1003 bool &FoundMultipleCalleeChains);
1004 uint64_t getLastStackId(IndexCall &
Call);
1005 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1008 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1009 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1010 IndexCall>::FuncInfo
1011 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1012 DenseMap<CallInfo, CallInfo> &CallMap,
1013 std::vector<CallInfo> &CallsWithMetadataInFunc,
1015 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1016 unsigned CloneNo)
const;
1020 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1022 const ModuleSummaryIndex &Index;
1030 std::unordered_map<FunctionSummary *,
1031 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1032 FunctionCalleesToSynthesizedCallsiteInfos;
1044 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1047 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1069template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1070bool allocTypesMatch(
1071 const std::vector<uint8_t> &InAllocTypes,
1072 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1076 assert(InAllocTypes.size() == Edges.size());
1078 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1080 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1084 if (l == (uint8_t)AllocationType::None ||
1085 r->AllocTypes == (uint8_t)AllocationType::None)
1087 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1096template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1097bool allocTypesMatchClone(
1098 const std::vector<uint8_t> &InAllocTypes,
1099 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1100 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1104 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1108 for (
const auto &
E : Clone->CalleeEdges) {
1110 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1114 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1115 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1117 if (Iter == EdgeCalleeMap.
end())
1125 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1133template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1134typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1135CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1136 const CallInfo &
C) {
1137 ContextNode *
Node = getNodeForAlloc(
C);
1141 return NonAllocationCallToContextNodeMap.lookup(
C);
1144template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1145typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1146CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1147 const CallInfo &
C) {
1148 return AllocationCallToContextNodeMap.lookup(
C);
1151template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1152typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1153CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1155 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1156 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1157 return StackEntryNode->second;
1161template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1162void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1164 unsigned int ContextId) {
1165 for (
auto &
Edge : CallerEdges) {
1166 if (
Edge->Caller == Caller) {
1168 Edge->getContextIds().insert(ContextId);
1172 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1173 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1174 CallerEdges.push_back(
Edge);
1178template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1179void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1180 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1196 auto CalleeCallerCount =
Callee->CallerEdges.size();
1197 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1202 }
else if (CalleeIter) {
1204 *EI =
Caller->CalleeEdges.erase(*EI);
1207 *EI =
Callee->CallerEdges.erase(*EI);
1209 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1210 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1213template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1214void CallsiteContextGraph<
1215 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1216 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1218 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1220 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1226template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1227void CallsiteContextGraph<
1228 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1229 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1231 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1233 Edge->Caller->eraseCalleeEdge(
Edge.get());
1234 EI =
Node->CallerEdges.erase(EI);
1240template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1241typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1242CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1243 findEdgeFromCallee(
const ContextNode *Callee) {
1244 for (
const auto &
Edge : CalleeEdges)
1245 if (
Edge->Callee == Callee)
1250template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1251typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1252CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1253 findEdgeFromCaller(
const ContextNode *Caller) {
1254 for (
const auto &
Edge : CallerEdges)
1255 if (
Edge->Caller == Caller)
1260template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1261void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1262 eraseCalleeEdge(
const ContextEdge *
Edge) {
1264 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1265 return CalleeEdge.get() ==
Edge;
1267 assert(EI != CalleeEdges.end());
1268 CalleeEdges.erase(EI);
1271template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1272void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1273 eraseCallerEdge(
const ContextEdge *
Edge) {
1275 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1276 return CallerEdge.get() ==
Edge;
1278 assert(EI != CallerEdges.end());
1279 CallerEdges.erase(EI);
1282template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1283uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1284 DenseSet<uint32_t> &ContextIds)
const {
1286 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1287 uint8_t
AllocType = (uint8_t)AllocationType::None;
1288 for (
auto Id : ContextIds) {
1289 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1297template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1299CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1300 const DenseSet<uint32_t> &Node1Ids,
1301 const DenseSet<uint32_t> &Node2Ids)
const {
1303 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1304 uint8_t
AllocType = (uint8_t)AllocationType::None;
1305 for (
auto Id : Node1Ids) {
1306 if (!Node2Ids.
count(Id))
1308 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1316template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1317uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1318 const DenseSet<uint32_t> &Node1Ids,
1319 const DenseSet<uint32_t> &Node2Ids)
const {
1320 if (Node1Ids.
size() < Node2Ids.
size())
1321 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1323 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1326template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1327typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1328CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1329 CallInfo
Call,
const FuncTy *
F) {
1331 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1332 AllocationCallToContextNodeMap[
Call] = AllocNode;
1334 AllocNode->OrigStackOrAllocId = LastContextId;
1337 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1353template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1354template <
class NodeT,
class IteratorT>
1355void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1356 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1364 ContextIdToAllocationType[++LastContextId] =
AllocType;
1366 if (!ContextSizeInfo.
empty()) {
1367 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1372 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1377 ContextNode *PrevNode = AllocNode;
1381 SmallSet<uint64_t, 8> StackIdSet;
1384 ContextIter != StackContext.
end(); ++ContextIter) {
1385 auto StackId = getStackId(*ContextIter);
1386 ContextNode *StackNode = getNodeForStackId(StackId);
1388 StackNode = createNewNode(
false);
1389 StackEntryIdToContextNodeMap[StackId] = StackNode;
1390 StackNode->OrigStackOrAllocId = StackId;
1397 StackNode->Recursive =
true;
1399 StackNode->AllocTypes |= (uint8_t)
AllocType;
1400 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1401 PrevNode = StackNode;
1405template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1407CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1408 const DenseSet<uint32_t> &StackSequenceContextIds,
1409 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1410 DenseSet<uint32_t> NewContextIds;
1411 for (
auto OldId : StackSequenceContextIds) {
1412 NewContextIds.
insert(++LastContextId);
1413 OldToNewContextIds[OldId].insert(LastContextId);
1414 assert(ContextIdToAllocationType.count(OldId));
1416 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1417 if (DotAllocContextIds.
contains(OldId))
1418 DotAllocContextIds.
insert(LastContextId);
1420 return NewContextIds;
1423template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1424void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1425 propagateDuplicateContextIds(
1426 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1428 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1429 DenseSet<uint32_t> NewIds;
1430 for (
auto Id : ContextIds)
1431 if (
auto NewId = OldToNewContextIds.find(Id);
1432 NewId != OldToNewContextIds.end())
1438 auto UpdateCallers = [&](ContextNode *
Node,
1439 DenseSet<const ContextEdge *> &Visited,
1440 auto &&UpdateCallers) ->
void {
1441 for (
const auto &
Edge :
Node->CallerEdges) {
1445 ContextNode *NextNode =
Edge->Caller;
1446 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1449 if (!NewIdsToAdd.
empty()) {
1450 Edge->getContextIds().insert_range(NewIdsToAdd);
1451 UpdateCallers(NextNode, Visited, UpdateCallers);
1456 DenseSet<const ContextEdge *> Visited;
1457 for (
auto &Entry : AllocationCallToContextNodeMap) {
1459 UpdateCallers(Node, Visited, UpdateCallers);
1463template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1464void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1465 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1468 DenseSet<uint32_t> RemainingContextIds) {
1470 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1471 DenseSet<uint32_t> RecursiveContextIds;
1472 DenseSet<uint32_t> AllCallerContextIds;
1477 for (
auto &CE : OrigEdges) {
1478 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1479 for (
auto Id :
CE->getContextIds())
1480 if (!AllCallerContextIds.
insert(Id).second)
1481 RecursiveContextIds.
insert(Id);
1485 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1487 DenseSet<uint32_t> NewEdgeContextIds;
1488 DenseSet<uint32_t> NotFoundContextIds;
1492 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1493 NotFoundContextIds);
1496 if (RecursiveContextIds.
empty()) {
1499 RemainingContextIds.
swap(NotFoundContextIds);
1509 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1511 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1514 if (NewEdgeContextIds.
empty()) {
1518 if (TowardsCallee) {
1519 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1520 auto NewEdge = std::make_shared<ContextEdge>(
1521 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1522 NewNode->CalleeEdges.push_back(NewEdge);
1523 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1525 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1526 auto NewEdge = std::make_shared<ContextEdge>(
1527 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1528 NewNode->CallerEdges.push_back(NewEdge);
1529 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1532 if (
Edge->getContextIds().empty()) {
1533 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1540template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1542 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1546 assert(!Edge->ContextIds.empty());
1549template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1551 bool CheckEdges =
true) {
1552 if (
Node->isRemoved())
1556 auto NodeContextIds =
Node->getContextIds();
1560 if (
Node->CallerEdges.size()) {
1562 Node->CallerEdges.front()->ContextIds);
1566 set_union(CallerEdgeContextIds, Edge->ContextIds);
1573 NodeContextIds == CallerEdgeContextIds ||
1576 if (
Node->CalleeEdges.size()) {
1578 Node->CalleeEdges.front()->ContextIds);
1582 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1588 NodeContextIds == CalleeEdgeContextIds);
1597 for (
const auto &
E :
Node->CalleeEdges)
1603template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1604void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1605 assignStackNodesPostOrder(
1606 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1607 DenseMap<uint64_t, std::vector<CallContextInfo>>
1608 &StackIdToMatchingCalls,
1609 DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1617 auto CallerEdges =
Node->CallerEdges;
1618 for (
auto &
Edge : CallerEdges) {
1620 if (
Edge->isRemoved()) {
1624 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1625 CallToMatchingCall);
1634 if (
Node->IsAllocation ||
1635 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1638 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1642 if (Calls.size() == 1) {
1643 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1644 if (Ids.size() == 1) {
1645 assert(SavedContextIds.empty());
1647 assert(Node == getNodeForStackId(Ids[0]));
1648 if (
Node->Recursive)
1651 NonAllocationCallToContextNodeMap[
Call] =
Node;
1660 uint64_t LastId =
Node->OrigStackOrAllocId;
1661 ContextNode *LastNode = getNodeForStackId(LastId);
1664 assert(LastNode == Node);
1666 ContextNode *LastNode =
Node;
1671 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1673 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1674 bool CreatedNode =
false;
1675 for (
unsigned I = 0;
I < Calls.size();
1676 I++, PrevIterCreatedNode = CreatedNode) {
1677 CreatedNode =
false;
1678 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1681 if (SavedContextIds.empty()) {
1688 auto MatchingCall = CallToMatchingCall[
Call];
1689 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1693 assert(
I > 0 && !PrevIterCreatedNode);
1696 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1701 assert(LastId == Ids.back());
1710 ContextNode *PrevNode = LastNode;
1714 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1716 ContextNode *CurNode = getNodeForStackId(Id);
1720 assert(!CurNode->Recursive);
1722 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1734 if (SavedContextIds.empty()) {
1743 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1744 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1746 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1748 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1754 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1759 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1764 for (
auto Id : Ids) {
1765 ContextNode *CurNode = getNodeForStackId(Id);
1772 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1779 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1780 if (PrevEdge->getContextIds().empty())
1781 removeEdgeFromGraph(PrevEdge);
1786 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1787 ? (uint8_t)AllocationType::None
1788 : CurNode->computeAllocType();
1793 for (
auto Id : Ids) {
1794 ContextNode *CurNode = getNodeForStackId(Id);
1803template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1804void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1812 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1813 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1814 for (
auto &
Call : CallsWithMetadata) {
1816 if (AllocationCallToContextNodeMap.count(
Call))
1818 auto StackIdsWithContextNodes =
1819 getStackIdsWithContextNodesForCall(
Call.call());
1822 if (StackIdsWithContextNodes.empty())
1826 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1827 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
1837 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1841 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1842 for (
auto &It : StackIdToMatchingCalls) {
1843 auto &Calls = It.getSecond();
1845 if (Calls.size() == 1) {
1846 auto &Ids = Calls[0].StackIds;
1847 if (Ids.size() == 1)
1860 DenseMap<const FuncTy *, unsigned> FuncToIndex;
1861 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
1862 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
1865 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1866 return A.StackIds.size() >
B.StackIds.size() ||
1867 (
A.StackIds.size() ==
B.StackIds.size() &&
1868 (
A.StackIds <
B.StackIds ||
1869 (
A.StackIds ==
B.StackIds &&
1870 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
1876 uint64_t LastId = It.getFirst();
1877 ContextNode *LastNode = getNodeForStackId(LastId);
1881 if (LastNode->Recursive)
1886 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1894 DenseSet<const FuncTy *> MatchingIdsFuncSet;
1897 for (
unsigned I = 0;
I < Calls.size();
I++) {
1898 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1899 assert(SavedContextIds.empty());
1900 assert(LastId == Ids.back());
1905 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1906 MatchingIdsFuncSet.
clear();
1913 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1915 ContextNode *PrevNode = LastNode;
1916 ContextNode *CurNode = LastNode;
1921 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1923 CurNode = getNodeForStackId(Id);
1927 if (CurNode->Recursive) {
1932 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1953 if (StackSequenceContextIds.
empty()) {
1966 if (Ids.back() != getLastStackId(
Call)) {
1967 for (
const auto &PE : LastNode->CallerEdges) {
1968 set_subtract(StackSequenceContextIds, PE->getContextIds());
1969 if (StackSequenceContextIds.
empty())
1973 if (StackSequenceContextIds.
empty())
1985 MatchingIdsFuncSet.
insert(Func);
1992 bool DuplicateContextIds =
false;
1993 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1994 auto &CallCtxInfo = Calls[J];
1995 auto &NextIds = CallCtxInfo.StackIds;
1998 auto *NextFunc = CallCtxInfo.Func;
1999 if (NextFunc != Func) {
2002 DuplicateContextIds =
true;
2005 auto &NextCall = CallCtxInfo.Call;
2006 CallToMatchingCall[NextCall] =
Call;
2017 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2018 StackSequenceContextIds.
size());
2021 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2022 : StackSequenceContextIds;
2023 assert(!SavedContextIds.empty());
2025 if (!DuplicateContextIds) {
2029 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2030 if (LastNodeContextIds.
empty())
2037 propagateDuplicateContextIds(OldToNewContextIds);
2047 DenseSet<const ContextNode *> Visited;
2048 for (
auto &Entry : AllocationCallToContextNodeMap)
2049 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2050 CallToMatchingCall);
2055uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2056 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2058 return CallsiteContext.
back();
2061uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2063 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2066 return Index.getStackIdAtIndex(CallsiteContext.
back());
2088 auto Pos =
F.getName().find_last_of(
'.');
2091 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2097std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2098 const Instruction *
Call,
2099 unsigned CloneNo)
const {
2105std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2106 const IndexCall &
Call,
2107 unsigned CloneNo)
const {
2108 auto VI = FSToVIMap.find(Func);
2109 assert(VI != FSToVIMap.end());
2112 return CallerName +
" -> alloc";
2115 return CallerName +
" -> " +
2117 Callsite->Clones[CloneNo]);
2121std::vector<uint64_t>
2122ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2123 Instruction *
Call) {
2124 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2126 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2130std::vector<uint64_t>
2131IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2133 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2135 return getStackIdsWithContextNodes<CallsiteInfo,
2136 SmallVector<unsigned>::const_iterator>(
2140template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2141template <
class NodeT,
class IteratorT>
2142std::vector<uint64_t>
2143CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2144 CallStack<NodeT, IteratorT> &CallsiteContext) {
2145 std::vector<uint64_t> StackIds;
2146 for (
auto IdOrIndex : CallsiteContext) {
2147 auto StackId = getStackId(IdOrIndex);
2148 ContextNode *
Node = getNodeForStackId(StackId);
2151 StackIds.push_back(StackId);
2156ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2158 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2159 :
Mod(
M), OREGetter(OREGetter) {
2161 std::vector<CallInfo> CallsWithMetadata;
2162 for (
auto &BB :
F) {
2163 for (
auto &
I : BB) {
2166 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2167 CallsWithMetadata.push_back(&
I);
2168 auto *AllocNode = addAllocNode(&
I, &
F);
2169 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2173 for (
auto &MDOp : MemProfMD->operands()) {
2175 std::vector<ContextTotalSize> ContextSizeInfo;
2177 if (MIBMD->getNumOperands() > 2) {
2178 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2179 MDNode *ContextSizePair =
2188 ContextSizeInfo.push_back({FullStackId, TotalSize});
2194 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2195 AllocNode, StackContext, CallsiteContext,
2201 DotAllocContextIds = AllocNode->getContextIds();
2205 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2206 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2209 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2210 CallsWithMetadata.push_back(&
I);
2214 if (!CallsWithMetadata.empty())
2215 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2219 dbgs() <<
"CCG before updating call stack chains:\n";
2224 exportToDot(
"prestackupdate");
2229 exportToDot(
"poststackupdate");
2231 handleCallsitesWithMultipleTargets();
2236 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2237 for (
auto &
Call : FuncEntry.second)
2238 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2241IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2245 : Index(Index), isPrevailing(isPrevailing) {
2246 for (
auto &
I : Index) {
2247 auto VI = Index.getValueInfo(
I);
2248 for (
auto &S : VI.getSummaryList()) {
2257 !isPrevailing(VI.getGUID(), S.get()))
2262 std::vector<CallInfo> CallsWithMetadata;
2263 if (!
FS->allocs().empty()) {
2264 for (
auto &AN :
FS->mutableAllocs()) {
2269 if (AN.MIBs.empty())
2271 IndexCall AllocCall(&AN);
2272 CallsWithMetadata.push_back(AllocCall);
2273 auto *AllocNode = addAllocNode(AllocCall, FS);
2281 AN.ContextSizeInfos.size() == AN.MIBs.size());
2283 for (
auto &MIB : AN.MIBs) {
2286 std::vector<ContextTotalSize> ContextSizeInfo;
2287 if (!AN.ContextSizeInfos.empty()) {
2288 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2289 ContextSizeInfo.push_back({FullStackId, TotalSize});
2291 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2292 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2299 DotAllocContextIds = AllocNode->getContextIds();
2305 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2309 if (!
FS->callsites().empty())
2310 for (
auto &SN :
FS->mutableCallsites()) {
2311 IndexCall StackNodeCall(&SN);
2312 CallsWithMetadata.push_back(StackNodeCall);
2315 if (!CallsWithMetadata.empty())
2316 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2318 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2324 dbgs() <<
"CCG before updating call stack chains:\n";
2329 exportToDot(
"prestackupdate");
2334 exportToDot(
"poststackupdate");
2336 handleCallsitesWithMultipleTargets();
2341template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2342void CallsiteContextGraph<DerivedCCG, FuncTy,
2343 CallTy>::handleCallsitesWithMultipleTargets() {
2358 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2359 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2360 auto *
Node = Entry.second;
2369 std::vector<CallInfo> AllCalls;
2370 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2371 AllCalls.push_back(
Node->Call);
2385 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2388 auto It = AllCalls.begin();
2390 for (; It != AllCalls.end(); ++It) {
2393 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2396 if (!Edge->Callee->hasCall())
2398 assert(NodeToCallingFunc.count(Edge->Callee));
2400 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2409 if (
Node->Call != ThisCall) {
2410 Node->setCall(ThisCall);
2421 Node->MatchingCalls.clear();
2424 if (It == AllCalls.end()) {
2425 RemovedEdgesWithMismatchedCallees++;
2429 Node->setCall(CallInfo());
2434 for (++It; It != AllCalls.end(); ++It) {
2438 Node->MatchingCalls.push_back(ThisCall);
2447 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2448 return !it.second->hasCall() || it.second->Call != it.first;
2452 for (
auto &[
Call,
Node] : NewCallToNode)
2453 NonAllocationCallToContextNodeMap[
Call] =
Node;
2457 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2458 NonAllocationCallToContextNodeMap[
Call] =
Node;
2461template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2462bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2464 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2468 struct CallsWithSameCallee {
2469 std::vector<CallInfo> Calls;
2470 ContextNode *
Node =
nullptr;
2476 for (
auto ThisCall : AllCalls) {
2477 auto *
F = getCalleeFunc(
ThisCall.call());
2479 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2488 for (
const auto &Edge :
Node->CalleeEdges) {
2489 if (!Edge->Callee->hasCall())
2491 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2492 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2493 CalleeNodeToCallInfo[Edge->Callee] =
2494 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2500 if (CalleeNodeToCallInfo.
empty())
2512 ContextNode *UnmatchedCalleesNode =
nullptr;
2514 bool UsedOrigNode =
false;
2519 auto CalleeEdges =
Node->CalleeEdges;
2520 for (
auto &Edge : CalleeEdges) {
2521 if (!Edge->Callee->hasCall())
2526 ContextNode *CallerNodeToUse =
nullptr;
2530 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2531 if (!UnmatchedCalleesNode)
2532 UnmatchedCalleesNode =
2533 createNewNode(
false, NodeToCallingFunc[
Node]);
2534 CallerNodeToUse = UnmatchedCalleesNode;
2538 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2541 if (!UsedOrigNode) {
2544 Node->MatchingCalls.clear();
2545 UsedOrigNode =
true;
2548 createNewNode(
false, NodeToCallingFunc[
Node]);
2552 Info->Node->setCall(
Info->Calls.front());
2558 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2560 CallerNodeToUse =
Info->Node;
2564 if (CallerNodeToUse ==
Node)
2567 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2574 for (
auto &
I : CalleeNodeToCallInfo)
2575 removeNoneTypeCallerEdges(
I.second->Node);
2576 if (UnmatchedCalleesNode)
2577 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2578 removeNoneTypeCallerEdges(
Node);
2588uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2591 return Index.getStackIdAtIndex(IdOrIndex);
2594template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2595bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2596 CallTy
Call, EdgeIter &EI,
2597 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2599 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2600 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2603 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2604 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2609 if (FoundCalleeChain.empty())
2613 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2617 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2618 CurEdge->AllocTypes |=
Edge->AllocTypes;
2623 auto NewEdge = std::make_shared<ContextEdge>(
2624 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2625 Callee->CallerEdges.push_back(NewEdge);
2626 if (Caller ==
Edge->Caller) {
2630 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2633 "Iterator position not restored after insert and increment");
2635 Caller->CalleeEdges.push_back(NewEdge);
2640 auto *CurCalleeNode =
Edge->Callee;
2641 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2642 ContextNode *NewNode =
nullptr;
2644 if (TailCallToContextNodeMap.
count(NewCall)) {
2645 NewNode = TailCallToContextNodeMap[NewCall];
2646 NewNode->AllocTypes |=
Edge->AllocTypes;
2648 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2650 NewNode = createNewNode(
false, Func, NewCall);
2651 TailCallToContextNodeMap[NewCall] = NewNode;
2652 NewNode->AllocTypes =
Edge->AllocTypes;
2656 AddEdge(NewNode, CurCalleeNode);
2658 CurCalleeNode = NewNode;
2662 AddEdge(
Edge->Caller, CurCalleeNode);
2670 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2682bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2683 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2684 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2685 bool &FoundMultipleCalleeChains) {
2692 FoundCalleeChain.push_back({Callsite,
F});
2707 bool FoundSingleCalleeChain =
false;
2708 for (
auto &BB : *CalleeFunc) {
2709 for (
auto &
I : BB) {
2711 if (!CB || !CB->isTailCall())
2713 auto *CalledValue = CB->getCalledOperand();
2714 auto *CalledFunction = CB->getCalledFunction();
2715 if (CalledValue && !CalledFunction) {
2716 CalledValue = CalledValue->stripPointerCasts();
2723 assert(!CalledFunction &&
2724 "Expected null called function in callsite for alias");
2727 if (!CalledFunction)
2729 if (CalledFunction == ProfiledCallee) {
2730 if (FoundSingleCalleeChain) {
2731 FoundMultipleCalleeChains =
true;
2734 FoundSingleCalleeChain =
true;
2735 FoundProfiledCalleeCount++;
2736 FoundProfiledCalleeDepth +=
Depth;
2737 if (
Depth > FoundProfiledCalleeMaxDepth)
2738 FoundProfiledCalleeMaxDepth =
Depth;
2739 SaveCallsiteInfo(&
I, CalleeFunc);
2740 }
else if (findProfiledCalleeThroughTailCalls(
2741 ProfiledCallee, CalledFunction,
Depth + 1,
2742 FoundCalleeChain, FoundMultipleCalleeChains)) {
2745 assert(!FoundMultipleCalleeChains);
2746 if (FoundSingleCalleeChain) {
2747 FoundMultipleCalleeChains =
true;
2750 FoundSingleCalleeChain =
true;
2751 SaveCallsiteInfo(&
I, CalleeFunc);
2752 }
else if (FoundMultipleCalleeChains)
2757 return FoundSingleCalleeChain;
2760const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2762 if (!CB->getCalledOperand() || CB->isIndirectCall())
2764 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2771bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2772 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
2773 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2775 if (!CB->getCalledOperand() || CB->isIndirectCall())
2777 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2779 if (CalleeFunc == Func)
2782 if (Alias && Alias->getAliasee() == Func)
2793 bool FoundMultipleCalleeChains =
false;
2794 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2796 FoundMultipleCalleeChains)) {
2797 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2798 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2799 <<
" that actually called " << CalleeVal->getName()
2800 << (FoundMultipleCalleeChains
2801 ?
" (found multiple possible chains)"
2804 if (FoundMultipleCalleeChains)
2805 FoundProfiledCalleeNonUniquelyCount++;
2812bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
2813 Instruction *Call2) {
2815 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2817 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2820 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2822 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2824 return CalleeFunc1 == CalleeFunc2;
2827bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2828 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
2829 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2830 bool &FoundMultipleCalleeChains) {
2836 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
2839 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2840 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2843 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
2844 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2845 CallsiteInfo *NewCallsiteInfo =
2846 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2847 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2854 bool FoundSingleCalleeChain =
false;
2857 !isPrevailing(CurCallee.
getGUID(), S.get()))
2862 auto FSVI = CurCallee;
2865 FSVI = AS->getAliaseeVI();
2866 for (
auto &CallEdge :
FS->calls()) {
2867 if (!CallEdge.second.hasTailCall())
2869 if (CallEdge.first == ProfiledCallee) {
2870 if (FoundSingleCalleeChain) {
2871 FoundMultipleCalleeChains =
true;
2874 FoundSingleCalleeChain =
true;
2875 FoundProfiledCalleeCount++;
2876 FoundProfiledCalleeDepth +=
Depth;
2877 if (
Depth > FoundProfiledCalleeMaxDepth)
2878 FoundProfiledCalleeMaxDepth =
Depth;
2879 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2881 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2882 FSToVIMap[
FS] = FSVI;
2883 }
else if (findProfiledCalleeThroughTailCalls(
2884 ProfiledCallee, CallEdge.first,
Depth + 1,
2885 FoundCalleeChain, FoundMultipleCalleeChains)) {
2888 assert(!FoundMultipleCalleeChains);
2889 if (FoundSingleCalleeChain) {
2890 FoundMultipleCalleeChains =
true;
2893 FoundSingleCalleeChain =
true;
2894 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2896 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2897 FSToVIMap[
FS] = FSVI;
2898 }
else if (FoundMultipleCalleeChains)
2903 return FoundSingleCalleeChain;
2906const FunctionSummary *
2907IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
2909 if (
Callee.getSummaryList().empty())
2914bool IndexCallsiteContextGraph::calleeMatchesFunc(
2915 IndexCall &
Call,
const FunctionSummary *Func,
2916 const FunctionSummary *CallerFunc,
2917 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2921 AliasSummary *Alias =
2922 Callee.getSummaryList().empty()
2925 assert(FSToVIMap.count(Func));
2926 auto FuncVI = FSToVIMap[
Func];
2927 if (Callee == FuncVI ||
2942 bool FoundMultipleCalleeChains =
false;
2943 if (!findProfiledCalleeThroughTailCalls(
2944 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2945 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2946 <<
" from " << FSToVIMap[CallerFunc]
2947 <<
" that actually called " << Callee
2948 << (FoundMultipleCalleeChains
2949 ?
" (found multiple possible chains)"
2952 if (FoundMultipleCalleeChains)
2953 FoundProfiledCalleeNonUniquelyCount++;
2960bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2963 return Callee1 == Callee2;
2966template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2967void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2973template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2974void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2975 raw_ostream &OS)
const {
2976 OS <<
"Node " <<
this <<
"\n";
2980 OS <<
" (recursive)";
2982 if (!MatchingCalls.empty()) {
2983 OS <<
"\tMatchingCalls:\n";
2984 for (
auto &MatchingCall : MatchingCalls) {
2986 MatchingCall.print(OS);
2990 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
2992 OS <<
"\tContextIds:";
2994 auto ContextIds = getContextIds();
2995 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2996 std::sort(SortedIds.begin(), SortedIds.end());
2997 for (
auto Id : SortedIds)
3000 OS <<
"\tCalleeEdges:\n";
3001 for (
auto &
Edge : CalleeEdges)
3002 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3004 OS <<
"\tCallerEdges:\n";
3005 for (
auto &
Edge : CallerEdges)
3006 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3008 if (!Clones.empty()) {
3011 for (
auto *
C : Clones) {
3015 OS <<
C <<
" NodeId: " <<
C->NodeId;
3018 }
else if (CloneOf) {
3019 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3023template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3024void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3030template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3031void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3032 raw_ostream &OS)
const {
3033 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3034 << (IsBackedge ?
" (BE)" :
"")
3036 OS <<
" ContextIds:";
3037 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3038 std::sort(SortedIds.begin(), SortedIds.end());
3039 for (
auto Id : SortedIds)
3043template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3044void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3048template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3049void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3050 raw_ostream &OS)
const {
3051 OS <<
"Callsite Context Graph:\n";
3052 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3054 if (
Node->isRemoved())
3061template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3062void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3063 raw_ostream &OS)
const {
3064 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3066 if (
Node->isRemoved())
3068 if (!
Node->IsAllocation)
3070 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3071 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3072 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3073 std::sort(SortedIds.begin(), SortedIds.end());
3074 for (
auto Id : SortedIds) {
3075 auto TypeI = ContextIdToAllocationType.find(Id);
3076 assert(TypeI != ContextIdToAllocationType.end());
3077 auto CSI = ContextIdToContextSizeInfos.find(Id);
3078 if (CSI != ContextIdToContextSizeInfos.end()) {
3079 for (
auto &
Info : CSI->second) {
3080 OS <<
"MemProf hinting: "
3082 <<
" full allocation context " <<
Info.FullStackId
3083 <<
" with total size " <<
Info.TotalSize <<
" is "
3085 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3087 <<
" due to cold byte percent";
3089 OS <<
" (context id " <<
Id <<
")";
3097template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3098void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3099 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3102 for (
auto &
Edge :
Node->CallerEdges)
3107template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3109 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3110 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3112 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3128 return G->NodeOwner.begin()->get();
3131 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3132 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3151template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3165 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3171 std::string LabelString =
3172 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3175 LabelString +=
"\n";
3176 if (
Node->hasCall()) {
3177 auto Func =
G->NodeToCallingFunc.find(
Node);
3178 assert(Func !=
G->NodeToCallingFunc.end());
3180 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3182 LabelString +=
"null call";
3183 if (
Node->Recursive)
3184 LabelString +=
" (recursive)";
3186 LabelString +=
" (external)";
3192 auto ContextIds =
Node->getContextIds();
3196 bool Highlight =
false;
3205 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3206 getContextIds(ContextIds) +
"\"")
3210 AttributeString +=
",fontsize=\"30\"";
3212 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3214 if (
Node->CloneOf) {
3215 AttributeString +=
",color=\"blue\"";
3216 AttributeString +=
",style=\"filled,bold,dashed\"";
3218 AttributeString +=
",style=\"filled\"";
3219 return AttributeString;
3224 auto &Edge = *(ChildIter.getCurrent());
3229 bool Highlight =
false;
3238 auto Color = getColor(Edge->AllocTypes, Highlight);
3239 std::string AttributeString =
3240 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3242 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3245 if (Edge->IsBackedge)
3246 AttributeString +=
",style=\"dotted\"";
3249 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3250 return AttributeString;
3256 if (
Node->isRemoved())
3269 std::string IdString =
"ContextIds:";
3270 if (ContextIds.
size() < 100) {
3271 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3272 std::sort(SortedIds.begin(), SortedIds.end());
3273 for (
auto Id : SortedIds)
3274 IdString += (
" " +
Twine(Id)).str();
3276 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3281 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3287 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3289 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3290 if (AllocTypes == (uint8_t)AllocationType::Cold)
3291 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3293 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3294 return Highlight ?
"magenta" :
"mediumorchid1";
3298 static std::string getNodeId(NodeRef Node) {
3299 std::stringstream SStream;
3300 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3301 std::string
Result = SStream.str();
3310template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3315template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3316void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3317 std::string Label)
const {
3322template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3323typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3324CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3325 const std::shared_ptr<ContextEdge> &
Edge,
3326 DenseSet<uint32_t> ContextIdsToMove) {
3328 assert(NodeToCallingFunc.count(Node));
3329 ContextNode *Clone =
3330 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3331 Node->addClone(Clone);
3332 Clone->MatchingCalls =
Node->MatchingCalls;
3333 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3338template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3339void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3340 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3341 ContextNode *NewCallee,
bool NewClone,
3342 DenseSet<uint32_t> ContextIdsToMove) {
3345 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3347 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3349 ContextNode *OldCallee =
Edge->Callee;
3353 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3357 if (ContextIdsToMove.
empty())
3358 ContextIdsToMove =
Edge->getContextIds();
3362 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3365 NewCallee->AllocTypes |=
Edge->AllocTypes;
3367 if (ExistingEdgeToNewCallee) {
3370 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3371 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3372 assert(
Edge->ContextIds == ContextIdsToMove);
3373 removeEdgeFromGraph(
Edge.get());
3376 Edge->Callee = NewCallee;
3377 NewCallee->CallerEdges.push_back(
Edge);
3379 OldCallee->eraseCallerEdge(
Edge.get());
3386 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3387 if (ExistingEdgeToNewCallee) {
3390 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3391 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3394 auto NewEdge = std::make_shared<ContextEdge>(
3395 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3396 Edge->Caller->CalleeEdges.push_back(NewEdge);
3397 NewCallee->CallerEdges.push_back(NewEdge);
3401 NewCallee->AllocTypes |= CallerEdgeAllocType;
3403 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3408 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3409 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3413 if (CalleeToUse == OldCallee) {
3417 if (EdgeIsRecursive) {
3421 CalleeToUse = NewCallee;
3425 DenseSet<uint32_t> EdgeContextIdsToMove =
3427 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3428 OldCalleeEdge->AllocTypes =
3429 computeAllocType(OldCalleeEdge->getContextIds());
3436 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3437 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3438 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3442 auto NewEdge = std::make_shared<ContextEdge>(
3443 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3444 EdgeContextIdsToMove);
3445 NewCallee->CalleeEdges.push_back(NewEdge);
3446 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3450 OldCallee->AllocTypes = OldCallee->computeAllocType();
3452 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3453 OldCallee->emptyContextIds());
3457 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3460 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3466template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3467void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3468 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3469 ContextNode *NewCaller) {
3470 auto *OldCallee =
Edge->Callee;
3471 auto *NewCallee = OldCallee;
3474 bool Recursive =
Edge->Caller ==
Edge->Callee;
3476 NewCallee = NewCaller;
3478 ContextNode *OldCaller =
Edge->Caller;
3479 OldCaller->eraseCalleeEdge(
Edge.get());
3483 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3485 if (ExistingEdgeToNewCaller) {
3488 ExistingEdgeToNewCaller->getContextIds().insert_range(
3489 Edge->getContextIds());
3490 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3491 Edge->ContextIds.clear();
3492 Edge->AllocTypes = (uint8_t)AllocationType::None;
3493 OldCallee->eraseCallerEdge(
Edge.get());
3496 Edge->Caller = NewCaller;
3497 NewCaller->CalleeEdges.push_back(
Edge);
3499 assert(NewCallee == NewCaller);
3502 Edge->Callee = NewCallee;
3503 NewCallee->CallerEdges.push_back(
Edge);
3504 OldCallee->eraseCallerEdge(
Edge.get());
3510 NewCaller->AllocTypes |=
Edge->AllocTypes;
3517 bool IsNewNode = NewCaller->CallerEdges.empty();
3526 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3527 auto OldCallerCaller = OldCallerEdge->Caller;
3531 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3532 if (OldCaller == OldCallerCaller) {
3533 OldCallerCaller = NewCaller;
3539 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3540 OldCallerEdge->AllocTypes =
3541 computeAllocType(OldCallerEdge->getContextIds());
3546 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3550 if (ExistingCallerEdge) {
3551 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3552 ExistingCallerEdge->AllocTypes |=
3553 computeAllocType(EdgeContextIdsToMove);
3556 auto NewEdge = std::make_shared<ContextEdge>(
3557 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3558 EdgeContextIdsToMove);
3559 NewCaller->CallerEdges.push_back(NewEdge);
3560 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3565 OldCaller->AllocTypes = OldCaller->computeAllocType();
3567 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3568 OldCaller->emptyContextIds());
3572 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3575 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3581template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3582void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3583 recursivelyRemoveNoneTypeCalleeEdges(
3584 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3589 removeNoneTypeCalleeEdges(Node);
3591 for (
auto *Clone :
Node->Clones)
3592 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3596 auto CallerEdges =
Node->CallerEdges;
3597 for (
auto &
Edge : CallerEdges) {
3599 if (
Edge->isRemoved()) {
3603 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3608template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3609void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3614 DenseSet<const ContextNode *> Visited;
3615 DenseSet<const ContextNode *> CurrentStack;
3616 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3618 if (
Node->isRemoved())
3621 if (!
Node->CallerEdges.empty())
3623 markBackedges(Node, Visited, CurrentStack);
3629template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3630void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3631 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3632 DenseSet<const ContextNode *> &CurrentStack) {
3633 auto I = Visited.
insert(Node);
3637 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3638 auto *
Callee = CalleeEdge->Callee;
3639 if (Visited.
count(Callee)) {
3642 if (CurrentStack.
count(Callee))
3643 CalleeEdge->IsBackedge =
true;
3646 CurrentStack.
insert(Callee);
3647 markBackedges(Callee, Visited, CurrentStack);
3648 CurrentStack.
erase(Callee);
3652template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3653void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3654 DenseSet<const ContextNode *> Visited;
3655 for (
auto &Entry : AllocationCallToContextNodeMap) {
3657 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3660 for (
auto &Entry : AllocationCallToContextNodeMap)
3661 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3674template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3675void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3676 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3677 const DenseSet<uint32_t> &AllocContextIds) {
3687 if (!
Node->hasCall())
3706 auto CallerEdges =
Node->CallerEdges;
3707 for (
auto &
Edge : CallerEdges) {
3709 if (
Edge->isRemoved()) {
3715 if (
Edge->IsBackedge) {
3722 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3723 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3746 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3750 [&](
const std::shared_ptr<ContextEdge> &
A,
3751 const std::shared_ptr<ContextEdge> &
B) {
3754 if (A->ContextIds.empty())
3760 if (B->ContextIds.empty())
3763 if (A->AllocTypes == B->AllocTypes)
3766 return *A->ContextIds.begin() < *B->ContextIds.begin();
3767 return AllocTypeCloningPriority[A->AllocTypes] <
3768 AllocTypeCloningPriority[B->AllocTypes];
3771 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3773 DenseSet<uint32_t> RecursiveContextIds;
3778 DenseSet<uint32_t> AllCallerContextIds;
3779 for (
auto &CE :
Node->CallerEdges) {
3782 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3783 for (
auto Id :
CE->getContextIds())
3784 if (!AllCallerContextIds.
insert(Id).second)
3785 RecursiveContextIds.
insert(Id);
3795 auto CallerEdges =
Node->CallerEdges;
3796 for (
auto &CallerEdge : CallerEdges) {
3798 if (CallerEdge->isRemoved()) {
3802 assert(CallerEdge->Callee == Node);
3811 if (!CallerEdge->Caller->hasCall())
3816 auto CallerEdgeContextsForAlloc =
3818 if (!RecursiveContextIds.
empty())
3819 CallerEdgeContextsForAlloc =
3821 if (CallerEdgeContextsForAlloc.empty())
3824 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3828 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3829 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3830 for (
auto &CalleeEdge :
Node->CalleeEdges)
3831 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3832 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3848 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3849 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3850 if (!CallerEdge->IsBackedge &&
3851 allocTypeToUse(CallerAllocTypeForAlloc) ==
3852 allocTypeToUse(
Node->AllocTypes) &&
3853 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3854 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3858 if (CallerEdge->IsBackedge) {
3862 DeferredBackedges++;
3875 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
3876 !Visited.
count(CallerEdge->Caller)) {
3877 const auto OrigIdCount = CallerEdge->getContextIds().size();
3880 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
3881 removeNoneTypeCalleeEdges(CallerEdge->Caller);
3885 bool UpdatedEdge =
false;
3886 if (OrigIdCount > CallerEdge->getContextIds().size()) {
3887 for (
auto E :
Node->CallerEdges) {
3889 if (
E->Caller->CloneOf != CallerEdge->Caller)
3893 auto CallerEdgeContextsForAllocNew =
3895 if (CallerEdgeContextsForAllocNew.empty())
3905 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
3915 if (CallerEdge->isRemoved())
3925 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
3926 if (CallerEdgeContextsForAlloc.empty())
3931 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3932 CalleeEdgeAllocTypesForCallerEdge.clear();
3933 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3934 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3935 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3941 ContextNode *Clone =
nullptr;
3942 for (
auto *CurClone :
Node->Clones) {
3943 if (allocTypeToUse(CurClone->AllocTypes) !=
3944 allocTypeToUse(CallerAllocTypeForAlloc))
3951 assert(!BothSingleAlloc ||
3952 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3958 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3959 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3967 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3968 CallerEdgeContextsForAlloc);
3970 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3973 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3980 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
3986void ModuleCallsiteContextGraph::updateAllocationCall(
3990 "memprof", AllocTypeString);
3993 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
3994 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3996 <<
" marked with memprof allocation attribute "
3997 <<
ore::NV(
"Attribute", AllocTypeString));
4000void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4004 assert(AI->Versions.size() >
Call.cloneNo());
4009ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4011 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4012 return AllocationType::None;
4013 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4014 ? AllocationType::Cold
4015 : AllocationType::NotCold;
4019IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4021 assert(AI->Versions.size() >
Call.cloneNo());
4025void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4026 FuncInfo CalleeFunc) {
4027 auto *CurF = getCalleeFunc(CallerCall.call());
4028 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4035 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4037 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4039 MismatchedCloneAssignments++;
4042 if (NewCalleeCloneNo > 0)
4043 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4044 OREGetter(CallerCall.call()->getFunction())
4045 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4046 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4047 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4048 <<
" assigned to call function clone "
4049 <<
ore::NV(
"Callee", CalleeFunc.func()));
4052void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4053 FuncInfo CalleeFunc) {
4056 "Caller cannot be an allocation which should not have profiled calls");
4057 assert(CI->Clones.size() > CallerCall.cloneNo());
4058 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4059 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4064 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4066 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4068 MismatchedCloneAssignments++;
4070 CurCalleeCloneNo = NewCalleeCloneNo;
4082 SP->replaceLinkageName(MDName);
4086 TempDISubprogram NewDecl = Decl->
clone();
4087 NewDecl->replaceLinkageName(MDName);
4091CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4093ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4094 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4095 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4100 assert(!
Func.func()->getParent()->getFunction(Name));
4101 NewFunc->setName(Name);
4103 for (
auto &Inst : CallsWithMetadataInFunc) {
4105 assert(Inst.cloneNo() == 0);
4108 OREGetter(
Func.func())
4109 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4110 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4111 return {NewFunc, CloneNo};
4114CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4115 IndexCall>::FuncInfo
4116IndexCallsiteContextGraph::cloneFunctionForCallsite(
4117 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4118 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4132 for (
auto &Inst : CallsWithMetadataInFunc) {
4134 assert(Inst.cloneNo() == 0);
4136 assert(AI->Versions.size() == CloneNo);
4139 AI->Versions.push_back(0);
4142 assert(CI && CI->Clones.size() == CloneNo);
4145 CI->Clones.push_back(0);
4147 CallMap[Inst] = {Inst.call(), CloneNo};
4149 return {
Func.func(), CloneNo};
4166template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4167void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4173 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4174 for (
auto &Entry : AllocationCallToContextNodeMap) {
4176 for (
auto Id :
Node->getContextIds())
4177 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4178 for (
auto *Clone :
Node->Clones) {
4179 for (
auto Id : Clone->getContextIds())
4180 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4187 DenseSet<const ContextNode *> Visited;
4188 for (
auto &Entry : AllocationCallToContextNodeMap) {
4191 mergeClones(Node, Visited, ContextIdToAllocationNode);
4197 auto Clones =
Node->Clones;
4198 for (
auto *Clone : Clones)
4199 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4203 dbgs() <<
"CCG after merging:\n";
4207 exportToDot(
"aftermerge");
4215template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4216void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4217 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4218 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4228 bool FoundUnvisited =
true;
4230 while (FoundUnvisited) {
4232 FoundUnvisited =
false;
4235 auto CallerEdges =
Node->CallerEdges;
4236 for (
auto CallerEdge : CallerEdges) {
4238 if (CallerEdge->Callee != Node)
4243 FoundUnvisited =
true;
4244 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4248 TotalMergeInvokes++;
4249 TotalMergeIters += Iters;
4250 if (Iters > MaxMergeIters)
4251 MaxMergeIters = Iters;
4254 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4257template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4258void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4259 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4260 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4262 if (
Node->emptyContextIds())
4267 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4268 OrigNodeToCloneEdges;
4269 for (
const auto &
E :
Node->CalleeEdges) {
4274 OrigNodeToCloneEdges[
Base].push_back(
E);
4280 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4281 const std::shared_ptr<ContextEdge> &
B) {
4282 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4283 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4284 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4286 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4290 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4295 for (
auto Entry : OrigNodeToCloneEdges) {
4298 auto &CalleeEdges =
Entry.second;
4299 auto NumCalleeClones = CalleeEdges.size();
4301 if (NumCalleeClones == 1)
4312 DenseSet<ContextNode *> OtherCallersToShareMerge;
4313 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4314 OtherCallersToShareMerge);
4319 ContextNode *MergeNode =
nullptr;
4320 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4321 for (
auto CalleeEdge : CalleeEdges) {
4322 auto *OrigCallee = CalleeEdge->Callee;
4328 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4329 MergeNode = OrigCallee;
4330 NonNewMergedNodes++;
4337 if (!OtherCallersToShareMerge.
empty()) {
4338 bool MoveAllCallerEdges =
true;
4339 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4340 if (CalleeCallerE == CalleeEdge)
4342 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4343 MoveAllCallerEdges =
false;
4349 if (MoveAllCallerEdges) {
4350 MergeNode = OrigCallee;
4351 NonNewMergedNodes++;
4358 assert(MergeNode != OrigCallee);
4359 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4362 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4367 if (!OtherCallersToShareMerge.
empty()) {
4371 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4372 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4373 if (CalleeCallerE == CalleeEdge)
4375 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4377 CallerToMoveCount[CalleeCallerE->Caller]++;
4378 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4382 removeNoneTypeCalleeEdges(OrigCallee);
4383 removeNoneTypeCalleeEdges(MergeNode);
4401template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4402void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4403 findOtherCallersToShareMerge(
4405 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4406 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4407 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4408 auto NumCalleeClones = CalleeEdges.size();
4411 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4414 unsigned PossibleOtherCallerNodes = 0;
4418 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4424 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4425 for (
auto CalleeEdge : CalleeEdges) {
4426 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4429 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4430 if (CalleeCallerEdges->Caller == Node) {
4431 assert(CalleeCallerEdges == CalleeEdge);
4434 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4437 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4439 PossibleOtherCallerNodes++;
4443 for (
auto Id : CalleeEdge->getContextIds()) {
4444 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4448 MissingAllocForContextId++;
4451 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4458 for (
auto CalleeEdge : CalleeEdges) {
4459 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4461 if (!PossibleOtherCallerNodes)
4463 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4465 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4467 if (CalleeCallerE == CalleeEdge)
4471 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4476 for (
auto Id : CalleeCallerE->getContextIds()) {
4477 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4482 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4483 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4484 PossibleOtherCallerNodes--;
4491 if (!PossibleOtherCallerNodes)
4496 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4497 if (
Count != NumCalleeClones)
4499 OtherCallersToShareMerge.
insert(OtherCaller);
4544template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4545bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4552 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4556 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4557 const FuncInfo &CalleeFunc) {
4559 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4563 struct FuncCloneInfo {
4568 DenseMap<CallInfo, CallInfo> CallMap;
4596 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4597 UnassignedCallClones;
4601 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4602 FuncInfo OrigFunc(Func);
4607 std::vector<FuncCloneInfo> FuncCloneInfos;
4608 for (
auto &
Call : CallsWithMetadata) {
4609 ContextNode *
Node = getNodeForInst(
Call);
4613 if (!Node ||
Node->Clones.empty())
4616 "Not having a call should have prevented cloning");
4620 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4624 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4626 ContextNode *CallsiteClone,
4629 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4631 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4632 DenseMap<CallInfo, CallInfo> &CallMap =
4633 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4634 CallInfo CallClone(
Call);
4635 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4636 CallClone = It->second;
4637 CallsiteClone->setCall(CallClone);
4639 for (
auto &MatchingCall :
Node->MatchingCalls) {
4640 CallInfo CallClone(MatchingCall);
4641 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4642 CallClone = It->second;
4644 MatchingCall = CallClone;
4652 auto MoveEdgeToNewCalleeCloneAndSetUp =
4653 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4654 ContextNode *OrigCallee =
Edge->Callee;
4655 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4656 removeNoneTypeCalleeEdges(NewClone);
4657 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4661 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4662 RecordCalleeFuncOfCallsite(
4663 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4670 std::deque<ContextNode *> ClonesWorklist;
4672 if (!
Node->emptyContextIds())
4673 ClonesWorklist.push_back(Node);
4679 unsigned NodeCloneCount = 0;
4680 while (!ClonesWorklist.empty()) {
4681 ContextNode *Clone = ClonesWorklist.front();
4682 ClonesWorklist.pop_front();
4691 if (FuncCloneInfos.size() < NodeCloneCount) {
4693 if (NodeCloneCount == 1) {
4698 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4699 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4703 FuncCloneInfos.push_back(
4704 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4705 AssignCallsiteCloneToFuncClone(
4706 OrigFunc,
Call, Clone,
4707 AllocationCallToContextNodeMap.count(
Call));
4708 for (
auto &CE : Clone->CallerEdges) {
4710 if (!
CE->Caller->hasCall())
4712 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4722 FuncInfo PreviousAssignedFuncClone;
4724 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4725 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4727 bool CallerAssignedToCloneOfFunc =
false;
4728 if (EI != Clone->CallerEdges.end()) {
4729 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4730 PreviousAssignedFuncClone =
4731 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4732 CallerAssignedToCloneOfFunc =
true;
4737 DenseMap<CallInfo, CallInfo> NewCallMap;
4738 unsigned CloneNo = FuncCloneInfos.size();
4739 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4740 "should already exist in the map");
4741 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4742 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4743 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4744 FunctionClonesAnalysis++;
4750 if (!CallerAssignedToCloneOfFunc) {
4751 AssignCallsiteCloneToFuncClone(
4752 NewFuncClone,
Call, Clone,
4753 AllocationCallToContextNodeMap.count(
Call));
4754 for (
auto &CE : Clone->CallerEdges) {
4756 if (!
CE->Caller->hasCall())
4758 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4770 auto CallerEdges = Clone->CallerEdges;
4771 for (
auto CE : CallerEdges) {
4773 if (
CE->isRemoved()) {
4779 if (!
CE->Caller->hasCall())
4782 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
4786 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
4787 PreviousAssignedFuncClone)
4790 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
4803 auto CalleeEdges =
CE->Caller->CalleeEdges;
4804 for (
auto CalleeEdge : CalleeEdges) {
4807 if (CalleeEdge->isRemoved()) {
4812 ContextNode *
Callee = CalleeEdge->Callee;
4816 if (Callee == Clone || !
Callee->hasCall())
4821 if (Callee == CalleeEdge->Caller)
4823 ContextNode *NewClone =
4824 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
4827 removeNoneTypeCalleeEdges(Callee);
4835 CallInfo OrigCall(
Callee->getOrigNode()->Call);
4836 OrigCall.setCloneNo(0);
4837 DenseMap<CallInfo, CallInfo> &CallMap =
4838 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
4840 CallInfo NewCall(CallMap[OrigCall]);
4842 NewClone->setCall(NewCall);
4844 for (
auto &MatchingCall : NewClone->MatchingCalls) {
4845 CallInfo OrigMatchingCall(MatchingCall);
4846 OrigMatchingCall.setCloneNo(0);
4848 CallInfo NewCall(CallMap[OrigMatchingCall]);
4851 MatchingCall = NewCall;
4860 auto FindFirstAvailFuncClone = [&]() {
4865 for (
auto &CF : FuncCloneInfos) {
4866 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
4867 return CF.FuncClone;
4870 "Expected an available func clone for this callsite clone");
4887 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4888 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4892 auto CloneCallerEdges = Clone->CallerEdges;
4893 for (
auto &
Edge : CloneCallerEdges) {
4897 if (
Edge->isRemoved())
4900 if (!
Edge->Caller->hasCall())
4904 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
4905 FuncInfo FuncCloneCalledByCaller =
4906 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4916 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4917 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4925 (FuncCloneAssignedToCurCallsiteClone &&
4926 FuncCloneAssignedToCurCallsiteClone !=
4927 FuncCloneCalledByCaller)) {
4942 if (FuncCloneToNewCallsiteCloneMap.count(
4943 FuncCloneCalledByCaller)) {
4944 ContextNode *NewClone =
4945 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4946 moveEdgeToExistingCalleeClone(
Edge, NewClone);
4948 removeNoneTypeCalleeEdges(NewClone);
4951 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
4952 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4955 ClonesWorklist.push_back(NewClone);
4959 removeNoneTypeCalleeEdges(Clone);
4967 if (!FuncCloneAssignedToCurCallsiteClone) {
4968 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4970 AssignCallsiteCloneToFuncClone(
4971 FuncCloneCalledByCaller,
Call, Clone,
4972 AllocationCallToContextNodeMap.count(
Call));
4976 assert(FuncCloneAssignedToCurCallsiteClone ==
4977 FuncCloneCalledByCaller);
4986 if (!FuncCloneAssignedToCurCallsiteClone) {
4987 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4988 assert(FuncCloneAssignedToCurCallsiteClone);
4990 AssignCallsiteCloneToFuncClone(
4991 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
4992 AllocationCallToContextNodeMap.count(
Call));
4994 assert(FuncCloneToCurNodeCloneMap
4995 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4997 RecordCalleeFuncOfCallsite(
Edge->Caller,
4998 FuncCloneAssignedToCurCallsiteClone);
5018 if (!FuncCloneAssignedToCurCallsiteClone) {
5019 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5020 assert(FuncCloneAssignedToCurCallsiteClone &&
5021 "No available func clone for this callsite clone");
5022 AssignCallsiteCloneToFuncClone(
5023 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5024 AllocationCallToContextNodeMap.contains(
Call));
5029 for (
const auto &PE :
Node->CalleeEdges)
5031 for (
const auto &CE :
Node->CallerEdges)
5033 for (
auto *Clone :
Node->Clones) {
5035 for (
const auto &PE : Clone->CalleeEdges)
5037 for (
const auto &CE : Clone->CallerEdges)
5043 if (FuncCloneInfos.size() < 2)
5049 for (
auto &
Call : CallsWithMetadata) {
5050 ContextNode *
Node = getNodeForInst(
Call);
5051 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5057 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5061 DenseSet<unsigned> NodeCallClones;
5062 for (
auto *
C :
Node->Clones)
5063 NodeCallClones.
insert(
C->Call.cloneNo());
5066 for (
auto &FC : FuncCloneInfos) {
5071 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5076 auto &CallVector = UnassignedCallClones[
Node][
I];
5077 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5078 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5079 CallInfo CallClone = It->second;
5080 CallVector.push_back(CallClone);
5084 assert(
false &&
"Expected to find call in CallMap");
5087 for (
auto &MatchingCall :
Node->MatchingCalls) {
5088 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5089 CallInfo CallClone = It->second;
5090 CallVector.push_back(CallClone);
5094 assert(
false &&
"Expected to find call in CallMap");
5102 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5104 auto UpdateCalls = [&](ContextNode *
Node,
5105 DenseSet<const ContextNode *> &Visited,
5106 auto &&UpdateCalls) {
5107 auto Inserted = Visited.insert(Node);
5111 for (
auto *Clone :
Node->Clones)
5112 UpdateCalls(Clone, Visited, UpdateCalls);
5114 for (
auto &
Edge :
Node->CallerEdges)
5115 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5119 if (!
Node->hasCall() ||
Node->emptyContextIds())
5122 if (
Node->IsAllocation) {
5123 auto AT = allocTypeToUse(
Node->AllocTypes);
5129 !ContextIdToContextSizeInfos.empty()) {
5130 uint64_t TotalCold = 0;
5132 for (
auto Id :
Node->getContextIds()) {
5133 auto TypeI = ContextIdToAllocationType.find(Id);
5134 assert(TypeI != ContextIdToAllocationType.end());
5135 auto CSI = ContextIdToContextSizeInfos.find(Id);
5136 if (CSI != ContextIdToContextSizeInfos.end()) {
5137 for (
auto &
Info : CSI->second) {
5139 if (TypeI->second == AllocationType::Cold)
5140 TotalCold +=
Info.TotalSize;
5145 AT = AllocationType::Cold;
5147 updateAllocationCall(
Node->Call, AT);
5152 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5155 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5156 updateCall(
Node->Call, CalleeFunc);
5158 for (
auto &
Call :
Node->MatchingCalls)
5159 updateCall(
Call, CalleeFunc);
5163 if (!UnassignedCallClones.
contains(Node))
5165 DenseSet<unsigned> NodeCallClones;
5166 for (
auto *
C :
Node->Clones)
5167 NodeCallClones.
insert(
C->Call.cloneNo());
5169 auto &ClonedCalls = UnassignedCallClones[
Node];
5170 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5174 if (NodeCallClones.
contains(CloneNo))
5177 for (
auto &
Call : CallVector)
5178 updateCall(
Call, CalleeFunc);
5187 DenseSet<const ContextNode *> Visited;
5188 for (
auto &Entry : AllocationCallToContextNodeMap)
5189 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5200 for (
auto &SN : FS->callsites()) {
5205 SN.Clones.size() >
I &&
5206 "Callsite summary has fewer entries than other summaries in function");
5207 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5214 for (
auto &AN : FS->allocs()) {
5218 assert(AN.Versions.size() >
I &&
5219 "Alloc summary has fewer entries than other summaries in function");
5220 if (AN.Versions.size() <=
I ||
5237 NewGV->takeName(DeclGV);
5244 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5245 if (!FuncToAliasMap.count(&
F))
5247 for (
auto *
A : FuncToAliasMap[&
F]) {
5249 auto *PrevA = M.getNamedAlias(AliasName);
5251 A->getType()->getPointerAddressSpace(),
5252 A->getLinkage(), AliasName, NewF);
5253 NewA->copyAttributesFrom(
A);
5255 TakeDeclNameAndReplace(PrevA, NewA);
5264 FunctionsClonedThinBackend++;
5281 for (
unsigned I = 1;
I < NumClones;
I++) {
5282 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5289 FunctionCloneDuplicatesThinBackend++;
5290 auto *Func = HashToFunc[Hash];
5291 if (Func->hasAvailableExternallyLinkage()) {
5297 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5299 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5302 auto *PrevF = M.getFunction(Name);
5305 TakeDeclNameAndReplace(PrevF, Alias);
5307 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5310 CloneFuncAliases(Func,
I);
5314 HashToFunc[Hash] = NewF;
5315 FunctionClonesThinBackend++;
5318 for (
auto &BB : *NewF) {
5319 for (
auto &Inst : BB) {
5320 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5321 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5326 TakeDeclNameAndReplace(PrevF, NewF);
5328 NewF->setName(Name);
5331 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5334 CloneFuncAliases(NewF,
I);
5343 const Function *CallingFunc =
nullptr) {
5362 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5368 if (!SrcFileMD &&
F.isDeclaration()) {
5372 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5377 assert(SrcFileMD || OrigName ==
F.getName());
5379 StringRef SrcFile = M.getSourceFileName();
5391 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5392 F.getName().contains(
'.')) {
5393 OrigName =
F.getName().rsplit(
'.').first;
5402 assert(TheFnVI ||
F.isDeclaration());
5406bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5408 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5409 Symtab = std::make_unique<InstrProfSymtab>();
5420 if (
Error E = Symtab->create(M,
true,
false)) {
5421 std::string SymtabFailure =
toString(std::move(
E));
5422 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5435 auto MIBIter = AllocNode.
MIBs.begin();
5436 for (
auto &MDOp : MemProfMD->
operands()) {
5438 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5443 auto ContextIterBegin =
5447 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5449 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5454 if (LastStackContextId == *ContextIter)
5456 LastStackContextId = *ContextIter;
5457 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5467bool MemProfContextDisambiguation::applyImport(
Module &M) {
5474 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5476 for (
auto &
A :
M.aliases()) {
5477 auto *Aliasee =
A.getAliaseeObject();
5479 FuncToAliasMap[
F].insert(&
A);
5482 if (!initializeIndirectCallPromotionInfo(M))
5489 OptimizationRemarkEmitter ORE(&
F);
5492 bool ClonesCreated =
false;
5493 unsigned NumClonesCreated = 0;
5494 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5504 if (ClonesCreated) {
5505 assert(NumClonesCreated == NumClones);
5512 ClonesCreated =
true;
5513 NumClonesCreated = NumClones;
5516 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5517 Function *CalledFunction, FunctionSummary *
FS) {
5519 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5531 if (CalledFunction != CB->getCalledOperand() &&
5532 (!GA || CalledFunction != GA->getAliaseeObject())) {
5533 SkippedCallsCloning++;
5539 auto CalleeOrigName = CalledFunction->getName();
5540 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5543 if (J > 0 && VMaps[J - 1]->
empty())
5547 if (!StackNode.
Clones[J])
5549 auto NewF =
M.getOrInsertFunction(
5551 CalledFunction->getFunctionType());
5565 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5566 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5568 <<
" assigned to call function clone "
5569 <<
ore::NV(
"Callee", NewF.getCallee()));
5583 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5587 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5589 "enable-import-metadata is needed to emit thinlto_src_module");
5590 StringRef SrcModule =
5593 if (GVS->modulePath() == SrcModule) {
5594 GVSummary = GVS.get();
5598 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5608 if (
FS->allocs().empty() &&
FS->callsites().empty())
5611 auto SI =
FS->callsites().begin();
5612 auto AI =
FS->allocs().begin();
5617 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5620 for (
auto CallsiteIt =
FS->callsites().rbegin();
5621 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5622 auto &Callsite = *CallsiteIt;
5626 if (!Callsite.StackIdIndices.empty())
5628 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5637 for (
auto &BB :
F) {
5638 for (
auto &
I : BB) {
5644 auto *CalledValue = CB->getCalledOperand();
5645 auto *CalledFunction = CB->getCalledFunction();
5646 if (CalledValue && !CalledFunction) {
5647 CalledValue = CalledValue->stripPointerCasts();
5654 assert(!CalledFunction &&
5655 "Expected null called function in callsite for alias");
5659 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5660 I.getMetadata(LLVMContext::MD_callsite));
5661 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5665 if (CB->getAttributes().hasFnAttr(
"memprof")) {
5667 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5668 ? AllocTypeColdThinBackend++
5669 : AllocTypeNotColdThinBackend++;
5670 OrigAllocsThinBackend++;
5671 AllocVersionsThinBackend++;
5672 if (!MaxAllocVersionsThinBackend)
5673 MaxAllocVersionsThinBackend = 1;
5680 auto &AllocNode = *(AI++);
5688 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5690 OrigAllocsThinBackend++;
5691 AllocVersionsThinBackend += AllocNode.Versions.size();
5692 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5693 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5703 if (AllocNode.Versions.size() == 1 &&
5706 AllocationType::NotCold ||
5708 AllocationType::None);
5709 UnclonableAllocsThinBackend++;
5715 return Type == ((uint8_t)AllocationType::NotCold |
5716 (uint8_t)AllocationType::Cold);
5720 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5723 if (J > 0 && VMaps[J - 1]->
empty())
5726 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5729 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5730 : AllocTypeNotColdThinBackend++;
5744 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5745 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5747 <<
" marked with memprof allocation attribute "
5748 <<
ore::NV(
"Attribute", AllocTypeString));
5750 }
else if (!CallsiteContext.empty()) {
5751 if (!CalledFunction) {
5755 assert(!CI || !CI->isInlineAsm());
5765 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
5771 CloneFuncIfNeeded(NumClones, FS);
5776 assert(SI !=
FS->callsites().end());
5777 auto &StackNode = *(
SI++);
5783 for (
auto StackId : CallsiteContext) {
5785 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
5791 CloneCallsite(StackNode, CB, CalledFunction, FS);
5793 }
else if (CB->isTailCall() && CalledFunction) {
5796 ValueInfo CalleeVI =
5798 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
5799 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
5800 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
5801 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
5808 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
5818 for (
auto &BB :
F) {
5819 for (
auto &
I : BB) {
5822 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
5823 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
5831unsigned MemProfContextDisambiguation::recordICPInfo(
5836 uint32_t NumCandidates;
5837 uint64_t TotalCount;
5838 auto CandidateProfileData =
5839 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
5841 if (CandidateProfileData.empty())
5847 bool ICPNeeded =
false;
5848 unsigned NumClones = 0;
5849 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
5850 for (
const auto &Candidate : CandidateProfileData) {
5852 auto CalleeValueInfo =
5854 ImportSummary->getValueInfo(Candidate.Value);
5857 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
5859 auto &StackNode = *(
SI++);
5864 [](
unsigned CloneNo) { return CloneNo != 0; });
5874 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
5875 TotalCount, CallsiteInfoStartIndex});
5879void MemProfContextDisambiguation::performICP(
5881 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
5883 OptimizationRemarkEmitter &ORE) {
5890 for (
auto &
Info : ICallAnalysisInfo) {
5893 auto TotalCount =
Info.TotalCount;
5894 unsigned NumPromoted = 0;
5895 unsigned NumClones = 0;
5897 for (
auto &Candidate :
Info.CandidateProfileData) {
5908 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
5909 if (TargetFunction ==
nullptr ||
5917 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
5918 <<
"Memprof cannot promote indirect call: target with md5sum "
5919 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
5928 const char *Reason =
nullptr;
5931 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
5932 <<
"Memprof cannot promote indirect call to "
5933 <<
ore::NV(
"TargetFunction", TargetFunction)
5934 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
5945 CallBase *CBClone = CB;
5946 for (
unsigned J = 0; J < NumClones; J++) {
5949 if (J > 0 && VMaps[J - 1]->
empty())
5959 TotalCount, isSamplePGO, &ORE);
5960 auto *TargetToUse = TargetFunction;
5963 if (StackNode.
Clones[J]) {
5982 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5984 <<
" promoted and assigned to call function clone "
5985 <<
ore::NV(
"Callee", TargetToUse));
5989 TotalCount -= Candidate.Count;
5994 CallBase *CBClone = CB;
5995 for (
unsigned J = 0; J < NumClones; J++) {
5998 if (J > 0 && VMaps[J - 1]->
empty())
6004 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6007 if (TotalCount != 0)
6009 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6010 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6015template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6016bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6018 dbgs() <<
"CCG before cloning:\n";
6022 exportToDot(
"postbuild");
6035 dbgs() <<
"CCG after cloning:\n";
6039 exportToDot(
"cloned");
6041 bool Changed = assignFunctions();
6044 dbgs() <<
"CCG after assigning function clones:\n";
6048 exportToDot(
"clonefuncassign");
6051 printTotalSizes(
errs());
6056bool MemProfContextDisambiguation::processModule(
6058 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6063 return applyImport(M);
6076 ModuleCallsiteContextGraph CCG(M, OREGetter);
6077 return CCG.process();
6082 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6087 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6091 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6095 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6096 "-memprof-dot-context-id");
6097 if (ImportSummary) {
6107 auto ReadSummaryFile =
6109 if (!ReadSummaryFile) {
6116 if (!ImportSummaryForTestingOrErr) {
6122 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6123 ImportSummary = ImportSummaryForTesting.get();
6132 if (!processModule(M, OREGetter))
6149 IndexCallsiteContextGraph CCG(Index, isPrevailing);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...