49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
128 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
137 cl::desc(
"Allow cloning of contexts through recursive cycles"));
148 cl::desc(
"Linking with hot/cold operator new interfaces"));
153 "Require target function definition when promoting indirect calls"));
174template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
175class CallsiteContextGraph {
177 CallsiteContextGraph() =
default;
178 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
179 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
186 void identifyClones();
193 bool assignFunctions();
200 const CallsiteContextGraph &CCG) {
206 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
208 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
210 void exportToDot(std::string Label)
const;
213 struct FuncInfo final
214 :
public std::pair<FuncTy *, unsigned > {
215 using Base = std::pair<FuncTy *, unsigned>;
216 FuncInfo(
const Base &
B) :
Base(
B) {}
217 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
218 explicit operator bool()
const {
return this->first !=
nullptr; }
219 FuncTy *
func()
const {
return this->first; }
220 unsigned cloneNo()
const {
return this->second; }
224 struct CallInfo final :
public std::pair<CallTy, unsigned > {
225 using Base = std::pair<CallTy, unsigned>;
227 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
229 explicit operator bool()
const {
return (
bool)this->first; }
230 CallTy call()
const {
return this->first; }
231 unsigned cloneNo()
const {
return this->second; }
232 void setCloneNo(
unsigned N) { this->second =
N; }
234 if (!
operator bool()) {
240 OS <<
"\t(clone " << cloneNo() <<
")";
263 bool Recursive =
false;
290 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
294 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
298 const std::vector<std::shared_ptr<ContextEdge>> *
299 getEdgesWithAllocInfo()
const {
302 if (!CalleeEdges.empty())
308 assert(CallerEdges.empty() || IsAllocation ||
310 if (!CallerEdges.empty() && IsAllocation)
319 auto *Edges = getEdgesWithAllocInfo();
323 for (
auto &Edge : *Edges)
324 Count += Edge->getContextIds().size();
326 for (
auto &Edge : *Edges)
327 ContextIds.
insert(Edge->getContextIds().begin(),
328 Edge->getContextIds().end());
334 uint8_t computeAllocType()
const {
335 auto *Edges = getEdgesWithAllocInfo();
337 return (
uint8_t)AllocationType::None;
341 for (
auto &Edge : *Edges) {
352 bool emptyContextIds()
const {
353 auto *Edges = getEdgesWithAllocInfo();
356 for (
auto &Edge : *Edges) {
357 if (!Edge->getContextIds().empty())
364 std::vector<ContextNode *> Clones;
367 ContextNode *CloneOf =
nullptr;
369 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
371 ContextNode(
bool IsAllocation,
CallInfo C)
372 : IsAllocation(IsAllocation),
Call(
C) {}
374 void addClone(ContextNode *Clone) {
376 CloneOf->Clones.push_back(Clone);
377 Clone->CloneOf = CloneOf;
379 Clones.push_back(Clone);
381 Clone->CloneOf =
this;
385 ContextNode *getOrigNode() {
392 unsigned int ContextId);
394 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
395 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
396 void eraseCalleeEdge(
const ContextEdge *Edge);
397 void eraseCallerEdge(
const ContextEdge *Edge);
401 bool hasCall()
const {
return (
bool)
Call.call(); }
407 bool isRemoved()
const {
413 (AllocTypes == (
uint8_t)AllocationType::None) ==
415 return AllocTypes == (
uint8_t)AllocationType::None;
443 ContextIds(
std::
move(ContextIds)) {}
449 inline void clear() {
451 AllocTypes = (
uint8_t)AllocationType::None;
459 inline bool isRemoved()
const {
460 if (Callee || Caller)
481 void removeNoneTypeCalleeEdges(ContextNode *
Node);
482 void removeNoneTypeCallerEdges(ContextNode *
Node);
484 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
490 template <
class NodeT,
class IteratorT>
491 std::vector<uint64_t>
496 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
499 template <
class NodeT,
class IteratorT>
500 void addStackNodesForMIB(ContextNode *AllocNode,
509 void updateStackNodes();
513 void handleCallsitesWithMultipleTargets();
519 bool partitionCallsByCallee(
521 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
528 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
531 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
536 struct CallContextInfo {
540 std::vector<uint64_t> StackIds;
554 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
555 bool CalleeIter =
true);
563 void assignStackNodesPostOrder(
576 void propagateDuplicateContextIds(
582 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
590 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
600 calleesMatch(CallTy Call, EdgeIter &EI,
605 const FuncTy *getCalleeFunc(CallTy Call) {
606 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(Call);
612 bool calleeMatchesFunc(
613 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
614 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
615 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
616 Call, Func, CallerFunc, FoundCalleeChain);
620 bool sameCallee(CallTy Call1, CallTy Call2) {
621 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
626 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
627 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
632 uint64_t getLastStackId(CallTy Call) {
633 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
638 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
639 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
644 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(Call);
649 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
650 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
656 FuncInfo cloneFunctionForCallsite(
657 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
658 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
659 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
660 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
665 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
666 unsigned CloneNo)
const {
667 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
671 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
673 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
674 auto *NewNode = NodeOwner.back().get();
676 NodeToCallingFunc[NewNode] =
F;
681 ContextNode *getNodeForInst(
const CallInfo &
C);
682 ContextNode *getNodeForAlloc(
const CallInfo &
C);
683 ContextNode *getNodeForStackId(
uint64_t StackId);
705 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
712 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
713 ContextNode *NewCallee,
714 bool NewClone =
false,
722 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
723 ContextNode *NewCaller);
752 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
758 unsigned int LastContextId = 0;
761template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
763 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
764template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
766 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
767template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
769 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
770template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
772 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
775class ModuleCallsiteContextGraph
776 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
779 ModuleCallsiteContextGraph(
784 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
789 bool calleeMatchesFunc(
791 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
793 bool findProfiledCalleeThroughTailCalls(
795 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
796 bool &FoundMultipleCalleeChains);
798 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
801 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
802 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
804 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
805 std::map<CallInfo, CallInfo> &CallMap,
806 std::vector<CallInfo> &CallsWithMetadataInFunc,
809 unsigned CloneNo)
const;
818struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
820 IndexCall(std::nullptr_t) : IndexCall() {}
825 IndexCall *operator->() {
return this; }
829 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(
Base)) {
832 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(
Base);
853class IndexCallsiteContextGraph
854 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
857 IndexCallsiteContextGraph(
862 ~IndexCallsiteContextGraph() {
867 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
869 for (
auto &Callsite :
I.second)
870 FS->addCallsite(*Callsite.second);
875 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
880 bool calleeMatchesFunc(
883 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
884 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
885 bool findProfiledCalleeThroughTailCalls(
887 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
888 bool &FoundMultipleCalleeChains);
889 uint64_t getLastStackId(IndexCall &Call);
890 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
893 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
896 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
897 std::map<CallInfo, CallInfo> &CallMap,
898 std::vector<CallInfo> &CallsWithMetadataInFunc,
900 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
901 unsigned CloneNo)
const;
905 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
916 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
917 FunctionCalleesToSynthesizedCallsiteInfos;
929 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
932 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
945 ((
uint8_t)AllocationType::NotCold | (
uint8_t)AllocationType::Cold))
946 return AllocationType::NotCold;
954template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
956 const std::vector<uint8_t> &InAllocTypes,
957 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
961 assert(InAllocTypes.size() == Edges.size());
963 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
965 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
969 if (l == (uint8_t)AllocationType::None ||
970 r->AllocTypes == (uint8_t)AllocationType::None)
972 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
981template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
982bool allocTypesMatchClone(
983 const std::vector<uint8_t> &InAllocTypes,
984 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
985 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
989 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
993 for (
const auto &E : Clone->CalleeEdges) {
995 EdgeCalleeMap[E->Callee] = E->AllocTypes;
999 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1000 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1002 if (Iter == EdgeCalleeMap.
end())
1007 if (InAllocTypes[
I] == (
uint8_t)AllocationType::None ||
1008 Iter->second == (
uint8_t)AllocationType::None)
1010 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1018template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1019typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1020CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1022 ContextNode *
Node = getNodeForAlloc(
C);
1026 return NonAllocationCallToContextNodeMap.lookup(
C);
1029template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1030typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1031CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1033 return AllocationCallToContextNodeMap.lookup(
C);
1036template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1037typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1038CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1040 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1041 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1042 return StackEntryNode->second;
1046template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1047void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1049 unsigned int ContextId) {
1050 for (
auto &Edge : CallerEdges) {
1051 if (Edge->Caller == Caller) {
1053 Edge->getContextIds().insert(ContextId);
1057 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
1059 CallerEdges.push_back(Edge);
1060 Caller->CalleeEdges.push_back(Edge);
1063template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1064void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1065 ContextEdge *Edge, EdgeIter *EI,
bool CalleeIter) {
1066 assert(!EI || (*EI)->get() == Edge);
1067 assert(!Edge->isRemoved());
1072 auto *
Callee = Edge->Callee;
1073 auto *
Caller = Edge->Caller;
1081 auto CalleeCallerCount =
Callee->CallerEdges.size();
1082 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1085 Callee->eraseCallerEdge(Edge);
1086 Caller->eraseCalleeEdge(Edge);
1087 }
else if (CalleeIter) {
1088 Callee->eraseCallerEdge(Edge);
1089 *EI =
Caller->CalleeEdges.erase(*EI);
1091 Caller->eraseCalleeEdge(Edge);
1092 *EI =
Callee->CallerEdges.erase(*EI);
1094 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1095 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1098template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1099void CallsiteContextGraph<
1100 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
1101 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1103 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1104 assert(Edge->ContextIds.empty());
1105 removeEdgeFromGraph(Edge.get(), &EI,
true);
1111template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1112void CallsiteContextGraph<
1113 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *
Node) {
1114 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1116 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1117 assert(Edge->ContextIds.empty());
1118 Edge->Caller->eraseCalleeEdge(Edge.get());
1119 EI =
Node->CallerEdges.erase(EI);
1125template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1126typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1127CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1128 findEdgeFromCallee(
const ContextNode *Callee) {
1129 for (
const auto &Edge : CalleeEdges)
1130 if (Edge->Callee == Callee)
1135template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1136typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1137CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1138 findEdgeFromCaller(
const ContextNode *Caller) {
1139 for (
const auto &Edge : CallerEdges)
1140 if (Edge->Caller == Caller)
1145template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1146void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1147 eraseCalleeEdge(
const ContextEdge *Edge) {
1149 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1150 return CalleeEdge.get() == Edge;
1152 assert(EI != CalleeEdges.end());
1153 CalleeEdges.erase(EI);
1156template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1157void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1158 eraseCallerEdge(
const ContextEdge *Edge) {
1160 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1161 return CallerEdge.get() == Edge;
1163 assert(EI != CallerEdges.end());
1164 CallerEdges.erase(EI);
1167template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1168uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1171 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1173 for (
auto Id : ContextIds) {
1182template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1184CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1188 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1190 for (
auto Id : Node1Ids) {
1191 if (!Node2Ids.
count(Id))
1201template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1202uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1205 if (Node1Ids.
size() < Node2Ids.
size())
1206 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1208 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1211template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1212typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1213CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1215 assert(!getNodeForAlloc(Call));
1216 ContextNode *AllocNode = createNewNode(
true,
F, Call);
1217 AllocationCallToContextNodeMap[
Call] = AllocNode;
1219 AllocNode->OrigStackOrAllocId = LastContextId;
1222 AllocNode->AllocTypes = (
uint8_t)AllocationType::None;
1231 if (AllocTypes & (
uint8_t)AllocationType::NotCold)
1233 if (AllocTypes & (
uint8_t)AllocationType::Cold)
1238template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1239template <
class NodeT,
class IteratorT>
1240void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1249 ContextIdToAllocationType[++LastContextId] =
AllocType;
1251 if (!ContextSizeInfo.
empty()) {
1252 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1262 ContextNode *PrevNode = AllocNode;
1269 ContextIter != StackContext.
end(); ++ContextIter) {
1270 auto StackId = getStackId(*ContextIter);
1271 ContextNode *
StackNode = getNodeForStackId(StackId);
1274 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1275 StackNode->OrigStackOrAllocId = StackId;
1290template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1292CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1296 for (
auto OldId : StackSequenceContextIds) {
1297 NewContextIds.
insert(++LastContextId);
1298 OldToNewContextIds[OldId].insert(LastContextId);
1299 assert(ContextIdToAllocationType.count(OldId));
1301 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1303 return NewContextIds;
1306template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1307void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1308 propagateDuplicateContextIds(
1313 for (
auto Id : ContextIds)
1314 if (
auto NewId = OldToNewContextIds.find(Id);
1315 NewId != OldToNewContextIds.end())
1316 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1321 auto UpdateCallers = [&](ContextNode *
Node,
1323 auto &&UpdateCallers) ->
void {
1324 for (
const auto &Edge :
Node->CallerEdges) {
1325 auto Inserted = Visited.insert(Edge.get());
1328 ContextNode *NextNode = Edge->Caller;
1332 if (!NewIdsToAdd.
empty()) {
1333 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1334 UpdateCallers(NextNode, Visited, UpdateCallers);
1340 for (
auto &Entry : AllocationCallToContextNodeMap) {
1342 UpdateCallers(
Node, Visited, UpdateCallers);
1346template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1347void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1348 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1353 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1360 for (
auto &CE : OrigEdges) {
1361 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1362 for (
auto Id :
CE->getContextIds())
1363 if (!AllCallerContextIds.
insert(Id).second)
1364 RecursiveContextIds.
insert(Id);
1368 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1375 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1376 NotFoundContextIds);
1379 if (RecursiveContextIds.
empty()) {
1382 RemainingContextIds.
swap(NotFoundContextIds);
1394 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1397 if (NewEdgeContextIds.
empty()) {
1401 if (TowardsCallee) {
1402 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1403 auto NewEdge = std::make_shared<ContextEdge>(
1404 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1405 NewNode->CalleeEdges.push_back(NewEdge);
1406 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1408 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1409 auto NewEdge = std::make_shared<ContextEdge>(
1410 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1411 NewNode->CallerEdges.push_back(NewEdge);
1412 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1415 if (Edge->getContextIds().empty()) {
1416 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);
1423template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1425 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1429 assert(!Edge->ContextIds.empty());
1432template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1434 bool CheckEdges =
true) {
1435 if (
Node->isRemoved())
1439 auto NodeContextIds =
Node->getContextIds();
1443 if (
Node->CallerEdges.size()) {
1445 Node->CallerEdges.front()->ContextIds);
1448 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1449 set_union(CallerEdgeContextIds, Edge->ContextIds);
1456 NodeContextIds == CallerEdgeContextIds ||
1459 if (
Node->CalleeEdges.size()) {
1461 Node->CalleeEdges.front()->ContextIds);
1464 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1465 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1471 NodeContextIds == CalleeEdgeContextIds);
1480 for (
const auto &E :
Node->CalleeEdges)
1486template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1487void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1488 assignStackNodesPostOrder(
1491 &StackIdToMatchingCalls,
1500 auto CallerEdges =
Node->CallerEdges;
1501 for (
auto &Edge : CallerEdges) {
1503 if (Edge->isRemoved()) {
1507 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1508 CallToMatchingCall);
1517 if (
Node->IsAllocation ||
1518 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1521 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1525 if (Calls.size() == 1) {
1526 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1527 if (Ids.size() == 1) {
1528 assert(SavedContextIds.empty());
1531 if (
Node->Recursive)
1533 Node->setCall(Call);
1534 NonAllocationCallToContextNodeMap[
Call] =
Node;
1544 ContextNode *LastNode = getNodeForStackId(LastId);
1549 ContextNode *LastNode =
Node;
1556 bool PrevIterCreatedNode =
false;
1557 bool CreatedNode =
false;
1558 for (
unsigned I = 0;
I < Calls.size();
1559 I++, PrevIterCreatedNode = CreatedNode) {
1560 CreatedNode =
false;
1561 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1564 if (SavedContextIds.empty()) {
1569 if (!CallToMatchingCall.
contains(Call))
1571 auto MatchingCall = CallToMatchingCall[
Call];
1572 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1576 assert(
I > 0 && !PrevIterCreatedNode);
1579 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1584 assert(LastId == Ids.back());
1593 ContextNode *PrevNode = LastNode;
1597 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1599 ContextNode *CurNode = getNodeForStackId(Id);
1603 assert(!CurNode->Recursive);
1605 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1617 if (SavedContextIds.empty()) {
1626 ContextNode *NewNode = createNewNode(
false, Func, Call);
1627 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1629 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1631 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1637 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1642 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1647 for (
auto Id : Ids) {
1648 ContextNode *CurNode = getNodeForStackId(Id);
1655 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1657 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1658 if (PrevEdge->getContextIds().empty())
1659 removeEdgeFromGraph(PrevEdge);
1664 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1665 ? (
uint8_t)AllocationType::None
1666 : CurNode->computeAllocType();
1670 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1671 for (
auto Id : Ids) {
1672 ContextNode *CurNode = getNodeForStackId(Id);
1675 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1681template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1682void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1691 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1692 for (
auto &Call : CallsWithMetadata) {
1694 if (AllocationCallToContextNodeMap.count(Call))
1696 auto StackIdsWithContextNodes =
1697 getStackIdsWithContextNodesForCall(
Call.call());
1700 if (StackIdsWithContextNodes.empty())
1704 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1705 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1720 for (
auto &It : StackIdToMatchingCalls) {
1721 auto &Calls = It.getSecond();
1723 if (Calls.size() == 1) {
1724 auto &Ids = Calls[0].StackIds;
1725 if (Ids.size() == 1)
1739 for (
const auto &[
Idx, CallCtxInfo] :
enumerate(Calls))
1740 FuncToIndex.
insert({CallCtxInfo.Func,
Idx});
1742 Calls.begin(), Calls.end(),
1743 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1744 return A.StackIds.size() > B.StackIds.size() ||
1745 (A.StackIds.size() == B.StackIds.size() &&
1746 (A.StackIds < B.StackIds ||
1747 (A.StackIds == B.StackIds &&
1748 FuncToIndex[A.Func] < FuncToIndex[B.Func])));
1755 ContextNode *LastNode = getNodeForStackId(LastId);
1759 if (LastNode->Recursive)
1775 for (
unsigned I = 0;
I < Calls.size();
I++) {
1776 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1777 assert(SavedContextIds.empty());
1778 assert(LastId == Ids.back());
1783 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1784 MatchingIdsFuncSet.
clear();
1793 ContextNode *PrevNode = LastNode;
1794 ContextNode *CurNode = LastNode;
1799 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1801 CurNode = getNodeForStackId(Id);
1805 if (CurNode->Recursive) {
1810 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1828 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1831 if (StackSequenceContextIds.
empty()) {
1844 if (Ids.back() != getLastStackId(Call)) {
1845 for (
const auto &PE : LastNode->CallerEdges) {
1846 set_subtract(StackSequenceContextIds, PE->getContextIds());
1847 if (StackSequenceContextIds.
empty())
1851 if (StackSequenceContextIds.
empty())
1863 MatchingIdsFuncSet.
insert(Func);
1870 bool DuplicateContextIds =
false;
1871 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1872 auto &CallCtxInfo = Calls[J];
1873 auto &NextIds = CallCtxInfo.StackIds;
1876 auto *NextFunc = CallCtxInfo.Func;
1877 if (NextFunc != Func) {
1880 DuplicateContextIds =
true;
1883 auto &NextCall = CallCtxInfo.Call;
1884 CallToMatchingCall[NextCall] =
Call;
1895 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1896 StackSequenceContextIds.
size());
1899 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1900 : StackSequenceContextIds;
1901 assert(!SavedContextIds.empty());
1903 if (!DuplicateContextIds) {
1907 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1908 if (LastNodeContextIds.
empty())
1915 propagateDuplicateContextIds(OldToNewContextIds);
1926 for (
auto &Entry : AllocationCallToContextNodeMap)
1927 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
1928 CallToMatchingCall);
1935 Call->getMetadata(LLVMContext::MD_callsite));
1936 return CallsiteContext.
back();
1939uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1940 assert(isa<CallsiteInfo *>(Call));
1942 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
1944 return Index.getStackIdAtIndex(CallsiteContext.
back());
1961std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1963 unsigned CloneNo)
const {
1964 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1965 cast<CallBase>(Call)->getCalledFunction()->getName())
1969std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1970 const IndexCall &Call,
1971 unsigned CloneNo)
const {
1972 auto VI = FSToVIMap.find(Func);
1973 assert(VI != FSToVIMap.end());
1974 if (isa<AllocInfo *>(Call))
1975 return (
VI->second.name() +
" -> alloc").str();
1977 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call);
1978 return (
VI->second.name() +
" -> " +
1980 Callsite->Clones[CloneNo]))
1985std::vector<uint64_t>
1986ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1989 Call->getMetadata(LLVMContext::MD_callsite));
1990 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1994std::vector<uint64_t>
1995IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1996 assert(isa<CallsiteInfo *>(Call));
1998 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
2004template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2005template <
class NodeT,
class IteratorT>
2006std::vector<uint64_t>
2007CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2009 std::vector<uint64_t> StackIds;
2010 for (
auto IdOrIndex : CallsiteContext) {
2011 auto StackId = getStackId(IdOrIndex);
2012 ContextNode *
Node = getNodeForStackId(StackId);
2015 StackIds.push_back(StackId);
2020ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2023 :
Mod(
M), OREGetter(OREGetter) {
2025 std::vector<CallInfo> CallsWithMetadata;
2026 for (
auto &BB :
F) {
2027 for (
auto &
I : BB) {
2028 if (!isa<CallBase>(
I))
2030 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2031 CallsWithMetadata.push_back(&
I);
2032 auto *AllocNode = addAllocNode(&
I, &
F);
2033 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2037 for (
auto &MDOp : MemProfMD->operands()) {
2038 auto *MIBMD = cast<const MDNode>(MDOp);
2039 std::vector<ContextTotalSize> ContextSizeInfo;
2041 if (MIBMD->getNumOperands() > 2) {
2042 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2043 MDNode *ContextSizePair =
2044 dyn_cast<MDNode>(MIBMD->getOperand(
I));
2046 uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
2049 uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
2052 ContextSizeInfo.push_back({FullStackId, TotalSize});
2058 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2059 AllocNode, StackContext, CallsiteContext,
2062 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2065 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2066 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2069 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2070 CallsWithMetadata.push_back(&
I);
2074 if (!CallsWithMetadata.empty())
2075 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2079 dbgs() <<
"CCG before updating call stack chains:\n";
2084 exportToDot(
"prestackupdate");
2088 handleCallsitesWithMultipleTargets();
2091 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2092 for (
auto &Call : FuncEntry.second)
2093 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2096IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2101 for (
auto &
I : Index) {
2103 for (
auto &S :
VI.getSummaryList()) {
2112 !isPrevailing(
VI.getGUID(), S.get()))
2114 auto *
FS = dyn_cast<FunctionSummary>(S.get());
2117 std::vector<CallInfo> CallsWithMetadata;
2118 if (!
FS->allocs().empty()) {
2119 for (
auto &AN :
FS->mutableAllocs()) {
2124 if (AN.MIBs.empty())
2126 IndexCall AllocCall(&AN);
2127 CallsWithMetadata.push_back(AllocCall);
2128 auto *AllocNode = addAllocNode(AllocCall, FS);
2137 AN.ContextSizeInfos.size() == AN.MIBs.size());
2139 for (
auto &MIB : AN.MIBs) {
2142 std::vector<ContextTotalSize> ContextSizeInfo;
2143 if (!AN.ContextSizeInfos.empty()) {
2144 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2145 ContextSizeInfo.push_back({FullStackId, TotalSize});
2147 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2148 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2152 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2157 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2161 if (!
FS->callsites().empty())
2162 for (
auto &SN :
FS->mutableCallsites()) {
2163 IndexCall StackNodeCall(&SN);
2164 CallsWithMetadata.push_back(StackNodeCall);
2167 if (!CallsWithMetadata.empty())
2168 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2170 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2176 dbgs() <<
"CCG before updating call stack chains:\n";
2181 exportToDot(
"prestackupdate");
2185 handleCallsitesWithMultipleTargets();
2188template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2189void CallsiteContextGraph<DerivedCCG, FuncTy,
2190 CallTy>::handleCallsitesWithMultipleTargets() {
2205 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2206 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2216 std::vector<CallInfo> AllCalls;
2217 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2218 AllCalls.push_back(
Node->Call);
2219 AllCalls.insert(AllCalls.end(),
Node->MatchingCalls.begin(),
2220 Node->MatchingCalls.end());
2233 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2236 auto It = AllCalls.begin();
2238 for (; It != AllCalls.end(); ++It) {
2241 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2244 if (!Edge->Callee->hasCall())
2246 assert(NodeToCallingFunc.count(Edge->Callee));
2248 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2257 if (
Node->Call != ThisCall) {
2258 Node->setCall(ThisCall);
2269 Node->MatchingCalls.clear();
2272 if (It == AllCalls.end()) {
2273 RemovedEdgesWithMismatchedCallees++;
2282 for (++It; It != AllCalls.end(); ++It) {
2286 Node->MatchingCalls.push_back(ThisCall);
2295 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2296 return !it.second->hasCall() || it.second->Call != it.first;
2300 for (
auto &[Call,
Node] : NewCallToNode)
2301 NonAllocationCallToContextNodeMap[
Call] =
Node;
2305 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
2306 NonAllocationCallToContextNodeMap[
Call] =
Node;
2309template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2310bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2312 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2316 struct CallsWithSameCallee {
2317 std::vector<CallInfo> Calls;
2318 ContextNode *
Node =
nullptr;
2324 for (
auto ThisCall : AllCalls) {
2325 auto *
F = getCalleeFunc(
ThisCall.call());
2327 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2336 for (
const auto &Edge :
Node->CalleeEdges) {
2337 if (!Edge->Callee->hasCall())
2339 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2340 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2341 CalleeNodeToCallInfo[Edge->Callee] =
2342 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2348 if (CalleeNodeToCallInfo.
empty())
2360 ContextNode *UnmatchedCalleesNode =
nullptr;
2362 bool UsedOrigNode =
false;
2367 auto CalleeEdges =
Node->CalleeEdges;
2368 for (
auto &Edge : CalleeEdges) {
2369 if (!Edge->Callee->hasCall())
2374 ContextNode *CallerNodeToUse =
nullptr;
2378 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2379 if (!UnmatchedCalleesNode)
2380 UnmatchedCalleesNode =
2381 createNewNode(
false, NodeToCallingFunc[
Node]);
2382 CallerNodeToUse = UnmatchedCalleesNode;
2386 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2389 if (!UsedOrigNode) {
2392 Node->MatchingCalls.clear();
2393 UsedOrigNode =
true;
2396 createNewNode(
false, NodeToCallingFunc[
Node]);
2400 Info->Node->setCall(
Info->Calls.front());
2401 Info->Node->MatchingCalls.insert(
Info->Node->MatchingCalls.end(),
2402 Info->Calls.begin() + 1,
2407 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2409 CallerNodeToUse =
Info->Node;
2413 if (CallerNodeToUse ==
Node)
2416 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2423 for (
auto &
I : CalleeNodeToCallInfo)
2424 removeNoneTypeCallerEdges(
I.second->Node);
2425 if (UnmatchedCalleesNode)
2426 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2427 removeNoneTypeCallerEdges(
Node);
2440 return Index.getStackIdAtIndex(IdOrIndex);
2443template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2444bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2445 CallTy Call, EdgeIter &EI,
2448 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2449 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2452 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2453 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2458 if (FoundCalleeChain.empty())
2461 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
2462 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2466 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
2467 Edge->ContextIds.end());
2468 CurEdge->AllocTypes |= Edge->AllocTypes;
2473 auto NewEdge = std::make_shared<ContextEdge>(
2474 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2475 Callee->CallerEdges.push_back(NewEdge);
2476 if (Caller == Edge->Caller) {
2480 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2483 "Iterator position not restored after insert and increment");
2485 Caller->CalleeEdges.push_back(NewEdge);
2490 auto *CurCalleeNode = Edge->Callee;
2491 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2492 ContextNode *NewNode =
nullptr;
2494 if (TailCallToContextNodeMap.
count(NewCall)) {
2495 NewNode = TailCallToContextNodeMap[NewCall];
2496 NewNode->AllocTypes |= Edge->AllocTypes;
2498 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2500 NewNode = createNewNode(
false, Func, NewCall);
2501 TailCallToContextNodeMap[NewCall] = NewNode;
2502 NewNode->AllocTypes = Edge->AllocTypes;
2506 AddEdge(NewNode, CurCalleeNode);
2508 CurCalleeNode = NewNode;
2512 AddEdge(Edge->Caller, CurCalleeNode);
2516 auto *
Caller = Edge->Caller;
2520 removeEdgeFromGraph(Edge.get(), &EI,
true);
2532bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2534 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2535 bool &FoundMultipleCalleeChains) {
2542 FoundCalleeChain.push_back({Callsite,
F});
2545 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2547 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2549 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2557 bool FoundSingleCalleeChain =
false;
2558 for (
auto &BB : *CalleeFunc) {
2559 for (
auto &
I : BB) {
2560 auto *CB = dyn_cast<CallBase>(&
I);
2561 if (!CB || !CB->isTailCall())
2563 auto *CalledValue = CB->getCalledOperand();
2564 auto *CalledFunction = CB->getCalledFunction();
2565 if (CalledValue && !CalledFunction) {
2566 CalledValue = CalledValue->stripPointerCasts();
2568 CalledFunction = dyn_cast<Function>(CalledValue);
2572 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2573 assert(!CalledFunction &&
2574 "Expected null called function in callsite for alias");
2575 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2577 if (!CalledFunction)
2579 if (CalledFunction == ProfiledCallee) {
2580 if (FoundSingleCalleeChain) {
2581 FoundMultipleCalleeChains =
true;
2584 FoundSingleCalleeChain =
true;
2585 FoundProfiledCalleeCount++;
2586 FoundProfiledCalleeDepth +=
Depth;
2587 if (
Depth > FoundProfiledCalleeMaxDepth)
2588 FoundProfiledCalleeMaxDepth =
Depth;
2589 SaveCallsiteInfo(&
I, CalleeFunc);
2590 }
else if (findProfiledCalleeThroughTailCalls(
2591 ProfiledCallee, CalledFunction,
Depth + 1,
2592 FoundCalleeChain, FoundMultipleCalleeChains)) {
2595 assert(!FoundMultipleCalleeChains);
2596 if (FoundSingleCalleeChain) {
2597 FoundMultipleCalleeChains =
true;
2600 FoundSingleCalleeChain =
true;
2601 SaveCallsiteInfo(&
I, CalleeFunc);
2602 }
else if (FoundMultipleCalleeChains)
2607 return FoundSingleCalleeChain;
2611 auto *CB = dyn_cast<CallBase>(Call);
2612 if (!CB->getCalledOperand() || CB->isIndirectCall())
2614 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2615 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2617 return dyn_cast<Function>(Alias->getAliasee());
2618 return dyn_cast<Function>(CalleeVal);
2621bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2623 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2624 auto *CB = dyn_cast<CallBase>(Call);
2625 if (!CB->getCalledOperand() || CB->isIndirectCall())
2627 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2628 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2629 if (CalleeFunc == Func)
2631 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2632 if (Alias && Alias->getAliasee() == Func)
2643 bool FoundMultipleCalleeChains =
false;
2644 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2646 FoundMultipleCalleeChains)) {
2647 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2648 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2649 <<
" that actually called " << CalleeVal->getName()
2650 << (FoundMultipleCalleeChains
2651 ?
" (found multiple possible chains)"
2654 if (FoundMultipleCalleeChains)
2655 FoundProfiledCalleeNonUniquelyCount++;
2662bool ModuleCallsiteContextGraph::sameCallee(
Instruction *Call1,
2664 auto *CB1 = cast<CallBase>(Call1);
2665 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2667 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2668 auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
2669 auto *CB2 = cast<CallBase>(Call2);
2670 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2672 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2673 auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
2674 return CalleeFunc1 == CalleeFunc2;
2677bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2679 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2680 bool &FoundMultipleCalleeChains) {
2689 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2690 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2693 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2696 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2697 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2704 bool FoundSingleCalleeChain =
false;
2707 !isPrevailing(CurCallee.
getGUID(), S.get()))
2709 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2712 auto FSVI = CurCallee;
2713 auto *AS = dyn_cast<AliasSummary>(S.get());
2715 FSVI = AS->getAliaseeVI();
2716 for (
auto &CallEdge :
FS->calls()) {
2717 if (!CallEdge.second.hasTailCall())
2719 if (CallEdge.first == ProfiledCallee) {
2720 if (FoundSingleCalleeChain) {
2721 FoundMultipleCalleeChains =
true;
2724 FoundSingleCalleeChain =
true;
2725 FoundProfiledCalleeCount++;
2726 FoundProfiledCalleeDepth +=
Depth;
2727 if (
Depth > FoundProfiledCalleeMaxDepth)
2728 FoundProfiledCalleeMaxDepth =
Depth;
2729 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2731 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2732 FSToVIMap[
FS] = FSVI;
2733 }
else if (findProfiledCalleeThroughTailCalls(
2734 ProfiledCallee, CallEdge.first,
Depth + 1,
2735 FoundCalleeChain, FoundMultipleCalleeChains)) {
2738 assert(!FoundMultipleCalleeChains);
2739 if (FoundSingleCalleeChain) {
2740 FoundMultipleCalleeChains =
true;
2743 FoundSingleCalleeChain =
true;
2744 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2746 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2747 FSToVIMap[
FS] = FSVI;
2748 }
else if (FoundMultipleCalleeChains)
2753 return FoundSingleCalleeChain;
2757IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
2759 if (
Callee.getSummaryList().empty())
2761 return dyn_cast<FunctionSummary>(
Callee.getSummaryList()[0]->getBaseObject());
2764bool IndexCallsiteContextGraph::calleeMatchesFunc(
2767 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2772 Callee.getSummaryList().empty()
2774 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2775 assert(FSToVIMap.count(Func));
2776 auto FuncVI = FSToVIMap[
Func];
2777 if (Callee == FuncVI ||
2792 bool FoundMultipleCalleeChains =
false;
2793 if (!findProfiledCalleeThroughTailCalls(
2794 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2795 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2796 <<
" from " << FSToVIMap[CallerFunc]
2797 <<
" that actually called " << Callee
2798 << (FoundMultipleCalleeChains
2799 ?
" (found multiple possible chains)"
2802 if (FoundMultipleCalleeChains)
2803 FoundProfiledCalleeNonUniquelyCount++;
2810bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2811 ValueInfo Callee1 = dyn_cast_if_present<CallsiteInfo *>(Call1)->Callee;
2812 ValueInfo Callee2 = dyn_cast_if_present<CallsiteInfo *>(Call2)->Callee;
2813 return Callee1 == Callee2;
2816template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2817void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2823template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2824void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2826 OS <<
"Node " <<
this <<
"\n";
2830 OS <<
" (recursive)";
2832 if (!MatchingCalls.
empty()) {
2833 OS <<
"\tMatchingCalls:\n";
2834 for (
auto &MatchingCall : MatchingCalls) {
2836 MatchingCall.print(
OS);
2841 OS <<
"\tContextIds:";
2843 auto ContextIds = getContextIds();
2844 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2845 std::sort(SortedIds.begin(), SortedIds.end());
2846 for (
auto Id : SortedIds)
2849 OS <<
"\tCalleeEdges:\n";
2850 for (
auto &Edge : CalleeEdges)
2851 OS <<
"\t\t" << *Edge <<
"\n";
2852 OS <<
"\tCallerEdges:\n";
2853 for (
auto &Edge : CallerEdges)
2854 OS <<
"\t\t" << *Edge <<
"\n";
2855 if (!Clones.empty()) {
2858 for (
auto *Clone : Clones)
2861 }
else if (CloneOf) {
2862 OS <<
"\tClone of " << CloneOf <<
"\n";
2866template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2867void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2873template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2874void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2878 OS <<
" ContextIds:";
2879 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2880 std::sort(SortedIds.begin(), SortedIds.end());
2881 for (
auto Id : SortedIds)
2885template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2886void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2890template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2891void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2893 OS <<
"Callsite Context Graph:\n";
2894 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2895 for (
const auto Node : nodes<GraphType>(
this)) {
2896 if (
Node->isRemoved())
2903template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2904void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2906 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2907 for (
const auto Node : nodes<GraphType>(
this)) {
2908 if (
Node->isRemoved())
2910 if (!
Node->IsAllocation)
2913 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
2914 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2915 std::sort(SortedIds.begin(), SortedIds.end());
2916 for (
auto Id : SortedIds) {
2917 auto TypeI = ContextIdToAllocationType.find(Id);
2918 assert(TypeI != ContextIdToAllocationType.end());
2919 auto CSI = ContextIdToContextSizeInfos.find(Id);
2920 if (CSI != ContextIdToContextSizeInfos.end()) {
2921 for (
auto &Info : CSI->second) {
2922 OS <<
"MemProf hinting: "
2924 <<
" full allocation context " <<
Info.FullStackId
2925 <<
" with total size " <<
Info.TotalSize <<
" is "
2927 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
2929 <<
" due to cold byte percent";
2937template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2938void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2939 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2940 for (
const auto Node : nodes<GraphType>(
this)) {
2941 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2942 for (
auto &Edge :
Node->CallerEdges)
2943 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2947template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2949 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2950 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2952 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2968 return G->NodeOwner.begin()->get();
2971 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2972 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2980 decltype(&GetCallee)>;
2991template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2996 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3002 std::string LabelString =
3003 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
3004 Twine(Node->OrigStackOrAllocId))
3006 LabelString +=
"\n";
3007 if (Node->hasCall()) {
3008 auto Func =
G->NodeToCallingFunc.find(Node);
3009 assert(Func !=
G->NodeToCallingFunc.end());
3011 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
3013 LabelString +=
"null call";
3014 if (Node->Recursive)
3015 LabelString +=
" (recursive)";
3017 LabelString +=
" (external)";
3023 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(Node) +
" " +
3024 getContextIds(Node->getContextIds()) +
"\"")
3027 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
3028 AttributeString +=
",style=\"filled\"";
3029 if (Node->CloneOf) {
3030 AttributeString +=
",color=\"blue\"";
3031 AttributeString +=
",style=\"filled,bold,dashed\"";
3033 AttributeString +=
",style=\"filled\"";
3034 return AttributeString;
3039 auto &Edge = *(ChildIter.getCurrent());
3040 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3041 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
3048 return Node->isRemoved();
3053 std::string IdString =
"ContextIds:";
3054 if (ContextIds.
size() < 100) {
3055 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3056 std::sort(SortedIds.begin(), SortedIds.end());
3057 for (
auto Id : SortedIds)
3058 IdString += (
" " +
Twine(Id)).str();
3060 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
3065 static std::string getColor(
uint8_t AllocTypes) {
3074 return "mediumorchid1";
3078 static std::string getNodeId(NodeRef
Node) {
3079 std::stringstream SStream;
3080 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
3081 std::string
Result = SStream.str();
3086template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3087void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3088 std::string Label)
const {
3093template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3094typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3095CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3096 const std::shared_ptr<ContextEdge> &Edge,
3098 ContextNode *
Node = Edge->Callee;
3100 ContextNode *Clone =
3101 createNewNode(
Node->IsAllocation, NodeToCallingFunc[
Node],
Node->Call);
3102 Node->addClone(Clone);
3103 Clone->MatchingCalls =
Node->MatchingCalls;
3104 moveEdgeToExistingCalleeClone(Edge, Clone,
true,
3109template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3110void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3111 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
3112 ContextNode *NewCallee,
bool NewClone,
3116 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3118 ContextNode *OldCallee = Edge->Callee;
3122 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3126 if (ContextIdsToMove.
empty())
3127 ContextIdsToMove = Edge->getContextIds();
3131 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3134 NewCallee->AllocTypes |= Edge->AllocTypes;
3136 if (ExistingEdgeToNewCallee) {
3139 ExistingEdgeToNewCallee->getContextIds().
insert(ContextIdsToMove.
begin(),
3140 ContextIdsToMove.
end());
3141 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3142 assert(Edge->ContextIds == ContextIdsToMove);
3143 removeEdgeFromGraph(Edge.get());
3146 Edge->Callee = NewCallee;
3147 NewCallee->CallerEdges.push_back(Edge);
3149 OldCallee->eraseCallerEdge(Edge.get());
3156 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3157 if (ExistingEdgeToNewCallee) {
3160 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
3161 ContextIdsToMove.
end());
3162 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3165 auto NewEdge = std::make_shared<ContextEdge>(
3166 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3167 Edge->Caller->CalleeEdges.push_back(NewEdge);
3168 NewCallee->CallerEdges.push_back(NewEdge);
3172 NewCallee->AllocTypes |= CallerEdgeAllocType;
3174 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3179 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3180 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3184 if (CalleeToUse == OldCallee)
3185 CalleeToUse = NewCallee;
3190 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3191 OldCalleeEdge->AllocTypes =
3192 computeAllocType(OldCalleeEdge->getContextIds());
3199 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3200 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3201 EdgeContextIdsToMove.
end());
3202 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3206 auto NewEdge = std::make_shared<ContextEdge>(
3207 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3208 EdgeContextIdsToMove);
3209 NewCallee->CalleeEdges.push_back(NewEdge);
3210 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3214 OldCallee->AllocTypes = OldCallee->computeAllocType();
3217 OldCallee->emptyContextIds());
3219 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
3220 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
3221 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3222 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3224 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3225 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3230template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3231void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3232 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
3233 ContextNode *NewCaller) {
3234 auto *OldCallee = Edge->Callee;
3235 auto *NewCallee = OldCallee;
3238 bool Recursive = Edge->Caller == Edge->Callee;
3240 NewCallee = NewCaller;
3242 ContextNode *OldCaller = Edge->Caller;
3243 OldCaller->eraseCalleeEdge(Edge.get());
3247 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3249 if (ExistingEdgeToNewCaller) {
3252 ExistingEdgeToNewCaller->getContextIds().insert(
3253 Edge->getContextIds().begin(), Edge->getContextIds().end());
3254 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3255 Edge->ContextIds.clear();
3257 OldCallee->eraseCallerEdge(Edge.get());
3260 Edge->Caller = NewCaller;
3261 NewCaller->CalleeEdges.push_back(Edge);
3263 assert(NewCallee == NewCaller);
3266 Edge->Callee = NewCallee;
3267 NewCallee->CallerEdges.push_back(Edge);
3268 OldCallee->eraseCallerEdge(Edge.get());
3274 NewCaller->AllocTypes |= Edge->AllocTypes;
3281 bool IsNewNode = NewCaller->CallerEdges.empty();
3290 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3291 auto OldCallerCaller = OldCallerEdge->Caller;
3295 OldCallerEdge->getContextIds(), Edge->getContextIds());
3296 if (OldCaller == OldCallerCaller) {
3297 OldCallerCaller = NewCaller;
3303 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3304 OldCallerEdge->AllocTypes =
3305 computeAllocType(OldCallerEdge->getContextIds());
3310 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3314 if (ExistingCallerEdge) {
3315 ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3316 EdgeContextIdsToMove.
end());
3317 ExistingCallerEdge->AllocTypes |=
3318 computeAllocType(EdgeContextIdsToMove);
3321 auto NewEdge = std::make_shared<ContextEdge>(
3322 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3323 EdgeContextIdsToMove);
3324 NewCaller->CallerEdges.push_back(NewEdge);
3325 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3330 OldCaller->AllocTypes = OldCaller->computeAllocType();
3333 OldCaller->emptyContextIds());
3335 checkNode<DerivedCCG, FuncTy, CallTy>(OldCaller,
false);
3336 checkNode<DerivedCCG, FuncTy, CallTy>(NewCaller,
false);
3337 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3338 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3340 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3341 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3346template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3347void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3348 recursivelyRemoveNoneTypeCalleeEdges(
3354 removeNoneTypeCalleeEdges(
Node);
3356 for (
auto *Clone :
Node->Clones)
3357 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3361 auto CallerEdges =
Node->CallerEdges;
3362 for (
auto &Edge : CallerEdges) {
3364 if (Edge->isRemoved()) {
3368 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3372template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3373void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3375 for (
auto &Entry : AllocationCallToContextNodeMap) {
3377 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3380 for (
auto &Entry : AllocationCallToContextNodeMap)
3381 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3394template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3395void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3399 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3407 if (!
Node->hasCall())
3426 auto CallerEdges =
Node->CallerEdges;
3427 for (
auto &Edge : CallerEdges) {
3429 if (Edge->isRemoved()) {
3434 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
3435 identifyClones(Edge->Caller, Visited, AllocContextIds);
3458 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3461 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
3462 [&](
const std::shared_ptr<ContextEdge> &
A,
3463 const std::shared_ptr<ContextEdge> &
B) {
3466 if (A->ContextIds.empty())
3472 if (B->ContextIds.empty())
3475 if (A->AllocTypes == B->AllocTypes)
3478 return *A->ContextIds.begin() < *B->ContextIds.begin();
3479 return AllocTypeCloningPriority[A->AllocTypes] <
3480 AllocTypeCloningPriority[B->AllocTypes];
3490 for (
auto &CE :
Node->CallerEdges) {
3493 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3494 for (
auto Id :
CE->getContextIds())
3495 if (!AllCallerContextIds.
insert(Id).second)
3496 RecursiveContextIds.
insert(Id);
3506 auto CallerEdges =
Node->CallerEdges;
3507 for (
auto &CallerEdge : CallerEdges) {
3515 if (!CallerEdge->Caller->hasCall())
3520 auto CallerEdgeContextsForAlloc =
3522 if (!RecursiveContextIds.
empty())
3523 CallerEdgeContextsForAlloc =
3525 if (CallerEdgeContextsForAlloc.empty())
3528 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3532 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3533 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3534 for (
auto &CalleeEdge :
Node->CalleeEdges)
3535 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3536 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3551 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
3552 allocTypeToUse(
Node->AllocTypes) &&
3553 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3554 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges))
3559 ContextNode *Clone =
nullptr;
3560 for (
auto *CurClone :
Node->Clones) {
3561 if (allocTypeToUse(CurClone->AllocTypes) !=
3562 allocTypeToUse(CallerAllocTypeForAlloc))
3569 assert(!BothSingleAlloc ||
3570 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3576 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3577 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3585 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
3586 CallerEdgeContextsForAlloc);
3588 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
3601 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3604void ModuleCallsiteContextGraph::updateAllocationCall(
3608 "memprof", AllocTypeString);
3609 cast<CallBase>(
Call.call())->addFnAttr(
A);
3610 OREGetter(
Call.call()->getFunction())
3612 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3614 <<
" marked with memprof allocation attribute "
3615 <<
ore::NV(
"Attribute", AllocTypeString));
3618void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
3620 auto *AI = cast<AllocInfo *>(
Call.call());
3622 assert(AI->Versions.size() >
Call.cloneNo());
3627ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3628 const auto *CB = cast<CallBase>(
Call.call());
3629 if (!CB->getAttributes().hasFnAttr(
"memprof"))
3631 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3637IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3638 const auto *AI = cast<AllocInfo *>(
Call.call());
3639 assert(AI->Versions.size() >
Call.cloneNo());
3643void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3644 FuncInfo CalleeFunc) {
3645 if (CalleeFunc.cloneNo() > 0)
3646 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
3647 OREGetter(CallerCall.call()->getFunction())
3649 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
3650 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
3651 <<
" assigned to call function clone "
3652 <<
ore::NV(
"Callee", CalleeFunc.func()));
3655void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3656 FuncInfo CalleeFunc) {
3657 auto *CI = cast<CallsiteInfo *>(CallerCall.call());
3659 "Caller cannot be an allocation which should not have profiled calls");
3660 assert(CI->Clones.size() > CallerCall.cloneNo());
3661 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
3664CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
3666ModuleCallsiteContextGraph::cloneFunctionForCallsite(
3667 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3668 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3674 NewFunc->setName(
Name);
3675 for (
auto &Inst : CallsWithMetadataInFunc) {
3677 assert(Inst.cloneNo() == 0);
3678 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
3680 OREGetter(
Func.func())
3682 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
3683 return {NewFunc, CloneNo};
3687 IndexCall>::FuncInfo
3688IndexCallsiteContextGraph::cloneFunctionForCallsite(
3689 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3690 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3695 assert(CloneNo == (isa<AllocInfo *>(
Call.call())
3696 ? cast<AllocInfo *>(
Call.call())->Versions.size()
3697 : cast<CallsiteInfo *>(
Call.call())->Clones.size()));
3704 for (
auto &Inst : CallsWithMetadataInFunc) {
3706 assert(Inst.cloneNo() == 0);
3707 if (
auto *AI = dyn_cast<AllocInfo *>(Inst.call())) {
3708 assert(AI->Versions.size() == CloneNo);
3711 AI->Versions.push_back(0);
3713 auto *CI = cast<CallsiteInfo *>(Inst.call());
3714 assert(CI && CI->Clones.size() == CloneNo);
3717 CI->Clones.push_back(0);
3719 CallMap[Inst] = {Inst.call(), CloneNo};
3721 return {
Func.func(), CloneNo};
3755template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3756bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3757 bool Changed =
false;
3765 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
3766 const FuncInfo &CalleeFunc) {
3768 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
3773 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
3774 FuncInfo OrigFunc(Func);
3778 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
3779 for (
auto &Call : CallsWithMetadata) {
3780 ContextNode *
Node = getNodeForInst(Call);
3787 "Not having a call should have prevented cloning");
3791 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3795 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
3797 ContextNode *CallsiteClone,
3800 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3802 assert(FuncClonesToCallMap.count(FuncClone));
3803 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3805 if (CallMap.count(Call))
3806 CallClone = CallMap[
Call];
3807 CallsiteClone->setCall(CallClone);
3809 for (
auto &MatchingCall :
Node->MatchingCalls) {
3811 if (CallMap.count(MatchingCall))
3812 CallClone = CallMap[MatchingCall];
3814 MatchingCall = CallClone;
3821 std::deque<ContextNode *> ClonesWorklist;
3823 if (!
Node->emptyContextIds())
3824 ClonesWorklist.push_back(
Node);
3825 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
3826 Node->Clones.end());
3831 unsigned NodeCloneCount = 0;
3832 while (!ClonesWorklist.empty()) {
3833 ContextNode *Clone = ClonesWorklist.front();
3834 ClonesWorklist.pop_front();
3837 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3843 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3845 if (NodeCloneCount == 1) {
3850 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3851 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3855 FuncClonesToCallMap[OrigFunc] = {};
3856 AssignCallsiteCloneToFuncClone(
3857 OrigFunc, Call, Clone,
3858 AllocationCallToContextNodeMap.count(Call));
3859 for (
auto &CE : Clone->CallerEdges) {
3861 if (!
CE->Caller->hasCall())
3863 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3873 FuncInfo PreviousAssignedFuncClone;
3875 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3876 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3878 bool CallerAssignedToCloneOfFunc =
false;
3879 if (EI != Clone->CallerEdges.end()) {
3880 const std::shared_ptr<ContextEdge> &Edge = *EI;
3881 PreviousAssignedFuncClone =
3882 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3883 CallerAssignedToCloneOfFunc =
true;
3888 std::map<CallInfo, CallInfo> NewCallMap;
3889 unsigned CloneNo = FuncClonesToCallMap.size();
3890 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3891 "should already exist in the map");
3892 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3893 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3894 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3895 FunctionClonesAnalysis++;
3901 if (!CallerAssignedToCloneOfFunc) {
3902 AssignCallsiteCloneToFuncClone(
3903 NewFuncClone, Call, Clone,
3904 AllocationCallToContextNodeMap.count(Call));
3905 for (
auto &CE : Clone->CallerEdges) {
3907 if (!
CE->Caller->hasCall())
3909 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3921 auto CallerEdges = Clone->CallerEdges;
3922 for (
auto CE : CallerEdges) {
3924 if (
CE->isRemoved()) {
3930 if (!
CE->Caller->hasCall())
3933 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3937 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3938 PreviousAssignedFuncClone)
3941 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3954 auto CalleeEdges =
CE->Caller->CalleeEdges;
3955 for (
auto CalleeEdge : CalleeEdges) {
3958 if (CalleeEdge->isRemoved()) {
3963 ContextNode *
Callee = CalleeEdge->Callee;
3967 if (Callee == Clone || !
Callee->hasCall())
3969 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3970 removeNoneTypeCalleeEdges(NewClone);
3973 removeNoneTypeCalleeEdges(Callee);
3978 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3979 RecordCalleeFuncOfCallsite(
3980 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3989 OrigCall.setCloneNo(0);
3990 std::map<CallInfo, CallInfo> &CallMap =
3991 FuncClonesToCallMap[NewFuncClone];
3992 assert(CallMap.count(OrigCall));
3993 CallInfo NewCall(CallMap[OrigCall]);
3995 NewClone->setCall(NewCall);
3997 for (
auto &MatchingCall : NewClone->MatchingCalls) {
3998 CallInfo OrigMatchingCall(MatchingCall);
3999 OrigMatchingCall.setCloneNo(0);
4000 assert(CallMap.count(OrigMatchingCall));
4001 CallInfo NewCall(CallMap[OrigMatchingCall]);
4004 MatchingCall = NewCall;
4027 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
4028 FuncInfo FuncCloneAssignedToCurCallsiteClone;
4032 auto CloneCallerEdges = Clone->CallerEdges;
4033 for (
auto &Edge : CloneCallerEdges) {
4037 if (Edge->isRemoved())
4040 if (!Edge->Caller->hasCall())
4044 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
4045 FuncInfo FuncCloneCalledByCaller =
4046 CallsiteToCalleeFuncCloneMap[Edge->Caller];
4056 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
4057 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
4065 (FuncCloneAssignedToCurCallsiteClone &&
4066 FuncCloneAssignedToCurCallsiteClone !=
4067 FuncCloneCalledByCaller)) {
4082 if (FuncCloneToNewCallsiteCloneMap.count(
4083 FuncCloneCalledByCaller)) {
4084 ContextNode *NewClone =
4085 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4086 moveEdgeToExistingCalleeClone(Edge, NewClone);
4088 removeNoneTypeCalleeEdges(NewClone);
4091 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge);
4092 removeNoneTypeCalleeEdges(NewClone);
4093 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4096 ClonesWorklist.push_back(NewClone);
4101 removeNoneTypeCalleeEdges(Clone);
4109 if (!FuncCloneAssignedToCurCallsiteClone) {
4110 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4112 AssignCallsiteCloneToFuncClone(
4113 FuncCloneCalledByCaller, Call, Clone,
4114 AllocationCallToContextNodeMap.count(Call));
4118 assert(FuncCloneAssignedToCurCallsiteClone ==
4119 FuncCloneCalledByCaller);
4128 if (!FuncCloneAssignedToCurCallsiteClone) {
4133 for (
auto &CF : FuncClonesToCallMap) {
4134 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
4135 FuncCloneAssignedToCurCallsiteClone = CF.first;
4139 assert(FuncCloneAssignedToCurCallsiteClone);
4141 AssignCallsiteCloneToFuncClone(
4142 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4143 AllocationCallToContextNodeMap.count(Call));
4145 assert(FuncCloneToCurNodeCloneMap
4146 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4148 RecordCalleeFuncOfCallsite(Edge->Caller,
4149 FuncCloneAssignedToCurCallsiteClone);
4154 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
4155 for (
const auto &PE :
Node->CalleeEdges)
4156 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4157 for (
const auto &CE :
Node->CallerEdges)
4158 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4159 for (
auto *Clone :
Node->Clones) {
4160 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4161 for (
const auto &PE : Clone->CalleeEdges)
4162 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4163 for (
const auto &CE : Clone->CallerEdges)
4164 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4173 auto UpdateCalls = [&](ContextNode *
Node,
4175 auto &&UpdateCalls) {
4180 for (
auto *Clone :
Node->Clones)
4181 UpdateCalls(Clone, Visited, UpdateCalls);
4183 for (
auto &Edge :
Node->CallerEdges)
4184 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
4188 if (!
Node->hasCall() ||
Node->emptyContextIds())
4191 if (
Node->IsAllocation) {
4192 auto AT = allocTypeToUse(
Node->AllocTypes);
4198 !ContextIdToContextSizeInfos.empty()) {
4201 for (
auto Id :
Node->getContextIds()) {
4202 auto TypeI = ContextIdToAllocationType.find(Id);
4203 assert(TypeI != ContextIdToAllocationType.end());
4204 auto CSI = ContextIdToContextSizeInfos.find(Id);
4205 if (CSI != ContextIdToContextSizeInfos.end()) {
4206 for (
auto &Info : CSI->second) {
4209 TotalCold +=
Info.TotalSize;
4216 updateAllocationCall(
Node->Call, AT);
4221 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
4224 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
4225 updateCall(
Node->Call, CalleeFunc);
4227 for (
auto &Call :
Node->MatchingCalls)
4228 updateCall(Call, CalleeFunc);
4237 for (
auto &Entry : AllocationCallToContextNodeMap)
4238 UpdateCalls(
Entry.second, Visited, UpdateCalls);
4252 FunctionsClonedThinBackend++;
4253 for (
unsigned I = 1;
I < NumClones;
I++) {
4254 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
4256 FunctionClonesThinBackend++;
4259 for (
auto &BB : *NewF) {
4260 for (
auto &Inst : BB) {
4261 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
4262 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
4266 auto *PrevF = M.getFunction(
Name);
4270 assert(PrevF->isDeclaration());
4271 NewF->takeName(PrevF);
4272 PrevF->replaceAllUsesWith(NewF);
4273 PrevF->eraseFromParent();
4275 NewF->setName(
Name);
4277 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
4280 if (!FuncToAliasMap.count(&
F))
4282 for (
auto *
A : FuncToAliasMap[&
F]) {
4284 auto *PrevA = M.getNamedAlias(
Name);
4286 A->getType()->getPointerAddressSpace(),
4287 A->getLinkage(),
Name, NewF);
4288 NewA->copyAttributesFrom(
A);
4292 assert(PrevA->isDeclaration());
4293 NewA->takeName(PrevA);
4294 PrevA->replaceAllUsesWith(NewA);
4295 PrevA->eraseFromParent();
4306 const Function *CallingFunc =
nullptr) {
4324 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
4330 if (!SrcFileMD &&
F.isDeclaration()) {
4334 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
4339 assert(SrcFileMD || OrigName ==
F.getName());
4341 StringRef SrcFile = M.getSourceFileName();
4343 SrcFile = dyn_cast<MDString>(SrcFileMD->getOperand(0))->getString();
4352 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
4353 F.getName().contains(
'.')) {
4354 OrigName =
F.getName().rsplit(
'.').first;
4362 assert(TheFnVI ||
F.isDeclaration());
4366bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
4368 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
4369 Symtab = std::make_unique<InstrProfSymtab>();
4380 if (
Error E = Symtab->create(M,
true,
false)) {
4381 std::string SymtabFailure =
toString(std::move(
E));
4382 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
4388bool MemProfContextDisambiguation::applyImport(
Module &M) {
4390 bool Changed =
false;
4395 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
4397 for (
auto &
A :
M.aliases()) {
4398 auto *Aliasee =
A.getAliaseeObject();
4399 if (
auto *
F = dyn_cast<Function>(Aliasee))
4400 FuncToAliasMap[
F].insert(&
A);
4403 if (!initializeIndirectCallPromotionInfo(M))
4413 bool ClonesCreated =
false;
4414 unsigned NumClonesCreated = 0;
4415 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
4425 if (ClonesCreated) {
4426 assert(NumClonesCreated == NumClones);
4433 ClonesCreated =
true;
4434 NumClonesCreated = NumClones;
4440 CloneFuncIfNeeded(
StackNode.Clones.size());
4447 auto CalleeOrigName = CalledFunction->getName();
4448 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
4453 auto NewF =
M.getOrInsertFunction(
4455 CalledFunction->getFunctionType());
4461 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4464 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4466 <<
" assigned to call function clone "
4467 <<
ore::NV(
"Callee", NewF.getCallee()));
4485 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
4487 "enable-import-metadata is needed to emit thinlto_src_module");
4489 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
4491 if (GVS->modulePath() == SrcModule) {
4492 GVSummary = GVS.get();
4496 assert(GVSummary && GVSummary->modulePath() == SrcModule);
4501 if (isa<AliasSummary>(GVSummary))
4504 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
4506 if (
FS->allocs().empty() &&
FS->callsites().empty())
4509 auto SI =
FS->callsites().begin();
4510 auto AI =
FS->allocs().begin();
4518 for (
auto CallsiteIt =
FS->callsites().rbegin();
4519 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
4520 auto &Callsite = *CallsiteIt;
4524 if (!Callsite.StackIdIndices.empty())
4526 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
4535 for (
auto &BB :
F) {
4536 for (
auto &
I : BB) {
4537 auto *CB = dyn_cast<CallBase>(&
I);
4542 auto *CalledValue = CB->getCalledOperand();
4543 auto *CalledFunction = CB->getCalledFunction();
4544 if (CalledValue && !CalledFunction) {
4545 CalledValue = CalledValue->stripPointerCasts();
4547 CalledFunction = dyn_cast<Function>(CalledValue);
4551 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
4552 assert(!CalledFunction &&
4553 "Expected null called function in callsite for alias");
4554 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
4558 I.getMetadata(LLVMContext::MD_callsite));
4559 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
4563 if (CB->getAttributes().hasFnAttr(
"memprof")) {
4565 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4566 ? AllocTypeColdThinBackend++
4567 : AllocTypeNotColdThinBackend++;
4568 OrigAllocsThinBackend++;
4569 AllocVersionsThinBackend++;
4570 if (!MaxAllocVersionsThinBackend)
4571 MaxAllocVersionsThinBackend = 1;
4578 auto &AllocNode = *(AI++);
4582 auto MIBIter = AllocNode.MIBs.begin();
4583 for (
auto &MDOp : MemProfMD->operands()) {
4584 assert(MIBIter != AllocNode.MIBs.end());
4586 MIBIter->StackIdIndices.begin();
4587 auto *MIBMD = cast<const MDNode>(MDOp);
4591 auto ContextIterBegin =
4595 (ContextIterBegin != StackContext.
end() &&
4596 *ContextIterBegin == 0)
4599 for (
auto ContextIter = ContextIterBegin;
4600 ContextIter != StackContext.
end(); ++ContextIter) {
4604 if (LastStackContextId == *ContextIter)
4606 LastStackContextId = *ContextIter;
4607 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
4616 CloneFuncIfNeeded(AllocNode.Versions.size());
4618 OrigAllocsThinBackend++;
4619 AllocVersionsThinBackend += AllocNode.Versions.size();
4620 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
4621 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
4631 if (AllocNode.Versions.size() == 1 &&
4637 UnclonableAllocsThinBackend++;
4643 return Type == ((uint8_t)AllocationType::NotCold |
4644 (uint8_t)AllocationType::Cold);
4648 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
4654 : AllocTypeNotColdThinBackend++;
4666 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4669 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
4671 <<
" marked with memprof allocation attribute "
4672 <<
ore::NV(
"Attribute", AllocTypeString));
4674 }
else if (!CallsiteContext.empty()) {
4675 if (!CalledFunction) {
4678 auto *CI = dyn_cast<CallInst>(CB);
4679 assert(!CI || !CI->isInlineAsm());
4682 assert(CalledValue && !isa<Constant>(CalledValue));
4689 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
4695 CloneFuncIfNeeded(NumClones);
4700 assert(SI !=
FS->callsites().end());
4706 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
4707 for (
auto StackId : CallsiteContext) {
4715 CloneCallsite(
StackNode, CB, CalledFunction);
4717 }
else if (CB->isTailCall() && CalledFunction) {
4722 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
4723 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
4724 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
4725 CloneCallsite(Callsite->second, CB, CalledFunction);
4732 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
4742 for (
auto &BB :
F) {
4743 for (
auto &
I : BB) {
4744 if (!isa<CallBase>(
I))
4746 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
4747 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
4755unsigned MemProfContextDisambiguation::recordICPInfo(
4762 auto CandidateProfileData =
4763 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
4765 if (CandidateProfileData.empty())
4771 bool ICPNeeded =
false;
4772 unsigned NumClones = 0;
4773 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
4774 for (
const auto &Candidate : CandidateProfileData) {
4776 auto CalleeValueInfo =
4781 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
4788 [](
unsigned CloneNo) { return CloneNo != 0; });
4798 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
4799 TotalCount, CallsiteInfoStartIndex});
4803void MemProfContextDisambiguation::performICP(
4805 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
4814 for (
auto &
Info : ICallAnalysisInfo) {
4816 auto CallsiteIndex =
Info.CallsiteInfoStartIndex;
4817 auto TotalCount =
Info.TotalCount;
4818 unsigned NumPromoted = 0;
4819 unsigned NumClones = 0;
4821 for (
auto &Candidate :
Info.CandidateProfileData) {
4822 auto &
StackNode = AllCallsites[CallsiteIndex++];
4832 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4833 if (TargetFunction ==
nullptr ||
4842 <<
"Memprof cannot promote indirect call: target with md5sum "
4843 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
4852 const char *Reason =
nullptr;
4856 <<
"Memprof cannot promote indirect call to "
4857 <<
ore::NV(
"TargetFunction", TargetFunction)
4858 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
4870 for (
unsigned J = 0; J < NumClones; J++) {
4873 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4879 TotalCount, isSamplePGO, &ORE);
4880 auto *TargetToUse = TargetFunction;
4885 cast<Function>(
M.getOrInsertFunction(
4893 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4895 <<
" promoted and assigned to call function clone "
4896 <<
ore::NV(
"Callee", TargetToUse));
4900 TotalCount -= Candidate.Count;
4906 for (
unsigned J = 0; J < NumClones; J++) {
4909 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4911 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
4914 if (TotalCount != 0)
4917 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
4922template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4923bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
4925 dbgs() <<
"CCG before cloning:\n";
4929 exportToDot(
"postbuild");
4942 dbgs() <<
"CCG after cloning:\n";
4946 exportToDot(
"cloned");
4948 bool Changed = assignFunctions();
4951 dbgs() <<
"CCG after assigning function clones:\n";
4955 exportToDot(
"clonefuncassign");
4958 printTotalSizes(
errs());
4963bool MemProfContextDisambiguation::processModule(
4970 return applyImport(M);
4983 ModuleCallsiteContextGraph CCG(M, OREGetter);
4984 return CCG.process();
4989 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
4990 if (ImportSummary) {
5000 auto ReadSummaryFile =
5002 if (!ReadSummaryFile) {
5009 if (!ImportSummaryForTestingOrErr) {
5015 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
5016 ImportSummary = ImportSummaryForTesting.get();
5025 if (!processModule(M, OREGetter))
5042 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static bool isMemProfClone(const Function &F)
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
cl::opt< unsigned > MinClonedColdBytePercent
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Lightweight error class with error context and mandatory checking.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
A Module instance is used to store all the information related to an LLVM module.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
const char * toString(DWARFSectionKind Kind)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
Summary of memprof callsite metadata.
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
static SimpleType getSimplifiedValue(IndexCall &Val)
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...