LLVM 21.0.0git
DependenceGraphBuilder.cpp
Go to the documentation of this file.
1//===- DependenceGraphBuilder.cpp ------------------------------------------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements common steps of the build algorithm for construction
9// of dependence graphs such as DDG and PDG.
10//===----------------------------------------------------------------------===//
11
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/DDG.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "dgb"
23
24STATISTIC(TotalGraphs, "Number of dependence graphs created.");
25STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
26STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
27STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
28STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
29STATISTIC(TotalConfusedEdges,
30 "Number of confused memory dependencies between two nodes.");
31STATISTIC(TotalEdgeReversals,
32 "Number of times the source and sink of dependence was reversed to "
33 "expose cycles in the graph.");
34
36
37//===--------------------------------------------------------------------===//
38// AbstractDependenceGraphBuilder implementation
39//===--------------------------------------------------------------------===//
40
41template <class G>
43 // The BBList is expected to be in program order.
44 size_t NextOrdinal = 1;
45 for (auto *BB : BBList)
46 for (auto &I : *BB)
47 InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
48}
49
50template <class G>
52 ++TotalGraphs;
53 assert(IMap.empty() && "Expected empty instruction map at start");
54 for (BasicBlock *BB : BBList)
55 for (Instruction &I : *BB) {
56 auto &NewNode = createFineGrainedNode(I);
57 IMap.insert(std::make_pair(&I, &NewNode));
58 NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
59 ++TotalFineGrainedNodes;
60 }
61}
62
63template <class G>
65 // Create a root node that connects to every connected component of the graph.
66 // This is done to allow graph iterators to visit all the disjoint components
67 // of the graph, in a single walk.
68 //
69 // This algorithm works by going through each node of the graph and for each
70 // node N, do a DFS starting from N. A rooted edge is established between the
71 // root node and N (if N is not yet visited). All the nodes reachable from N
72 // are marked as visited and are skipped in the DFS of subsequent nodes.
73 //
74 // Note: This algorithm tries to limit the number of edges out of the root
75 // node to some extent, but there may be redundant edges created depending on
76 // the iteration order. For example for a graph {A -> B}, an edge from the
77 // root node is added to both nodes if B is visited before A. While it does
78 // not result in minimal number of edges, this approach saves compile-time
79 // while keeping the number of edges in check.
80 auto &RootNode = createRootNode();
82 for (auto *N : Graph) {
83 if (*N == RootNode)
84 continue;
85 for (auto I : depth_first_ext(N, Visited))
86 if (I == N)
87 createRootedEdge(RootNode, *N);
88 }
89}
92 if (!shouldCreatePiBlocks())
93 return;
94
95 LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
96
97 // The overall algorithm is as follows:
98 // 1. Identify SCCs and for each SCC create a pi-block node containing all
99 // the nodes in that SCC.
100 // 2. Identify incoming edges incident to the nodes inside of the SCC and
101 // reconnect them to the pi-block node.
102 // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
103 // outside of it and reconnect them so that the edges are coming out of the
104 // SCC node instead.
105
106 // Adding nodes as we iterate through the SCCs cause the SCC
107 // iterators to get invalidated. To prevent this invalidation, we first
108 // collect a list of nodes that are part of an SCC, and then iterate over
109 // those lists to create the pi-block nodes. Each element of the list is a
110 // list of nodes in an SCC. Note: trivial SCCs containing a single node are
111 // ignored.
113 for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
114 if (SCC.size() > 1)
115 ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
116 }
117
118 for (NodeListType &NL : ListOfSCCs) {
119 LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
120 << " nodes in it.\n");
121
122 // SCC iterator may put the nodes in an order that's different from the
123 // program order. To preserve original program order, we sort the list of
124 // nodes based on ordinal numbers computed earlier.
125 llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
126 return getOrdinal(*LHS) < getOrdinal(*RHS);
127 });
128
129 NodeType &PiNode = createPiBlock(NL);
130 ++TotalPiBlockNodes;
131
132 // Build a set to speed up the lookup for edges whose targets
133 // are inside the SCC.
134 SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
135
136 // We have the set of nodes in the SCC. We go through the set of nodes
137 // that are outside of the SCC and look for edges that cross the two sets.
138 for (NodeType *N : Graph) {
139
140 // Skip the SCC node and all the nodes inside of it.
141 if (*N == PiNode || NodesInSCC.count(N))
142 continue;
143
144 enum Direction {
145 Incoming, // Incoming edges to the SCC
146 Outgoing, // Edges going ot of the SCC
147 DirectionCount // To make the enum usable as an array index.
148 };
149
150 // Use these flags to help us avoid creating redundant edges. If there
151 // are more than one edges from an outside node to inside nodes, we only
152 // keep one edge from that node to the pi-block node. Similarly, if
153 // there are more than one edges from inside nodes to an outside node,
154 // we only keep one edge from the pi-block node to the outside node.
155 // There is a flag defined for each direction (incoming vs outgoing) and
156 // for each type of edge supported, using a two-dimensional boolean
157 // array.
158 using EdgeKind = typename EdgeType::EdgeKind;
159 EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false,
160 false};
161
162 auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
163 const EdgeKind K) {
164 switch (K) {
165 case EdgeKind::RegisterDefUse:
166 createDefUseEdge(Src, Dst);
167 break;
168 case EdgeKind::MemoryDependence:
169 createMemoryEdge(Src, Dst);
170 break;
171 case EdgeKind::Rooted:
172 createRootedEdge(Src, Dst);
173 break;
174 default:
175 llvm_unreachable("Unsupported type of edge.");
176 }
177 };
178
179 auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
180 const Direction Dir) {
181 if (!Src->hasEdgeTo(*Dst))
182 return;
184 dbgs() << "reconnecting("
185 << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
186 << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New
187 << "\n");
188 assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
189 "Invalid direction.");
190
192 Src->findEdgesTo(*Dst, EL);
193 for (EdgeType *OldEdge : EL) {
194 EdgeKind Kind = OldEdge->getKind();
195 if (!EdgeAlreadyCreated[Dir][Kind]) {
196 if (Dir == Direction::Incoming) {
197 createEdgeOfKind(*Src, *New, Kind);
198 LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
199 } else if (Dir == Direction::Outgoing) {
200 createEdgeOfKind(*New, *Dst, Kind);
201 LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
202 }
203 EdgeAlreadyCreated[Dir][Kind] = true;
204 }
205 Src->removeEdge(*OldEdge);
206 destroyEdge(*OldEdge);
207 LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
208 }
209 };
210
211 for (NodeType *SCCNode : NL) {
212 // Process incoming edges incident to the pi-block node.
213 reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
214
215 // Process edges that are coming out of the pi-block node.
216 reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
217 }
218 }
219 }
220
221 // Ordinal maps are no longer needed.
222 InstOrdinalMap.clear();
223 NodeOrdinalMap.clear();
224
225 LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
226}
227
229 for (NodeType *N : Graph) {
230 InstructionListType SrcIList;
231 N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
232
233 // Use a set to mark the targets that we link to N, so we don't add
234 // duplicate def-use edges when more than one instruction in a target node
235 // use results of instructions that are contained in N.
236 SmallPtrSet<NodeType *, 4> VisitedTargets;
237
238 for (Instruction *II : SrcIList) {
239 for (User *U : II->users()) {
240 Instruction *UI = dyn_cast<Instruction>(U);
241 if (!UI)
242 continue;
243 NodeType *DstNode = IMap.lookup(UI);
244
245 // In the case of loops, the scope of the subgraph is all the
246 // basic blocks (and instructions within them) belonging to the loop. We
247 // simply ignore all the edges coming from (or going into) instructions
248 // or basic blocks outside of this range.
249 if (!DstNode) {
251 dbgs()
252 << "skipped def-use edge since the sink" << *UI
253 << " is outside the range of instructions being considered.\n");
254 continue;
255 }
256
257 // Self dependencies are ignored because they are redundant and
258 // uninteresting.
259 if (DstNode == N) {
261 << "skipped def-use edge since the sink and the source ("
262 << N << ") are the same.\n");
263 continue;
264 }
265
266 if (VisitedTargets.insert(DstNode).second) {
267 createDefUseEdge(*N, *DstNode);
268 ++TotalDefUseEdges;
269 }
270 }
271 }
272 }
273}
274
275template <class G>
277 using DGIterator = typename G::iterator;
278 auto isMemoryAccess = [](const Instruction *I) {
279 return I->mayReadOrWriteMemory();
280 };
281 for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
282 InstructionListType SrcIList;
283 (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
284 if (SrcIList.empty())
285 continue;
286
287 for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
288 if (**SrcIt == **DstIt)
289 continue;
290 InstructionListType DstIList;
291 (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
292 if (DstIList.empty())
293 continue;
294 bool ForwardEdgeCreated = false;
295 bool BackwardEdgeCreated = false;
296 for (Instruction *ISrc : SrcIList) {
297 for (Instruction *IDst : DstIList) {
298 auto D = DI.depends(ISrc, IDst);
299 if (!D)
300 continue;
301
302 // If we have a dependence with its left-most non-'=' direction
303 // being '>' we need to reverse the direction of the edge, because
304 // the source of the dependence cannot occur after the sink. For
305 // confused dependencies, we will create edges in both directions to
306 // represent the possibility of a cycle.
307
308 auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
309 if (!ForwardEdgeCreated) {
310 createMemoryEdge(Src, Dst);
311 ++TotalMemoryEdges;
312 }
313 if (!BackwardEdgeCreated) {
314 createMemoryEdge(Dst, Src);
315 ++TotalMemoryEdges;
316 }
317 ForwardEdgeCreated = BackwardEdgeCreated = true;
318 ++TotalConfusedEdges;
319 };
320
321 auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
322 if (!ForwardEdgeCreated) {
323 createMemoryEdge(Src, Dst);
324 ++TotalMemoryEdges;
325 }
326 ForwardEdgeCreated = true;
327 };
328
329 auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
330 if (!BackwardEdgeCreated) {
331 createMemoryEdge(Dst, Src);
332 ++TotalMemoryEdges;
333 }
334 BackwardEdgeCreated = true;
335 };
336
337 if (D->isConfused())
338 createConfusedEdges(**SrcIt, **DstIt);
339 else if (D->isOrdered() && !D->isLoopIndependent()) {
340 bool ReversedEdge = false;
341 for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
342 if (D->getDirection(Level) == Dependence::DVEntry::EQ)
343 continue;
344 else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
345 createBackwardEdge(**SrcIt, **DstIt);
346 ReversedEdge = true;
347 ++TotalEdgeReversals;
348 break;
349 } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
350 break;
351 else {
352 createConfusedEdges(**SrcIt, **DstIt);
353 break;
354 }
355 }
356 if (!ReversedEdge)
357 createForwardEdge(**SrcIt, **DstIt);
358 } else
359 createForwardEdge(**SrcIt, **DstIt);
360
361 // Avoid creating duplicate edges.
362 if (ForwardEdgeCreated && BackwardEdgeCreated)
363 break;
364 }
365
366 // If we've created edges in both directions, there is no more
367 // unique edge that we can create between these two nodes, so we
368 // can exit early.
369 if (ForwardEdgeCreated && BackwardEdgeCreated)
370 break;
371 }
372 }
373 }
374}
375
377 if (!shouldSimplify())
378 return;
379 LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
380
381 // This algorithm works by first collecting a set of candidate nodes that have
382 // an out-degree of one (in terms of def-use edges), and then ignoring those
383 // whose targets have an in-degree more than one. Each node in the resulting
384 // set can then be merged with its corresponding target and put back into the
385 // worklist until no further merge candidates are available.
386 SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
387
388 // A mapping between nodes and their in-degree. To save space, this map
389 // only contains nodes that are targets of nodes in the CandidateSourceNodes.
390 DenseMap<NodeType *, unsigned> TargetInDegreeMap;
391
392 for (NodeType *N : Graph) {
393 if (N->getEdges().size() != 1)
394 continue;
395 EdgeType &Edge = N->back();
396 if (!Edge.isDefUse())
397 continue;
398 CandidateSourceNodes.insert(N);
399
400 // Insert an element into the in-degree map and initialize to zero. The
401 // count will get updated in the next step.
402 TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
403 }
404
405 LLVM_DEBUG({
406 dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
407 << "\nNode with single outgoing def-use edge:\n";
408 for (NodeType *N : CandidateSourceNodes) {
409 dbgs() << N << "\n";
410 }
411 });
412
413 for (NodeType *N : Graph) {
414 for (EdgeType *E : *N) {
415 NodeType *Tgt = &E->getTargetNode();
416 auto TgtIT = TargetInDegreeMap.find(Tgt);
417 if (TgtIT != TargetInDegreeMap.end())
418 ++(TgtIT->second);
419 }
420 }
421
422 LLVM_DEBUG({
423 dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
424 << "\nContent of in-degree map:\n";
425 for (auto &I : TargetInDegreeMap) {
426 dbgs() << I.first << " --> " << I.second << "\n";
427 }
428 });
429
430 SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
431 CandidateSourceNodes.end());
432 while (!Worklist.empty()) {
433 NodeType &Src = *Worklist.pop_back_val();
434 // As nodes get merged, we need to skip any node that has been removed from
435 // the candidate set (see below).
436 if (!CandidateSourceNodes.erase(&Src))
437 continue;
438
439 assert(Src.getEdges().size() == 1 &&
440 "Expected a single edge from the candidate src node.");
441 NodeType &Tgt = Src.back().getTargetNode();
442 assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
443 "Expected target to be in the in-degree map.");
444
445 if (TargetInDegreeMap[&Tgt] != 1)
446 continue;
447
448 if (!areNodesMergeable(Src, Tgt))
449 continue;
450
451 // Do not merge if there is also an edge from target to src (immediate
452 // cycle).
453 if (Tgt.hasEdgeTo(Src))
454 continue;
455
456 LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
457
458 mergeNodes(Src, Tgt);
459
460 // If the target node is in the candidate set itself, we need to put the
461 // src node back into the worklist again so it gives the target a chance
462 // to get merged into it. For example if we have:
463 // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
464 // then after merging (a) and (b) together, we need to put (a,b) back in
465 // the worklist so that (c) can get merged in as well resulting in
466 // {(a,b,c) -> d}
467 // We also need to remove the old target (b), from the worklist. We first
468 // remove it from the candidate set here, and skip any item from the
469 // worklist that is not in the set.
470 if (CandidateSourceNodes.erase(&Tgt)) {
471 Worklist.push_back(&Src);
472 CandidateSourceNodes.insert(&Src);
473 LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
474 }
475 }
476 LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
477}
478
479template <class G>
481
482 // If we don't create pi-blocks, then we may not have a DAG.
483 if (!shouldCreatePiBlocks())
484 return;
485
487 using NodeKind = typename NodeType::NodeKind;
488 for (NodeType *N : post_order(&Graph)) {
489 if (N->getKind() == NodeKind::PiBlock) {
490 // Put members of the pi-block right after the pi-block itself, for
491 // convenience.
492 const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
493 llvm::append_range(NodesInPO, PiBlockMembers);
494 }
495 NodesInPO.push_back(N);
496 }
497
498 size_t OldSize = Graph.Nodes.size();
499 Graph.Nodes.clear();
500 append_range(Graph.Nodes, reverse(NodesInPO));
501 if (Graph.Nodes.size() != OldSize)
502 assert(false &&
503 "Expected the number of nodes to stay the same after the sort");
504}
505
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file defines an array type that can be indexed using scoped enum values.
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This abstract builder class defines a set of high-level steps for creating DDG-like graphs.
void createAndConnectRootNode()
Create a root node and add edges such that each node in the graph is reachable from the root.
void computeInstructionOrdinals()
Compute ordinal numbers for each instruction and store them in a map for future look up.
void sortNodesTopologically()
Topologically sort the graph nodes.
void createFineGrainedNodes()
Create fine grained nodes.
void createMemoryDependencyEdges()
Analyze data dependencies that exist between memory loads or stores, in the graph nodes and create ed...
void simplify()
Go through all the nodes in the graph and collapse any two nodes 'a' and 'b' if all of the following ...
void createDefUseEdges()
Analyze the def-use chains and create edges from the nodes containing definitions to the nodes contai...
void createPiBlocks()
Apply graph abstraction to groups of nodes that belong to a strongly connected component of the graph...
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Encapsulate some common data and functionality needed for different variations of data dependence gra...
Definition: DDG.h:255
size_type size() const
Definition: SmallPtrSet.h:94
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
iterator end() const
Definition: SmallPtrSet.h:477
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
iterator begin() const
Definition: SmallPtrSet.h:472
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
iterator_range< po_iterator< T > > post_order(const T &G)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
scc_iterator< T > scc_end(const T &G)
Construct the end iterator for a deduced graph type T.
Definition: SCCIterator.h:238
#define N
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:215