LLVM 22.0.0git
GenericUniformityImpl.h
Go to the documentation of this file.
1//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This template implementation resides in a separate file so that it
10// does not get injected into every .cpp file that includes the
11// generic header.
12//
13// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
14//
15// This file should only be included by files that implement a
16// specialization of the relvant templates. Currently these are:
17// - UniformityAnalysis.cpp
18//
19// Note: The DEBUG_TYPE macro should be defined before using this
20// file so that any use of LLVM_DEBUG is associated with the
21// including file rather than this file.
22//
23//===----------------------------------------------------------------------===//
24///
25/// \file
26/// \brief Implementation of uniformity analysis.
27///
28/// The algorithm is a fixed point iteration that starts with the assumption
29/// that all control flow and all values are uniform. Starting from sources of
30/// divergence (whose discovery must be implemented by a CFG- or even
31/// target-specific derived class), divergence of values is propagated from
32/// definition to uses in a straight-forward way. The main complexity lies in
33/// the propagation of the impact of divergent control flow on the divergence of
34/// values (sync dependencies).
35///
36/// NOTE: In general, no interface exists for a transform to update
37/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
38/// transitive dependence, but it also does not provide an interface for
39/// updating itself. Given that, transforms should not preserve uniformity in
40/// their getAnalysisUsage() callback.
41///
42//===----------------------------------------------------------------------===//
43
44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
46
48
49#include "llvm/ADT/DenseSet.h"
50#include "llvm/ADT/STLExtras.h"
55
56#define DEBUG_TYPE "uniformity"
57
58namespace llvm {
59
60// Forward decl from llvm/CodeGen/MachineInstr.h
61class MachineInstr;
62
63/// Construct a specially modified post-order traversal of cycles.
64///
65/// The ModifiedPO is contructed using a virtually modified CFG as follows:
66///
67/// 1. The successors of pre-entry nodes (predecessors of an cycle
68/// entry that are outside the cycle) are replaced by the
69/// successors of the successors of the header.
70/// 2. Successors of the cycle header are replaced by the exit blocks
71/// of the cycle.
72///
73/// Effectively, we produce a depth-first numbering with the following
74/// properties:
75///
76/// 1. Nodes after a cycle are numbered earlier than the cycle header.
77/// 2. The header is numbered earlier than the nodes in the cycle.
78/// 3. The numbering of the nodes within the cycle forms an interval
79/// starting with the header.
80///
81/// Effectively, the virtual modification arranges the nodes in a
82/// cycle as a DAG with the header as the sole leaf, and successors of
83/// the header as the roots. A reverse traversal of this numbering has
84/// the following invariant on the unmodified original CFG:
85///
86/// Each node is visited after all its predecessors, except if that
87/// predecessor is the cycle header.
88///
89template <typename ContextT> class ModifiedPostOrder {
90public:
91 using BlockT = typename ContextT::BlockT;
92 using FunctionT = typename ContextT::FunctionT;
93 using DominatorTreeT = typename ContextT::DominatorTreeT;
94
96 using CycleT = typename CycleInfoT::CycleT;
97 using const_iterator = typename std::vector<BlockT *>::const_iterator;
98
99 ModifiedPostOrder(const ContextT &C) : Context(C) {}
100
101 bool empty() const { return m_order.empty(); }
102 size_t size() const { return m_order.size(); }
103
104 void clear() { m_order.clear(); }
105 void compute(const CycleInfoT &CI);
106
107 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
108 const BlockT *operator[](size_t idx) const { return m_order[idx]; }
109
110 void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
111 POIndex[&BB] = m_order.size();
112 m_order.push_back(&BB);
113 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
114 << "): " << Context.print(&BB) << "\n");
116 ReducibleCycleHeaders.insert(&BB);
117 }
118
119 unsigned getIndex(const BlockT *BB) const {
120 assert(POIndex.count(BB));
121 return POIndex.lookup(BB);
122 }
123
124 bool isReducibleCycleHeader(const BlockT *BB) const {
125 return ReducibleCycleHeaders.contains(BB);
126 }
127
128private:
131 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
132 const ContextT &Context;
133
134 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
136
137 void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
138 const CycleInfoT &CI, const CycleT *Cycle,
140};
141
142template <typename> class DivergencePropagator;
143
144/// \class GenericSyncDependenceAnalysis
145///
146/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
147///
148/// An analysis per divergent branch that returns the set of basic
149/// blocks whose phi nodes become divergent due to divergent control.
150/// These are the blocks that are reachable by two disjoint paths from
151/// the branch, or cycle exits reachable along a path that is disjoint
152/// from a path to the cycle latch.
153
154// --- Above line is not a doxygen comment; intentionally left blank ---
155//
156// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
157//
158// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
159// control-induced divergence in phi nodes.
160//
161// -- Reference --
162// The algorithm is an extension of Section 5 of
163//
164// An abstract interpretation for SPMD divergence
165// on reducible control flow graphs.
166// Julian Rosemann, Simon Moll and Sebastian Hack
167// POPL '21
168//
169//
170// -- Sync dependence --
171// Sync dependence characterizes the control flow aspect of the
172// propagation of branch divergence. For example,
173//
174// %cond = icmp slt i32 %tid, 10
175// br i1 %cond, label %then, label %else
176// then:
177// br label %merge
178// else:
179// br label %merge
180// merge:
181// %a = phi i32 [ 0, %then ], [ 1, %else ]
182//
183// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
184// because %tid is not on its use-def chains, %a is sync dependent on %tid
185// because the branch "br i1 %cond" depends on %tid and affects which value %a
186// is assigned to.
187//
188//
189// -- Reduction to SSA construction --
190// There are two disjoint paths from A to X, if a certain variant of SSA
191// construction places a phi node in X under the following set-up scheme.
192//
193// This variant of SSA construction ignores incoming undef values.
194// That is paths from the entry without a definition do not result in
195// phi nodes.
196//
197// entry
198// / \
199// A \
200// / \ Y
201// B C /
202// \ / \ /
203// D E
204// \ /
205// F
206//
207// Assume that A contains a divergent branch. We are interested
208// in the set of all blocks where each block is reachable from A
209// via two disjoint paths. This would be the set {D, F} in this
210// case.
211// To generally reduce this query to SSA construction we introduce
212// a virtual variable x and assign to x different values in each
213// successor block of A.
214//
215// entry
216// / \
217// A \
218// / \ Y
219// x = 0 x = 1 /
220// \ / \ /
221// D E
222// \ /
223// F
224//
225// Our flavor of SSA construction for x will construct the following
226//
227// entry
228// / \
229// A \
230// / \ Y
231// x0 = 0 x1 = 1 /
232// \ / \ /
233// x2 = phi E
234// \ /
235// x3 = phi
236//
237// The blocks D and F contain phi nodes and are thus each reachable
238// by two disjoins paths from A.
239//
240// -- Remarks --
241// * In case of cycle exits we need to check for temporal divergence.
242// To this end, we check whether the definition of x differs between the
243// cycle exit and the cycle header (_after_ SSA construction).
244//
245// * In the presence of irreducible control flow, the fixed point is
246// reached only after multiple iterations. This is because labels
247// reaching the header of a cycle must be repropagated through the
248// cycle. This is true even in a reducible cycle, since the labels
249// may have been produced by a nested irreducible cycle.
250//
251// * Note that SyncDependenceAnalysis is not concerned with the points
252// of convergence in an irreducible cycle. It's only purpose is to
253// identify join blocks. The "diverged entry" criterion is
254// separately applied on join blocks to determine if an entire
255// irreducible cycle is assumed to be divergent.
256//
257// * Relevant related work:
258// A simple algorithm for global data flow analysis problems.
259// Matthew S. Hecht and Jeffrey D. Ullman.
260// SIAM Journal on Computing, 4(4):519–532, December 1975.
261//
262template <typename ContextT> class GenericSyncDependenceAnalysis {
263public:
264 using BlockT = typename ContextT::BlockT;
265 using DominatorTreeT = typename ContextT::DominatorTreeT;
266 using FunctionT = typename ContextT::FunctionT;
267 using ValueRefT = typename ContextT::ValueRefT;
268 using InstructionT = typename ContextT::InstructionT;
269
271 using CycleT = typename CycleInfoT::CycleT;
272
275
276 // * if BlockLabels[B] == C then C is the dominating definition at
277 // block B
278 // * if BlockLabels[B] == nullptr then we haven't seen B yet
279 // * if BlockLabels[B] == B then:
280 // - B is a join point of disjoint paths from X, or,
281 // - B is an immediate successor of X (initial value), or,
282 // - B is X
284
285 /// Information discovered by the sync dependence analysis for each
286 /// divergent branch.
288 // Join points of diverged paths.
290 // Divergent cycle exits
292 // Labels assigned to blocks on diverged paths.
294 };
295
297
298 GenericSyncDependenceAnalysis(const ContextT &Context,
299 const DominatorTreeT &DT, const CycleInfoT &CI);
300
301 /// \brief Computes divergent join points and cycle exits caused by branch
302 /// divergence in \p Term.
303 ///
304 /// This returns a pair of sets:
305 /// * The set of blocks which are reachable by disjoint paths from
306 /// \p Term.
307 /// * The set also contains cycle exits if there two disjoint paths:
308 /// one from \p Term to the cycle exit and another from \p Term to
309 /// the cycle header.
310 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
311
312private:
313 static DivergenceDescriptor EmptyDivergenceDesc;
314
315 ModifiedPO CyclePO;
316
317 const DominatorTreeT &DT;
318 const CycleInfoT &CI;
319
321 CachedControlDivDescs;
322};
323
324/// \brief Analysis that identifies uniform values in a data-parallel
325/// execution.
326///
327/// This analysis propagates divergence in a data-parallel context
328/// from sources of divergence to all users. It can be instantiated
329/// for an IR that provides a suitable SSAContext.
330template <typename ContextT> class GenericUniformityAnalysisImpl {
331public:
332 using BlockT = typename ContextT::BlockT;
333 using FunctionT = typename ContextT::FunctionT;
334 using ValueRefT = typename ContextT::ValueRefT;
335 using ConstValueRefT = typename ContextT::ConstValueRefT;
336 using UseT = typename ContextT::UseT;
337 using InstructionT = typename ContextT::InstructionT;
338 using DominatorTreeT = typename ContextT::DominatorTreeT;
339
341 using CycleT = typename CycleInfoT::CycleT;
342
345 typename SyncDependenceAnalysisT::DivergenceDescriptor;
346 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
347
349 std::tuple<ConstValueRefT, InstructionT *, const CycleT *>;
350
353 : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
354 TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
355
357
358 const FunctionT &getFunction() const { return F; }
359
360 /// \brief Mark \p UniVal as a value that is always uniform.
361 void addUniformOverride(const InstructionT &Instr);
362
363 /// \brief Examine \p I for divergent outputs and add to the worklist.
364 void markDivergent(const InstructionT &I);
365
366 /// \brief Mark \p DivVal as a divergent value.
367 /// \returns Whether the tracked divergence state of \p DivVal changed.
368 bool markDivergent(ConstValueRefT DivVal);
369
370 /// \brief Mark outputs of \p Instr as divergent.
371 /// \returns Whether the tracked divergence state of any output has changed.
372 bool markDefsDivergent(const InstructionT &Instr);
373
374 /// \brief Propagate divergence to all instructions in the region.
375 /// Divergence is seeded by calls to \p markDivergent.
376 void compute();
377
378 /// \brief Whether any value was marked or analyzed to be divergent.
379 bool hasDivergence() const { return !DivergentValues.empty(); }
380
381 /// \brief Whether \p Val will always return a uniform value regardless of its
382 /// operands
383 bool isAlwaysUniform(const InstructionT &Instr) const;
384
385 bool hasDivergentDefs(const InstructionT &I) const;
386
387 bool isDivergent(const InstructionT &I) const {
388 if (I.isTerminator()) {
389 return DivergentTermBlocks.contains(I.getParent());
390 }
391 return hasDivergentDefs(I);
392 };
393
394 /// \brief Whether \p Val is divergent at its definition.
395 bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
396
397 bool isDivergentUse(const UseT &U) const;
398
399 bool hasDivergentTerminator(const BlockT &B) const {
400 return DivergentTermBlocks.contains(&B);
401 }
402
403 void print(raw_ostream &out) const;
404
406
408 const CycleT *);
409
410protected:
411 /// \brief Value/block pair representing a single phi input.
419
420 const ContextT &Context;
421 const FunctionT &F;
423 const TargetTransformInfo *TTI = nullptr;
424
425 // Detected/marked divergent values.
428
429 // Internal worklist for divergence propagation.
430 std::vector<const InstructionT *> Worklist;
431
432 /// \brief Mark \p Term as divergent and push all Instructions that become
433 /// divergent as a result on the worklist.
434 void analyzeControlDivergence(const InstructionT &Term);
435
436private:
437 const DominatorTreeT &DT;
438
439 // Recognized cycles with divergent exits.
440 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
441
442 // Cycles assumed to be divergent.
443 //
444 // We don't use a set here because every insertion needs an explicit
445 // traversal of all existing members.
446 SmallVector<const CycleT *> AssumedDivergent;
447
448 // The SDA links divergent branches to divergent control-flow joins.
450
451 // Set of known-uniform values.
453
454 /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
455 /// the worklist.
456 void taintAndPushAllDefs(const BlockT &JoinBlock);
457
458 /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
459 /// the worklist.
460 void taintAndPushPhiNodes(const BlockT &JoinBlock);
461
462 /// \brief Identify all Instructions that become divergent because \p DivExit
463 /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
464 /// divergent and push them on the worklist.
465 void propagateCycleExitDivergence(const BlockT &DivExit,
466 const CycleT &DivCycle);
467
468 /// Mark as divergent all external uses of values defined in \p DefCycle.
469 void analyzeCycleExitDivergence(const CycleT &DefCycle);
470
471 /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
472 void propagateTemporalDivergence(const InstructionT &I,
473 const CycleT &DefCycle);
474
475 /// \brief Push all users of \p Val (in the region) to the worklist.
476 void pushUsers(const InstructionT &I);
477 void pushUsers(ConstValueRefT V);
478
479 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
480
481 /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
482 bool isTemporalDivergent(const BlockT &ObservingBlock,
483 const InstructionT &Def) const;
484};
485
486template <typename ImplT>
488 delete Impl;
489}
490
491/// Compute divergence starting with a divergent branch.
492template <typename ContextT> class DivergencePropagator {
493public:
494 using BlockT = typename ContextT::BlockT;
495 using DominatorTreeT = typename ContextT::DominatorTreeT;
496 using FunctionT = typename ContextT::FunctionT;
497 using ValueRefT = typename ContextT::ValueRefT;
498
500 using CycleT = typename CycleInfoT::CycleT;
501
505 typename SyncDependenceAnalysisT::DivergenceDescriptor;
506 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
507
512 const ContextT &Context;
513
514 // Track blocks that receive a new label. Every time we relabel a
515 // cycle header, we another pass over the modified post-order in
516 // order to propagate the header label. The bit vector also allows
517 // us to skip labels that have not changed.
519
520 // divergent join and cycle exit descriptor.
521 std::unique_ptr<DivergenceDescriptorT> DivDesc;
523
529
531 Out << "Propagator::BlockLabels {\n";
532 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
533 const auto *Block = CyclePOT[BlockIdx];
534 const auto *Label = BlockLabels[Block];
535 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
536 if (!Label) {
537 Out << "<null>\n";
538 } else {
539 Out << Context.print(Label) << "\n";
540 }
541 }
542 Out << "}\n";
543 }
544
545 // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
546 // causes a divergent join.
547 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
548 const auto *OldLabel = BlockLabels[&SuccBlock];
549
550 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
551 << "\tpushed label: " << Context.print(&PushedLabel)
552 << "\n"
553 << "\told label: " << Context.print(OldLabel) << "\n");
554
555 // Early exit if there is no change in the label.
556 if (OldLabel == &PushedLabel)
557 return false;
558
559 if (OldLabel != &SuccBlock) {
560 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
561 // Assigning a new label, mark this in FreshLabels.
562 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
563 FreshLabels.set(SuccIdx);
564 }
565
566 // This is not a join if the succ was previously unlabeled.
567 if (!OldLabel) {
568 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
569 << "\n");
570 BlockLabels[&SuccBlock] = &PushedLabel;
571 return false;
572 }
573
574 // This is a new join. Label the join block as itself, and not as
575 // the pushed label.
576 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
577 BlockLabels[&SuccBlock] = &SuccBlock;
578
579 return true;
580 }
581
582 // visiting a virtual cycle exit edge from the cycle header --> temporal
583 // divergence on join
584 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
585 if (!computeJoin(ExitBlock, Label))
586 return false;
587
588 // Identified a divergent cycle exit
589 DivDesc->CycleDivBlocks.insert(&ExitBlock);
590 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
591 << "\n");
592 return true;
593 }
594
595 // process \p SuccBlock with reaching definition \p Label
596 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
597 if (!computeJoin(SuccBlock, Label))
598 return false;
599
600 // Divergent, disjoint paths join.
601 DivDesc->JoinDivBlocks.insert(&SuccBlock);
602 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
603 << "\n");
604 return true;
605 }
606
607 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
609
610 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
611 << Context.print(&DivTermBlock) << "\n");
612
613 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
614
615 // Bootstrap with branch targets
616 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
617
618 // Locate the largest ancestor cycle that is not reducible and does not
619 // contain a reducible ancestor. This is done with a lambda that is defined
620 // and invoked in the same statement.
621 const CycleT *IrreducibleAncestor = [](const CycleT *C) -> const CycleT * {
622 if (!C)
623 return nullptr;
624 if (C->isReducible())
625 return nullptr;
626 while (const CycleT *P = C->getParentCycle()) {
627 if (P->isReducible())
628 return C;
629 C = P;
630 }
631 assert(!C->getParentCycle());
632 assert(!C->isReducible());
633 return C;
634 }(DivTermCycle);
635
636 for (const auto *SuccBlock : successors(&DivTermBlock)) {
637 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
638 // If DivTerm exits the cycle immediately, computeJoin() might
639 // not reach SuccBlock with a different label. We need to
640 // check for this exit now.
641 DivDesc->CycleDivBlocks.insert(SuccBlock);
642 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
643 << Context.print(SuccBlock) << "\n");
644 }
645 visitEdge(*SuccBlock, *SuccBlock);
646 }
647
648 // Technically propagation can continue until it reaches the last node.
649 //
650 // For efficiency, propagation can stop if FreshLabels.count()==1. But
651 // For irreducible cycles, let propagation continue until it reaches
652 // out of irreducible cycles (see code for details.)
653 while (true) {
654 auto BlockIdx = FreshLabels.find_last();
655 if (BlockIdx == -1)
656 break;
657
658 const auto *Block = CyclePOT[BlockIdx];
659 // If no irreducible cycle, stop if freshLable.count() = 1 and Block
660 // is the IPD. If it is in any irreducible cycle, continue propagation.
661 if (FreshLabels.count() == 1 &&
662 (!IrreducibleAncestor || !IrreducibleAncestor->contains(Block)))
663 break;
664
665 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
666
667 FreshLabels.reset(BlockIdx);
668 if (BlockIdx == DivTermIdx) {
669 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
670 continue;
671 }
672
673 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
674 << BlockIdx << "\n");
675
676 const auto *Label = BlockLabels[Block];
677 assert(Label);
678
679 // If the current block is the header of a reducible cycle that
680 // contains the divergent branch, then the label should be
681 // propagated to the cycle exits. Such a header is the "last
682 // possible join" of any disjoint paths within this cycle. This
683 // prevents detection of spurious joins at the entries of any
684 // irreducible child cycles.
685 //
686 // This conclusion about the header is true for any choice of DFS:
687 //
688 // If some DFS has a reducible cycle C with header H, then for
689 // any other DFS, H is the header of a cycle C' that is a
690 // superset of C. For a divergent branch inside the subgraph
691 // C, any join node inside C is either H, or some node
692 // encountered without passing through H.
693 //
694 auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * {
695 if (!CyclePOT.isReducibleCycleHeader(Block))
696 return nullptr;
697 const auto *BlockCycle = CI.getCycle(Block);
698 if (BlockCycle->contains(&DivTermBlock))
699 return BlockCycle;
700 return nullptr;
701 };
702
703 if (const auto *BlockCycle = getReducibleParent(Block)) {
704 SmallVector<BlockT *, 4> BlockCycleExits;
705 BlockCycle->getExitBlocks(BlockCycleExits);
706 for (auto *BlockCycleExit : BlockCycleExits)
707 visitCycleExitEdge(*BlockCycleExit, *Label);
708 } else {
709 for (const auto *SuccBlock : successors(Block))
710 visitEdge(*SuccBlock, *Label);
711 }
712 }
713
714 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
715
716 // Check every cycle containing DivTermBlock for exit divergence.
717 // A cycle has exit divergence if the label of an exit block does
718 // not match the label of its header.
719 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
720 Cycle = Cycle->getParentCycle()) {
721 if (Cycle->isReducible()) {
722 // The exit divergence of a reducible cycle is recorded while
723 // propagating labels.
724 continue;
725 }
727 Cycle->getExitBlocks(Exits);
728 auto *Header = Cycle->getHeader();
729 auto *HeaderLabel = BlockLabels[Header];
730 for (const auto *Exit : Exits) {
731 if (BlockLabels[Exit] != HeaderLabel) {
732 // Identified a divergent cycle exit
733 DivDesc->CycleDivBlocks.insert(Exit);
734 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
735 << "\n");
736 }
737 }
738 }
739
740 return std::move(DivDesc);
741 }
742};
743
744template <typename ContextT>
746 llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc;
747
748template <typename ContextT>
750 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
751 : CyclePO(Context), DT(DT), CI(CI) {
752 CyclePO.compute(CI);
753}
754
755template <typename ContextT>
757 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
758 // trivial case
759 if (succ_size(DivTermBlock) <= 1) {
760 return EmptyDivergenceDesc;
761 }
762
763 // already available in cache?
764 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
765 if (ItCached != CachedControlDivDescs.end())
766 return *ItCached->second;
767
768 // compute all join points
769 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
770 auto DivDesc = Propagator.computeJoinPoints();
771
772 auto printBlockSet = [&](ConstBlockSet &Blocks) {
773 return Printable([&](raw_ostream &Out) {
774 Out << "[";
775 ListSeparator LS;
776 for (const auto *BB : Blocks) {
777 Out << LS << CI.getSSAContext().print(BB);
778 }
779 Out << "]\n";
780 });
781 };
782
784 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
785 << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
786 << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
787 << "\n");
788 (void)printBlockSet;
789
790 auto ItInserted =
791 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
792 assert(ItInserted.second);
793 return *ItInserted.first->second;
794}
795
796template <typename ContextT>
798 const InstructionT &I) {
799 if (isAlwaysUniform(I))
800 return;
801 bool Marked = false;
802 if (I.isTerminator()) {
803 Marked = DivergentTermBlocks.insert(I.getParent()).second;
804 if (Marked) {
805 LLVM_DEBUG(dbgs() << "marked divergent term block: "
806 << Context.print(I.getParent()) << "\n");
807 }
808 } else {
809 Marked = markDefsDivergent(I);
810 }
811
812 if (Marked)
813 Worklist.push_back(&I);
814}
815
816template <typename ContextT>
818 ConstValueRefT Val) {
819 if (DivergentValues.insert(Val).second) {
820 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
821 return true;
822 }
823 return false;
824}
825
826template <typename ContextT>
828 const InstructionT &Instr) {
829 UniformOverrides.insert(&Instr);
830}
831
832// Mark as divergent all external uses of values defined in \p DefCycle.
833//
834// A value V defined by a block B inside \p DefCycle may be used outside the
835// cycle only if the use is a PHI in some exit block, or B dominates some exit
836// block. Thus, we check uses as follows:
837//
838// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
839// - For every block B inside \p DefCycle that dominates at least one exit
840// block, check all uses outside \p DefCycle.
841//
842// FIXME: This function does not distinguish between divergent and uniform
843// exits. For each divergent exit, only the values that are live at that exit
844// need to be propagated as divergent at their use outside the cycle.
845template <typename ContextT>
846void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
847 const CycleT &DefCycle) {
849 DefCycle.getExitBlocks(Exits);
850 for (auto *Exit : Exits) {
851 for (auto &Phi : Exit->phis()) {
852 if (usesValueFromCycle(Phi, DefCycle)) {
853 markDivergent(Phi);
854 }
855 }
856 }
857
858 for (auto *BB : DefCycle.blocks()) {
859 if (!llvm::any_of(Exits,
860 [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
861 continue;
862 for (auto &II : *BB) {
863 propagateTemporalDivergence(II, DefCycle);
864 }
865 }
866}
867
868template <typename ContextT>
869void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
870 const BlockT &DivExit, const CycleT &InnerDivCycle) {
871 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
872 << "\n");
873 auto *DivCycle = &InnerDivCycle;
874 auto *OuterDivCycle = DivCycle;
875 auto *ExitLevelCycle = CI.getCycle(&DivExit);
876 const unsigned CycleExitDepth =
877 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
878
879 // Find outer-most cycle that does not contain \p DivExit
880 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
881 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
882 << Context.print(DivCycle->getHeader()) << "\n");
883 OuterDivCycle = DivCycle;
884 DivCycle = DivCycle->getParentCycle();
885 }
886 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
887 << Context.print(OuterDivCycle->getHeader()) << "\n");
888
889 if (!DivergentExitCycles.insert(OuterDivCycle).second)
890 return;
891
892 // Exit divergence does not matter if the cycle itself is assumed to
893 // be divergent.
894 for (const auto *C : AssumedDivergent) {
895 if (C->contains(OuterDivCycle))
896 return;
897 }
898
899 analyzeCycleExitDivergence(*OuterDivCycle);
900}
901
902template <typename ContextT>
903void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
904 const BlockT &BB) {
905 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
906 for (const auto &I : instrs(BB)) {
907 // Terminators do not produce values; they are divergent only if
908 // the condition is divergent. That is handled when the divergent
909 // condition is placed in the worklist.
910 if (I.isTerminator())
911 break;
912
913 markDivergent(I);
914 }
915}
916
917/// Mark divergent phi nodes in a join block
918template <typename ContextT>
919void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
920 const BlockT &JoinBlock) {
921 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
922 << "\n");
923 for (const auto &Phi : JoinBlock.phis()) {
924 // FIXME: The non-undef value is not constant per se; it just happens to be
925 // uniform and may not dominate this PHI. So assuming that the same value
926 // reaches along all incoming edges may itself be undefined behaviour. This
927 // particular interpretation of the undef value was added to
928 // DivergenceAnalysis in the following review:
929 //
930 // https://reviews.llvm.org/D19013
931 if (ContextT::isConstantOrUndefValuePhi(Phi))
932 continue;
933 markDivergent(Phi);
934 }
935}
936
937/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
938///
939/// \return true iff \p Candidate was added to \p Cycles.
940template <typename CycleT>
942 CycleT *Candidate) {
943 if (llvm::any_of(Cycles,
944 [Candidate](CycleT *C) { return C->contains(Candidate); }))
945 return false;
946 Cycles.push_back(Candidate);
947 return true;
948}
949
950/// Return the outermost cycle made divergent by branch outside it.
951///
952/// If two paths that diverged outside an irreducible cycle join
953/// inside that cycle, then that whole cycle is assumed to be
954/// divergent. This does not apply if the cycle is reducible.
955template <typename CycleT, typename BlockT>
956static const CycleT *getExtDivCycle(const CycleT *Cycle,
957 const BlockT *DivTermBlock,
958 const BlockT *JoinBlock) {
959 assert(Cycle);
960 assert(Cycle->contains(JoinBlock));
961
962 if (Cycle->contains(DivTermBlock))
963 return nullptr;
964
965 const auto *OriginalCycle = Cycle;
966 const auto *Parent = Cycle->getParentCycle();
967 while (Parent && !Parent->contains(DivTermBlock)) {
968 Cycle = Parent;
969 Parent = Cycle->getParentCycle();
970 }
971
972 // If the original cycle is not the outermost cycle, then the outermost cycle
973 // is irreducible. If the outermost cycle were reducible, then external
974 // diverged paths would not reach the original inner cycle.
975 (void)OriginalCycle;
976 assert(Cycle == OriginalCycle || !Cycle->isReducible());
977
978 if (Cycle->isReducible()) {
979 assert(Cycle->getHeader() == JoinBlock);
980 return nullptr;
981 }
982
983 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
984 return Cycle;
985}
986
987/// Return the outermost cycle made divergent by branch inside it.
988///
989/// This checks the "diverged entry" criterion defined in the
990/// docs/ConvergenceAnalysis.html.
991template <typename ContextT, typename CycleT, typename BlockT,
992 typename DominatorTreeT>
993static const CycleT *
994getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
995 const BlockT *JoinBlock, const DominatorTreeT &DT,
996 ContextT &Context) {
997 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
998 << " for internal branch " << Context.print(DivTermBlock)
999 << "\n");
1000 if (DT.properlyDominates(DivTermBlock, JoinBlock))
1001 return nullptr;
1002
1003 // Find the smallest common cycle, if one exists.
1004 assert(Cycle && Cycle->contains(JoinBlock));
1005 while (Cycle && !Cycle->contains(DivTermBlock)) {
1006 Cycle = Cycle->getParentCycle();
1007 }
1008 if (!Cycle || Cycle->isReducible())
1009 return nullptr;
1010
1011 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
1012 return nullptr;
1013
1014 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
1015 << " does not dominate join\n");
1016
1017 const auto *Parent = Cycle->getParentCycle();
1018 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1019 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1020 << " does not dominate join\n");
1021 Cycle = Parent;
1022 Parent = Parent->getParentCycle();
1023 }
1024
1025 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1026 return Cycle;
1027}
1028
1029template <typename ContextT, typename CycleT, typename BlockT,
1030 typename DominatorTreeT>
1031static const CycleT *
1032getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1033 const BlockT *JoinBlock, const DominatorTreeT &DT,
1034 ContextT &Context) {
1035 if (!Cycle)
1036 return nullptr;
1037
1038 // First try to expand Cycle to the largest that contains JoinBlock
1039 // but not DivTermBlock.
1040 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1041
1042 // Continue expanding to the largest cycle that contains both.
1043 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1044
1045 if (Int)
1046 return Int;
1047 return Ext;
1048}
1049
1050template <typename ContextT>
1051bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1052 const BlockT &ObservingBlock, const InstructionT &Def) const {
1053 const BlockT *DefBlock = Def.getParent();
1054 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1055 Cycle && !Cycle->contains(&ObservingBlock);
1056 Cycle = Cycle->getParentCycle()) {
1057 if (DivergentExitCycles.contains(Cycle)) {
1058 return true;
1059 }
1060 }
1061 return false;
1062}
1063
1064template <typename ContextT>
1066 const InstructionT &Term) {
1067 const auto *DivTermBlock = Term.getParent();
1068 DivergentTermBlocks.insert(DivTermBlock);
1069 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1070 << "\n");
1071
1072 // Don't propagate divergence from unreachable blocks.
1073 if (!DT.isReachableFromEntry(DivTermBlock))
1074 return;
1075
1076 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1078
1079 // Iterate over all blocks now reachable by a disjoint path join
1080 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1081 const auto *Cycle = CI.getCycle(JoinBlock);
1082 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1083 << "\n");
1084 if (const auto *Outermost = getOutermostDivergentCycle(
1085 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1086 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1087 DivCycles.push_back(Outermost);
1088 continue;
1089 }
1090 taintAndPushPhiNodes(*JoinBlock);
1091 }
1092
1093 // Sort by order of decreasing depth. This allows later cycles to be skipped
1094 // because they are already contained in earlier ones.
1095 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1096 return A->getDepth() > B->getDepth();
1097 });
1098
1099 // Cycles that are assumed divergent due to the diverged entry
1100 // criterion potentially contain temporal divergence depending on
1101 // the DFS chosen. Conservatively, all values produced in such a
1102 // cycle are assumed divergent. "Cycle invariant" values may be
1103 // assumed uniform, but that requires further analysis.
1104 for (auto *C : DivCycles) {
1105 if (!insertIfNotContained(AssumedDivergent, C))
1106 continue;
1107 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1108 for (const BlockT *BB : C->blocks()) {
1109 taintAndPushAllDefs(*BB);
1110 }
1111 }
1112
1113 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1114 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1115 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1116 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1117 }
1118}
1119
1120template <typename ContextT>
1122 // Initialize worklist.
1123 auto DivValuesCopy = DivergentValues;
1124 for (const auto DivVal : DivValuesCopy) {
1125 assert(isDivergent(DivVal) && "Worklist invariant violated!");
1126 pushUsers(DivVal);
1127 }
1128
1129 // All values on the Worklist are divergent.
1130 // Their users may not have been updated yet.
1131 while (!Worklist.empty()) {
1132 const InstructionT *I = Worklist.back();
1133 Worklist.pop_back();
1134
1135 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1136
1137 if (I->isTerminator()) {
1139 continue;
1140 }
1141
1142 // propagate value divergence to users
1143 assert(isDivergent(*I) && "Worklist invariant violated!");
1144 pushUsers(*I);
1145 }
1146}
1147
1148template <typename ContextT>
1154
1155template <typename ContextT>
1157 const InstructionT &Instr) const {
1158 return UniformOverrides.contains(&Instr);
1159}
1160
1161template <typename ContextT>
1163 const DominatorTreeT &DT, const CycleInfoT &CI,
1164 const TargetTransformInfo *TTI) {
1165 DA.reset(new ImplT{DT, CI, TTI});
1166}
1167
1168template <typename ContextT>
1170 bool haveDivergentArgs = false;
1171
1172 // When we print Value, LLVM IR instruction, we want to print extra new line.
1173 // In LLVM IR print function for Value does not print new line at the end.
1174 // In MIR print for MachineInstr prints new line at the end.
1175 constexpr bool IsMIR = std::is_same<InstructionT, MachineInstr>::value;
1176 std::string NewLine = IsMIR ? "" : "\n";
1177
1178 // Control flow instructions may be divergent even if their inputs are
1179 // uniform. Thus, although exceedingly rare, it is possible to have a program
1180 // with no divergent values but with divergent control structures.
1181 if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
1182 DivergentExitCycles.empty()) {
1183 OS << "ALL VALUES UNIFORM\n";
1184 return;
1185 }
1186
1187 for (const auto &entry : DivergentValues) {
1188 const BlockT *parent = Context.getDefBlock(entry);
1189 if (!parent) {
1190 if (!haveDivergentArgs) {
1191 OS << "DIVERGENT ARGUMENTS:\n";
1192 haveDivergentArgs = true;
1193 }
1194 OS << " DIVERGENT: " << Context.print(entry) << '\n';
1195 }
1196 }
1197
1198 if (!AssumedDivergent.empty()) {
1199 OS << "CYCLES ASSUMED DIVERGENT:\n";
1200 for (const CycleT *cycle : AssumedDivergent) {
1201 OS << " " << cycle->print(Context) << '\n';
1202 }
1203 }
1204
1205 if (!DivergentExitCycles.empty()) {
1206 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1207 for (const CycleT *cycle : DivergentExitCycles) {
1208 OS << " " << cycle->print(Context) << '\n';
1209 }
1210 }
1211
1212 if (!TemporalDivergenceList.empty()) {
1213 OS << "\nTEMPORAL DIVERGENCE LIST:\n";
1214
1215 for (auto [Val, UseInst, Cycle] : TemporalDivergenceList) {
1216 OS << "Value :" << Context.print(Val) << NewLine
1217 << "Used by :" << Context.print(UseInst) << NewLine
1218 << "Outside cycle :" << Cycle->print(Context) << "\n\n";
1219 }
1220 }
1221
1222 for (auto &block : F) {
1223 OS << "\nBLOCK " << Context.print(&block) << '\n';
1224
1225 OS << "DEFINITIONS\n";
1227 Context.appendBlockDefs(defs, block);
1228 for (auto value : defs) {
1229 if (isDivergent(value))
1230 OS << " DIVERGENT: ";
1231 else
1232 OS << " ";
1233 OS << Context.print(value) << NewLine;
1234 }
1235
1236 OS << "TERMINATORS\n";
1238 Context.appendBlockTerms(terms, block);
1239 bool divergentTerminators = hasDivergentTerminator(block);
1240 for (auto *T : terms) {
1241 if (divergentTerminators)
1242 OS << " DIVERGENT: ";
1243 else
1244 OS << " ";
1245 OS << Context.print(T) << NewLine;
1246 }
1247
1248 OS << "END BLOCK\n";
1249 }
1250}
1251
1252template <typename ContextT>
1256 return make_range(DA->TemporalDivergenceList.begin(),
1257 DA->TemporalDivergenceList.end());
1258}
1259
1260template <typename ContextT>
1262 return DA->hasDivergence();
1263}
1264
1265template <typename ContextT>
1266const typename ContextT::FunctionT &
1268 return DA->getFunction();
1269}
1270
1271/// Whether \p V is divergent at its definition.
1272template <typename ContextT>
1274 return DA->isDivergent(V);
1275}
1276
1277template <typename ContextT>
1279 return DA->isDivergent(*I);
1280}
1281
1282template <typename ContextT>
1284 return DA->isDivergentUse(U);
1285}
1286
1287template <typename ContextT>
1289 return DA->hasDivergentTerminator(B);
1290}
1291
1292/// \brief T helper function for printing.
1293template <typename ContextT>
1295 DA->print(out);
1296}
1297
1298template <typename ContextT>
1299void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1300 SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1301 const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1302 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1303 while (!Stack.empty()) {
1304 auto *NextBB = Stack.back();
1305 if (Finalized.count(NextBB)) {
1306 Stack.pop_back();
1307 continue;
1308 }
1309 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1310 << "\n");
1311 auto *NestedCycle = CI.getCycle(NextBB);
1312 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1313 LLVM_DEBUG(dbgs() << " found a cycle\n");
1314 while (NestedCycle->getParentCycle() != Cycle)
1315 NestedCycle = NestedCycle->getParentCycle();
1316
1317 SmallVector<BlockT *, 3> NestedExits;
1318 NestedCycle->getExitBlocks(NestedExits);
1319 bool PushedNodes = false;
1320 for (auto *NestedExitBB : NestedExits) {
1321 LLVM_DEBUG(dbgs() << " examine exit: "
1322 << CI.getSSAContext().print(NestedExitBB) << "\n");
1323 if (Cycle && !Cycle->contains(NestedExitBB))
1324 continue;
1325 if (Finalized.count(NestedExitBB))
1326 continue;
1327 PushedNodes = true;
1328 Stack.push_back(NestedExitBB);
1329 LLVM_DEBUG(dbgs() << " pushed exit: "
1330 << CI.getSSAContext().print(NestedExitBB) << "\n");
1331 }
1332 if (!PushedNodes) {
1333 // All loop exits finalized -> finish this node
1334 Stack.pop_back();
1335 computeCyclePO(CI, NestedCycle, Finalized);
1336 }
1337 continue;
1338 }
1339
1340 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1341 // DAG-style
1342 bool PushedNodes = false;
1343 for (auto *SuccBB : successors(NextBB)) {
1344 LLVM_DEBUG(dbgs() << " examine succ: "
1345 << CI.getSSAContext().print(SuccBB) << "\n");
1346 if (Cycle && !Cycle->contains(SuccBB))
1347 continue;
1348 if (Finalized.count(SuccBB))
1349 continue;
1350 PushedNodes = true;
1351 Stack.push_back(SuccBB);
1352 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1353 << "\n");
1354 }
1355 if (!PushedNodes) {
1356 // Never push nodes twice
1357 LLVM_DEBUG(dbgs() << " finishing node: "
1358 << CI.getSSAContext().print(NextBB) << "\n");
1359 Stack.pop_back();
1360 Finalized.insert(NextBB);
1361 appendBlock(*NextBB);
1362 }
1363 }
1364 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1365}
1366
1367template <typename ContextT>
1368void ModifiedPostOrder<ContextT>::computeCyclePO(
1369 const CycleInfoT &CI, const CycleT *Cycle,
1371 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1373 auto *CycleHeader = Cycle->getHeader();
1374
1375 LLVM_DEBUG(dbgs() << " noted header: "
1376 << CI.getSSAContext().print(CycleHeader) << "\n");
1377 assert(!Finalized.count(CycleHeader));
1378 Finalized.insert(CycleHeader);
1379
1380 // Visit the header last
1381 LLVM_DEBUG(dbgs() << " finishing header: "
1382 << CI.getSSAContext().print(CycleHeader) << "\n");
1383 appendBlock(*CycleHeader, Cycle->isReducible());
1384
1385 // Initialize with immediate successors
1386 for (auto *BB : successors(CycleHeader)) {
1387 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1388 << "\n");
1389 if (!Cycle->contains(BB))
1390 continue;
1391 if (BB == CycleHeader)
1392 continue;
1393 if (!Finalized.count(BB)) {
1394 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1395 << "\n");
1396 Stack.push_back(BB);
1397 }
1398 }
1399
1400 // Compute PO inside region
1401 computeStackPO(Stack, CI, Cycle, Finalized);
1402
1403 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1404}
1405
1406/// \brief Generically compute the modified post order.
1407template <typename ContextT>
1411 auto *F = CI.getFunction();
1412 Stack.reserve(24); // FIXME made-up number
1413 Stack.push_back(&F->front());
1414 computeStackPO(Stack, CI, nullptr, Finalized);
1415}
1416
1417} // namespace llvm
1418
1419#undef DEBUG_TYPE
1420
1421#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseSet and SmallDenseSet classes.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define T
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
unify loop Fixup each natural loop to have a single exit block
Implements a dense probed hash-table based set.
Definition DenseSet.h:261
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
GenericCycleInfo< ContextT > CycleInfoT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
Cycle information for a function.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
DivergencePropagator< ContextT > DivergencePropagatorT
DenseMap< const BlockT *, const BlockT * > BlockLabelMap
SmallPtrSet< const BlockT *, 4 > ConstBlockSet
typename ContextT::DominatorTreeT DominatorTreeT
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::ValueRefT ValueRefT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
SmallVector< TemporalDivergenceTuple, 8 > TemporalDivergenceList
bool isAlwaysUniform(const InstructionT &Instr) const
Whether Val will always return a uniform value regardless of its operands.
typename ContextT::ValueRefT ValueRefT
void analyzeControlDivergence(const InstructionT &Term)
Mark Term as divergent and push all Instructions that become divergent as a result on the worklist.
bool isDivergent(ConstValueRefT V) const
Whether Val is divergent at its definition.
bool isDivergentUse(const UseT &U) const
bool hasDivergentDefs(const InstructionT &I) const
typename ContextT::InstructionT InstructionT
DenseSet< ConstValueRefT > DivergentValues
typename ContextT::ConstValueRefT ConstValueRefT
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
bool hasDivergence() const
Whether any value was marked or analyzed to be divergent.
typename ContextT::DominatorTreeT DominatorTreeT
void compute()
Propagate divergence to all instructions in the region.
bool hasDivergentTerminator(const BlockT &B) const
GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI, const TargetTransformInfo *TTI)
bool markDefsDivergent(const InstructionT &Instr)
Mark outputs of Instr as divergent.
typename ContextT::FunctionT FunctionT
bool isDivergent(const InstructionT &I) const
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
std::vector< const InstructionT * > Worklist
void markDivergent(const InstructionT &I)
Examine I for divergent outputs and add to the worklist.
SmallPtrSet< const BlockT *, 32 > DivergentTermBlocks
GenericCycleInfo< ContextT > CycleInfoT
void addUniformOverride(const InstructionT &Instr)
Mark UniVal as a value that is always uniform.
void recordTemporalDivergence(ConstValueRefT, const InstructionT *, const CycleT *)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
bool hasDivergentTerminator(const BlockT &B)
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
void print(raw_ostream &Out) const
T helper function for printing.
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
typename ContextT::ConstValueRefT ConstValueRefT
typename ContextT::BlockT BlockT
typename ContextT::InstructionT InstructionT
bool hasDivergence() const
Whether any divergence was detected.
const FunctionT & getFunction() const
The GPU kernel this analysis result is for.
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::DominatorTreeT DominatorTreeT
iterator_range< TemporalDivergenceTuple * > getTemporalDivergenceList() const
A helper class to return the specified delimiter string after the first invocation of operator String...
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
const BlockT * operator[](size_t idx) const
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
void appendBlock(const BlockT &BB, bool isReducibleCycleHeader=false)
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
static const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
auto successors(const MachineBasicBlock *BB)
static bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1652
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
TargetTransformInfo TTI
auto instrs(const MachineBasicBlock &BB)
static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Information discovered by the sync dependence analysis for each divergent branch.
PhiInput(ConstValueRefT value, BlockT *predBlock)