LLVM 22.0.0git
GenericUniformityImpl.h
Go to the documentation of this file.
1//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This template implementation resides in a separate file so that it
10// does not get injected into every .cpp file that includes the
11// generic header.
12//
13// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
14//
15// This file should only be included by files that implement a
16// specialization of the relvant templates. Currently these are:
17// - UniformityAnalysis.cpp
18//
19// Note: The DEBUG_TYPE macro should be defined before using this
20// file so that any use of LLVM_DEBUG is associated with the
21// including file rather than this file.
22//
23//===----------------------------------------------------------------------===//
24///
25/// \file
26/// \brief Implementation of uniformity analysis.
27///
28/// The algorithm is a fixed point iteration that starts with the assumption
29/// that all control flow and all values are uniform. Starting from sources of
30/// divergence (whose discovery must be implemented by a CFG- or even
31/// target-specific derived class), divergence of values is propagated from
32/// definition to uses in a straight-forward way. The main complexity lies in
33/// the propagation of the impact of divergent control flow on the divergence of
34/// values (sync dependencies).
35///
36/// NOTE: In general, no interface exists for a transform to update
37/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
38/// transitive dependence, but it also does not provide an interface for
39/// updating itself. Given that, transforms should not preserve uniformity in
40/// their getAnalysisUsage() callback.
41///
42//===----------------------------------------------------------------------===//
43
44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
48
49#include "llvm/ADT/DenseSet.h"
50#include "llvm/ADT/STLExtras.h"
55
56#define DEBUG_TYPE "uniformity"
57
58namespace llvm {
59
60// Forward decl from llvm/CodeGen/MachineInstr.h
62
63/// Construct a specially modified post-order traversal of cycles.
64///
65/// The ModifiedPO is contructed using a virtually modified CFG as follows:
66///
67/// 1. The successors of pre-entry nodes (predecessors of an cycle
68/// entry that are outside the cycle) are replaced by the
69/// successors of the successors of the header.
70/// 2. Successors of the cycle header are replaced by the exit blocks
71/// of the cycle.
72///
73/// Effectively, we produce a depth-first numbering with the following
74/// properties:
75///
76/// 1. Nodes after a cycle are numbered earlier than the cycle header.
77/// 2. The header is numbered earlier than the nodes in the cycle.
78/// 3. The numbering of the nodes within the cycle forms an interval
79/// starting with the header.
80///
81/// Effectively, the virtual modification arranges the nodes in a
82/// cycle as a DAG with the header as the sole leaf, and successors of
83/// the header as the roots. A reverse traversal of this numbering has
84/// the following invariant on the unmodified original CFG:
85///
86/// Each node is visited after all its predecessors, except if that
87/// predecessor is the cycle header.
88///
89template <typename ContextT> class ModifiedPostOrder {
90public:
91 using BlockT = typename ContextT::BlockT;
92 using FunctionT = typename ContextT::FunctionT;
93 using DominatorTreeT = typename ContextT::DominatorTreeT;
94
96 using CycleT = typename CycleInfoT::CycleT;
97 using const_iterator = typename std::vector<BlockT *>::const_iterator;
98
99 ModifiedPostOrder(const ContextT &C) : Context(C) {}
100
101 bool empty() const { return m_order.empty(); }
102 size_t size() const { return m_order.size(); }
103
104 void clear() { m_order.clear(); }
105 void compute(const CycleInfoT &CI);
106
107 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
108 const BlockT *operator[](size_t idx) const { return m_order[idx]; }
109
110 void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
111 POIndex[&BB] = m_order.size();
112 m_order.push_back(&BB);
113 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
114 << "): " << Context.print(&BB) << "\n");
116 ReducibleCycleHeaders.insert(&BB);
117 }
118
119 unsigned getIndex(const BlockT *BB) const {
120 assert(POIndex.count(BB));
121 return POIndex.lookup(BB);
122 }
123
124 bool isReducibleCycleHeader(const BlockT *BB) const {
125 return ReducibleCycleHeaders.contains(BB);
126 }
127
128private:
131 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
132 const ContextT &Context;
133
134 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
136
137 void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
138 const CycleInfoT &CI, const CycleT *Cycle,
140};
141
142template <typename> class DivergencePropagator;
143
144/// \class GenericSyncDependenceAnalysis
145///
146/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
147///
148/// An analysis per divergent branch that returns the set of basic
149/// blocks whose phi nodes become divergent due to divergent control.
150/// These are the blocks that are reachable by two disjoint paths from
151/// the branch, or cycle exits reachable along a path that is disjoint
152/// from a path to the cycle latch.
153
154// --- Above line is not a doxygen comment; intentionally left blank ---
155//
156// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
157//
158// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
159// control-induced divergence in phi nodes.
160//
161// -- Reference --
162// The algorithm is an extension of Section 5 of
163//
164// An abstract interpretation for SPMD divergence
165// on reducible control flow graphs.
166// Julian Rosemann, Simon Moll and Sebastian Hack
167// POPL '21
168//
169//
170// -- Sync dependence --
171// Sync dependence characterizes the control flow aspect of the
172// propagation of branch divergence. For example,
173//
174// %cond = icmp slt i32 %tid, 10
175// br i1 %cond, label %then, label %else
176// then:
177// br label %merge
178// else:
179// br label %merge
180// merge:
181// %a = phi i32 [ 0, %then ], [ 1, %else ]
182//
183// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
184// because %tid is not on its use-def chains, %a is sync dependent on %tid
185// because the branch "br i1 %cond" depends on %tid and affects which value %a
186// is assigned to.
187//
188//
189// -- Reduction to SSA construction --
190// There are two disjoint paths from A to X, if a certain variant of SSA
191// construction places a phi node in X under the following set-up scheme.
192//
193// This variant of SSA construction ignores incoming undef values.
194// That is paths from the entry without a definition do not result in
195// phi nodes.
196//
197// entry
198// / \
199// A \
200// / \ Y
201// B C /
202// \ / \ /
203// D E
204// \ /
205// F
206//
207// Assume that A contains a divergent branch. We are interested
208// in the set of all blocks where each block is reachable from A
209// via two disjoint paths. This would be the set {D, F} in this
210// case.
211// To generally reduce this query to SSA construction we introduce
212// a virtual variable x and assign to x different values in each
213// successor block of A.
214//
215// entry
216// / \
217// A \
218// / \ Y
219// x = 0 x = 1 /
220// \ / \ /
221// D E
222// \ /
223// F
224//
225// Our flavor of SSA construction for x will construct the following
226//
227// entry
228// / \
229// A \
230// / \ Y
231// x0 = 0 x1 = 1 /
232// \ / \ /
233// x2 = phi E
234// \ /
235// x3 = phi
236//
237// The blocks D and F contain phi nodes and are thus each reachable
238// by two disjoins paths from A.
239//
240// -- Remarks --
241// * In case of cycle exits we need to check for temporal divergence.
242// To this end, we check whether the definition of x differs between the
243// cycle exit and the cycle header (_after_ SSA construction).
244//
245// * In the presence of irreducible control flow, the fixed point is
246// reached only after multiple iterations. This is because labels
247// reaching the header of a cycle must be repropagated through the
248// cycle. This is true even in a reducible cycle, since the labels
249// may have been produced by a nested irreducible cycle.
250//
251// * Note that SyncDependenceAnalysis is not concerned with the points
252// of convergence in an irreducible cycle. It's only purpose is to
253// identify join blocks. The "diverged entry" criterion is
254// separately applied on join blocks to determine if an entire
255// irreducible cycle is assumed to be divergent.
256//
257// * Relevant related work:
258// A simple algorithm for global data flow analysis problems.
259// Matthew S. Hecht and Jeffrey D. Ullman.
260// SIAM Journal on Computing, 4(4):519–532, December 1975.
261//
262template <typename ContextT> class GenericSyncDependenceAnalysis {
263public:
264 using BlockT = typename ContextT::BlockT;
265 using DominatorTreeT = typename ContextT::DominatorTreeT;
266 using FunctionT = typename ContextT::FunctionT;
267 using ValueRefT = typename ContextT::ValueRefT;
268 using InstructionT = typename ContextT::InstructionT;
269
271 using CycleT = typename CycleInfoT::CycleT;
272
275
276 // * if BlockLabels[B] == C then C is the dominating definition at
277 // block B
278 // * if BlockLabels[B] == nullptr then we haven't seen B yet
279 // * if BlockLabels[B] == B then:
280 // - B is a join point of disjoint paths from X, or,
281 // - B is an immediate successor of X (initial value), or,
282 // - B is X
284
285 /// Information discovered by the sync dependence analysis for each
286 /// divergent branch.
288 // Join points of diverged paths.
290 // Divergent cycle exits
292 // Labels assigned to blocks on diverged paths.
294 };
295
297
298 GenericSyncDependenceAnalysis(const ContextT &Context,
299 const DominatorTreeT &DT, const CycleInfoT &CI);
300
301 /// \brief Computes divergent join points and cycle exits caused by branch
302 /// divergence in \p Term.
303 ///
304 /// This returns a pair of sets:
305 /// * The set of blocks which are reachable by disjoint paths from
306 /// \p Term.
307 /// * The set also contains cycle exits if there two disjoint paths:
308 /// one from \p Term to the cycle exit and another from \p Term to
309 /// the cycle header.
310 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
311
312private:
313 static DivergenceDescriptor EmptyDivergenceDesc;
314
315 ModifiedPO CyclePO;
316
317 const DominatorTreeT &DT;
318 const CycleInfoT &CI;
319
321 CachedControlDivDescs;
322};
323
324/// \brief Analysis that identifies uniform values in a data-parallel
325/// execution.
326///
327/// This analysis propagates divergence in a data-parallel context
328/// from sources of divergence to all users. It can be instantiated
329/// for an IR that provides a suitable SSAContext.
330template <typename ContextT> class GenericUniformityAnalysisImpl {
331public:
332 using BlockT = typename ContextT::BlockT;
333 using FunctionT = typename ContextT::FunctionT;
334 using ValueRefT = typename ContextT::ValueRefT;
335 using ConstValueRefT = typename ContextT::ConstValueRefT;
336 using UseT = typename ContextT::UseT;
337 using InstructionT = typename ContextT::InstructionT;
338 using DominatorTreeT = typename ContextT::DominatorTreeT;
339
341 using CycleT = typename CycleInfoT::CycleT;
342
345 typename SyncDependenceAnalysisT::DivergenceDescriptor;
346 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
347
349 std::tuple<ConstValueRefT, InstructionT *, const CycleT *>;
350
353 : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
354 TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
355
357
358 const FunctionT &getFunction() const { return F; }
359
360 /// \brief Mark \p UniVal as a value that is always uniform.
361 void addUniformOverride(const InstructionT &Instr);
362
363 /// \brief Examine \p I for divergent outputs and add to the worklist.
364 void markDivergent(const InstructionT &I);
365
366 /// \brief Mark \p DivVal as a divergent value.
367 /// \returns Whether the tracked divergence state of \p DivVal changed.
368 bool markDivergent(ConstValueRefT DivVal);
369
370 /// \brief Mark outputs of \p Instr as divergent.
371 /// \returns Whether the tracked divergence state of any output has changed.
372 bool markDefsDivergent(const InstructionT &Instr);
373
374 /// \brief Propagate divergence to all instructions in the region.
375 /// Divergence is seeded by calls to \p markDivergent.
376 void compute();
377
378 /// \brief Whether any value was marked or analyzed to be divergent.
379 bool hasDivergence() const { return !DivergentValues.empty(); }
380
381 /// \brief Whether \p Val will always return a uniform value regardless of its
382 /// operands
383 bool isAlwaysUniform(const InstructionT &Instr) const;
384
385 bool hasDivergentDefs(const InstructionT &I) const;
386
387 bool isDivergent(const InstructionT &I) const {
388 if (I.isTerminator()) {
389 return DivergentTermBlocks.contains(I.getParent());
390 }
391 return hasDivergentDefs(I);
392 };
393
394 /// \brief Whether \p Val is divergent at its definition.
395 bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
396
397 bool isDivergentUse(const UseT &U) const;
398
399 bool hasDivergentTerminator(const BlockT &B) const {
401 }
402
403 void print(raw_ostream &out) const;
404
406
408 const CycleT *);
409
410protected:
411 /// \brief Value/block pair representing a single phi input.
412 struct PhiInput {
415
418 };
419
420 const ContextT &Context;
421 const FunctionT &F;
423 const TargetTransformInfo *TTI = nullptr;
424
425 // Detected/marked divergent values.
428
429 // Internal worklist for divergence propagation.
430 std::vector<const InstructionT *> Worklist;
431
432 /// \brief Mark \p Term as divergent and push all Instructions that become
433 /// divergent as a result on the worklist.
434 void analyzeControlDivergence(const InstructionT &Term);
435
436private:
437 const DominatorTreeT &DT;
438
439 // Recognized cycles with divergent exits.
440 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
441
442 // Cycles assumed to be divergent.
443 //
444 // We don't use a set here because every insertion needs an explicit
445 // traversal of all existing members.
446 SmallVector<const CycleT *> AssumedDivergent;
447
448 // The SDA links divergent branches to divergent control-flow joins.
450
451 // Set of known-uniform values.
453
454 /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
455 /// the worklist.
456 void taintAndPushAllDefs(const BlockT &JoinBlock);
457
458 /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
459 /// the worklist.
460 void taintAndPushPhiNodes(const BlockT &JoinBlock);
461
462 /// \brief Identify all Instructions that become divergent because \p DivExit
463 /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
464 /// divergent and push them on the worklist.
465 void propagateCycleExitDivergence(const BlockT &DivExit,
466 const CycleT &DivCycle);
467
468 /// Mark as divergent all external uses of values defined in \p DefCycle.
469 void analyzeCycleExitDivergence(const CycleT &DefCycle);
470
471 /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
472 void propagateTemporalDivergence(const InstructionT &I,
473 const CycleT &DefCycle);
474
475 /// \brief Push all users of \p Val (in the region) to the worklist.
476 void pushUsers(const InstructionT &I);
477 void pushUsers(ConstValueRefT V);
478
479 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
480
481 /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
482 bool isTemporalDivergent(const BlockT &ObservingBlock,
483 const InstructionT &Def) const;
484};
485
486template <typename ImplT>
488 delete Impl;
489}
490
491/// Compute divergence starting with a divergent branch.
492template <typename ContextT> class DivergencePropagator {
493public:
494 using BlockT = typename ContextT::BlockT;
495 using DominatorTreeT = typename ContextT::DominatorTreeT;
496 using FunctionT = typename ContextT::FunctionT;
497 using ValueRefT = typename ContextT::ValueRefT;
498
500 using CycleT = typename CycleInfoT::CycleT;
501
505 typename SyncDependenceAnalysisT::DivergenceDescriptor;
506 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
507
512 const ContextT &Context;
513
514 // Track blocks that receive a new label. Every time we relabel a
515 // cycle header, we another pass over the modified post-order in
516 // order to propagate the header label. The bit vector also allows
517 // us to skip labels that have not changed.
519
520 // divergent join and cycle exit descriptor.
521 std::unique_ptr<DivergenceDescriptorT> DivDesc;
523
525 const CycleInfoT &CI, const BlockT &DivTermBlock)
527 Context(CI.getSSAContext()), DivDesc(new DivergenceDescriptorT),
529
531 Out << "Propagator::BlockLabels {\n";
532 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
533 const auto *Block = CyclePOT[BlockIdx];
534 const auto *Label = BlockLabels[Block];
535 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
536 if (!Label) {
537 Out << "<null>\n";
538 } else {
539 Out << Context.print(Label) << "\n";
540 }
541 }
542 Out << "}\n";
543 }
544
545 // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
546 // causes a divergent join.
547 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
548 const auto *OldLabel = BlockLabels[&SuccBlock];
549
550 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
551 << "\tpushed label: " << Context.print(&PushedLabel)
552 << "\n"
553 << "\told label: " << Context.print(OldLabel) << "\n");
554
555 // Early exit if there is no change in the label.
556 if (OldLabel == &PushedLabel)
557 return false;
558
559 if (OldLabel != &SuccBlock) {
560 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
561 // Assigning a new label, mark this in FreshLabels.
562 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
563 FreshLabels.set(SuccIdx);
564 }
565
566 // This is not a join if the succ was previously unlabeled.
567 if (!OldLabel) {
568 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
569 << "\n");
570 BlockLabels[&SuccBlock] = &PushedLabel;
571 return false;
572 }
573
574 // This is a new join. Label the join block as itself, and not as
575 // the pushed label.
576 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
577 BlockLabels[&SuccBlock] = &SuccBlock;
578
579 return true;
580 }
581
582 // visiting a virtual cycle exit edge from the cycle header --> temporal
583 // divergence on join
584 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
585 if (!computeJoin(ExitBlock, Label))
586 return false;
587
588 // Identified a divergent cycle exit
589 DivDesc->CycleDivBlocks.insert(&ExitBlock);
590 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
591 << "\n");
592 return true;
593 }
594
595 // process \p SuccBlock with reaching definition \p Label
596 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
597 if (!computeJoin(SuccBlock, Label))
598 return false;
599
600 // Divergent, disjoint paths join.
601 DivDesc->JoinDivBlocks.insert(&SuccBlock);
602 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
603 << "\n");
604 return true;
605 }
606
607 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
609
610 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
611 << Context.print(&DivTermBlock) << "\n");
612
613 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
614
615 // Bootstrap with branch targets
616 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
617
618 // Locate the largest ancestor cycle that is not reducible and does not
619 // contain a reducible ancestor. This is done with a lambda that is defined
620 // and invoked in the same statement.
621 const CycleT *IrreducibleAncestor = [](const CycleT *C) -> const CycleT * {
622 if (!C)
623 return nullptr;
624 if (C->isReducible())
625 return nullptr;
626 while (const CycleT *P = C->getParentCycle()) {
627 if (P->isReducible())
628 return C;
629 C = P;
630 }
631 assert(!C->getParentCycle());
632 assert(!C->isReducible());
633 return C;
634 }(DivTermCycle);
635
636 for (const auto *SuccBlock : successors(&DivTermBlock)) {
637 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
638 // If DivTerm exits the cycle immediately, computeJoin() might
639 // not reach SuccBlock with a different label. We need to
640 // check for this exit now.
641 DivDesc->CycleDivBlocks.insert(SuccBlock);
642 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
643 << Context.print(SuccBlock) << "\n");
644 }
645 visitEdge(*SuccBlock, *SuccBlock);
646 }
647
648 // Technically propagation can continue until it reaches the last node.
649 //
650 // For efficiency, propagation can stop if FreshLabels.count()==1. But
651 // For irreducible cycles, let propagation continue until it reaches
652 // out of irreducible cycles (see code for details.)
653 while (true) {
654 auto BlockIdx = FreshLabels.find_last();
655 if (BlockIdx == -1)
656 break;
657
658 const auto *Block = CyclePOT[BlockIdx];
659 // If no irreducible cycle, stop if freshLable.count() = 1 and Block
660 // is the IPD. If it is in any irreducible cycle, continue propagation.
661 if (FreshLabels.count() == 1 &&
662 (!IrreducibleAncestor || !IrreducibleAncestor->contains(Block)))
663 break;
664
665 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
666
667 FreshLabels.reset(BlockIdx);
668 if (BlockIdx == DivTermIdx) {
669 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
670 continue;
671 }
672
673 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
674 << BlockIdx << "\n");
675
676 const auto *Label = BlockLabels[Block];
677 assert(Label);
678
679 // If the current block is the header of a reducible cycle that
680 // contains the divergent branch, then the label should be
681 // propagated to the cycle exits. Such a header is the "last
682 // possible join" of any disjoint paths within this cycle. This
683 // prevents detection of spurious joins at the entries of any
684 // irreducible child cycles.
685 //
686 // This conclusion about the header is true for any choice of DFS:
687 //
688 // If some DFS has a reducible cycle C with header H, then for
689 // any other DFS, H is the header of a cycle C' that is a
690 // superset of C. For a divergent branch inside the subgraph
691 // C, any join node inside C is either H, or some node
692 // encountered without passing through H.
693 //
694 auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * {
695 if (!CyclePOT.isReducibleCycleHeader(Block))
696 return nullptr;
697 const auto *BlockCycle = CI.getCycle(Block);
698 if (BlockCycle->contains(&DivTermBlock))
699 return BlockCycle;
700 return nullptr;
701 };
702
703 if (const auto *BlockCycle = getReducibleParent(Block)) {
704 SmallVector<BlockT *, 4> BlockCycleExits;
705 BlockCycle->getExitBlocks(BlockCycleExits);
706 for (auto *BlockCycleExit : BlockCycleExits)
707 visitCycleExitEdge(*BlockCycleExit, *Label);
708 } else {
709 for (const auto *SuccBlock : successors(Block))
710 visitEdge(*SuccBlock, *Label);
711 }
712 }
713
714 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
715
716 // Check every cycle containing DivTermBlock for exit divergence.
717 // A cycle has exit divergence if the label of an exit block does
718 // not match the label of its header.
719 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
721 if (Cycle->isReducible()) {
722 // The exit divergence of a reducible cycle is recorded while
723 // propagating labels.
724 continue;
725 }
727 Cycle->getExitBlocks(Exits);
728 auto *Header = Cycle->getHeader();
729 auto *HeaderLabel = BlockLabels[Header];
730 for (const auto *Exit : Exits) {
731 if (BlockLabels[Exit] != HeaderLabel) {
732 // Identified a divergent cycle exit
733 DivDesc->CycleDivBlocks.insert(Exit);
734 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
735 << "\n");
736 }
737 }
738 }
739
740 return std::move(DivDesc);
741 }
742};
743
744template <typename ContextT>
747
748template <typename ContextT>
750 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
751 : CyclePO(Context), DT(DT), CI(CI) {
752 CyclePO.compute(CI);
753}
754
755template <typename ContextT>
757 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
758 // trivial case
759 if (succ_size(DivTermBlock) <= 1) {
760 return EmptyDivergenceDesc;
761 }
762
763 // already available in cache?
764 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
765 if (ItCached != CachedControlDivDescs.end())
766 return *ItCached->second;
767
768 // compute all join points
769 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
770 auto DivDesc = Propagator.computeJoinPoints();
771
772 auto printBlockSet = [&](ConstBlockSet &Blocks) {
773 return Printable([&](raw_ostream &Out) {
774 Out << "[";
775 ListSeparator LS;
776 for (const auto *BB : Blocks) {
777 Out << LS << CI.getSSAContext().print(BB);
778 }
779 Out << "]\n";
780 });
781 };
782
784 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
785 << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
786 << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
787 << "\n");
788 (void)printBlockSet;
789
790 auto ItInserted =
791 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
792 assert(ItInserted.second);
793 return *ItInserted.first->second;
794}
795
796template <typename ContextT>
798 const InstructionT &I) {
799 if (isAlwaysUniform(I))
800 return;
801 bool Marked = false;
802 if (I.isTerminator()) {
803 Marked = DivergentTermBlocks.insert(I.getParent()).second;
804 if (Marked) {
805 LLVM_DEBUG(dbgs() << "marked divergent term block: "
806 << Context.print(I.getParent()) << "\n");
807 }
808 } else {
809 Marked = markDefsDivergent(I);
810 }
811
812 if (Marked)
813 Worklist.push_back(&I);
814}
815
816template <typename ContextT>
818 ConstValueRefT Val) {
819 if (DivergentValues.insert(Val).second) {
820 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
821 return true;
822 }
823 return false;
824}
825
826template <typename ContextT>
828 const InstructionT &Instr) {
829 UniformOverrides.insert(&Instr);
830}
831
832// Mark as divergent all external uses of values defined in \p DefCycle.
833//
834// A value V defined by a block B inside \p DefCycle may be used outside the
835// cycle only if the use is a PHI in some exit block, or B dominates some exit
836// block. Thus, we check uses as follows:
837//
838// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
839// - For every block B inside \p DefCycle that dominates at least one exit
840// block, check all uses outside \p DefCycle.
841//
842// FIXME: This function does not distinguish between divergent and uniform
843// exits. For each divergent exit, only the values that are live at that exit
844// need to be propagated as divergent at their use outside the cycle.
845template <typename ContextT>
847 const CycleT &DefCycle) {
849 DefCycle.getExitBlocks(Exits);
850 for (auto *Exit : Exits) {
851 for (auto &Phi : Exit->phis()) {
852 if (usesValueFromCycle(Phi, DefCycle)) {
853 markDivergent(Phi);
854 }
855 }
856 }
857
858 for (auto *BB : DefCycle.blocks()) {
859 if (!llvm::any_of(Exits,
860 [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
861 continue;
862 for (auto &II : *BB) {
863 propagateTemporalDivergence(II, DefCycle);
864 }
865 }
866}
867
868template <typename ContextT>
869void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
870 const BlockT &DivExit, const CycleT &InnerDivCycle) {
871 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
872 << "\n");
873 auto *DivCycle = &InnerDivCycle;
874 auto *OuterDivCycle = DivCycle;
875 auto *ExitLevelCycle = CI.getCycle(&DivExit);
876 const unsigned CycleExitDepth =
877 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
878
879 // Find outer-most cycle that does not contain \p DivExit
880 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
881 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
882 << Context.print(DivCycle->getHeader()) << "\n");
883 OuterDivCycle = DivCycle;
884 DivCycle = DivCycle->getParentCycle();
885 }
886 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
887 << Context.print(OuterDivCycle->getHeader()) << "\n");
888
889 if (!DivergentExitCycles.insert(OuterDivCycle).second)
890 return;
891
892 // Exit divergence does not matter if the cycle itself is assumed to
893 // be divergent.
894 for (const auto *C : AssumedDivergent) {
895 if (C->contains(OuterDivCycle))
896 return;
897 }
898
899 analyzeCycleExitDivergence(*OuterDivCycle);
900}
901
902template <typename ContextT>
903void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
904 const BlockT &BB) {
905 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
906 for (const auto &I : instrs(BB)) {
907 // Terminators do not produce values; they are divergent only if
908 // the condition is divergent. That is handled when the divergent
909 // condition is placed in the worklist.
910 if (I.isTerminator())
911 break;
912
913 markDivergent(I);
914 }
915}
916
917/// Mark divergent phi nodes in a join block
918template <typename ContextT>
919void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
920 const BlockT &JoinBlock) {
921 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
922 << "\n");
923 for (const auto &Phi : JoinBlock.phis()) {
924 // FIXME: The non-undef value is not constant per se; it just happens to be
925 // uniform and may not dominate this PHI. So assuming that the same value
926 // reaches along all incoming edges may itself be undefined behaviour. This
927 // particular interpretation of the undef value was added to
928 // DivergenceAnalysis in the following review:
929 //
930 // https://reviews.llvm.org/D19013
931 if (ContextT::isConstantOrUndefValuePhi(Phi))
932 continue;
933 markDivergent(Phi);
934 }
935}
936
937/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
938///
939/// \return true iff \p Candidate was added to \p Cycles.
940template <typename CycleT>
942 CycleT *Candidate) {
943 if (llvm::any_of(Cycles,
944 [Candidate](CycleT *C) { return C->contains(Candidate); }))
945 return false;
946 Cycles.push_back(Candidate);
947 return true;
948}
949
950/// Return the outermost cycle made divergent by branch outside it.
951///
952/// If two paths that diverged outside an irreducible cycle join
953/// inside that cycle, then that whole cycle is assumed to be
954/// divergent. This does not apply if the cycle is reducible.
955template <typename CycleT, typename BlockT>
956static const CycleT *getExtDivCycle(const CycleT *Cycle,
957 const BlockT *DivTermBlock,
958 const BlockT *JoinBlock) {
959 assert(Cycle);
960 assert(Cycle->contains(JoinBlock));
961
962 if (Cycle->contains(DivTermBlock))
963 return nullptr;
964
965 const auto *OriginalCycle = Cycle;
966 const auto *Parent = Cycle->getParentCycle();
967 while (Parent && !Parent->contains(DivTermBlock)) {
968 Cycle = Parent;
969 Parent = Cycle->getParentCycle();
970 }
971
972 // If the original cycle is not the outermost cycle, then the outermost cycle
973 // is irreducible. If the outermost cycle were reducible, then external
974 // diverged paths would not reach the original inner cycle.
975 (void)OriginalCycle;
976 assert(Cycle == OriginalCycle || !Cycle->isReducible());
977
978 if (Cycle->isReducible()) {
979 assert(Cycle->getHeader() == JoinBlock);
980 return nullptr;
981 }
982
983 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
984 return Cycle;
985}
986
987/// Return the outermost cycle made divergent by branch inside it.
988///
989/// This checks the "diverged entry" criterion defined in the
990/// docs/ConvergenceAnalysis.html.
991template <typename ContextT, typename CycleT, typename BlockT,
992 typename DominatorTreeT>
993static const CycleT *
994getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
995 const BlockT *JoinBlock, const DominatorTreeT &DT,
996 ContextT &Context) {
997 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
998 << " for internal branch " << Context.print(DivTermBlock)
999 << "\n");
1000 if (DT.properlyDominates(DivTermBlock, JoinBlock))
1001 return nullptr;
1002
1003 // Find the smallest common cycle, if one exists.
1004 assert(Cycle && Cycle->contains(JoinBlock));
1005 while (Cycle && !Cycle->contains(DivTermBlock)) {
1007 }
1008 if (!Cycle || Cycle->isReducible())
1009 return nullptr;
1010
1011 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
1012 return nullptr;
1013
1014 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
1015 << " does not dominate join\n");
1016
1017 const auto *Parent = Cycle->getParentCycle();
1018 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1019 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1020 << " does not dominate join\n");
1021 Cycle = Parent;
1022 Parent = Parent->getParentCycle();
1023 }
1024
1025 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1026 return Cycle;
1027}
1028
1029template <typename ContextT, typename CycleT, typename BlockT,
1030 typename DominatorTreeT>
1031static const CycleT *
1032getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1033 const BlockT *JoinBlock, const DominatorTreeT &DT,
1034 ContextT &Context) {
1035 if (!Cycle)
1036 return nullptr;
1037
1038 // First try to expand Cycle to the largest that contains JoinBlock
1039 // but not DivTermBlock.
1040 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1041
1042 // Continue expanding to the largest cycle that contains both.
1043 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1044
1045 if (Int)
1046 return Int;
1047 return Ext;
1048}
1049
1050template <typename ContextT>
1051bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1052 const BlockT &ObservingBlock, const InstructionT &Def) const {
1053 const BlockT *DefBlock = Def.getParent();
1054 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1055 Cycle && !Cycle->contains(&ObservingBlock);
1056 Cycle = Cycle->getParentCycle()) {
1057 if (DivergentExitCycles.contains(Cycle)) {
1058 return true;
1059 }
1060 }
1061 return false;
1062}
1063
1064template <typename ContextT>
1066 const InstructionT &Term) {
1067 const auto *DivTermBlock = Term.getParent();
1068 DivergentTermBlocks.insert(DivTermBlock);
1069 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1070 << "\n");
1071
1072 // Don't propagate divergence from unreachable blocks.
1073 if (!DT.isReachableFromEntry(DivTermBlock))
1074 return;
1075
1076 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1078
1079 // Iterate over all blocks now reachable by a disjoint path join
1080 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1081 const auto *Cycle = CI.getCycle(JoinBlock);
1082 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1083 << "\n");
1084 if (const auto *Outermost = getOutermostDivergentCycle(
1085 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1086 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1087 DivCycles.push_back(Outermost);
1088 continue;
1089 }
1090 taintAndPushPhiNodes(*JoinBlock);
1091 }
1092
1093 // Sort by order of decreasing depth. This allows later cycles to be skipped
1094 // because they are already contained in earlier ones.
1095 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1096 return A->getDepth() > B->getDepth();
1097 });
1098
1099 // Cycles that are assumed divergent due to the diverged entry
1100 // criterion potentially contain temporal divergence depending on
1101 // the DFS chosen. Conservatively, all values produced in such a
1102 // cycle are assumed divergent. "Cycle invariant" values may be
1103 // assumed uniform, but that requires further analysis.
1104 for (auto *C : DivCycles) {
1105 if (!insertIfNotContained(AssumedDivergent, C))
1106 continue;
1107 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1108 for (const BlockT *BB : C->blocks()) {
1109 taintAndPushAllDefs(*BB);
1110 }
1111 }
1112
1113 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1114 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1115 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1116 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1117 }
1118}
1119
1120template <typename ContextT>
1122 // Initialize worklist.
1123 auto DivValuesCopy = DivergentValues;
1124 for (const auto DivVal : DivValuesCopy) {
1125 assert(isDivergent(DivVal) && "Worklist invariant violated!");
1126 pushUsers(DivVal);
1127 }
1128
1129 // All values on the Worklist are divergent.
1130 // Their users may not have been updated yet.
1131 while (!Worklist.empty()) {
1132 const InstructionT *I = Worklist.back();
1133 Worklist.pop_back();
1134
1135 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1136
1137 if (I->isTerminator()) {
1138 analyzeControlDivergence(*I);
1139 continue;
1140 }
1141
1142 // propagate value divergence to users
1143 assert(isDivergent(*I) && "Worklist invariant violated!");
1144 pushUsers(*I);
1145 }
1146}
1147
1148template <typename ContextT>
1150 ConstValueRefT Val, const InstructionT *User, const CycleT *Cycle) {
1151 TemporalDivergenceList.emplace_back(Val, const_cast<InstructionT *>(User),
1152 Cycle);
1153}
1154
1155template <typename ContextT>
1157 const InstructionT &Instr) const {
1158 return UniformOverrides.contains(&Instr);
1159}
1160
1161template <typename ContextT>
1163 const DominatorTreeT &DT, const CycleInfoT &CI,
1164 const TargetTransformInfo *TTI) {
1165 DA.reset(new ImplT{DT, CI, TTI});
1166}
1167
1168template <typename ContextT>
1170 bool haveDivergentArgs = false;
1171
1172 // When we print Value, LLVM IR instruction, we want to print extra new line.
1173 // In LLVM IR print function for Value does not print new line at the end.
1174 // In MIR print for MachineInstr prints new line at the end.
1175 constexpr bool IsMIR = std::is_same<InstructionT, MachineInstr>::value;
1176 std::string NewLine = IsMIR ? "" : "\n";
1177
1178 // Control flow instructions may be divergent even if their inputs are
1179 // uniform. Thus, although exceedingly rare, it is possible to have a program
1180 // with no divergent values but with divergent control structures.
1181 if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
1182 DivergentExitCycles.empty()) {
1183 OS << "ALL VALUES UNIFORM\n";
1184 return;
1185 }
1186
1187 for (const auto &entry : DivergentValues) {
1188 const BlockT *parent = Context.getDefBlock(entry);
1189 if (!parent) {
1190 if (!haveDivergentArgs) {
1191 OS << "DIVERGENT ARGUMENTS:\n";
1192 haveDivergentArgs = true;
1193 }
1194 OS << " DIVERGENT: " << Context.print(entry) << '\n';
1195 }
1196 }
1197
1198 if (!AssumedDivergent.empty()) {
1199 OS << "CYCLES ASSUMED DIVERGENT:\n";
1200 for (const CycleT *cycle : AssumedDivergent) {
1201 OS << " " << cycle->print(Context) << '\n';
1202 }
1203 }
1204
1205 if (!DivergentExitCycles.empty()) {
1206 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1207 for (const CycleT *cycle : DivergentExitCycles) {
1208 OS << " " << cycle->print(Context) << '\n';
1209 }
1210 }
1211
1212 if (!TemporalDivergenceList.empty()) {
1213 OS << "\nTEMPORAL DIVERGENCE LIST:\n";
1214
1215 for (auto [Val, UseInst, Cycle] : TemporalDivergenceList) {
1216 OS << "Value :" << Context.print(Val) << NewLine
1217 << "Used by :" << Context.print(UseInst) << NewLine
1218 << "Outside cycle :" << Cycle->print(Context) << "\n\n";
1219 }
1220 }
1221
1222 for (auto &block : F) {
1223 OS << "\nBLOCK " << Context.print(&block) << '\n';
1224
1225 OS << "DEFINITIONS\n";
1227 Context.appendBlockDefs(defs, block);
1228 for (auto value : defs) {
1229 if (isDivergent(value))
1230 OS << " DIVERGENT: ";
1231 else
1232 OS << " ";
1233 OS << Context.print(value) << NewLine;
1234 }
1235
1236 OS << "TERMINATORS\n";
1238 Context.appendBlockTerms(terms, block);
1239 bool divergentTerminators = hasDivergentTerminator(block);
1240 for (auto *T : terms) {
1241 if (divergentTerminators)
1242 OS << " DIVERGENT: ";
1243 else
1244 OS << " ";
1245 OS << Context.print(T) << NewLine;
1246 }
1247
1248 OS << "END BLOCK\n";
1249 }
1250}
1251
1252template <typename ContextT>
1256 return make_range(DA->TemporalDivergenceList.begin(),
1257 DA->TemporalDivergenceList.end());
1258}
1259
1260template <typename ContextT>
1262 return DA->hasDivergence();
1263}
1264
1265template <typename ContextT>
1266const typename ContextT::FunctionT &
1268 return DA->getFunction();
1269}
1270
1271/// Whether \p V is divergent at its definition.
1272template <typename ContextT>
1274 return DA->isDivergent(V);
1275}
1276
1277template <typename ContextT>
1279 return DA->isDivergent(*I);
1280}
1281
1282template <typename ContextT>
1284 return DA->isDivergentUse(U);
1285}
1286
1287template <typename ContextT>
1289 return DA->hasDivergentTerminator(B);
1290}
1291
1292/// \brief T helper function for printing.
1293template <typename ContextT>
1295 DA->print(out);
1296}
1297
1298template <typename ContextT>
1300 SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1301 const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1302 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1303 while (!Stack.empty()) {
1304 auto *NextBB = Stack.back();
1305 if (Finalized.count(NextBB)) {
1306 Stack.pop_back();
1307 continue;
1308 }
1309 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1310 << "\n");
1311 auto *NestedCycle = CI.getCycle(NextBB);
1312 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1313 LLVM_DEBUG(dbgs() << " found a cycle\n");
1314 while (NestedCycle->getParentCycle() != Cycle)
1315 NestedCycle = NestedCycle->getParentCycle();
1316
1317 SmallVector<BlockT *, 3> NestedExits;
1318 NestedCycle->getExitBlocks(NestedExits);
1319 bool PushedNodes = false;
1320 for (auto *NestedExitBB : NestedExits) {
1321 LLVM_DEBUG(dbgs() << " examine exit: "
1322 << CI.getSSAContext().print(NestedExitBB) << "\n");
1323 if (Cycle && !Cycle->contains(NestedExitBB))
1324 continue;
1325 if (Finalized.count(NestedExitBB))
1326 continue;
1327 PushedNodes = true;
1328 Stack.push_back(NestedExitBB);
1329 LLVM_DEBUG(dbgs() << " pushed exit: "
1330 << CI.getSSAContext().print(NestedExitBB) << "\n");
1331 }
1332 if (!PushedNodes) {
1333 // All loop exits finalized -> finish this node
1334 Stack.pop_back();
1335 computeCyclePO(CI, NestedCycle, Finalized);
1336 }
1337 continue;
1338 }
1339
1340 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1341 // DAG-style
1342 bool PushedNodes = false;
1343 for (auto *SuccBB : successors(NextBB)) {
1344 LLVM_DEBUG(dbgs() << " examine succ: "
1345 << CI.getSSAContext().print(SuccBB) << "\n");
1346 if (Cycle && !Cycle->contains(SuccBB))
1347 continue;
1348 if (Finalized.count(SuccBB))
1349 continue;
1350 PushedNodes = true;
1351 Stack.push_back(SuccBB);
1352 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1353 << "\n");
1354 }
1355 if (!PushedNodes) {
1356 // Never push nodes twice
1357 LLVM_DEBUG(dbgs() << " finishing node: "
1358 << CI.getSSAContext().print(NextBB) << "\n");
1359 Stack.pop_back();
1360 Finalized.insert(NextBB);
1361 appendBlock(*NextBB);
1362 }
1363 }
1364 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1365}
1366
1367template <typename ContextT>
1368void ModifiedPostOrder<ContextT>::computeCyclePO(
1369 const CycleInfoT &CI, const CycleT *Cycle,
1370 SmallPtrSetImpl<const BlockT *> &Finalized) {
1371 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1372 SmallVector<const BlockT *> Stack;
1373 auto *CycleHeader = Cycle->getHeader();
1374
1375 LLVM_DEBUG(dbgs() << " noted header: "
1376 << CI.getSSAContext().print(CycleHeader) << "\n");
1377 assert(!Finalized.count(CycleHeader));
1378 Finalized.insert(CycleHeader);
1379
1380 // Visit the header last
1381 LLVM_DEBUG(dbgs() << " finishing header: "
1382 << CI.getSSAContext().print(CycleHeader) << "\n");
1383 appendBlock(*CycleHeader, Cycle->isReducible());
1384
1385 // Initialize with immediate successors
1386 for (auto *BB : successors(CycleHeader)) {
1387 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1388 << "\n");
1389 if (!Cycle->contains(BB))
1390 continue;
1391 if (BB == CycleHeader)
1392 continue;
1393 if (!Finalized.count(BB)) {
1394 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1395 << "\n");
1396 Stack.push_back(BB);
1397 }
1398 }
1399
1400 // Compute PO inside region
1401 computeStackPO(Stack, CI, Cycle, Finalized);
1402
1403 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1404}
1405
1406/// \brief Generically compute the modified post order.
1407template <typename ContextT>
1411 auto *F = CI.getFunction();
1412 Stack.reserve(24); // FIXME made-up number
1413 Stack.push_back(&F->front());
1414 computeStackPO(Stack, CI, nullptr, Finalized);
1415}
1416
1417} // namespace llvm
1418
1419#undef DEBUG_TYPE
1420
1421#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Given that RA is a live value
This file defines the DenseSet and SmallDenseSet classes.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
unify loop Fixup each natural loop to have a single exit block
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
GenericCycleInfo< ContextT > CycleInfoT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
Cycle information for a function.
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
bool isReducible() const
Whether the cycle is a natural loop.
Printable print(const ContextT &Ctx) const
void getExitBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all of the successor blocks of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
typename ContextT::DominatorTreeT DominatorTreeT
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::ValueRefT ValueRefT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
Analysis that identifies uniform values in a data-parallel execution.
SmallVector< TemporalDivergenceTuple, 8 > TemporalDivergenceList
bool isAlwaysUniform(const InstructionT &Instr) const
Whether Val will always return a uniform value regardless of its operands.
typename ContextT::ValueRefT ValueRefT
void analyzeControlDivergence(const InstructionT &Term)
Mark Term as divergent and push all Instructions that become divergent as a result on the worklist.
bool isDivergent(ConstValueRefT V) const
Whether Val is divergent at its definition.
bool isDivergentUse(const UseT &U) const
bool hasDivergentDefs(const InstructionT &I) const
typename ContextT::InstructionT InstructionT
DenseSet< ConstValueRefT > DivergentValues
typename ContextT::ConstValueRefT ConstValueRefT
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
bool hasDivergence() const
Whether any value was marked or analyzed to be divergent.
typename ContextT::DominatorTreeT DominatorTreeT
void compute()
Propagate divergence to all instructions in the region.
bool hasDivergentTerminator(const BlockT &B) const
GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI, const TargetTransformInfo *TTI)
bool markDefsDivergent(const InstructionT &Instr)
Mark outputs of Instr as divergent.
typename ContextT::FunctionT FunctionT
bool isDivergent(const InstructionT &I) const
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
std::vector< const InstructionT * > Worklist
void markDivergent(const InstructionT &I)
Examine I for divergent outputs and add to the worklist.
SmallPtrSet< const BlockT *, 32 > DivergentTermBlocks
GenericCycleInfo< ContextT > CycleInfoT
void addUniformOverride(const InstructionT &Instr)
Mark UniVal as a value that is always uniform.
void recordTemporalDivergence(ConstValueRefT, const InstructionT *, const CycleT *)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
bool hasDivergentTerminator(const BlockT &B)
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
void print(raw_ostream &Out) const
T helper function for printing.
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
typename ContextT::ConstValueRefT ConstValueRefT
typename ContextT::BlockT BlockT
typename ContextT::InstructionT InstructionT
typename ContextT::UseT UseT
bool hasDivergence() const
Whether any divergence was detected.
const FunctionT & getFunction() const
The GPU kernel this analysis result is for.
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::DominatorTreeT DominatorTreeT
iterator_range< TemporalDivergenceTuple * > getTemporalDivergenceList() const
Representation of each machine instruction.
Definition: MachineInstr.h:72
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
const BlockT * operator[](size_t idx) const
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
void appendBlock(const BlockT &BB, bool isReducibleCycleHeader=false)
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
void set(unsigned Idx)
unsigned count() const
void reset(unsigned Idx)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:174
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
static const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
auto successors(const MachineBasicBlock *BB)
static bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
auto instrs(const MachineBasicBlock &BB)
static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Information discovered by the sync dependence analysis for each divergent branch.
Value/block pair representing a single phi input.
PhiInput(ConstValueRefT value, BlockT *predBlock)