44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
56#define DEBUG_TYPE "uniformity"
91 using BlockT =
typename ContextT::BlockT;
96 using CycleT =
typename CycleInfoT::CycleT;
111 POIndex[&BB] = m_order.
size();
114 <<
"): " << Context.print(&BB) <<
"\n");
116 ReducibleCycleHeaders.
insert(&BB);
121 return POIndex.
lookup(BB);
125 return ReducibleCycleHeaders.
contains(BB);
132 const ContextT &Context;
142template <
typename>
class DivergencePropagator;
264 using BlockT =
typename ContextT::BlockT;
271 using CycleT =
typename CycleInfoT::CycleT;
321 CachedControlDivDescs;
332 using BlockT =
typename ContextT::BlockT;
336 using UseT =
typename ContextT::UseT;
341 using CycleT =
typename CycleInfoT::CycleT;
345 typename SyncDependenceAnalysisT::DivergenceDescriptor;
349 std::tuple<ConstValueRefT, InstructionT *, const CycleT *>;
388 if (
I.isTerminator()) {
456 void taintAndPushAllDefs(
const BlockT &JoinBlock);
460 void taintAndPushPhiNodes(
const BlockT &JoinBlock);
465 void propagateCycleExitDivergence(
const BlockT &DivExit,
469 void analyzeCycleExitDivergence(
const CycleT &DefCycle);
482 bool isTemporalDivergent(
const BlockT &ObservingBlock,
486template <
typename ImplT>
494 using BlockT =
typename ContextT::BlockT;
500 using CycleT =
typename CycleInfoT::CycleT;
505 typename SyncDependenceAnalysisT::DivergenceDescriptor;
521 std::unique_ptr<DivergenceDescriptorT>
DivDesc;
531 Out <<
"Propagator::BlockLabels {\n";
532 for (
int BlockIdx = (
int)
CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
535 Out <<
Context.print(
Block) <<
"(" << BlockIdx <<
") : ";
539 Out <<
Context.print(Label) <<
"\n";
551 <<
"\tpushed label: " <<
Context.print(&PushedLabel)
553 <<
"\told label: " <<
Context.print(OldLabel) <<
"\n");
556 if (OldLabel == &PushedLabel)
559 if (OldLabel != &SuccBlock) {
560 auto SuccIdx =
CyclePOT.getIndex(&SuccBlock);
589 DivDesc->CycleDivBlocks.insert(&ExitBlock);
601 DivDesc->JoinDivBlocks.insert(&SuccBlock);
624 if (
C->isReducible())
626 while (
const CycleT *
P =
C->getParentCycle()) {
627 if (
P->isReducible())
637 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
641 DivDesc->CycleDivBlocks.insert(SuccBlock);
643 <<
Context.print(SuccBlock) <<
"\n");
662 (!IrreducibleAncestor || !IrreducibleAncestor->contains(
Block)))
668 if (BlockIdx == DivTermIdx) {
674 << BlockIdx <<
"\n");
697 const auto *BlockCycle =
CI.getCycle(
Block);
703 if (
const auto *BlockCycle = getReducibleParent(
Block)) {
705 BlockCycle->getExitBlocks(BlockCycleExits);
706 for (
auto *BlockCycleExit : BlockCycleExits)
730 for (
const auto *Exit : Exits) {
733 DivDesc->CycleDivBlocks.insert(Exit);
744template <
typename ContextT>
748template <
typename ContextT>
751 : CyclePO(
Context), DT(DT), CI(CI) {
755template <
typename ContextT>
760 return EmptyDivergenceDesc;
764 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
765 if (ItCached != CachedControlDivDescs.end())
766 return *ItCached->second;
776 for (
const auto *BB :
Blocks) {
777 Out << LS << CI.getSSAContext().
print(BB);
784 dbgs() <<
"\nResult (" << CI.getSSAContext().print(DivTermBlock)
785 <<
"):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
786 <<
" CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
791 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
792 assert(ItInserted.second);
793 return *ItInserted.first->second;
796template <
typename ContextT>
799 if (isAlwaysUniform(
I))
802 if (
I.isTerminator()) {
803 Marked = DivergentTermBlocks.insert(
I.getParent()).second;
806 <<
Context.print(
I.getParent()) <<
"\n");
809 Marked = markDefsDivergent(
I);
813 Worklist.push_back(&
I);
816template <
typename ContextT>
819 if (DivergentValues.insert(Val).second) {
826template <
typename ContextT>
829 UniformOverrides.insert(&Instr);
845template <
typename ContextT>
847 const CycleT &DefCycle) {
849 DefCycle.getExitBlocks(Exits);
850 for (
auto *Exit : Exits) {
851 for (
auto &Phi : Exit->phis()) {
852 if (usesValueFromCycle(Phi, DefCycle)) {
858 for (
auto *BB : DefCycle.blocks()) {
860 [&](BlockT *Exit) {
return DT.
dominates(BB, Exit); }))
862 for (
auto &
II : *BB) {
863 propagateTemporalDivergence(
II, DefCycle);
868template <
typename ContextT>
869void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
870 const BlockT &DivExit,
const CycleT &InnerDivCycle) {
873 auto *DivCycle = &InnerDivCycle;
874 auto *OuterDivCycle = DivCycle;
875 auto *ExitLevelCycle = CI.getCycle(&DivExit);
876 const unsigned CycleExitDepth =
877 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
880 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
882 <<
Context.print(DivCycle->getHeader()) <<
"\n");
883 OuterDivCycle = DivCycle;
884 DivCycle = DivCycle->getParentCycle();
887 <<
Context.print(OuterDivCycle->getHeader()) <<
"\n");
889 if (!DivergentExitCycles.insert(OuterDivCycle).second)
894 for (
const auto *
C : AssumedDivergent) {
895 if (
C->contains(OuterDivCycle))
899 analyzeCycleExitDivergence(*OuterDivCycle);
902template <
typename ContextT>
903void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
906 for (
const auto &
I :
instrs(BB)) {
910 if (
I.isTerminator())
918template <
typename ContextT>
919void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
920 const BlockT &JoinBlock) {
923 for (
const auto &Phi : JoinBlock.phis()) {
931 if (ContextT::isConstantOrUndefValuePhi(Phi))
940template <
typename CycleT>
944 [Candidate](CycleT *
C) {
return C->contains(Candidate); }))
955template <
typename CycleT,
typename BlockT>
957 const BlockT *DivTermBlock,
958 const BlockT *JoinBlock) {
965 const auto *OriginalCycle =
Cycle;
967 while (Parent && !Parent->contains(DivTermBlock)) {
983 LLVM_DEBUG(
dbgs() <<
"cycle made divergent by external branch\n");
991template <
typename ContextT,
typename CycleT,
typename BlockT,
992 typename DominatorTreeT>
995 const BlockT *JoinBlock,
const DominatorTreeT &DT,
998 <<
" for internal branch " <<
Context.print(DivTermBlock)
1015 <<
" does not dominate join\n");
1020 <<
" does not dominate join\n");
1025 LLVM_DEBUG(
dbgs() <<
" cycle made divergent by internal branch\n");
1029template <
typename ContextT,
typename CycleT,
typename BlockT,
1030 typename DominatorTreeT>
1031static const CycleT *
1033 const BlockT *JoinBlock,
const DominatorTreeT &DT,
1034 ContextT &Context) {
1050template <
typename ContextT>
1051bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1052 const BlockT &ObservingBlock,
const InstructionT &Def)
const {
1053 const BlockT *DefBlock = Def.getParent();
1054 for (
const CycleT *
Cycle = CI.getCycle(DefBlock);
1057 if (DivergentExitCycles.contains(
Cycle)) {
1064template <
typename ContextT>
1067 const auto *DivTermBlock = Term.getParent();
1068 DivergentTermBlocks.insert(DivTermBlock);
1076 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1080 for (
const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1081 const auto *
Cycle = CI.getCycle(JoinBlock);
1090 taintAndPushPhiNodes(*JoinBlock);
1096 return A->getDepth() >
B->getDepth();
1104 for (
auto *
C : DivCycles) {
1108 for (
const BlockT *BB :
C->blocks()) {
1109 taintAndPushAllDefs(*BB);
1113 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1114 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1115 for (
const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1116 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1120template <
typename ContextT>
1123 auto DivValuesCopy = DivergentValues;
1124 for (
const auto DivVal : DivValuesCopy) {
1125 assert(isDivergent(DivVal) &&
"Worklist invariant violated!");
1131 while (!Worklist.empty()) {
1133 Worklist.pop_back();
1137 if (
I->isTerminator()) {
1138 analyzeControlDivergence(*
I);
1143 assert(isDivergent(*
I) &&
"Worklist invariant violated!");
1148template <
typename ContextT>
1155template <
typename ContextT>
1158 return UniformOverrides.contains(&Instr);
1161template <
typename ContextT>
1168template <
typename ContextT>
1170 bool haveDivergentArgs =
false;
1175 constexpr bool IsMIR = std::is_same<InstructionT, MachineInstr>::value;
1176 std::string NewLine = IsMIR ?
"" :
"\n";
1181 if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
1182 DivergentExitCycles.empty()) {
1183 OS <<
"ALL VALUES UNIFORM\n";
1187 for (
const auto &entry : DivergentValues) {
1190 if (!haveDivergentArgs) {
1191 OS <<
"DIVERGENT ARGUMENTS:\n";
1192 haveDivergentArgs =
true;
1194 OS <<
" DIVERGENT: " <<
Context.print(entry) <<
'\n';
1198 if (!AssumedDivergent.empty()) {
1199 OS <<
"CYCLES ASSUMED DIVERGENT:\n";
1200 for (
const CycleT *cycle : AssumedDivergent) {
1201 OS <<
" " << cycle->print(
Context) <<
'\n';
1205 if (!DivergentExitCycles.empty()) {
1206 OS <<
"CYCLES WITH DIVERGENT EXIT:\n";
1207 for (
const CycleT *cycle : DivergentExitCycles) {
1208 OS <<
" " << cycle->print(
Context) <<
'\n';
1212 if (!TemporalDivergenceList.empty()) {
1213 OS <<
"\nTEMPORAL DIVERGENCE LIST:\n";
1215 for (
auto [Val, UseInst,
Cycle] : TemporalDivergenceList) {
1216 OS <<
"Value :" <<
Context.print(Val) << NewLine
1217 <<
"Used by :" <<
Context.print(UseInst) << NewLine
1225 OS <<
"DEFINITIONS\n";
1228 for (
auto value : defs) {
1229 if (isDivergent(
value))
1230 OS <<
" DIVERGENT: ";
1236 OS <<
"TERMINATORS\n";
1239 bool divergentTerminators = hasDivergentTerminator(
block);
1240 for (
auto *
T : terms) {
1241 if (divergentTerminators)
1242 OS <<
" DIVERGENT: ";
1248 OS <<
"END BLOCK\n";
1252template <
typename ContextT>
1256 return make_range(DA->TemporalDivergenceList.begin(),
1257 DA->TemporalDivergenceList.end());
1260template <
typename ContextT>
1262 return DA->hasDivergence();
1265template <
typename ContextT>
1266const typename ContextT::FunctionT &
1268 return DA->getFunction();
1272template <
typename ContextT>
1274 return DA->isDivergent(V);
1277template <
typename ContextT>
1279 return DA->isDivergent(*
I);
1282template <
typename ContextT>
1284 return DA->isDivergentUse(U);
1287template <
typename ContextT>
1289 return DA->hasDivergentTerminator(
B);
1293template <
typename ContextT>
1298template <
typename ContextT>
1303 while (!Stack.empty()) {
1304 auto *NextBB = Stack.back();
1305 if (Finalized.
count(NextBB)) {
1309 LLVM_DEBUG(
dbgs() <<
" visiting " << CI.getSSAContext().print(NextBB)
1311 auto *NestedCycle = CI.getCycle(NextBB);
1314 while (NestedCycle->getParentCycle() !=
Cycle)
1315 NestedCycle = NestedCycle->getParentCycle();
1317 SmallVector<BlockT *, 3> NestedExits;
1318 NestedCycle->getExitBlocks(NestedExits);
1319 bool PushedNodes =
false;
1320 for (
auto *NestedExitBB : NestedExits) {
1322 << CI.getSSAContext().print(NestedExitBB) <<
"\n");
1325 if (Finalized.
count(NestedExitBB))
1328 Stack.push_back(NestedExitBB);
1330 << CI.getSSAContext().print(NestedExitBB) <<
"\n");
1335 computeCyclePO(CI, NestedCycle, Finalized);
1342 bool PushedNodes =
false;
1345 << CI.getSSAContext().print(SuccBB) <<
"\n");
1348 if (Finalized.
count(SuccBB))
1351 Stack.push_back(SuccBB);
1352 LLVM_DEBUG(
dbgs() <<
" pushed succ: " << CI.getSSAContext().print(SuccBB)
1358 << CI.getSSAContext().print(NextBB) <<
"\n");
1360 Finalized.
insert(NextBB);
1361 appendBlock(*NextBB);
1367template <
typename ContextT>
1368void ModifiedPostOrder<ContextT>::computeCyclePO(
1369 const CycleInfoT &CI,
const CycleT *
Cycle,
1370 SmallPtrSetImpl<const BlockT *> &Finalized) {
1372 SmallVector<const BlockT *>
Stack;
1376 << CI.getSSAContext().print(CycleHeader) <<
"\n");
1377 assert(!Finalized.count(CycleHeader));
1378 Finalized.insert(CycleHeader);
1382 << CI.getSSAContext().print(CycleHeader) <<
"\n");
1387 LLVM_DEBUG(
dbgs() <<
" examine succ: " << CI.getSSAContext().print(BB)
1391 if (BB == CycleHeader)
1393 if (!Finalized.count(BB)) {
1394 LLVM_DEBUG(
dbgs() <<
" pushed succ: " << CI.getSSAContext().print(BB)
1396 Stack.push_back(BB);
1401 computeStackPO(Stack, CI,
Cycle, Finalized);
1407template <
typename ContextT>
1411 auto *
F = CI.getFunction();
1413 Stack.push_back(&
F->front());
1414 computeStackPO(Stack, CI,
nullptr, Finalized);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Given that RA is a live value
This file defines the DenseSet and SmallDenseSet classes.
DenseMap< Block *, BlockRelaxAux > Blocks
uint64_t IntrinsicInst * II
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
unify loop Fixup each natural loop to have a single exit block
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
const ModifiedPO & CyclePOT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
const BlockT & DivTermBlock
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
GenericCycleInfo< ContextT > CycleInfoT
const DominatorTreeT & DT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
BlockLabelMapT & BlockLabels
SparseBitVector FreshLabels
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Cycle information for a function.
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
bool isReducible() const
Whether the cycle is a natural loop.
Printable print(const ContextT &Ctx) const
void getExitBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all of the successor blocks of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
typename ContextT::DominatorTreeT DominatorTreeT
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::BlockT BlockT
typename ContextT::ValueRefT ValueRefT
typename CycleInfoT::CycleT CycleT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
Representation of each machine instruction.
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
const BlockT * operator[](size_t idx) const
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
void appendBlock(const BlockT &BB, bool isReducibleCycleHeader=false)
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
static const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
auto successors(const MachineBasicBlock *BB)
static bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto succ_size(const MachineBasicBlock *BB)
auto instrs(const MachineBasicBlock &BB)
static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Information discovered by the sync dependence analysis for each divergent branch.
ConstBlockSet CycleDivBlocks
ConstBlockSet JoinDivBlocks
BlockLabelMap BlockLabels