LLVM 22.0.0git
StructurizeCFG.cpp
Go to the documentation of this file.
1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/MapVector.h"
14#include "llvm/ADT/STLExtras.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
40#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
50#include <cassert>
51#include <utility>
52
53using namespace llvm;
54using namespace llvm::PatternMatch;
55
56#define DEBUG_TYPE "structurizecfg"
57
58// The name for newly created blocks.
59const char FlowBlockName[] = "Flow";
60
61namespace {
62
63static cl::opt<bool> ForceSkipUniformRegions(
64 "structurizecfg-skip-uniform-regions",
66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
67 cl::init(false));
68
69static cl::opt<bool>
70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
71 cl::desc("Allow relaxed uniform region checks"),
72 cl::init(true));
73
74// Definition of the complex types used in this pass.
75
76using BBValuePair = std::pair<BasicBlock *, Value *>;
77
78using RNVector = SmallVector<RegionNode *, 8>;
79using BBVector = SmallVector<BasicBlock *, 8>;
80using BranchVector = SmallVector<BranchInst *, 8>;
81using BBValueVector = SmallVector<BBValuePair, 2>;
82
84
86using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
87
88using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
89
90using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
91
92class CondBranchWeights {
93 uint32_t TrueWeight;
94 uint32_t FalseWeight;
95
96 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
97
98public:
99 static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
100 assert(Br.isConditional());
101
102 uint64_t T, F;
103 if (!extractBranchWeights(Br, T, F))
104 return std::nullopt;
105
106 return CondBranchWeights(T, F);
107 }
108
109 static void setMetadata(BranchInst &Br,
110 const MaybeCondBranchWeights &Weights) {
111 assert(Br.isConditional());
112 if (!Weights)
113 return;
114 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
115 setBranchWeights(Br, Arr, false);
116 }
117
118 CondBranchWeights invert() const {
119 return CondBranchWeights{FalseWeight, TrueWeight};
120 }
121};
122
123struct PredInfo {
124 Value *Pred;
125 MaybeCondBranchWeights Weights;
126};
127
131using Val2BBMap = DenseMap<Value *, BasicBlock *>;
132
133// A traits type that is intended to be used in graph algorithms. The graph
134// traits starts at an entry node, and traverses the RegionNodes that are in
135// the Nodes set.
136struct SubGraphTraits {
137 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
138 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
139
140 // This wraps a set of Nodes into the iterator, so we know which edges to
141 // filter out.
142 class WrappedSuccIterator
143 : public iterator_adaptor_base<
144 WrappedSuccIterator, BaseSuccIterator,
145 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
146 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
148
149 public:
150 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
151 : iterator_adaptor_base(It), Nodes(Nodes) {}
152
153 NodeRef operator*() const { return {*I, Nodes}; }
154 };
155
156 static bool filterAll(const NodeRef &N) { return true; }
157 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
158
159 using ChildIteratorType =
160 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
161
162 static NodeRef getEntryNode(Region *R) {
163 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
164 }
165
166 static NodeRef getEntryNode(NodeRef N) { return N; }
167
168 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
169 auto *filter = N.second ? &filterSet : &filterAll;
170 return make_filter_range(
173 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
174 filter);
175 }
176
177 static ChildIteratorType child_begin(const NodeRef &N) {
178 return children(N).begin();
179 }
180
181 static ChildIteratorType child_end(const NodeRef &N) {
182 return children(N).end();
183 }
184};
185
186/// Finds the nearest common dominator of a set of BasicBlocks.
187///
188/// For every BB you add to the set, you can specify whether we "remember" the
189/// block. When you get the common dominator, you can also ask whether it's one
190/// of the blocks we remembered.
191class NearestCommonDominator {
192 DominatorTree *DT;
193 BasicBlock *Result = nullptr;
194 bool ResultIsRemembered = false;
195
196 /// Add BB to the resulting dominator.
197 void addBlock(BasicBlock *BB, bool Remember) {
198 if (!Result) {
199 Result = BB;
200 ResultIsRemembered = Remember;
201 return;
202 }
203
204 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
205 if (NewResult != Result)
206 ResultIsRemembered = false;
207 if (NewResult == BB)
208 ResultIsRemembered |= Remember;
209 Result = NewResult;
210 }
211
212public:
213 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
214
215 void addBlock(BasicBlock *BB) {
216 addBlock(BB, /* Remember = */ false);
217 }
218
219 void addAndRememberBlock(BasicBlock *BB) {
220 addBlock(BB, /* Remember = */ true);
221 }
222
223 /// Get the nearest common dominator of all the BBs added via addBlock() and
224 /// addAndRememberBlock().
225 BasicBlock *result() { return Result; }
226
227 /// Is the BB returned by getResult() one of the blocks we added to the set
228 /// with addAndRememberBlock()?
229 bool resultIsRememberedBlock() { return ResultIsRemembered; }
230};
231
232/// Transforms the control flow graph on one single entry/exit region
233/// at a time.
234///
235/// After the transform all "If"/"Then"/"Else" style control flow looks like
236/// this:
237///
238/// \verbatim
239/// 1
240/// ||
241/// | |
242/// 2 |
243/// | /
244/// |/
245/// 3
246/// || Where:
247/// | | 1 = "If" block, calculates the condition
248/// 4 | 2 = "Then" subregion, runs if the condition is true
249/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
250/// |/ 4 = "Else" optional subregion, runs if the condition is false
251/// 5 5 = "End" block, also rejoins the control flow
252/// \endverbatim
253///
254/// Control flow is expressed as a branch where the true exit goes into the
255/// "Then"/"Else" region, while the false exit skips the region
256/// The condition for the optional "Else" region is expressed as a PHI node.
257/// The incoming values of the PHI node are true for the "If" edge and false
258/// for the "Then" edge.
259///
260/// Additionally to that even complicated loops look like this:
261///
262/// \verbatim
263/// 1
264/// ||
265/// | |
266/// 2 ^ Where:
267/// | / 1 = "Entry" block
268/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
269/// 3 3 = "Flow" block, with back edge to entry block
270/// |
271/// \endverbatim
272///
273/// The back edge of the "Flow" block is always on the false side of the branch
274/// while the true side continues the general flow. So the loop condition
275/// consist of a network of PHI nodes where the true incoming values expresses
276/// breaks and the false values expresses continue states.
277
278class StructurizeCFG {
279 Type *Boolean;
280 ConstantInt *BoolTrue;
281 ConstantInt *BoolFalse;
282 Value *BoolPoison;
284 Function *Func;
285 Region *ParentRegion;
286
287 UniformityInfo *UA = nullptr;
288 DominatorTree *DT;
289
291 BBSet Visited;
292 BBSet FlowSet;
293
294 SmallVector<WeakVH, 8> AffectedPhis;
295 BBPhiMap DeletedPhis;
296 BB2BBVecMap AddedPhis;
297
298 PredMap Predicates;
299 BranchVector Conditions;
300
301 BB2BBMap Loops;
302 PredMap LoopPreds;
303 BranchVector LoopConds;
304
305 Val2BBMap HoistedValues;
306
307 RegionNode *PrevNode;
308
309 void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
310
311 bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
312 BasicBlock *HoistTo);
313
314 void orderNodes();
315
316 void analyzeLoops(RegionNode *N);
317
318 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
319
320 void gatherPredicates(RegionNode *N);
321
322 void collectInfos();
323
324 void insertConditions(bool Loops);
325
326 void simplifyConditions();
327
328 void delPhiValues(BasicBlock *From, BasicBlock *To);
329
330 void addPhiValues(BasicBlock *From, BasicBlock *To);
331
332 void findUndefBlocks(BasicBlock *PHIBlock,
333 const SmallPtrSet<BasicBlock *, 8> &Incomings,
334 SmallVector<BasicBlock *> &UndefBlks) const;
335
336 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
337 PHINode *B);
338
339 void setPhiValues();
340
341 void simplifyAffectedPhis();
342
343 void simplifyHoistedPhis();
344
345 DebugLoc killTerminator(BasicBlock *BB);
346
347 void changeExit(RegionNode *Node, BasicBlock *NewExit,
348 bool IncludeDominator);
349
350 BasicBlock *getNextFlow(BasicBlock *Dominator);
351
352 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
353
354 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
355
356 void setPrevNode(BasicBlock *BB);
357
358 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
359
360 bool isPredictableTrue(RegionNode *Node);
361
362 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
363
364 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
365
366 void createFlow();
367
368 void rebuildSSA();
369
370public:
371 void init(Region *R);
372 bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
373 bool makeUniformRegion(Region *R, UniformityInfo &UA);
374};
375
376class StructurizeCFGLegacyPass : public RegionPass {
377 bool SkipUniformRegions;
378
379public:
380 static char ID;
381
382 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
383 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
384 if (ForceSkipUniformRegions.getNumOccurrences())
385 SkipUniformRegions = ForceSkipUniformRegions.getValue();
387 }
388
389 bool runOnRegion(Region *R, RGPassManager &RGM) override {
390 StructurizeCFG SCFG;
391 SCFG.init(R);
392 if (SkipUniformRegions) {
393 UniformityInfo &UA =
394 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
395 if (SCFG.makeUniformRegion(R, UA))
396 return false;
397 }
398 Function *F = R->getEntry()->getParent();
399 const TargetTransformInfo *TTI =
400 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
401 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
402 return SCFG.run(R, DT, TTI);
403 }
404
405 StringRef getPassName() const override { return "Structurize control flow"; }
406
407 void getAnalysisUsage(AnalysisUsage &AU) const override {
408 if (SkipUniformRegions)
413
416 }
417};
418
419} // end anonymous namespace
420
421char StructurizeCFGLegacyPass::ID = 0;
422
423INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
424 "Structurize the CFG", false, false)
428INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
429 "Structurize the CFG", false, false)
430
431/// Checks whether an instruction is zero cost instruction and checks if the
432/// operands are from different BB. If so, this instruction can be coalesced
433/// if its hoisted to predecessor block. So, this returns true.
434bool StructurizeCFG::isHoistableInstruction(Instruction *I, BasicBlock *BB,
435 BasicBlock *HoistTo) {
436 if (I->getParent() != BB || isa<PHINode>(I))
437 return false;
438
439 // If the instruction is not a zero cost instruction, return false.
440 auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
442 Cost.isValid()
443 ? Cost.getValue()
445 if (CostVal != 0)
446 return false;
447
448 // Check if all operands are available at the hoisting destination.
449 for (auto &Op : I->operands()) {
450 if (auto *OpI = dyn_cast<Instruction>(Op)) {
451 // Operand must dominate the hoisting destination.
452 if (!DT->dominates(OpI->getParent(), HoistTo))
453 return false;
454 }
455 }
456
457 return true;
458}
459
460/// Structurization can introduce unnecessary VGPR copies due to register
461/// coalescing interference. For example, if the Else block has a zero-cost
462/// instruction and the Then block modifies the VGPR value, only one value is
463/// live at a time in merge block before structurization. After structurization,
464/// the coalescer may incorrectly treat the Then value as live in the Else block
465/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
466///
467/// This function examines phi nodes whose incoming values are zero-cost
468/// instructions in the Else block. It identifies such values that can be safely
469/// hoisted and moves them to the nearest common dominator of Then and Else
470/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
471/// value to poison phi nodes along the if→flow edge, aiding register coalescing
472/// and minimizing unnecessary live ranges.
473void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
474 BasicBlock *ThenBB) {
475
476 BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
477 BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
478
479 if (!ElseSucc || !CommonDominator)
480 return;
481 Instruction *Term = CommonDominator->getTerminator();
482 for (PHINode &Phi : ElseSucc->phis()) {
483 Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
484 auto *Inst = dyn_cast<Instruction>(ElseVal);
485 if (!Inst || !isHoistableInstruction(Inst, ElseBB, CommonDominator))
486 continue;
487 Inst->removeFromParent();
488 Inst->insertInto(CommonDominator, Term->getIterator());
489 HoistedValues[Inst] = CommonDominator;
490 }
491}
492
493/// Build up the general order of nodes, by performing a topological sort of the
494/// parent region's nodes, while ensuring that there is no outer cycle node
495/// between any two inner cycle nodes.
496void StructurizeCFG::orderNodes() {
497 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
498 GraphTraits<Region *>::nodes_end(ParentRegion)));
499 if (Order.empty())
500 return;
501
502 SmallDenseSet<RegionNode *> Nodes;
503 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
504
505 // A list of range indices of SCCs in Order, to be processed.
507 unsigned I = 0, E = Order.size();
508 while (true) {
509 // Run through all the SCCs in the subgraph starting with Entry.
510 for (auto SCCI =
512 EntryNode);
513 !SCCI.isAtEnd(); ++SCCI) {
514 auto &SCC = *SCCI;
515
516 // An SCC up to the size of 2, can be reduced to an entry (the last node),
517 // and a possible additional node. Therefore, it is already in order, and
518 // there is no need to add it to the work-list.
519 unsigned Size = SCC.size();
520 if (Size > 2)
521 WorkList.emplace_back(I, I + Size);
522
523 // Add the SCC nodes to the Order array.
524 for (const auto &N : SCC) {
525 assert(I < E && "SCC size mismatch!");
526 Order[I++] = N.first;
527 }
528 }
529 assert(I == E && "SCC size mismatch!");
530
531 // If there are no more SCCs to order, then we are done.
532 if (WorkList.empty())
533 break;
534
535 std::tie(I, E) = WorkList.pop_back_val();
536
537 // Collect the set of nodes in the SCC's subgraph. These are only the
538 // possible child nodes; we do not add the entry (last node) otherwise we
539 // will have the same exact SCC all over again.
540 Nodes.clear();
541 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
542
543 // Update the entry node.
544 EntryNode.first = Order[E - 1];
545 EntryNode.second = &Nodes;
546 }
547}
548
549/// Determine the end of the loops
550void StructurizeCFG::analyzeLoops(RegionNode *N) {
551 if (N->isSubRegion()) {
552 // Test for exit as back edge
553 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
554 if (Visited.count(Exit))
555 Loops[Exit] = N->getEntry();
556
557 } else {
558 // Test for successors as back edge
559 BasicBlock *BB = N->getNodeAs<BasicBlock>();
560 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
561
562 for (BasicBlock *Succ : Term->successors())
563 if (Visited.count(Succ))
564 Loops[Succ] = BB;
565 }
566}
567
568/// Build the condition for one edge
569PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
570 bool Invert) {
571 Value *Cond = Invert ? BoolFalse : BoolTrue;
572 MaybeCondBranchWeights Weights;
573
574 if (Term->isConditional()) {
575 Cond = Term->getCondition();
576 Weights = CondBranchWeights::tryParse(*Term);
577
578 if (Idx != (unsigned)Invert) {
580 if (Weights)
581 Weights = Weights->invert();
582 }
583 }
584 return {Cond, Weights};
585}
586
587/// Analyze the predecessors of each block and build up predicates
588void StructurizeCFG::gatherPredicates(RegionNode *N) {
589 RegionInfo *RI = ParentRegion->getRegionInfo();
590 BasicBlock *BB = N->getEntry();
591 BBPredicates &Pred = Predicates[BB];
592 BBPredicates &LPred = LoopPreds[BB];
593
594 for (BasicBlock *P : predecessors(BB)) {
595 // Ignore it if it's a branch from outside into our region entry
596 if (!ParentRegion->contains(P))
597 continue;
598
599 Region *R = RI->getRegionFor(P);
600 if (R == ParentRegion) {
601 // It's a top level block in our region
602 BranchInst *Term = cast<BranchInst>(P->getTerminator());
603 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
604 BasicBlock *Succ = Term->getSuccessor(i);
605 if (Succ != BB)
606 continue;
607
608 if (Visited.count(P)) {
609 // Normal forward edge
610 if (Term->isConditional()) {
611 // Try to treat it like an ELSE block
612 BasicBlock *Other = Term->getSuccessor(!i);
613 if (Visited.count(Other) && !Loops.count(Other) &&
614 !Pred.count(Other) && !Pred.count(P)) {
615 hoistZeroCostElseBlockPhiValues(Succ, Other);
616 Pred[Other] = {BoolFalse, std::nullopt};
617 Pred[P] = {BoolTrue, std::nullopt};
618 continue;
619 }
620 }
621 Pred[P] = buildCondition(Term, i, false);
622 } else {
623 // Back edge
624 LPred[P] = buildCondition(Term, i, true);
625 }
626 }
627 } else {
628 // It's an exit from a sub region
629 while (R->getParent() != ParentRegion)
630 R = R->getParent();
631
632 // Edge from inside a subregion to its entry, ignore it
633 if (*R == *N)
634 continue;
635
636 BasicBlock *Entry = R->getEntry();
637 if (Visited.count(Entry))
638 Pred[Entry] = {BoolTrue, std::nullopt};
639 else
640 LPred[Entry] = {BoolFalse, std::nullopt};
641 }
642 }
643}
644
645/// Collect various loop and predicate infos
646void StructurizeCFG::collectInfos() {
647 // Reset predicate
648 Predicates.clear();
649
650 // and loop infos
651 Loops.clear();
652 LoopPreds.clear();
653
654 // Reset the visited nodes
655 Visited.clear();
656
657 for (RegionNode *RN : reverse(Order)) {
658 LLVM_DEBUG(dbgs() << "Visiting: "
659 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
660 << RN->getEntry()->getName() << "\n");
661
662 // Analyze all the conditions leading to a node
663 gatherPredicates(RN);
664
665 // Remember that we've seen this node
666 Visited.insert(RN->getEntry());
667
668 // Find the last back edges
669 analyzeLoops(RN);
670 }
671}
672
673/// Insert the missing branch conditions
674void StructurizeCFG::insertConditions(bool Loops) {
675 BranchVector &Conds = Loops ? LoopConds : Conditions;
676 Value *Default = Loops ? BoolTrue : BoolFalse;
677 SSAUpdater PhiInserter;
678
679 for (BranchInst *Term : Conds) {
680 assert(Term->isConditional());
681
682 BasicBlock *Parent = Term->getParent();
683 BasicBlock *SuccTrue = Term->getSuccessor(0);
684 BasicBlock *SuccFalse = Term->getSuccessor(1);
685
686 PhiInserter.Initialize(Boolean, "");
687 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
688
689 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
690
691 NearestCommonDominator Dominator(DT);
692 Dominator.addBlock(Parent);
693
694 PredInfo ParentInfo{nullptr, std::nullopt};
695 for (auto [BB, PI] : Preds) {
696 if (BB == Parent) {
697 ParentInfo = PI;
698 break;
699 }
700 PhiInserter.AddAvailableValue(BB, PI.Pred);
701 Dominator.addAndRememberBlock(BB);
702 }
703
704 if (ParentInfo.Pred) {
705 Term->setCondition(ParentInfo.Pred);
706 CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
707 } else {
708 if (!Dominator.resultIsRememberedBlock())
709 PhiInserter.AddAvailableValue(Dominator.result(), Default);
710
711 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
712 }
713 }
714}
715
716/// Simplify any inverted conditions that were built by buildConditions.
717void StructurizeCFG::simplifyConditions() {
718 SmallVector<Instruction *> InstToErase;
719 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
720 auto &Preds = I.second;
721 for (auto [BB, PI] : Preds) {
722 Instruction *Inverted;
723 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
724 !PI.Pred->use_empty()) {
725 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
726 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
727 PI.Pred->replaceAllUsesWith(InvertedCmp);
728 InstToErase.push_back(cast<Instruction>(PI.Pred));
729 }
730 }
731 }
732 }
733 for (auto *I : InstToErase)
734 I->eraseFromParent();
735}
736
737/// Remove all PHI values coming from "From" into "To" and remember
738/// them in DeletedPhis
739void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
740 PhiMap &Map = DeletedPhis[To];
741 for (PHINode &Phi : To->phis()) {
742 bool Recorded = false;
743 while (Phi.getBasicBlockIndex(From) != -1) {
744 Value *Deleted = Phi.removeIncomingValue(From, false);
745 Map[&Phi].push_back(std::make_pair(From, Deleted));
746 if (!Recorded) {
747 AffectedPhis.push_back(&Phi);
748 Recorded = true;
749 }
750 }
751 }
752}
753
754/// Add a dummy PHI value as soon as we knew the new predecessor
755void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
756 for (PHINode &Phi : To->phis()) {
757 Value *Poison = PoisonValue::get(Phi.getType());
758 Phi.addIncoming(Poison, From);
759 }
760 AddedPhis[To].push_back(From);
761}
762
763/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
764/// from predecessors \p Incomings, we have a chance to mark the available value
765/// from some blocks as undefined. The function will find out all such blocks
766/// and return in \p UndefBlks.
767void StructurizeCFG::findUndefBlocks(
768 BasicBlock *PHIBlock, const SmallPtrSet<BasicBlock *, 8> &Incomings,
769 SmallVector<BasicBlock *> &UndefBlks) const {
770 // We may get a post-structured CFG like below:
771 //
772 // | P1
773 // |/
774 // F1
775 // |\
776 // | N
777 // |/
778 // F2
779 // |\
780 // | P2
781 // |/
782 // F3
783 // |\
784 // B
785 //
786 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
787 // of B before structurization. F1/F2/F3 are flow blocks inserted during
788 // structurization process. Block N is not a predecessor of B before
789 // structurization, but are placed between the predecessors(P1/P2) of B after
790 // structurization. This usually means that threads went to N never take the
791 // path N->F2->F3->B. For example, the threads take the branch F1->N may
792 // always take the branch F2->P2. So, when we are reconstructing a PHI
793 // originally in B, we can safely say the incoming value from N is undefined.
794 SmallPtrSet<BasicBlock *, 8> VisitedBlock;
795 SmallVector<BasicBlock *, 8> Stack;
796 if (PHIBlock == ParentRegion->getExit()) {
797 for (auto P : predecessors(PHIBlock)) {
798 if (ParentRegion->contains(P))
799 Stack.push_back(P);
800 }
801 } else {
802 append_range(Stack, predecessors(PHIBlock));
803 }
804
805 // Do a backward traversal over the CFG, and stop further searching if
806 // the block is not a Flow. If a block is neither flow block nor the
807 // incoming predecessor, then the incoming value from the block is
808 // undefined value for the PHI being reconstructed.
809 while (!Stack.empty()) {
810 BasicBlock *Current = Stack.pop_back_val();
811 if (!VisitedBlock.insert(Current).second)
812 continue;
813
814 if (FlowSet.contains(Current))
815 llvm::append_range(Stack, predecessors(Current));
816 else if (!Incomings.contains(Current))
817 UndefBlks.push_back(Current);
818 }
819}
820
821// If two phi nodes have compatible incoming values (for each
822// incoming block, either they have the same incoming value or only one phi
823// node has an incoming value), let them share the merged incoming values. The
824// merge process is guided by the equivalence information from \p PhiClasses.
825// The function will possibly update the incoming values of leader phi in
826// DeletedPhis.
827void StructurizeCFG::mergeIfCompatible(
828 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
829 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
830 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
831 // They are already in the same class, no work needed.
832 if (ItA == ItB)
833 return;
834
835 PHINode *LeaderA = *ItA;
836 PHINode *LeaderB = *ItB;
837 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
838 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
839
840 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
841 for (auto [BB, V] : IncomingB) {
842 auto BBIt = Mergeable.find(BB);
843 if (BBIt != Mergeable.end() && BBIt->second != V)
844 return;
845 // Either IncomingA does not have this value or IncomingA has the same
846 // value.
847 Mergeable.insert({BB, V});
848 }
849
850 // Update the incoming value of leaderA.
851 IncomingA.assign(Mergeable.begin(), Mergeable.end());
852 PhiClasses.unionSets(ItA, ItB);
853}
854
855/// Add the real PHI value as soon as everything is set up
856void StructurizeCFG::setPhiValues() {
857 SmallVector<PHINode *, 8> InsertedPhis;
858 SSAUpdater Updater(&InsertedPhis);
859 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
860
861 // Find phi nodes that have compatible incoming values (either they have
862 // the same value for the same block or only one phi node has an incoming
863 // value, see example below). We only search again the phi's that are
864 // referenced by another phi, which is the case we care about.
865 //
866 // For example (-- means no incoming value):
867 // phi1 : BB1:phi2 BB2:v BB3:--
868 // phi2: BB1:-- BB2:v BB3:w
869 //
870 // Then we can merge these incoming values and let phi1, phi2 use the
871 // same set of incoming values:
872 //
873 // phi1&phi2: BB1:phi2 BB2:v BB3:w
874 //
875 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
876 // This would help reduce the number of phi nodes during SSA reconstruction
877 // and ultimately result in fewer COPY instructions.
878 //
879 // This should be correct, because if a phi node does not have incoming
880 // value from certain block, this means the block is not the predecessor
881 // of the parent block, so we actually don't care about its incoming value.
882 EquivalenceClasses<PHINode *> PhiClasses;
883 for (const auto &[To, From] : AddedPhis) {
884 auto OldPhiIt = DeletedPhis.find(To);
885 if (OldPhiIt == DeletedPhis.end())
886 continue;
887
888 PhiMap &BlkPhis = OldPhiIt->second;
889 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
890 SmallPtrSet<BasicBlock *, 8> Incomings;
891
892 // Get the undefined blocks shared by all the phi nodes.
893 if (!BlkPhis.empty()) {
894 Incomings.insert_range(llvm::make_first_range(BlkPhis.front().second));
895 findUndefBlocks(To, Incomings, UndefBlks);
896 }
897
898 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
899 SmallVector<PHINode *> IncomingPHIs;
900 for (const auto &[BB, V] : Incomings) {
901 // First, for each phi, check whether it has incoming value which is
902 // another phi.
903 if (PHINode *P = dyn_cast<PHINode>(V))
904 IncomingPHIs.push_back(P);
905 }
906
907 for (auto *OtherPhi : IncomingPHIs) {
908 // Skip phis that are unrelated to the phi reconstruction for now.
909 if (!DeletedPhis.contains(OtherPhi->getParent()))
910 continue;
911 mergeIfCompatible(PhiClasses, Phi, OtherPhi);
912 }
913 }
914 }
915
916 for (const auto &AddedPhi : AddedPhis) {
917 BasicBlock *To = AddedPhi.first;
918 const BBVector &From = AddedPhi.second;
919
920 auto It = DeletedPhis.find(To);
921 if (It == DeletedPhis.end())
922 continue;
923
924 PhiMap &Map = It->second;
925 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
926 for (const auto &[Phi, Incoming] : Map) {
927 Value *Poison = PoisonValue::get(Phi->getType());
928 Updater.Initialize(Phi->getType(), "");
929 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
930 Updater.AddAvailableValue(To, Poison);
931
932 // Use leader phi's incoming if there is.
933 auto LeaderIt = PhiClasses.findLeader(Phi);
934 bool UseIncomingOfLeader =
935 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
936 const auto &IncomingMap =
937 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
938 : Incoming;
939
940 SmallVector<BasicBlock *> ConstantPreds;
941 for (const auto &[BB, V] : IncomingMap) {
942 Updater.AddAvailableValue(BB, V);
943 if (isa<Constant>(V))
944 ConstantPreds.push_back(BB);
945 }
946
947 for (auto UB : UndefBlks) {
948 // If this undef block is dominated by any predecessor(before
949 // structurization) of reconstructed PHI with constant incoming value,
950 // don't mark the available value as undefined. Setting undef to such
951 // block will stop us from getting optimal phi insertion.
952 if (any_of(ConstantPreds,
953 [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
954 continue;
955 // Maybe already get a value through sharing with other phi nodes.
956 if (Updater.HasValueForBlock(UB))
957 continue;
958
959 Updater.AddAvailableValue(UB, Poison);
960 }
961
962 for (BasicBlock *FI : From)
963 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
964 AffectedPhis.push_back(Phi);
965 }
966 }
967
968 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
969}
970
971/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
972/// entries on Flow nodes with the appropriate hoisted values
973void StructurizeCFG::simplifyHoistedPhis() {
974 for (WeakVH VH : AffectedPhis) {
975 PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
976 if (!Phi || Phi->getNumIncomingValues() != 2)
977 continue;
978
979 for (int i = 0; i < 2; i++) {
980 Value *V = Phi->getIncomingValue(i);
981 auto BBIt = HoistedValues.find(V);
982
983 if (BBIt == HoistedValues.end())
984 continue;
985
986 Value *OtherV = Phi->getIncomingValue(!i);
987 PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
988 if (!OtherPhi)
989 continue;
990
991 int PoisonValBBIdx = -1;
992 for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
993 if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
994 continue;
995 PoisonValBBIdx = i;
996 break;
997 }
998 if (PoisonValBBIdx == -1 ||
999 !DT->dominates(BBIt->second,
1000 OtherPhi->getIncomingBlock(PoisonValBBIdx)))
1001 continue;
1002
1003 OtherPhi->setIncomingValue(PoisonValBBIdx, V);
1004 if (DT->dominates(OtherV, Phi))
1005 Phi->setIncomingValue(i, OtherV);
1006 }
1007 }
1008}
1009
1010void StructurizeCFG::simplifyAffectedPhis() {
1011 bool Changed;
1012 do {
1013 Changed = false;
1014 SimplifyQuery Q(Func->getDataLayout());
1015 Q.DT = DT;
1016 // Setting CanUseUndef to true might extend value liveness, set it to false
1017 // to achieve better register pressure.
1018 Q.CanUseUndef = false;
1019 for (WeakVH VH : AffectedPhis) {
1020 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
1021 if (auto NewValue = simplifyInstruction(Phi, Q)) {
1022 Phi->replaceAllUsesWith(NewValue);
1023 Phi->eraseFromParent();
1024 Changed = true;
1025 }
1026 }
1027 }
1028 } while (Changed);
1029}
1030
1031/// Remove phi values from all successors and then remove the terminator.
1032DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
1034 if (!Term)
1035 return DebugLoc();
1036
1037 for (BasicBlock *Succ : successors(BB))
1038 delPhiValues(BB, Succ);
1039
1040 DebugLoc DL = Term->getDebugLoc();
1041 Term->eraseFromParent();
1042 return DL;
1043}
1044
1045/// Let node exit(s) point to NewExit
1046void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
1047 bool IncludeDominator) {
1048 if (Node->isSubRegion()) {
1049 Region *SubRegion = Node->getNodeAs<Region>();
1050 BasicBlock *OldExit = SubRegion->getExit();
1051 BasicBlock *Dominator = nullptr;
1052
1053 // Find all the edges from the sub region to the exit.
1054 // We use make_early_inc_range here because we modify BB's terminator.
1055 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
1056 if (!SubRegion->contains(BB))
1057 continue;
1058
1059 // Modify the edges to point to the new exit
1060 delPhiValues(BB, OldExit);
1061 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
1062 addPhiValues(BB, NewExit);
1063
1064 // Find the new dominator (if requested)
1065 if (IncludeDominator) {
1066 if (!Dominator)
1067 Dominator = BB;
1068 else
1069 Dominator = DT->findNearestCommonDominator(Dominator, BB);
1070 }
1071 }
1072
1073 // Change the dominator (if requested)
1074 if (Dominator)
1075 DT->changeImmediateDominator(NewExit, Dominator);
1076
1077 // Update the region info
1078 SubRegion->replaceExit(NewExit);
1079 } else {
1080 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
1081 DebugLoc DL = killTerminator(BB);
1082 BranchInst *Br = BranchInst::Create(NewExit, BB);
1083 Br->setDebugLoc(DL);
1084 addPhiValues(BB, NewExit);
1085 if (IncludeDominator)
1086 DT->changeImmediateDominator(NewExit, BB);
1087 }
1088}
1089
1090/// Create a new flow node and update dominator tree and region info
1091BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
1092 LLVMContext &Context = Func->getContext();
1093 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
1094 Order.back()->getEntry();
1096 Func, Insert);
1097 FlowSet.insert(Flow);
1098 DT->addNewBlock(Flow, Dominator);
1099 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
1100 return Flow;
1101}
1102
1103/// Create a new or reuse the previous node as flow node. Returns a block and a
1104/// debug location to be used for new instructions in that block.
1105std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
1106 BasicBlock *Entry = PrevNode->getEntry();
1107
1108 if (!PrevNode->isSubRegion()) {
1109 DebugLoc DL = killTerminator(Entry);
1110 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1111 return {Entry, DL};
1112 }
1113
1114 // create a new flow node
1115 BasicBlock *Flow = getNextFlow(Entry);
1116
1117 // and wire it up
1118 changeExit(PrevNode, Flow, true);
1119 PrevNode = ParentRegion->getBBNode(Flow);
1120 return {Flow, DebugLoc()};
1121}
1122
1123/// Returns the region exit if possible, otherwise just a new flow node
1124BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1125 bool ExitUseAllowed) {
1126 if (!Order.empty() || !ExitUseAllowed)
1127 return getNextFlow(Flow);
1128
1129 BasicBlock *Exit = ParentRegion->getExit();
1130 DT->changeImmediateDominator(Exit, Flow);
1131 addPhiValues(Flow, Exit);
1132 return Exit;
1133}
1134
1135/// Set the previous node
1136void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1137 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1138 : nullptr;
1139}
1140
1141/// Does BB dominate all the predicates of Node?
1142bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1143 BBPredicates &Preds = Predicates[Node->getEntry()];
1144 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) {
1145 return DT->dominates(BB, Pred.first);
1146 });
1147}
1148
1149/// Can we predict that this node will always be called?
1150bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1151 BBPredicates &Preds = Predicates[Node->getEntry()];
1152 bool Dominated = false;
1153
1154 // Regionentry is always true
1155 if (!PrevNode)
1156 return true;
1157
1158 for (auto [BB, PI] : Preds) {
1159 if (PI.Pred != BoolTrue)
1160 return false;
1161
1162 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1163 Dominated = true;
1164 }
1165
1166 // TODO: The dominator check is too strict
1167 return Dominated;
1168}
1169
1170/// Take one node from the order vector and wire it up
1171void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1172 BasicBlock *LoopEnd) {
1173 RegionNode *Node = Order.pop_back_val();
1174 Visited.insert(Node->getEntry());
1175
1176 if (isPredictableTrue(Node)) {
1177 // Just a linear flow
1178 if (PrevNode) {
1179 changeExit(PrevNode, Node->getEntry(), true);
1180 }
1181 PrevNode = Node;
1182 } else {
1183 // Insert extra prefix node (or reuse last one)
1184 auto [Flow, DL] = needPrefix(false);
1185
1186 // Insert extra postfix node (or use exit instead)
1187 BasicBlock *Entry = Node->getEntry();
1188 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1189
1190 // let it point to entry and next block
1191 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1192 Br->setDebugLoc(DL);
1193 Conditions.push_back(Br);
1194 addPhiValues(Flow, Entry);
1195 DT->changeImmediateDominator(Entry, Flow);
1196
1197 PrevNode = Node;
1198 while (!Order.empty() && !Visited.count(LoopEnd) &&
1199 dominatesPredicates(Entry, Order.back())) {
1200 handleLoops(false, LoopEnd);
1201 }
1202
1203 changeExit(PrevNode, Next, false);
1204 setPrevNode(Next);
1205 }
1206}
1207
1208void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1209 BasicBlock *LoopEnd) {
1210 RegionNode *Node = Order.back();
1211 BasicBlock *LoopStart = Node->getEntry();
1212
1213 if (!Loops.count(LoopStart)) {
1214 wireFlow(ExitUseAllowed, LoopEnd);
1215 return;
1216 }
1217
1218 if (!isPredictableTrue(Node))
1219 LoopStart = needPrefix(true).first;
1220
1221 LoopEnd = Loops[Node->getEntry()];
1222 wireFlow(false, LoopEnd);
1223 while (!Visited.count(LoopEnd)) {
1224 handleLoops(false, LoopEnd);
1225 }
1226
1227 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1228
1229 // Create an extra loop end node
1230 DebugLoc DL;
1231 std::tie(LoopEnd, DL) = needPrefix(false);
1232 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1233 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1234 Br->setDebugLoc(DL);
1235 LoopConds.push_back(Br);
1236 addPhiValues(LoopEnd, LoopStart);
1237 setPrevNode(Next);
1238}
1239
1240/// After this function control flow looks like it should be, but
1241/// branches and PHI nodes only have undefined conditions.
1242void StructurizeCFG::createFlow() {
1243 BasicBlock *Exit = ParentRegion->getExit();
1244 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1245
1246 AffectedPhis.clear();
1247 DeletedPhis.clear();
1248 AddedPhis.clear();
1249 Conditions.clear();
1250 LoopConds.clear();
1251
1252 PrevNode = nullptr;
1253 Visited.clear();
1254
1255 while (!Order.empty()) {
1256 handleLoops(EntryDominatesExit, nullptr);
1257 }
1258
1259 if (PrevNode)
1260 changeExit(PrevNode, Exit, EntryDominatesExit);
1261 else
1262 assert(EntryDominatesExit);
1263}
1264
1265/// Handle a rare case where the disintegrated nodes instructions
1266/// no longer dominate all their uses. Not sure if this is really necessary
1267void StructurizeCFG::rebuildSSA() {
1268 SSAUpdater Updater;
1269 for (BasicBlock *BB : ParentRegion->blocks())
1270 for (Instruction &I : *BB) {
1271 bool Initialized = false;
1272 // We may modify the use list as we iterate over it, so we use
1273 // make_early_inc_range.
1274 for (Use &U : llvm::make_early_inc_range(I.uses())) {
1275 Instruction *User = cast<Instruction>(U.getUser());
1276 if (User->getParent() == BB) {
1277 continue;
1278 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1279 if (UserPN->getIncomingBlock(U) == BB)
1280 continue;
1281 }
1282
1283 if (DT->dominates(&I, User))
1284 continue;
1285
1286 if (!Initialized) {
1287 Value *Poison = PoisonValue::get(I.getType());
1288 Updater.Initialize(I.getType(), "");
1289 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
1290 Updater.AddAvailableValue(BB, &I);
1291 Initialized = true;
1292 }
1293 Updater.RewriteUseAfterInsertions(U);
1294 }
1295 }
1296}
1297
1298static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1299 const UniformityInfo &UA) {
1300 // Bool for if all sub-regions are uniform.
1301 bool SubRegionsAreUniform = true;
1302 // Count of how many direct children are conditional.
1303 unsigned ConditionalDirectChildren = 0;
1304
1305 for (auto *E : R->elements()) {
1306 if (!E->isSubRegion()) {
1307 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1308 if (!Br || !Br->isConditional())
1309 continue;
1310
1311 if (!UA.isUniform(Br))
1312 return false;
1313
1314 // One of our direct children is conditional.
1315 ConditionalDirectChildren++;
1316
1317 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1318 << " has uniform terminator\n");
1319 } else {
1320 // Explicitly refuse to treat regions as uniform if they have non-uniform
1321 // subregions. We cannot rely on UniformityAnalysis for branches in
1322 // subregions because those branches may have been removed and re-created,
1323 // so we look for our metadata instead.
1324 //
1325 // Warning: It would be nice to treat regions as uniform based only on
1326 // their direct child basic blocks' terminators, regardless of whether
1327 // subregions are uniform or not. However, this requires a very careful
1328 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1329 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1330 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1331 if (!Br || !Br->isConditional())
1332 continue;
1333
1334 if (!Br->getMetadata(UniformMDKindID)) {
1335 // Early exit if we cannot have relaxed uniform regions.
1336 if (!RelaxedUniformRegions)
1337 return false;
1338
1339 SubRegionsAreUniform = false;
1340 break;
1341 }
1342 }
1343 }
1344 }
1345
1346 // Our region is uniform if:
1347 // 1. All conditional branches that are direct children are uniform (checked
1348 // above).
1349 // 2. And either:
1350 // a. All sub-regions are uniform.
1351 // b. There is one or less conditional branches among the direct children.
1352 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1353}
1354
1355void StructurizeCFG::init(Region *R) {
1356 LLVMContext &Context = R->getEntry()->getContext();
1357
1358 Boolean = Type::getInt1Ty(Context);
1359 BoolTrue = ConstantInt::getTrue(Context);
1360 BoolFalse = ConstantInt::getFalse(Context);
1361 BoolPoison = PoisonValue::get(Boolean);
1362
1363 this->UA = nullptr;
1364}
1365
1366bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1367 if (R->isTopLevelRegion())
1368 return false;
1369
1370 this->UA = &UA;
1371
1372 // TODO: We could probably be smarter here with how we handle sub-regions.
1373 // We currently rely on the fact that metadata is set by earlier invocations
1374 // of the pass on sub-regions, and that this metadata doesn't get lost --
1375 // but we shouldn't rely on metadata for correctness!
1376 unsigned UniformMDKindID =
1377 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1378
1379 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1380 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1381 << '\n');
1382
1383 // Mark all direct child block terminators as having been treated as
1384 // uniform. To account for a possible future in which non-uniform
1385 // sub-regions are treated more cleverly, indirect children are not
1386 // marked as uniform.
1387 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1388 for (RegionNode *E : R->elements()) {
1389 if (E->isSubRegion())
1390 continue;
1391
1392 if (Instruction *Term = E->getEntry()->getTerminator())
1393 Term->setMetadata(UniformMDKindID, MD);
1394 }
1395
1396 return true;
1397 }
1398 return false;
1399}
1400
1401/// Run the transformation for each region found
1402bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1403 const TargetTransformInfo *TTI) {
1404 if (R->isTopLevelRegion())
1405 return false;
1406
1407 this->DT = DT;
1408 this->TTI = TTI;
1409 Func = R->getEntry()->getParent();
1410 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1411
1412 ParentRegion = R;
1413
1414 orderNodes();
1415 collectInfos();
1416 createFlow();
1417 insertConditions(false);
1418 insertConditions(true);
1419 setPhiValues();
1420 simplifyHoistedPhis();
1421 simplifyConditions();
1422 simplifyAffectedPhis();
1423 rebuildSSA();
1424
1425 // Cleanup
1426 Order.clear();
1427 Visited.clear();
1428 DeletedPhis.clear();
1429 AddedPhis.clear();
1430 Predicates.clear();
1431 Conditions.clear();
1432 Loops.clear();
1433 LoopPreds.clear();
1434 LoopConds.clear();
1435 FlowSet.clear();
1436
1437 return true;
1438}
1439
1440Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1441 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1442}
1443
1444static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1445 Regions.push_back(&R);
1446 for (const auto &E : R)
1447 addRegionIntoQueue(*E, Regions);
1448}
1449
1451 : SkipUniformRegions(SkipUniformRegions_) {
1452 if (ForceSkipUniformRegions.getNumOccurrences())
1453 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1454}
1455
1457 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1459 OS, MapClassName2PassName);
1460 if (SkipUniformRegions)
1461 OS << "<skip-uniform-regions>";
1462}
1463
1466
1467 bool Changed = false;
1469 auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1471 UniformityInfo *UI = nullptr;
1472 if (SkipUniformRegions)
1474
1475 std::vector<Region *> Regions;
1476 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1477 while (!Regions.empty()) {
1478 Region *R = Regions.back();
1479 Regions.pop_back();
1480
1481 StructurizeCFG SCFG;
1482 SCFG.init(R);
1483
1484 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1485 Changed = true; // May have added metadata.
1486 continue;
1487 }
1488
1489 Changed |= SCFG.run(R, DT, TTI);
1490 }
1491 if (!Changed)
1492 return PreservedAnalyses::all();
1495 return PA;
1496}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DenseMap< BasicBlock *, Instruction * > BBPredicates
This file defines the DenseMap class.
@ Default
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
#define T
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static void addRegionIntoQueue(Region &R, std::deque< Region * > &RQ)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
Annotate SI Control Flow
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
SmallDenseMap< const PHINode *, PhiInfo, 16 > PhiMap
const char FlowBlockName[]
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const UniformityInfo &UA)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:107
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
const BasicBlock & getEntryBlock() const
Definition Function.h:807
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
The pass manager to schedule RegionPasses.
Definition RegionPass.h:88
void replaceExit(BlockT *BB)
Replace the exit basic block of the region with the new basic block.
block_range blocks()
Returns a range view of the basic blocks in the region.
Definition RegionInfo.h:620
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition RegionInfo.h:357
RegionNodeT * getBBNode(BlockT *BB) const
Get the BasicBlock RegionNode for a BasicBlock.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionInfoT * getRegionInfo() const
Return the RegionInfo object, that belongs to this Region.
Definition RegionInfo.h:424
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition RegionInfo.h:965
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
bool isSubRegion() const
Is this RegionNode a subregion?
Definition RegionInfo.h:186
BlockT * getEntry() const
Get the entry BasicBlock of this RegionNode.
Definition RegionInfo.h:172
A pass that runs on each Region in a function.
Definition RegionPass.h:33
void RewriteUseAfterInsertions(Use &U)
Rewrite a use like RewriteUse but handling in-block definitions.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:281
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
LLVM Value Representation.
Definition Value.h:75
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
CRTP base class for adapting an iterator to a different type.
Definition iterator.h:237
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static scc_iterator begin(const GraphT &G)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Changed
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2235
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F)
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1152
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:552
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
unsigned char Boolean
Definition ConvertUTF.h:132
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto predecessors(const MachineBasicBlock *BB)
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * invertCondition(Value *Condition)
Invert the given true/false value, possibly reusing an existing copy.
Definition Local.cpp:3926
filter_iterator_impl< WrappedIteratorT, PredicateT, detail::fwd_or_bidi_tag< WrappedIteratorT > > filter_iterator
Defines filter_iterator to a suitable specialization of filter_iterator_impl, based on the underlying...
Definition STLExtras.h:539
LLVM_ABI void initializeStructurizeCFGLegacyPassPass(PassRegistry &)
#define N
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
StructurizeCFGPass(bool SkipUniformRegions=false)