79#define DEBUG_TYPE "block-placement"
81STATISTIC(NumCondBranches,
"Number of conditional branches");
82STATISTIC(NumUncondBranches,
"Number of unconditional branches");
84 "Potential frequency of taking conditional branches");
86 "Potential frequency of taking unconditional branches");
90 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
91 "(e.g 4 means align on 16B boundaries)."),
95 "align-all-nofallthru-blocks",
96 cl::desc(
"Force the alignment of all blocks that have no fall-through "
97 "predecessors (i.e. don't add nops that are executed). In log2 "
98 "format (e.g 4 means align on 16B boundaries)."),
102 "max-bytes-for-alignment",
103 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-predecessor-limit",
109 cl::desc(
"For blocks with more predecessors, certain layout optimizations"
110 "will be disabled to prevent quadratic compile time."),
115 "block-placement-exit-block-bias",
116 cl::desc(
"Block frequency percentage a loop exit block needs "
117 "over the original exit to be considered the new exit."),
124 "loop-to-cold-block-ratio",
125 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
126 "(frequency of block) is greater than this ratio"),
131 cl::desc(
"Force outlining cold blocks from loops."),
136 cl::desc(
"Model the cost of loop rotation more "
137 "precisely by using profile data."),
142 cl::desc(
"Force the use of precise cost "
143 "loop rotation strategy."),
148 cl::desc(
"Cost that models the probabilistic risk of an instruction "
149 "misfetch due to a jump comparing to falling through, whose cost "
154 cl::desc(
"Cost of jump instructions."),
158 cl::desc(
"Perform tail duplication during placement. "
159 "Creates more fallthrough opportunities in "
160 "outline branches."),
165 cl::desc(
"Perform branch folding during placement. "
166 "Reduces code size."),
171 "tail-dup-placement-threshold",
172 cl::desc(
"Instruction cutoff for tail duplication during layout. "
173 "Tail merging during layout is forced to have a threshold "
174 "that won't conflict."),
179 "tail-dup-placement-aggressive-threshold",
180 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
181 "layout. Used at -O3. Tail merging during layout is forced to "
182 "have a threshold that won't conflict."),
187 "tail-dup-placement-penalty",
189 "Cost penalty for blocks that can avoid breaking CFG by copying. "
190 "Copying can increase fallthrough, but it also increases icache "
191 "pressure. This parameter controls the penalty to account for that. "
192 "Percent as integer."),
197 "tail-dup-profile-percent-threshold",
198 cl::desc(
"If profile count information is used in tail duplication cost "
199 "model, the gained fall through number from tail duplication "
200 "should be at least this percent of hot count."),
205 "triangle-chain-count",
206 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
207 "triangle tail duplication heuristic to kick in. 0 to disable."),
216 "renumber-blocks-before-view",
218 "If true, basic blocks are re-numbered before MBP layout is printed "
219 "into a dot graph. Only used when a function is being printed."),
223 "ext-tsp-block-placement-max-blocks",
224 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
231 cl::desc(
"Use ext-tsp for size-aware block placement."));
280 BlockToChainMapType &BlockToChain;
289 :
Blocks(1, BB), BlockToChain(BlockToChain) {
290 assert(BB &&
"Cannot create a chain with a null basic block");
291 BlockToChain[BB] =
this;
299 iterator begin() {
return Blocks.begin(); }
303 iterator end() {
return Blocks.end(); }
307 for (iterator i = begin(); i != end(); ++i) {
323 assert(BB &&
"Can't merge a null block.");
324 assert(!
Blocks.empty() &&
"Can't merge into an empty chain.");
328 assert(!BlockToChain[BB] &&
329 "Passed chain is null, but BB has entry in BlockToChain.");
331 BlockToChain[BB] =
this;
335 assert(BB == *Chain->
begin() &&
"Passed BB is not head of Chain.");
336 assert(Chain->begin() != Chain->end());
341 Blocks.push_back(ChainBB);
342 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
343 BlockToChain[ChainBB] =
this;
364 unsigned UnscheduledPredecessors = 0;
367class MachineBlockPlacement {
372 struct BlockAndTailDupResult {
378 struct WeightedEdge {
398 std::unique_ptr<MBFIWrapper> MBFI;
435 unsigned TailDupSize;
439 bool UseProfileCount =
false;
469 if (UseProfileCount) {
470 auto Count = MBFI->getBlockProfileCount(BB);
476 return MBFI->getBlockFreq(BB);
481 void initTailDupThreshold();
485 void markChainSuccessors(
const BlockChain &Chain,
487 const BlockFilterSet *BlockFilter =
nullptr);
493 const BlockFilterSet *BlockFilter =
nullptr);
497 const BlockFilterSet *BlockFilter,
500 BlockFilterSet *BlockFilter);
503 BlockFilterSet *BlockFilter);
504 bool repeatedlyTailDuplicateBlock(
507 BlockFilterSet *BlockFilter,
512 BlockChain &Chain, BlockFilterSet *BlockFilter,
515 bool &DuplicatedToLPred);
518 const BlockChain &SuccChain,
521 const BlockChain &Chain,
522 const BlockFilterSet *BlockFilter);
524 const BlockChain &Chain,
525 const BlockFilterSet *BlockFilter);
527 selectBestCandidateBlock(
const BlockChain &Chain,
530 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
533 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
535 const BlockFilterSet *BlockFilter);
544 const BlockFilterSet *BlockFilter);
547 BlockFilterSet *BlockFilter =
nullptr);
551 const BlockFilterSet &LoopBlockSet);
553 const BlockFilterSet &LoopBlockSet);
557 const BlockFilterSet &LoopBlockSet);
560 const BlockFilterSet &LoopBlockSet);
562 const BlockFilterSet &LoopBlockSet);
564 const BlockFilterSet &LoopBlockSet,
566 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
570 void rotateLoopWithProfile(BlockChain &LoopChain,
const MachineLoop &L,
571 const BlockFilterSet &LoopBlockSet);
572 void buildCFGChains();
573 void optimizeBranches();
583 const BlockFilterSet *BlockFilter);
588 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
591 BlockAndTailDupResult getBestTrellisSuccessor(
595 const BlockFilterSet *BlockFilter);
598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
606 const BlockChain &Chain,
607 const BlockFilterSet *BlockFilter);
611 void precomputeTriangleChains();
614 void applyExtTsp(
bool OptForSize);
617 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
620 void createCFGChainExtTsp();
625 std::unique_ptr<MBFIWrapper> MBFI,
627 : MBPI(MBPI), MBFI(
std::
move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
628 AllowTailMerge(AllowTailMerge) {};
650 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
651 auto MBFI = std::make_unique<MBFIWrapper>(
652 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
653 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
654 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
655 ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
658 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
659 auto *PassConfig = &getAnalysis<TargetPassConfig>();
660 bool AllowTailMerge = PassConfig->getEnableTailMerge();
661 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
680char MachineBlockPlacementLegacy::ID = 0;
685 "Branch Probability Basic Block Placement",
false,
false)
714void MachineBlockPlacement::markChainSuccessors(
716 const BlockFilterSet *BlockFilter) {
720 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
730void MachineBlockPlacement::markBlockSuccessors(
738 if (BlockFilter && !BlockFilter->count(Succ))
740 BlockChain &SuccChain = *BlockToChain[Succ];
742 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
747 if (SuccChain.UnscheduledPredecessors == 0 ||
748 --SuccChain.UnscheduledPredecessors > 0)
751 auto *NewBB = *SuccChain.
begin();
752 if (NewBB->isEHPad())
765 const BlockFilterSet *BlockFilter,
785 bool SkipSucc =
false;
786 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
789 BlockChain *SuccChain = BlockToChain[Succ];
790 if (SuccChain == &Chain) {
792 }
else if (Succ != *SuccChain->begin()) {
794 <<
" -> Mid chain!\n");
804 return AdjustedSumProb;
815 if (SuccProbN >= SuccProbD)
830 if (Successors.
count(&BB))
833 if (!Successors.
count(Succ))
862 return (Gain / ThresholdProb) >= EntryFreq;
870bool MachineBlockPlacement::isProfitableToTailDup(
873 const BlockFilterSet *BlockFilter) {
900 auto AdjustedSuccSumProb =
901 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
903 auto BBFreq = MBFI->getBlockFreq(BB);
904 auto SuccFreq = MBFI->getBlockFreq(Succ);
910 if (SuccSuccs.
size() == 0)
917 if (Prob > BestSuccSucc)
929 if (SuccPred == Succ || SuccPred == BB ||
930 BlockToChain[SuccPred] == &Chain ||
931 (BlockFilter && !BlockFilter->count(SuccPred)))
935 if (Freq > SuccBestPred)
1003 if (UProb > AdjustedSuccSumProb / 2 &&
1004 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
1005 Chain, BlockFilter))
1008 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
1012 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
1013 std::max(Qin,
F) * UProb),
1024bool MachineBlockPlacement::isTrellis(
1027 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1047 if (Successors.count(SuccPred)) {
1050 if (!Successors.count(CheckSucc))
1054 const BlockChain *PredChain = BlockToChain[SuccPred];
1055 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1056 PredChain == &Chain || PredChain == BlockToChain[Succ])
1060 if (!SeenPreds.
insert(SuccPred).second)
1078std::pair<MachineBlockPlacement::WeightedEdge,
1079 MachineBlockPlacement::WeightedEdge>
1080MachineBlockPlacement::getBestNonConflictingEdges(
1091 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1095 auto BestA = Edges[0].begin();
1096 auto BestB = Edges[1].begin();
1099 if (BestA->Src == BestB->Src) {
1101 auto SecondBestA = std::next(BestA);
1102 auto SecondBestB = std::next(BestB);
1105 if (BestAScore < BestBScore)
1106 BestA = SecondBestA;
1108 BestB = SecondBestB;
1111 if (BestB->Src == BB)
1113 return std::make_pair(*BestA, *BestB);
1123MachineBlockPlacement::BlockAndTailDupResult
1124MachineBlockPlacement::getBestTrellisSuccessor(
1128 const BlockFilterSet *BlockFilter) {
1130 BlockAndTailDupResult
Result = {
nullptr,
false};
1137 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1143 for (
auto *Succ : ViableSuccs) {
1146 if (SuccPred != BB) {
1147 if (BlockFilter && !BlockFilter->count(SuccPred))
1149 const BlockChain *SuccPredChain = BlockToChain[SuccPred];
1150 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])
1155 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1161 WeightedEdge BestA, BestB;
1162 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1164 if (BestA.Src != BB) {
1168 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1175 if (BestA.Dest == BestB.Src) {
1181 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ2) &&
1182 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1184 Chain, BlockFilter)) {
1188 <<
", probability: " << Succ2Prob
1189 <<
" (Tail Duplicate)\n");
1191 Result.ShouldTailDup =
true;
1197 ComputedEdges[BestB.Src] = {BestB.Dest,
false};
1199 auto TrellisSucc = BestA.Dest;
1203 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1211bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1213 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1214 if (!shouldTailDuplicate(Succ))
1218 bool Duplicate =
true;
1220 unsigned int NumDup = 0;
1229 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1230 (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1280 if (
F->getFunction().hasProfileData())
1311 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1334void MachineBlockPlacement::precomputeTriangleChains() {
1335 struct TriangleChain {
1336 std::vector<MachineBasicBlock *> Edges;
1339 : Edges({src, dst}) {}
1342 assert(getKey()->isSuccessor(dst) &&
1343 "Attempting to append a block that is not a successor.");
1344 Edges.push_back(dst);
1347 unsigned count()
const {
return Edges.size() - 1; }
1372 if (PDom ==
nullptr)
1379 if (!shouldTailDuplicate(PDom))
1381 bool CanTailDuplicate =
true;
1388 CanTailDuplicate =
false;
1394 if (!CanTailDuplicate)
1401 auto Found = TriangleChainMap.
find(&BB);
1404 if (Found != TriangleChainMap.
end()) {
1405 TriangleChain Chain = std::move(Found->second);
1406 TriangleChainMap.
erase(Found);
1408 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1410 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1411 assert(InsertResult.second &&
"Block seen twice.");
1419 for (
auto &ChainPair : TriangleChainMap) {
1420 TriangleChain &Chain = ChainPair.second;
1427 Chain.Edges.pop_back();
1431 <<
" as pre-computed based on triangles.\n");
1433 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1434 assert(InsertResult.second &&
"Block seen twice.");
1476bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1480 const BlockFilterSet *BlockFilter) {
1483 if (SuccChain.UnscheduledPredecessors == 0)
1608 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1609 bool BadCFGConflict =
false;
1612 BlockChain *PredChain = BlockToChain[Pred];
1613 if (Pred == Succ || PredChain == &SuccChain ||
1614 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1615 Pred != *std::prev(PredChain->end()) ||
1636 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1637 BadCFGConflict =
true;
1642 if (BadCFGConflict) {
1644 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1661MachineBlockPlacement::BlockAndTailDupResult
1663 const BlockChain &Chain,
1664 const BlockFilterSet *BlockFilter) {
1667 BlockAndTailDupResult BestSucc = {
nullptr,
false};
1671 auto AdjustedSumProb =
1672 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1679 auto FoundEdge = ComputedEdges.
find(BB);
1680 if (FoundEdge != ComputedEdges.
end()) {
1682 ComputedEdges.
erase(FoundEdge);
1683 BlockChain *SuccChain = BlockToChain[Succ];
1684 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1685 SuccChain != &Chain && Succ == *SuccChain->begin())
1686 return FoundEdge->second;
1691 if (isTrellis(BB, Successors, Chain, BlockFilter))
1692 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1705 BlockChain &SuccChain = *BlockToChain[Succ];
1708 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1709 Chain, BlockFilter)) {
1711 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ))
1718 <<
", probability: " << SuccProb
1719 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1722 if (BestSucc.BB && BestProb >= SuccProb) {
1729 BestProb = SuccProb;
1737 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1738 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1739 return std::get<0>(L) > std::get<0>(R);
1741 for (
auto &Tup : DupCandidates) {
1744 std::tie(DupProb, Succ) = Tup;
1745 if (DupProb < BestProb)
1747 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1748 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1750 <<
", probability: " << DupProb
1751 <<
" (Tail Duplicate)\n");
1753 BestSucc.ShouldTailDup =
true;
1781 return BlockToChain.
lookup(BB) == &Chain;
1784 if (WorkList.
empty())
1787 bool IsEHPad = WorkList[0]->isEHPad();
1793 "EHPad mismatch between block and work list.");
1795 BlockChain &SuccChain = *BlockToChain[
MBB];
1796 if (&SuccChain == &Chain)
1799 assert(SuccChain.UnscheduledPredecessors == 0 &&
1800 "Found CFG-violating block");
1825 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1829 BestFreq = CandidateFreq;
1843 const BlockChain &PlacedChain,
1848 if (BlockChain *Chain = BlockToChain[&*
I]; Chain != &PlacedChain) {
1849 PrevUnplacedBlockIt =
I;
1853 return *Chain->begin();
1870 const BlockChain &PlacedChain,
1871 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1872 const BlockFilterSet *BlockFilter) {
1874 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1875 ++PrevUnplacedBlockInFilterIt) {
1876 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1877 if (
C != &PlacedChain) {
1884void MachineBlockPlacement::fillWorkLists(
1886 const BlockFilterSet *BlockFilter =
nullptr) {
1887 BlockChain &Chain = *BlockToChain[
MBB];
1888 if (!UpdatedPreds.
insert(&Chain).second)
1892 Chain.UnscheduledPredecessors == 0 &&
1893 "Attempting to place block with unscheduled predecessors in worklist.");
1895 assert(BlockToChain[ChainBB] == &Chain &&
1896 "Block in chain doesn't match BlockToChain map.");
1898 if (BlockFilter && !BlockFilter->count(Pred))
1900 if (BlockToChain[Pred] == &Chain)
1902 ++Chain.UnscheduledPredecessors;
1906 if (Chain.UnscheduledPredecessors != 0)
1918 BlockFilterSet *BlockFilter) {
1919 assert(HeadBB &&
"BB must not be null.\n");
1920 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1922 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1924 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1927 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1930 assert(BB &&
"null block found at end of chain in loop.");
1931 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1932 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1936 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1938 bool ShouldTailDup =
Result.ShouldTailDup;
1939 if (allowTailDupPlacement(*
F))
1940 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1941 BB, BestSucc, Chain, BlockFilter));
1947 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1949 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1953 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1956 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1960 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1961 "layout successor until the CFG reduces\n");
1966 if (allowTailDupPlacement(*
F) && BestSucc && ShouldTailDup) {
1967 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1968 BlockFilter, PrevUnplacedBlockIt,
1969 PrevUnplacedBlockInFilterIt);
1977 BlockChain &SuccChain = *BlockToChain[BestSucc];
1980 SuccChain.UnscheduledPredecessors = 0;
1983 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1984 Chain.merge(BestSucc, &SuccChain);
1985 BB = *std::prev(Chain.end());
2006bool MachineBlockPlacement::canMoveBottomBlockToTop(
2015 if (OtherBB == BottomBlock)
2017 if (OtherBB == OldTop)
2026 const BlockFilterSet &LoopBlockSet) {
2029 BlockChain *PredChain = BlockToChain[Pred];
2030 if (!LoopBlockSet.count(Pred) &&
2031 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2038 BlockChain *SuccChain = BlockToChain[Succ];
2041 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
2042 (!SuccChain || Succ == *SuccChain->begin())) {
2050 if (EdgeFreq > MaxFreq)
2082 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2094 if (!LoopBlockSet.count(Pred))
2096 BlockChain *PredChain = BlockToChain[Pred];
2097 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2100 if (EdgeFreq > FallThroughFromPred) {
2101 FallThroughFromPred = EdgeFreq;
2112 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2116 BlockChain *SuccChain = BlockToChain[Succ];
2117 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2118 (SuccChain == BlockToChain[BestPred]))
2122 if (EdgeFreq > NewFreq)
2127 if (NewFreq > OrigEdgeFreq) {
2139 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2169 const BlockFilterSet &LoopBlockSet) {
2173 BlockChain &HeaderChain = *BlockToChain[OldTop];
2174 if (!LoopBlockSet.count(*HeaderChain.begin()))
2176 if (OldTop != *HeaderChain.begin())
2185 if (!LoopBlockSet.count(Pred))
2187 if (Pred ==
L.getHeader())
2198 if (OtherBB == OldTop)
2202 if (!canMoveBottomBlockToTop(Pred, OldTop))
2206 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2208 (Gains > BestGains ||
2223 (*BestPred->
pred_begin())->succ_size() == 1 &&
2236MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2237 const BlockFilterSet &LoopBlockSet) {
2246 return L.getHeader();
2250 while (NewTop != OldTop) {
2252 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2253 if (NewTop != OldTop)
2254 ComputedEdges[NewTop] = {OldTop,
false};
2265MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2266 const BlockFilterSet &LoopBlockSet,
2276 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2277 if (!LoopBlockSet.count(*HeaderChain.begin()))
2281 unsigned BestExitLoopDepth = 0;
2291 BlockChain &Chain = *BlockToChain[
MBB];
2294 if (
MBB != *std::prev(Chain.end()))
2303 bool HasLoopingSucc =
false;
2309 BlockChain &SuccChain = *BlockToChain[Succ];
2311 if (&Chain == &SuccChain) {
2318 if (LoopBlockSet.count(Succ)) {
2321 HasLoopingSucc =
true;
2325 unsigned SuccLoopDepth = 0;
2327 SuccLoopDepth = ExitLoop->getLoopDepth();
2328 if (ExitLoop->contains(&L))
2335 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2342 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2343 ExitEdgeFreq > BestExitEdgeFreq ||
2345 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2346 BestExitEdgeFreq = ExitEdgeFreq;
2351 if (!HasLoopingSucc) {
2353 ExitingBB = OldExitingBB;
2354 BestExitEdgeFreq = OldBestExitEdgeFreq;
2361 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2364 if (
L.getNumBlocks() == 1) {
2365 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2372 if (!BlocksExitingToOuterLoop.
empty() &&
2373 !BlocksExitingToOuterLoop.
count(ExitingBB))
2378 ExitFreq = BestExitEdgeFreq;
2386bool MachineBlockPlacement::hasViableTopFallthrough(
2389 BlockChain *PredChain = BlockToChain[Pred];
2390 if (!LoopBlockSet.count(Pred) &&
2391 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2398 BlockChain *SuccChain = BlockToChain[Succ];
2401 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2419void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2422 const BlockFilterSet &LoopBlockSet) {
2430 if (Bottom == ExitingBB)
2437 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2442 if (ViableTopFallthrough) {
2444 BlockChain *SuccChain = BlockToChain[Succ];
2445 if (!LoopBlockSet.count(Succ) &&
2446 (!SuccChain || Succ == *SuccChain->begin()))
2452 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2453 if (FallThrough2Top >= ExitFreq)
2457 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2458 if (ExitIt == LoopChain.end())
2480 if (ViableTopFallthrough) {
2481 assert(std::next(ExitIt) != LoopChain.end() &&
2482 "Exit should not be last BB");
2491 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2507void MachineBlockPlacement::rotateLoopWithProfile(
2509 const BlockFilterSet &LoopBlockSet) {
2510 auto RotationPos = LoopChain.end();
2535 BlockChain *PredChain = BlockToChain[Pred];
2536 if (!LoopBlockSet.count(Pred) &&
2537 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2538 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2540 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2544 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2545 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2554 for (
auto *BB : LoopChain) {
2557 BlockChain *SuccChain = BlockToChain[Succ];
2558 if (!LoopBlockSet.count(Succ) &&
2559 (!SuccChain || Succ == *SuccChain->begin())) {
2561 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2565 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2573 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2574 EndIter = LoopChain.end();
2575 Iter != EndIter; Iter++, TailIter++) {
2578 if (TailIter == LoopChain.end())
2579 TailIter = LoopChain.begin();
2581 auto TailBB = *TailIter;
2589 if (Iter != LoopChain.begin())
2590 Cost += HeaderFallThroughCost;
2594 for (
auto &ExitWithFreq : ExitsWithFreq)
2595 if (TailBB != ExitWithFreq.first)
2596 Cost += ExitWithFreq.second;
2612 if (TailBB->isSuccessor(*Iter)) {
2613 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2614 if (TailBB->succ_size() == 1)
2616 else if (TailBB->succ_size() == 2) {
2618 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2620 ? TailBBFreq * TailToHeadProb.
getCompl()
2631 if (
Cost < SmallestRotationCost) {
2632 SmallestRotationCost =
Cost;
2637 if (RotationPos != LoopChain.end()) {
2639 <<
" to the top\n");
2640 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2649MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2655 return X->getNumber() <
Y->getNumber();
2658 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2671 for (
auto *LoopPred :
L.getHeader()->predecessors())
2672 if (!
L.contains(LoopPred))
2673 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2677 if (LoopBlockSet.count(LoopBB))
2682 BlockChain *Chain = BlockToChain[LoopBB];
2684 LoopBlockSet.
insert(ChainBB);
2687 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2692 BlockFilterSet
Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2702void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2706 buildLoopChains(*InnerLoop);
2709 "BlockWorkList not empty when starting to build loop chains.");
2711 "EHPadWorkList not empty when starting to build loop chains.");
2712 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2717 bool RotateLoopWithProfile =
2733 PreferredLoopExit =
nullptr;
2735 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2736 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2738 BlockChain &LoopChain = *BlockToChain[LoopTop];
2744 assert(LoopChain.UnscheduledPredecessors == 0 &&
2745 "LoopChain should not have unscheduled predecessors.");
2746 UpdatedPreds.
insert(&LoopChain);
2749 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2751 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2753 if (RotateLoopWithProfile)
2754 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2756 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2760 bool BadLoop =
false;
2761 if (LoopChain.UnscheduledPredecessors) {
2763 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2764 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n"
2765 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2769 if (!LoopBlockSet.remove(ChainBB)) {
2773 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2774 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2775 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2780 if (!LoopBlockSet.empty()) {
2783 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2784 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2785 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2788 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2791 BlockWorkList.
clear();
2792 EHPadWorkList.
clear();
2795void MachineBlockPlacement::buildCFGChains() {
2803 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2816 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2817 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2820 Chain->merge(NextBB,
nullptr);
2822 BlocksWithUnanalyzableExits.
insert(&*BB);
2830 PreferredLoopExit =
nullptr;
2832 buildLoopChains(*L);
2835 "BlockWorkList should be empty before building final chain.");
2837 "EHPadWorkList should be empty before building final chain.");
2841 fillWorkLists(&
MBB, UpdatedPreds);
2843 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2844 buildChain(&
F->front(), FunctionChain);
2851 bool BadFunc =
false;
2852 FunctionBlockSetType FunctionBlockSet;
2854 FunctionBlockSet.insert(&
MBB);
2857 if (!FunctionBlockSet.erase(ChainBB)) {
2859 dbgs() <<
"Function chain contains a block not in the function!\n"
2860 <<
" Bad block: " << getBlockName(ChainBB) <<
"\n";
2863 if (!FunctionBlockSet.empty()) {
2865 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2866 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2867 <<
" Bad block: " << getBlockName(RemainingBB) <<
"\n";
2869 assert(!BadFunc &&
"Detected problems with the block placement.");
2875 F->getNumBlockIDs());
2878 for (
auto &
MBB : *
F) {
2879 if (LastMBB !=
nullptr)
2883 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2890 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2894 F->splice(InsertPos, ChainBB);
2899 if (ChainBB == *FunctionChain.begin())
2910 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2916 "Unexpected block with un-analyzable fallthrough!");
2918 TBB = FBB =
nullptr;
2956 BlockWorkList.
clear();
2957 EHPadWorkList.
clear();
2960void MachineBlockPlacement::optimizeBranches() {
2961 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2975 if (!
TBB || !FBB ||
Cond.empty())
2993 auto Dl = ChainBB->findBranchDebugLoc();
2999void MachineBlockPlacement::alignBlocks() {
3006 if (
F->getFunction().hasMinSize() ||
3011 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
3013 if (FunctionChain.begin() == FunctionChain.end())
3020 if (ChainBB == *FunctionChain.begin())
3032 unsigned MDAlign = 1;
3033 MDNode *LoopID =
L->getLoopID();
3036 MDNode *MD = dyn_cast<MDNode>(MDO);
3042 if (S->
getString() ==
"llvm.loop.align") {
3044 "per-loop align metadata should have two operands.");
3046 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
3047 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
3053 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3060 if (Freq < WeightedEntryFreq)
3067 if (Freq < (LoopHeaderFreq * ColdProb))
3080 auto DetermineMaxAlignmentPadding = [&]() {
3087 ChainBB->setMaxBytesForAlignment(MaxBytes);
3093 ChainBB->setAlignment(LoopAlign);
3094 DetermineMaxAlignmentPadding();
3104 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3105 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3106 ChainBB->setAlignment(LoopAlign);
3107 DetermineMaxAlignmentPadding();
3111 const bool HasMaxBytesOverride =
3117 if (HasMaxBytesOverride)
3126 for (
auto MBI = std::next(
F->begin()), MBE =
F->end(); MBI != MBE; ++MBI) {
3127 auto LayoutPred = std::prev(MBI);
3129 if (HasMaxBytesOverride)
3154bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3158 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3159 bool Removed, DuplicatedToLPred;
3160 bool DuplicatedToOriginalLPred;
3161 Removed = maybeTailDuplicateBlock(
3162 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3163 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3166 DuplicatedToOriginalLPred = DuplicatedToLPred;
3171 while (DuplicatedToLPred && Removed) {
3178 BlockChain::iterator ChainEnd = Chain.
end();
3179 DupBB = *(--ChainEnd);
3181 if (ChainEnd == Chain.begin())
3183 DupPred = *std::prev(ChainEnd);
3184 Removed = maybeTailDuplicateBlock(
3185 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3186 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3193 LPred = *std::prev(Chain.end());
3194 if (DuplicatedToOriginalLPred)
3195 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3212bool MachineBlockPlacement::maybeTailDuplicateBlock(
3215 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3216 bool &DuplicatedToLPred) {
3217 DuplicatedToLPred =
false;
3218 if (!shouldTailDuplicate(BB))
3226 bool Removed =
false;
3232 if (
auto It = BlockToChain.
find(RemBB); It != BlockToChain.
end()) {
3233 It->second->remove(RemBB);
3234 BlockToChain.
erase(It);
3238 if (&(*PrevUnplacedBlockIt) == RemBB) {
3239 PrevUnplacedBlockIt++;
3243 if (RemBB->isEHPad()) {
3254 if (It != BlockFilter->end()) {
3255 if (It < PrevUnplacedBlockInFilterIt) {
3259 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3260 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3261 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3263 }
else if (It == PrevUnplacedBlockInFilterIt)
3266 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3268 BlockFilter->erase(It);
3273 MLI->removeBlock(RemBB);
3274 if (RemBB == PreferredLoopExit)
3275 PreferredLoopExit =
nullptr;
3280 auto RemovalCallbackRef =
3287 if (
F->getFunction().hasProfileData()) {
3289 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3290 if (CandidatePreds.
size() == 0)
3293 CandidatePtr = &CandidatePreds;
3296 &RemovalCallbackRef, CandidatePtr);
3299 DuplicatedToLPred =
false;
3302 BlockChain *PredChain = BlockToChain[Pred];
3304 DuplicatedToLPred =
true;
3305 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3306 PredChain == &Chain)
3309 if (BlockFilter && !BlockFilter->count(NewSucc))
3311 BlockChain *NewChain = BlockToChain[NewSucc];
3312 if (NewChain != &Chain && NewChain != PredChain)
3313 NewChain->UnscheduledPredecessors++;
3323 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3339 BlockFilterSet *BlockFilter) {
3342 if (BlockFilter && !BlockFilter->count(Pred))
3344 BlockChain *PredChain = BlockToChain[Pred];
3345 if (PredChain && (Pred != *std::prev(PredChain->end())))
3352 if (BlockFilter && !BlockFilter->count(Succ))
3354 BlockChain *SuccChain = BlockToChain[Succ];
3355 if (SuccChain && (Succ != *SuccChain->begin()))
3358 if (SuccProb > BestProb)
3359 BestProb = SuccProb;
3363 if (BBProb <= BestProb)
3370 return Gain > scaleThreshold(BB);
3375void MachineBlockPlacement::findDuplicateCandidates(
3377 BlockFilterSet *BlockFilter) {
3389 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3394 auto SuccIt = Succs.begin();
3395 if (SuccIt != Succs.end()) {
3446 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3448 if (SuccIt != Succs.end())
3454 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3456 if (SuccIt == Succs.end()) {
3458 if (Succs.size() > 0)
3459 DupCost += PredFreq;
3462 DupCost += PredFreq;
3466 assert(OrigCost >= DupCost);
3467 OrigCost -= DupCost;
3468 if (OrigCost > BBDupThreshold) {
3470 if (SuccIt != Succs.end())
3478 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3479 Candidates[0] = Candidates.
back();
3485void MachineBlockPlacement::initTailDupThreshold() {
3487 if (
F->getFunction().hasProfileData()) {
3491 UseProfileCount =
true;
3505 UseProfileCount =
false;
3517 if (OptLevel >= CodeGenOptLevel::Aggressive) {
3529 (OptLevel < CodeGenOptLevel::Aggressive ||
3531 TailDupSize =
TII->getTailDuplicateSize(OptLevel);
3538 auto MBFI = std::make_unique<MBFIWrapper>(
3541 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3545 .getCachedResult<ProfileSummaryAnalysis>(
3551 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3563 OS << MapClassName2PassName(
name());
3564 if (!AllowTailMerge)
3565 OS <<
"<no-tail-merge>";
3571 if (std::next(MF.
begin()) == MF.
end())
3575 OptLevel =
F->getTarget().getOptLevel();
3582 PreferredLoopExit =
nullptr;
3585 "BlockToChain map should be empty before starting placement.");
3587 "Computed Edge map should be empty before starting placement.");
3590 initTailDupThreshold();
3592 const bool OptForSize =
3597 bool UseExtTspForPerf =
false;
3598 bool UseExtTspForSize =
false;
3607 if (allowTailDupPlacement(*
F)) {
3610 const bool PreRegAlloc =
false;
3611 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3613 if (!UseExtTspForSize)
3614 precomputeTriangleChains();
3618 if (!UseExtTspForSize)
3628 if (EnableTailMerge) {
3638 if (!UseExtTspForSize) {
3640 BlockToChain.
clear();
3641 ComputedEdges.
clear();
3651 if (UseExtTspForPerf || UseExtTspForSize) {
3653 !(UseExtTspForPerf && UseExtTspForSize) &&
3654 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3655 applyExtTsp(UseExtTspForSize);
3656 createCFGChainExtTsp();
3662 BlockToChain.
clear();
3663 ComputedEdges.
clear();
3672 MBFI->view(
"MBP." + MF.
getName(),
false);
3680void MachineBlockPlacement::applyExtTsp(
bool OptForSize) {
3684 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3685 CurrentBlockOrder.reserve(
F->size());
3686 size_t NumBlocks = 0;
3688 BlockIndex[&
MBB] = NumBlocks++;
3689 CurrentBlockOrder.push_back(&
MBB);
3700 BlockCounts[BlockIndex[&
MBB]] = OptForSize ? 1 : BlockFreq.
getFrequency();
3709 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3710 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3726 if (FBB && FBB != FTB)
3735 JumpCounts.
push_back({BlockIndex[&
MBB], BlockIndex[Succ], Freq});
3746 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3747 <<
" with profile = " <<
F->getFunction().hasProfileData()
3748 <<
" (" <<
F->getName() <<
")" <<
"\n");
3755 std::vector<const MachineBasicBlock *> NewBlockOrder;
3756 NewBlockOrder.reserve(
F->size());
3758 NewBlockOrder.push_back(CurrentBlockOrder[
Node]);
3760 const double OptScore =
calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3764 if (OptForSize && OrgScore > OptScore)
3765 assignBlockOrder(CurrentBlockOrder);
3767 assignBlockOrder(NewBlockOrder);
3770void MachineBlockPlacement::assignBlockOrder(
3771 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3772 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3773 F->RenumberBlocks();
3780 bool HasChanges =
false;
3781 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3782 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3792 for (
auto &
MBB : *
F) {
3799 NewIndex[
MBB] = NewIndex.
size();
3802 return NewIndex[&
L] < NewIndex[&
R];
3809 for (
auto &
MBB : *
F) {
3816 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3829void MachineBlockPlacement::createCFGChainExtTsp() {
3830 BlockToChain.
clear();
3831 ComputedEdges.
clear();
3835 BlockChain *FunctionChain =
3836 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3841 FunctionChain->merge(&
MBB,
nullptr);
3853class MachineBlockPlacementStats {
3863 : MBPI(MBPI), MBFI(MBFI) {}
3878 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3879 auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3880 return MachineBlockPlacementStats(MBPI, MBFI).run(
F);
3893char MachineBlockPlacementStatsLegacy::ID = 0;
3898 "Basic Block Placement Stats",
false,
false)
3910 MachineBlockPlacementStats(&MBPI, &MBFI).
run(MF);
3916 if (std::next(
F.begin()) ==
F.end())
3925 (
MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3927 (
MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
block placement Basic Block Placement Stats
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
Branch Probability Basic Block Placement
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
unify loop Fixup each natural loop to have a single exit block
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Module * getParent()
Get the module that this global value is contained inside of...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
LLVM_ABI StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI void dump() const
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
StringRef - Represent a constant reference to a string, i.e.
Utility class to perform tail duplication.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
Target-Independent Code Generator Pass Configuration Options.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > ApplyExtTspWithoutProfile
constexpr from_range_t from_range
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
cl::opt< unsigned > ProfileLikelyProb
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
cl::opt< unsigned > StaticLikelyProb
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI void initializeMachineBlockPlacementLegacyPass(PassRegistry &)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
LLVM_ABI char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static StringRef name()
Gets the name of the pass we are mixed into.