LLVM 22.0.0git
BranchProbabilityInfo.cpp
Go to the documentation of this file.
1//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loops should be simplified before this analysis.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/STLExtras.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/LLVMContext.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Value.h"
38#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "branch-prob"
52
54 "print-bpi", cl::init(false), cl::Hidden,
55 cl::desc("Print the branch probability info."));
56
58 "print-bpi-func-name", cl::Hidden,
59 cl::desc("The option to specify the name of the function "
60 "whose branch probability info is printed."));
61
63 "Branch Probability Analysis", false, true)
69 "Branch Probability Analysis", false, true)
70
72 : FunctionPass(ID) {}
73
75
76// Weights are for internal use only. They are used by heuristics to help to
77// estimate edges' probability. Example:
78//
79// Using "Loop Branch Heuristics" we predict weights of edges for the
80// block BB2.
81// ...
82// |
83// V
84// BB1<-+
85// | |
86// | | (Weight = 124)
87// V |
88// BB2--+
89// |
90// | (Weight = 4)
91// V
92// BB3
93//
94// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
95// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
96static const uint32_t LBH_TAKEN_WEIGHT = 124;
98
99/// Unreachable-terminating branch taken probability.
100///
101/// This is the probability for a branch being taken to a block that terminates
102/// (eventually) in unreachable. These are predicted as unlikely as possible.
103/// All reachable probability will proportionally share the remaining part.
105
106/// Heuristics and lookup tables for non-loop branches:
107/// Pointer Heuristics (PH)
108static const uint32_t PH_TAKEN_WEIGHT = 20;
109static const uint32_t PH_NONTAKEN_WEIGHT = 12;
110static const BranchProbability
112static const BranchProbability
114
116using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>;
117
118/// Pointer comparisons:
120 {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely
121 {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely
122};
123
124/// Zero Heuristics (ZH)
125static const uint32_t ZH_TAKEN_WEIGHT = 20;
126static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
127static const BranchProbability
129static const BranchProbability
131
132/// Integer compares with 0:
134 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely
135 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely
136 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely
137 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely
138};
139
140/// Integer compares with -1:
142 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely
143 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely
144 // InstCombine canonicalizes X >= 0 into X > -1
145 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely
146};
147
148/// Integer compares with 1:
150 // InstCombine canonicalizes X <= 0 into X < 1
151 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely
152};
153
154/// strcmp and similar functions return zero, negative, or positive, if the
155/// first string is equal, less, or greater than the second. We consider it
156/// likely that the strings are not equal, so a comparison with zero is
157/// probably false, but also a comparison with any other number is also
158/// probably false given that what exactly is returned for nonzero values is
159/// not specified. Any kind of comparison other than equality we know
160/// nothing about.
164};
165
166// Floating-Point Heuristics (FPH)
167static const uint32_t FPH_TAKEN_WEIGHT = 20;
169
170/// This is the probability for an ordered floating point comparison.
171static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
172/// This is the probability for an unordered floating point comparison, it means
173/// one or two of the operands are NaN. Usually it is used to test for an
174/// exceptional case, so the result is unlikely.
175static const uint32_t FPH_UNO_WEIGHT = 1;
176
179static const BranchProbability
181static const BranchProbability
183static const BranchProbability
185
186/// Floating-Point compares:
188 {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely
189 {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely
190};
191
192/// Set of dedicated "absolute" execution weights for a block. These weights are
193/// meaningful relative to each other and their derivatives only.
194enum class BlockExecWeight : std::uint32_t {
195 /// Special weight used for cases with exact zero probability.
196 ZERO = 0x0,
197 /// Minimal possible non zero weight.
198 LOWEST_NON_ZERO = 0x1,
199 /// Weight to an 'unreachable' block.
201 /// Weight to a block containing non returning call.
203 /// Weight to 'unwind' block of an invoke instruction.
205 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked
206 /// with attribute 'cold'.
207 COLD = 0xffff,
208 /// Default weight is used in cases when there is no dedicated execution
209 /// weight set. It is not propagated through the domination line either.
210 DEFAULT = 0xfffff
211};
212
214 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
215 // FIXME: We could only calculate this if the CFG is known to be irreducible
216 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
217 int SccNum = 0;
218 for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
219 ++It, ++SccNum) {
220 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
221 // catch them.
222 const std::vector<const BasicBlock *> &Scc = *It;
223 if (Scc.size() == 1)
224 continue;
225
226 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
227 for (const auto *BB : Scc) {
228 LLVM_DEBUG(dbgs() << " " << BB->getName());
229 SccNums[BB] = SccNum;
230 calculateSccBlockType(BB, SccNum);
231 }
232 LLVM_DEBUG(dbgs() << "\n");
233 }
234}
235
237 auto SccIt = SccNums.find(BB);
238 if (SccIt == SccNums.end())
239 return -1;
240 return SccIt->second;
241}
242
244 int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const {
245
246 for (auto MapIt : SccBlocks[SccNum]) {
247 const auto *BB = MapIt.first;
248 if (isSCCHeader(BB, SccNum))
249 for (const auto *Pred : predecessors(BB))
250 if (getSCCNum(Pred) != SccNum)
251 Enters.push_back(const_cast<BasicBlock *>(BB));
252 }
253}
254
256 int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const {
257 for (auto MapIt : SccBlocks[SccNum]) {
258 const auto *BB = MapIt.first;
259 if (isSCCExitingBlock(BB, SccNum))
260 for (const auto *Succ : successors(BB))
261 if (getSCCNum(Succ) != SccNum)
262 Exits.push_back(const_cast<BasicBlock *>(Succ));
263 }
264}
265
266uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB,
267 int SccNum) const {
268 assert(getSCCNum(BB) == SccNum);
269
270 assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC");
271 const auto &SccBlockTypes = SccBlocks[SccNum];
272
273 auto It = SccBlockTypes.find(BB);
274 if (It != SccBlockTypes.end()) {
275 return It->second;
276 }
277 return Inner;
278}
279
280void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB,
281 int SccNum) {
282 assert(getSCCNum(BB) == SccNum);
283 uint32_t BlockType = Inner;
284
285 if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) {
286 // Consider any block that is an entry point to the SCC as
287 // a header.
288 return getSCCNum(Pred) != SccNum;
289 }))
290 BlockType |= Header;
291
292 if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) {
293 return getSCCNum(Succ) != SccNum;
294 }))
295 BlockType |= Exiting;
296
297 // Lazily compute the set of headers for a given SCC and cache the results
298 // in the SccHeaderMap.
299 if (SccBlocks.size() <= static_cast<unsigned>(SccNum))
300 SccBlocks.resize(SccNum + 1);
301 auto &SccBlockTypes = SccBlocks[SccNum];
302
303 if (BlockType != Inner) {
304 bool IsInserted;
305 std::tie(std::ignore, IsInserted) =
306 SccBlockTypes.insert(std::make_pair(BB, BlockType));
307 assert(IsInserted && "Duplicated block in SCC");
308 }
309}
310
311BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB,
312 const LoopInfo &LI,
313 const SccInfo &SccI)
314 : BB(BB) {
315 LD.first = LI.getLoopFor(BB);
316 if (!LD.first) {
317 LD.second = SccI.getSCCNum(BB);
318 }
319}
320
321bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const {
322 const auto &SrcBlock = Edge.first;
323 const auto &DstBlock = Edge.second;
324 return (DstBlock.getLoop() &&
325 !DstBlock.getLoop()->contains(SrcBlock.getLoop())) ||
326 // Assume that SCCs can't be nested.
327 (DstBlock.getSccNum() != -1 &&
328 SrcBlock.getSccNum() != DstBlock.getSccNum());
329}
330
331bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const {
332 return isLoopEnteringEdge({Edge.second, Edge.first});
333}
334
335bool BranchProbabilityInfo::isLoopEnteringExitingEdge(
336 const LoopEdge &Edge) const {
337 return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge);
338}
339
340bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const {
341 const auto &SrcBlock = Edge.first;
342 const auto &DstBlock = Edge.second;
343 return SrcBlock.belongsToSameLoop(DstBlock) &&
344 ((DstBlock.getLoop() &&
345 DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) ||
346 (DstBlock.getSccNum() != -1 &&
347 SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum())));
348}
349
350void BranchProbabilityInfo::getLoopEnterBlocks(
351 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const {
352 if (LB.getLoop()) {
353 auto *Header = LB.getLoop()->getHeader();
354 Enters.append(pred_begin(Header), pred_end(Header));
355 } else {
356 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
357 SccI->getSccEnterBlocks(LB.getSccNum(), Enters);
358 }
359}
360
361void BranchProbabilityInfo::getLoopExitBlocks(
362 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const {
363 if (LB.getLoop()) {
364 LB.getLoop()->getExitBlocks(Exits);
365 } else {
366 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?");
367 SccI->getSccExitBlocks(LB.getSccNum(), Exits);
368 }
369}
370
371// Propagate existing explicit probabilities from either profile data or
372// 'expect' intrinsic processing. Examine metadata against unreachable
373// heuristic. The probability of the edge coming to unreachable block is
374// set to min of metadata and unreachable heuristic.
375bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
376 const Instruction *TI = BB->getTerminator();
377 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
378 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) ||
379 isa<InvokeInst>(TI) || isa<CallBrInst>(TI)))
380 return false;
381
382 MDNode *WeightsNode = getValidBranchWeightMDNode(*TI);
383 if (!WeightsNode)
384 return false;
385
386 // Check that the number of successors is manageable.
387 assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
388
389 // Build up the final weights that will be used in a temporary buffer.
390 // Compute the sum of all weights to later decide whether they need to
391 // be scaled to fit in 32 bits.
392 uint64_t WeightSum = 0;
394 SmallVector<unsigned, 2> UnreachableIdxs;
395 SmallVector<unsigned, 2> ReachableIdxs;
396
397 extractBranchWeights(WeightsNode, Weights);
398 for (unsigned I = 0, E = Weights.size(); I != E; ++I) {
399 WeightSum += Weights[I];
400 const LoopBlock SrcLoopBB = getLoopBlock(BB);
401 const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I));
402 auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
403 if (EstimatedWeight &&
404 *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
405 UnreachableIdxs.push_back(I);
406 else
407 ReachableIdxs.push_back(I);
408 }
409 assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
410
411 // If the sum of weights does not fit in 32 bits, scale every weight down
412 // accordingly.
413 uint64_t ScalingFactor =
414 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
415
416 if (ScalingFactor > 1) {
417 WeightSum = 0;
418 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
419 Weights[I] /= ScalingFactor;
420 WeightSum += Weights[I];
421 }
422 }
423 assert(WeightSum <= UINT32_MAX &&
424 "Expected weights to scale down to 32 bits");
425
426 if (WeightSum == 0 || ReachableIdxs.size() == 0) {
427 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
428 Weights[I] = 1;
429 WeightSum = TI->getNumSuccessors();
430 }
431
432 // Set the probability.
434 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
435 BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) });
436
437 // Examine the metadata against unreachable heuristic.
438 // If the unreachable heuristic is more strong then we use it for this edge.
439 if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) {
440 setEdgeProbability(BB, BP);
441 return true;
442 }
443
444 auto UnreachableProb = UR_TAKEN_PROB;
445 for (auto I : UnreachableIdxs)
446 if (UnreachableProb < BP[I]) {
447 BP[I] = UnreachableProb;
448 }
449
450 // Sum of all edge probabilities must be 1.0. If we modified the probability
451 // of some edges then we must distribute the introduced difference over the
452 // reachable blocks.
453 //
454 // Proportional distribution: the relation between probabilities of the
455 // reachable edges is kept unchanged. That is for any reachable edges i and j:
456 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
457 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
458 // Where K is independent of i,j.
459 // newBP[i] == oldBP[i] * K
460 // We need to find K.
461 // Make sum of all reachables of the left and right parts:
462 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
463 // Sum of newBP must be equal to 1.0:
464 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
465 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
466 // Where sum_of_unreachable(newBP) is what has been just changed.
467 // Finally:
468 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
469 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
470 BranchProbability NewUnreachableSum = BranchProbability::getZero();
471 for (auto I : UnreachableIdxs)
472 NewUnreachableSum += BP[I];
473
474 BranchProbability NewReachableSum =
475 BranchProbability::getOne() - NewUnreachableSum;
476
478 for (auto I : ReachableIdxs)
479 OldReachableSum += BP[I];
480
481 if (OldReachableSum != NewReachableSum) { // Anything to dsitribute?
482 if (OldReachableSum.isZero()) {
483 // If all oldBP[i] are zeroes then the proportional distribution results
484 // in all zero probabilities and the error stays big. In this case we
485 // evenly spread NewReachableSum over the reachable edges.
486 BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size();
487 for (auto I : ReachableIdxs)
488 BP[I] = PerEdge;
489 } else {
490 for (auto I : ReachableIdxs) {
491 // We use uint64_t to avoid double rounding error of the following
492 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
493 // The formula is taken from the private constructor
494 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
495 uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) *
496 BP[I].getNumerator();
497 uint32_t Div = static_cast<uint32_t>(
498 divideNearest(Mul, OldReachableSum.getNumerator()));
499 BP[I] = BranchProbability::getRaw(Div);
500 }
501 }
502 }
503
504 setEdgeProbability(BB, BP);
505
506 return true;
507}
508
509// Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
510// between two pointer or pointer and NULL will fail.
511bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
512 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
513 if (!BI || !BI->isConditional())
514 return false;
515
516 Value *Cond = BI->getCondition();
517 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
518 if (!CI || !CI->isEquality())
519 return false;
520
521 Value *LHS = CI->getOperand(0);
522
523 if (!LHS->getType()->isPointerTy())
524 return false;
525
526 assert(CI->getOperand(1)->getType()->isPointerTy());
527
528 auto Search = PointerTable.find(CI->getPredicate());
529 if (Search == PointerTable.end())
530 return false;
531 setEdgeProbability(BB, Search->second);
532 return true;
533}
534
535// Compute the unlikely successors to the block BB in the loop L, specifically
536// those that are unlikely because this is a loop, and add them to the
537// UnlikelyBlocks set.
538static void
540 SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
541 // Sometimes in a loop we have a branch whose condition is made false by
542 // taking it. This is typically something like
543 // int n = 0;
544 // while (...) {
545 // if (++n >= MAX) {
546 // n = 0;
547 // }
548 // }
549 // In this sort of situation taking the branch means that at the very least it
550 // won't be taken again in the next iteration of the loop, so we should
551 // consider it less likely than a typical branch.
552 //
553 // We detect this by looking back through the graph of PHI nodes that sets the
554 // value that the condition depends on, and seeing if we can reach a successor
555 // block which can be determined to make the condition false.
556 //
557 // FIXME: We currently consider unlikely blocks to be half as likely as other
558 // blocks, but if we consider the example above the likelyhood is actually
559 // 1/MAX. We could therefore be more precise in how unlikely we consider
560 // blocks to be, but it would require more careful examination of the form
561 // of the comparison expression.
562 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
563 if (!BI || !BI->isConditional())
564 return;
565
566 // Check if the branch is based on an instruction compared with a constant
567 CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
568 if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
569 !isa<Constant>(CI->getOperand(1)))
570 return;
571
572 // Either the instruction must be a PHI, or a chain of operations involving
573 // constants that ends in a PHI which we can then collapse into a single value
574 // if the PHI value is known.
575 Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0));
576 PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
577 Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
578 // Collect the instructions until we hit a PHI
580 while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
581 isa<Constant>(CmpLHS->getOperand(1))) {
582 // Stop if the chain extends outside of the loop
583 if (!L->contains(CmpLHS))
584 return;
585 InstChain.push_back(cast<BinaryOperator>(CmpLHS));
586 CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
587 if (CmpLHS)
588 CmpPHI = dyn_cast<PHINode>(CmpLHS);
589 }
590 if (!CmpPHI || !L->contains(CmpPHI))
591 return;
592
593 // Trace the phi node to find all values that come from successors of BB
594 SmallPtrSet<PHINode*, 8> VisitedInsts;
596 WorkList.push_back(CmpPHI);
597 VisitedInsts.insert(CmpPHI);
598 while (!WorkList.empty()) {
599 PHINode *P = WorkList.pop_back_val();
600 for (BasicBlock *B : P->blocks()) {
601 // Skip blocks that aren't part of the loop
602 if (!L->contains(B))
603 continue;
604 Value *V = P->getIncomingValueForBlock(B);
605 // If the source is a PHI add it to the work list if we haven't
606 // already visited it.
607 if (PHINode *PN = dyn_cast<PHINode>(V)) {
608 if (VisitedInsts.insert(PN).second)
609 WorkList.push_back(PN);
610 continue;
611 }
612 // If this incoming value is a constant and B is a successor of BB, then
613 // we can constant-evaluate the compare to see if it makes the branch be
614 // taken or not.
615 Constant *CmpLHSConst = dyn_cast<Constant>(V);
616 if (!CmpLHSConst || !llvm::is_contained(successors(BB), B))
617 continue;
618 // First collapse InstChain
619 const DataLayout &DL = BB->getDataLayout();
620 for (Instruction *I : llvm::reverse(InstChain)) {
621 CmpLHSConst = ConstantFoldBinaryOpOperands(
622 I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL);
623 if (!CmpLHSConst)
624 break;
625 }
626 if (!CmpLHSConst)
627 continue;
628 // Now constant-evaluate the compare
630 CI->getPredicate(), CmpLHSConst, CmpConst, DL);
631 // If the result means we don't branch to the block then that block is
632 // unlikely.
633 if (Result &&
634 ((Result->isZeroValue() && B == BI->getSuccessor(0)) ||
635 (Result->isOneValue() && B == BI->getSuccessor(1))))
636 UnlikelyBlocks.insert(B);
637 }
638 }
639}
640
641std::optional<uint32_t>
642BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const {
643 auto WeightIt = EstimatedBlockWeight.find(BB);
644 if (WeightIt == EstimatedBlockWeight.end())
645 return std::nullopt;
646 return WeightIt->second;
647}
648
649std::optional<uint32_t>
650BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const {
651 auto WeightIt = EstimatedLoopWeight.find(L);
652 if (WeightIt == EstimatedLoopWeight.end())
653 return std::nullopt;
654 return WeightIt->second;
655}
656
657std::optional<uint32_t>
658BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const {
659 // For edges entering a loop take weight of a loop rather than an individual
660 // block in the loop.
661 return isLoopEnteringEdge(Edge)
662 ? getEstimatedLoopWeight(Edge.second.getLoopData())
663 : getEstimatedBlockWeight(Edge.second.getBlock());
664}
665
666template <class IterT>
667std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
668 const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const {
669 std::optional<uint32_t> MaxWeight;
670 for (const BasicBlock *DstBB : Successors) {
671 const LoopBlock DstLoopBB = getLoopBlock(DstBB);
672 auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
673
674 if (!Weight)
675 return std::nullopt;
676
677 if (!MaxWeight || *MaxWeight < *Weight)
678 MaxWeight = Weight;
679 }
680
681 return MaxWeight;
682}
683
684// Updates \p LoopBB's weight and returns true. If \p LoopBB has already
685// an associated weight it is unchanged and false is returned.
686//
687// Please note by the algorithm the weight is not expected to change once set
688// thus 'false' status is used to track visited blocks.
689bool BranchProbabilityInfo::updateEstimatedBlockWeight(
690 LoopBlock &LoopBB, uint32_t BBWeight,
691 SmallVectorImpl<BasicBlock *> &BlockWorkList,
692 SmallVectorImpl<LoopBlock> &LoopWorkList) {
693 BasicBlock *BB = LoopBB.getBlock();
694
695 // In general, weight is assigned to a block when it has final value and
696 // can't/shouldn't be changed. However, there are cases when a block
697 // inherently has several (possibly "contradicting") weights. For example,
698 // "unwind" block may also contain "cold" call. In that case the first
699 // set weight is favored and all consequent weights are ignored.
700 if (!EstimatedBlockWeight.insert({BB, BBWeight}).second)
701 return false;
702
703 for (BasicBlock *PredBlock : predecessors(BB)) {
704 LoopBlock PredLoop = getLoopBlock(PredBlock);
705 // Add affected block/loop to a working list.
706 if (isLoopExitingEdge({PredLoop, LoopBB})) {
707 if (!EstimatedLoopWeight.count(PredLoop.getLoopData()))
708 LoopWorkList.push_back(PredLoop);
709 } else if (!EstimatedBlockWeight.count(PredBlock))
710 BlockWorkList.push_back(PredBlock);
711 }
712 return true;
713}
714
715// Starting from \p BB traverse through dominator blocks and assign \p BBWeight
716// to all such blocks that are post dominated by \BB. In other words to all
717// blocks that the one is executed if and only if another one is executed.
718// Importantly, we skip loops here for two reasons. First weights of blocks in
719// a loop should be scaled by trip count (yet possibly unknown). Second there is
720// no any value in doing that because that doesn't give any additional
721// information regarding distribution of probabilities inside the loop.
722// Exception is loop 'enter' and 'exit' edges that are handled in a special way
723// at calcEstimatedHeuristics.
724//
725// In addition, \p WorkList is populated with basic blocks if at leas one
726// successor has updated estimated weight.
727void BranchProbabilityInfo::propagateEstimatedBlockWeight(
728 const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT,
729 uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList,
730 SmallVectorImpl<LoopBlock> &LoopWorkList) {
731 const BasicBlock *BB = LoopBB.getBlock();
732 const auto *DTStartNode = DT->getNode(BB);
733 const auto *PDTStartNode = PDT->getNode(BB);
734
735 // TODO: Consider propagating weight down the domination line as well.
736 for (const auto *DTNode = DTStartNode; DTNode != nullptr;
737 DTNode = DTNode->getIDom()) {
738 auto *DomBB = DTNode->getBlock();
739 // Consider blocks which lie on one 'line'.
740 if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB)))
741 // If BB doesn't post dominate DomBB it will not post dominate dominators
742 // of DomBB as well.
743 break;
744
745 LoopBlock DomLoopBB = getLoopBlock(DomBB);
746 const LoopEdge Edge{DomLoopBB, LoopBB};
747 // Don't propagate weight to blocks belonging to different loops.
748 if (!isLoopEnteringExitingEdge(Edge)) {
749 if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList,
750 LoopWorkList))
751 // If DomBB has weight set then all it's predecessors are already
752 // processed (since we propagate weight up to the top of IR each time).
753 break;
754 } else if (isLoopExitingEdge(Edge)) {
755 LoopWorkList.push_back(DomLoopBB);
756 }
757 }
758}
759
760std::optional<uint32_t>
761BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) {
762 // Returns true if \p BB has call marked with "NoReturn" attribute.
763 auto hasNoReturn = [&](const BasicBlock *BB) {
764 for (const auto &I : reverse(*BB))
765 if (const CallInst *CI = dyn_cast<CallInst>(&I))
766 if (CI->hasFnAttr(Attribute::NoReturn))
767 return true;
768
769 return false;
770 };
771
772 // Important note regarding the order of checks. They are ordered by weight
773 // from lowest to highest. Doing that allows to avoid "unstable" results
774 // when several conditions heuristics can be applied simultaneously.
775 if (isa<UnreachableInst>(BB->getTerminator()) ||
776 // If this block is terminated by a call to
777 // @llvm.experimental.deoptimize then treat it like an unreachable
778 // since it is expected to practically never execute.
779 // TODO: Should we actually treat as never returning call?
781 return hasNoReturn(BB)
782 ? static_cast<uint32_t>(BlockExecWeight::NORETURN)
783 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE);
784
785 // Check if the block is an exception handling block.
786 if (BB->isEHPad())
787 return static_cast<uint32_t>(BlockExecWeight::UNWIND);
788
789 // Check if the block contains 'cold' call.
790 for (const auto &I : *BB)
791 if (const CallInst *CI = dyn_cast<CallInst>(&I))
792 if (CI->hasFnAttr(Attribute::Cold))
793 return static_cast<uint32_t>(BlockExecWeight::COLD);
794
795 return std::nullopt;
796}
797
798// Does RPO traversal over all blocks in \p F and assigns weights to
799// 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its
800// best to propagate the weight to up/down the IR.
801void BranchProbabilityInfo::estimateBlockWeights(const Function &F,
802 DominatorTree *DT,
803 PostDominatorTree *PDT) {
804 SmallVector<BasicBlock *, 8> BlockWorkList;
805 SmallVector<LoopBlock, 8> LoopWorkList;
807
808 // By doing RPO we make sure that all predecessors already have weights
809 // calculated before visiting theirs successors.
811 for (const auto *BB : RPOT)
812 if (auto BBWeight = getInitialEstimatedBlockWeight(BB))
813 // If we were able to find estimated weight for the block set it to this
814 // block and propagate up the IR.
815 propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, *BBWeight,
816 BlockWorkList, LoopWorkList);
817
818 // BlockWorklist/LoopWorkList contains blocks/loops with at least one
819 // successor/exit having estimated weight. Try to propagate weight to such
820 // blocks/loops from successors/exits.
821 // Process loops and blocks. Order is not important.
822 do {
823 while (!LoopWorkList.empty()) {
824 const LoopBlock LoopBB = LoopWorkList.pop_back_val();
825 const LoopData LD = LoopBB.getLoopData();
826 if (EstimatedLoopWeight.count(LD))
827 continue;
828
829 auto Res = LoopExitBlocks.try_emplace(LD);
830 SmallVectorImpl<BasicBlock *> &Exits = Res.first->second;
831 if (Res.second)
832 getLoopExitBlocks(LoopBB, Exits);
833 auto LoopWeight = getMaxEstimatedEdgeWeight(
834 LoopBB, make_range(Exits.begin(), Exits.end()));
835
836 if (LoopWeight) {
837 // If we never exit the loop then we can enter it once at maximum.
838 if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
839 LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
840
841 EstimatedLoopWeight.insert({LD, *LoopWeight});
842 // Add all blocks entering the loop into working list.
843 getLoopEnterBlocks(LoopBB, BlockWorkList);
844 }
845 }
846
847 while (!BlockWorkList.empty()) {
848 // We can reach here only if BlockWorkList is not empty.
849 const BasicBlock *BB = BlockWorkList.pop_back_val();
850 if (EstimatedBlockWeight.count(BB))
851 continue;
852
853 // We take maximum over all weights of successors. In other words we take
854 // weight of "hot" path. In theory we can probably find a better function
855 // which gives higher accuracy results (comparing to "maximum") but I
856 // can't
857 // think of any right now. And I doubt it will make any difference in
858 // practice.
859 const LoopBlock LoopBB = getLoopBlock(BB);
860 auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
861
862 if (MaxWeight)
863 propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
864 BlockWorkList, LoopWorkList);
865 }
866 } while (!BlockWorkList.empty() || !LoopWorkList.empty());
867}
868
869// Calculate edge probabilities based on block's estimated weight.
870// Note that gathered weights were not scaled for loops. Thus edges entering
871// and exiting loops requires special processing.
872bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
874 "expected more than one successor!");
875
876 const LoopBlock LoopBB = getLoopBlock(BB);
877
880 if (LoopBB.getLoop())
881 computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks);
882
883 // Changed to 'true' if at least one successor has estimated weight.
884 bool FoundEstimatedWeight = false;
885 SmallVector<uint32_t, 4> SuccWeights;
886 uint64_t TotalWeight = 0;
887 // Go over all successors of BB and put their weights into SuccWeights.
888 for (const BasicBlock *SuccBB : successors(BB)) {
889 std::optional<uint32_t> Weight;
890 const LoopBlock SuccLoopBB = getLoopBlock(SuccBB);
891 const LoopEdge Edge{LoopBB, SuccLoopBB};
892
893 Weight = getEstimatedEdgeWeight(Edge);
894
895 if (isLoopExitingEdge(Edge) &&
896 // Avoid adjustment of ZERO weight since it should remain unchanged.
897 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
898 // Scale down loop exiting weight by trip count.
899 Weight = std::max(
900 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
901 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
902 TC);
903 }
904 bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
905 if (IsUnlikelyEdge &&
906 // Avoid adjustment of ZERO weight since it should remain unchanged.
907 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) {
908 // 'Unlikely' blocks have twice lower weight.
909 Weight = std::max(
910 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
911 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
912 }
913
914 if (Weight)
915 FoundEstimatedWeight = true;
916
917 auto WeightVal =
918 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
919 TotalWeight += WeightVal;
920 SuccWeights.push_back(WeightVal);
921 }
922
923 // If non of blocks have estimated weight bail out.
924 // If TotalWeight is 0 that means weight of each successor is 0 as well and
925 // equally likely. Bail out early to not deal with devision by zero.
926 if (!FoundEstimatedWeight || TotalWeight == 0)
927 return false;
928
929 assert(SuccWeights.size() == succ_size(BB) && "Missed successor?");
930 const unsigned SuccCount = SuccWeights.size();
931
932 // If the sum of weights does not fit in 32 bits, scale every weight down
933 // accordingly.
934 if (TotalWeight > UINT32_MAX) {
935 uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1;
936 TotalWeight = 0;
937 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
938 SuccWeights[Idx] /= ScalingFactor;
939 if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO))
940 SuccWeights[Idx] =
941 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
942 TotalWeight += SuccWeights[Idx];
943 }
944 assert(TotalWeight <= UINT32_MAX && "Total weight overflows");
945 }
946
947 // Finally set probabilities to edges according to estimated block weights.
948 SmallVector<BranchProbability, 4> EdgeProbabilities(
949 SuccCount, BranchProbability::getUnknown());
950
951 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) {
952 EdgeProbabilities[Idx] =
953 BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight);
954 }
955 setEdgeProbability(BB, EdgeProbabilities);
956 return true;
957}
958
959bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
960 const TargetLibraryInfo *TLI) {
961 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
962 if (!BI || !BI->isConditional())
963 return false;
964
965 Value *Cond = BI->getCondition();
966 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
967 if (!CI)
968 return false;
969
970 auto GetConstantInt = [](Value *V) {
971 if (auto *I = dyn_cast<BitCastInst>(V))
972 return dyn_cast<ConstantInt>(I->getOperand(0));
973 return dyn_cast<ConstantInt>(V);
974 };
975
976 Value *RHS = CI->getOperand(1);
977 ConstantInt *CV = GetConstantInt(RHS);
978 if (!CV)
979 return false;
980
981 // If the LHS is the result of AND'ing a value with a single bit bitmask,
982 // we don't have information about probabilities.
983 if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
984 if (LHS->getOpcode() == Instruction::And)
985 if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1)))
986 if (AndRHS->getValue().isPowerOf2())
987 return false;
988
989 // Check if the LHS is the return value of a library function
991 if (TLI)
992 if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
993 if (Function *CalledFn = Call->getCalledFunction())
994 TLI->getLibFunc(*CalledFn, Func);
995
996 ProbabilityTable::const_iterator Search;
997 if (Func == LibFunc_strcasecmp ||
998 Func == LibFunc_strcmp ||
999 Func == LibFunc_strncasecmp ||
1000 Func == LibFunc_strncmp ||
1001 Func == LibFunc_memcmp ||
1002 Func == LibFunc_bcmp) {
1003 Search = ICmpWithLibCallTable.find(CI->getPredicate());
1004 if (Search == ICmpWithLibCallTable.end())
1005 return false;
1006 } else if (CV->isZero()) {
1007 Search = ICmpWithZeroTable.find(CI->getPredicate());
1008 if (Search == ICmpWithZeroTable.end())
1009 return false;
1010 } else if (CV->isOne()) {
1011 Search = ICmpWithOneTable.find(CI->getPredicate());
1012 if (Search == ICmpWithOneTable.end())
1013 return false;
1014 } else if (CV->isMinusOne()) {
1015 Search = ICmpWithMinusOneTable.find(CI->getPredicate());
1016 if (Search == ICmpWithMinusOneTable.end())
1017 return false;
1018 } else {
1019 return false;
1020 }
1021
1022 setEdgeProbability(BB, Search->second);
1023 return true;
1024}
1025
1026bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
1027 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
1028 if (!BI || !BI->isConditional())
1029 return false;
1030
1031 Value *Cond = BI->getCondition();
1032 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
1033 if (!FCmp)
1034 return false;
1035
1036 ProbabilityList ProbList;
1037 if (FCmp->isEquality()) {
1038 ProbList = !FCmp->isTrueWhenEqual() ?
1039 // f1 == f2 -> Unlikely
1041 // f1 != f2 -> Likely
1043 } else {
1044 auto Search = FCmpTable.find(FCmp->getPredicate());
1045 if (Search == FCmpTable.end())
1046 return false;
1047 ProbList = Search->second;
1048 }
1049
1050 setEdgeProbability(BB, ProbList);
1051 return true;
1052}
1053
1055 Probs.clear();
1056 Handles.clear();
1057}
1058
1061 // Check whether the analysis, all analyses on functions, or the function's
1062 // CFG have been preserved.
1063 auto PAC = PA.getChecker<BranchProbabilityAnalysis>();
1064 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
1065 PAC.preservedSet<CFGAnalyses>());
1066}
1067
1069 OS << "---- Branch Probabilities ----\n";
1070 // We print the probabilities from the last function the analysis ran over,
1071 // or the function it is currently running over.
1072 assert(LastF && "Cannot print prior to running over a function");
1073 for (const auto &BI : *LastF) {
1074 for (const BasicBlock *Succ : successors(&BI))
1075 printEdgeProbability(OS << " ", &BI, Succ);
1076 }
1077}
1078
1080isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
1081 // Hot probability is at least 4/5 = 80%
1082 // FIXME: Compare against a static "hot" BranchProbability.
1083 return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
1084}
1085
1086/// Get the raw edge probability for the edge. If can't find it, return a
1087/// default probability 1/N where N is the number of successors. Here an edge is
1088/// specified using PredBlock and an
1089/// index to the successors.
1092 unsigned IndexInSuccessors) const {
1093 auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
1094 assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) ==
1095 (Probs.end() == I) &&
1096 "Probability for I-th successor must always be defined along with the "
1097 "probability for the first successor");
1098
1099 if (I != Probs.end())
1100 return I->second;
1101
1102 return {1, static_cast<uint32_t>(succ_size(Src))};
1103}
1104
1107 const_succ_iterator Dst) const {
1108 return getEdgeProbability(Src, Dst.getSuccessorIndex());
1109}
1110
1111/// Get the raw edge probability calculated for the block pair. This returns the
1112/// sum of all raw edge probabilities from Src to Dst.
1115 const BasicBlock *Dst) const {
1116 if (!Probs.count(std::make_pair(Src, 0)))
1117 return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src));
1118
1119 auto Prob = BranchProbability::getZero();
1120 for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
1121 if (*I == Dst)
1122 Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second;
1123
1124 return Prob;
1125}
1126
1127/// Set the edge probability for all edges at once.
1129 const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) {
1130 assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
1131 eraseBlock(Src); // Erase stale data if any.
1132 if (Probs.size() == 0)
1133 return; // Nothing to set.
1134
1135 Handles.insert(BasicBlockCallbackVH(Src, this));
1136 uint64_t TotalNumerator = 0;
1137 for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) {
1138 this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx];
1139 LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx
1140 << " successor probability to " << Probs[SuccIdx]
1141 << "\n");
1142 TotalNumerator += Probs[SuccIdx].getNumerator();
1143 }
1144
1145 // Because of rounding errors the total probability cannot be checked to be
1146 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1147 // Instead, every single probability in Probs must be as accurate as possible.
1148 // This results in error 1/denominator at most, thus the total absolute error
1149 // should be within Probs.size / BranchProbability::getDenominator.
1150 assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size());
1151 assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
1152 (void)TotalNumerator;
1153}
1154
1156 BasicBlock *Dst) {
1157 eraseBlock(Dst); // Erase stale data if any.
1158 unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors();
1159 assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
1160 if (NumSuccessors == 0)
1161 return; // Nothing to set.
1162 if (!this->Probs.contains(std::make_pair(Src, 0)))
1163 return; // No probability is set for edges from Src. Keep the same for Dst.
1164
1165 Handles.insert(BasicBlockCallbackVH(Dst, this));
1166 for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) {
1167 auto Prob = this->Probs[std::make_pair(Src, SuccIdx)];
1168 this->Probs[std::make_pair(Dst, SuccIdx)] = Prob;
1169 LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx
1170 << " successor probability to " << Prob << "\n");
1171 }
1172}
1173
1175 assert(Src->getTerminator()->getNumSuccessors() == 2);
1176 auto It0 = Probs.find(std::make_pair(Src, 0));
1177 if (It0 == Probs.end())
1178 return; // No probability is set for edges from Src
1179 auto It1 = Probs.find(std::make_pair(Src, 1));
1180 assert(It1 != Probs.end());
1181 std::swap(It0->second, It1->second);
1182}
1183
1186 const BasicBlock *Src,
1187 const BasicBlock *Dst) const {
1188 const BranchProbability Prob = getEdgeProbability(Src, Dst);
1189 OS << "edge ";
1190 Src->printAsOperand(OS, false, Src->getModule());
1191 OS << " -> ";
1192 Dst->printAsOperand(OS, false, Dst->getModule());
1193 OS << " probability is " << Prob
1194 << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
1195
1196 return OS;
1197}
1198
1200 LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n");
1201
1202 // Note that we cannot use successors of BB because the terminator of BB may
1203 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback.
1204 // Instead we remove prob data for the block by iterating successors by their
1205 // indices from 0 till the last which exists. There could not be prob data for
1206 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always
1207 // set for all successors from 0 to M at once by the method
1208 // setEdgeProbability().
1209 Handles.erase(BasicBlockCallbackVH(BB, this));
1210 for (unsigned I = 0;; ++I) {
1211 auto MapI = Probs.find(std::make_pair(BB, I));
1212 if (MapI == Probs.end()) {
1213 assert(Probs.count(std::make_pair(BB, I + 1)) == 0 &&
1214 "Must be no more successors");
1215 return;
1216 }
1217 Probs.erase(MapI);
1218 }
1219}
1220
1222 const TargetLibraryInfo *TLI,
1223 DominatorTree *DT,
1224 PostDominatorTree *PDT) {
1225 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
1226 << " ----\n\n");
1227 LastF = &F; // Store the last function we ran on for printing.
1228 LI = &LoopI;
1229
1230 SccI = std::make_unique<SccInfo>(F);
1231
1232 assert(EstimatedBlockWeight.empty());
1233 assert(EstimatedLoopWeight.empty());
1234
1235 std::unique_ptr<DominatorTree> DTPtr;
1236 std::unique_ptr<PostDominatorTree> PDTPtr;
1237
1238 if (!DT) {
1239 DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F));
1240 DT = DTPtr.get();
1241 }
1242
1243 if (!PDT) {
1244 PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
1245 PDT = PDTPtr.get();
1246 }
1247
1248 estimateBlockWeights(F, DT, PDT);
1249
1250 // Walk the basic blocks in post-order so that we can build up state about
1251 // the successors of a block iteratively.
1252 for (const auto *BB : post_order(&F.getEntryBlock())) {
1253 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
1254 << "\n");
1255 // If there is no at least two successors, no sense to set probability.
1256 if (BB->getTerminator()->getNumSuccessors() < 2)
1257 continue;
1258 if (calcMetadataWeights(BB))
1259 continue;
1260 if (calcEstimatedHeuristics(BB))
1261 continue;
1262 if (calcPointerHeuristics(BB))
1263 continue;
1264 if (calcZeroHeuristics(BB, TLI))
1265 continue;
1266 if (calcFloatingPointHeuristics(BB))
1267 continue;
1268 }
1269
1270 EstimatedLoopWeight.clear();
1271 EstimatedBlockWeight.clear();
1272 SccI.reset();
1273
1274 if (PrintBranchProb && (PrintBranchProbFuncName.empty() ||
1275 F.getName() == PrintBranchProbFuncName)) {
1276 print(dbgs());
1277 }
1278}
1279
1281 AnalysisUsage &AU) const {
1282 // We require DT so it's available when LI is available. The LI updating code
1283 // asserts that DT is also present so if we don't make sure that we have DT
1284 // here, that assert will trigger.
1290 AU.setPreservesAll();
1291}
1292
1294 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1295 const TargetLibraryInfo &TLI =
1296 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1297 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1298 PostDominatorTree &PDT =
1299 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1300 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1301 return false;
1302}
1303
1305
1307 const Module *) const {
1308 BPI.print(OS);
1309}
1310
1311AnalysisKey BranchProbabilityAnalysis::Key;
1314 auto &LI = AM.getResult<LoopAnalysis>(F);
1315 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1316 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1317 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
1319 BPI.calculate(F, LI, &TLI, &DT, &PDT);
1320 return BPI;
1321}
1322
1325 OS << "Printing analysis 'Branch Probability Analysis' for function '"
1326 << F.getName() << "':\n";
1328 return PreservedAnalyses::all();
1329}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
BlockExecWeight
Set of dedicated "absolute" execution weights for a block.
@ NORETURN
Weight to a block containing non returning call.
@ UNWIND
Weight to 'unwind' block of an invoke instruction.
@ COLD
Weight to a 'cold' block.
@ ZERO
Special weight used for cases with exact zero probability.
@ UNREACHABLE
Weight to an 'unreachable' block.
@ DEFAULT
Default weight is used in cases when there is no dedicated execution weight set.
@ LOWEST_NON_ZERO
Minimal possible non zero weight.
static const uint32_t FPH_TAKEN_WEIGHT
static const BranchProbability FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const uint32_t LBH_TAKEN_WEIGHT
static const uint32_t ZH_NONTAKEN_WEIGHT
static const ProbabilityTable ICmpWithLibCallTable
strcmp and similar functions return zero, negative, or positive, if the first string is equal,...
static const ProbabilityTable ICmpWithMinusOneTable
Integer compares with -1:
branch prob
static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const ProbabilityTable ICmpWithZeroTable
Integer compares with 0:
static const uint32_t PH_NONTAKEN_WEIGHT
std::map< CmpInst::Predicate, ProbabilityList > ProbabilityTable
static const BranchProbability PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
static const BranchProbability ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT+ZH_NONTAKEN_WEIGHT)
static const BranchProbability FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT+FPH_UNO_WEIGHT)
static const uint32_t PH_TAKEN_WEIGHT
Heuristics and lookup tables for non-loop branches: Pointer Heuristics (PH)
static const BranchProbability FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT+FPH_NONTAKEN_WEIGHT)
branch Branch Probability Analysis
static const BranchProbability PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT+PH_NONTAKEN_WEIGHT)
SmallVector< BranchProbability > ProbabilityList
static const ProbabilityTable ICmpWithOneTable
Integer compares with 1:
static const uint32_t ZH_TAKEN_WEIGHT
Zero Heuristics (ZH)
static const uint32_t FPH_NONTAKEN_WEIGHT
static const BranchProbability UR_TAKEN_PROB
Unreachable-terminating branch taken probability.
static const ProbabilityTable FCmpTable
Floating-Point compares:
static const uint32_t LBH_NONTAKEN_WEIGHT
static const ProbabilityTable PointerTable
Pointer comparisons:
static const uint32_t FPH_ORD_WEIGHT
This is the probability for an ordered floating point comparison.
static void computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, SmallPtrSetImpl< const BasicBlock * > &UnlikelyBlocks)
static const uint32_t FPH_UNO_WEIGHT
This is the probability for an unordered floating point comparison, it means one or two of the operan...
static cl::opt< std::string > PrintBranchProbFuncName("print-bpi-func-name", cl::Hidden, cl::desc("The option to specify the name of the function " "whose branch probability info is printed."))
static cl::opt< bool > PrintBranchProb("print-bpi", cl::init(false), cl::Hidden, cl::desc("Print the branch probability info."))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
Value * LHS
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:50
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:294
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:287
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:707
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
LLVM_ABI BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM)
Run the analysis pass over a function and produce BPI.
Legacy analysis pass which computes BranchProbabilityInfo.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &OS, const Module *M=nullptr) const override
print - Print out the internal state of the pass.
LLVM_ABI void getSccEnterBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Enters) const
Fills in Enters vector with all such blocks that don't belong to SCC with SccNum ID but there is an e...
LLVM_ABI void getSccExitBlocks(int SccNum, SmallVectorImpl< BasicBlock * > &Exits) const
Fills in Exits vector with all such blocks that don't belong to SCC with SccNum ID but there is an ed...
LLVM_ABI int getSCCNum(const BasicBlock *BB) const
If BB belongs to some SCC then ID of that SCC is returned, otherwise -1 is returned.
Analysis providing branch probability information.
LLVM_ABI void eraseBlock(const BasicBlock *BB)
Forget analysis results for the given basic block.
LLVM_ABI void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
LLVM_ABI bool invalidate(Function &, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &)
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
LLVM_ABI void calculate(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI, DominatorTree *DT, PostDominatorTree *PDT)
LLVM_ABI bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const
Test if an edge is hot relative to other out-edges of the Src.
LLVM_ABI void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
LLVM_ABI void print(raw_ostream &OS) const
LLVM_ABI raw_ostream & printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const
Print an edge's probability.
LLVM_ABI void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static uint32_t getDenominator()
static BranchProbability getRaw(uint32_t N)
static BranchProbability getOne()
static BranchProbability getUnknown()
uint32_t getNumerator() const
static BranchProbability getZero()
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
bool isTrueWhenEqual() const
This is just a convenience.
Definition: InstrTypes.h:944
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:226
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
This is an important base class in LLVM.
Definition: Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
bool empty() const
Definition: DenseMap.h:119
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate Pred)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:570
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:597
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
Metadata node.
Definition: Metadata.h:1077
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:275
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:49
BlockType
Used as immediate MachineOperands for block signatures.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto pred_end(const MachineBasicBlock *BB)
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:233
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< po_iterator< T > > post_order(const T &G)
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition: MathExtras.h:463
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI MDNode * getValidBranchWeightMDNode(const Instruction &I)
Get the valid branch weights metadata node.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Mul
Product of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:29