LLVM 22.0.0git
JumpThreading.cpp
Go to the documentation of this file.
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Jump Threading pass.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
20#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/InstrTypes.h"
47#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Intrinsics.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/MDBuilder.h"
53#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Module.h"
55#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
65#include "llvm/Support/Debug.h"
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <memory>
76#include <utility>
77
78using namespace llvm;
79using namespace jumpthreading;
80
81#define DEBUG_TYPE "jump-threading"
82
83STATISTIC(NumThreads, "Number of jumps threaded");
84STATISTIC(NumFolds, "Number of terminators folded");
85STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
86
88BBDuplicateThreshold("jump-threading-threshold",
89 cl::desc("Max block size to duplicate for jump threading"),
91
94 "jump-threading-implication-search-threshold",
95 cl::desc("The number of predecessors to search for a stronger "
96 "condition to use to thread over a weaker condition"),
98
100 "jump-threading-phi-threshold",
101 cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),
102 cl::Hidden);
103
105 "jump-threading-across-loop-headers",
106 cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
107 cl::init(false), cl::Hidden);
108
110 DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
111}
112
113// Update branch probability information according to conditional
114// branch probability. This is usually made possible for cloned branches
115// in inline instances by the context specific profile in the caller.
116// For instance,
117//
118// [Block PredBB]
119// [Branch PredBr]
120// if (t) {
121// Block A;
122// } else {
123// Block B;
124// }
125//
126// [Block BB]
127// cond = PN([true, %A], [..., %B]); // PHI node
128// [Branch CondBr]
129// if (cond) {
130// ... // P(cond == true) = 1%
131// }
132//
133// Here we know that when block A is taken, cond must be true, which means
134// P(cond == true | A) = 1
135//
136// Given that P(cond == true) = P(cond == true | A) * P(A) +
137// P(cond == true | B) * P(B)
138// we get:
139// P(cond == true ) = P(A) + P(cond == true | B) * P(B)
140//
141// which gives us:
142// P(A) is less than P(cond == true), i.e.
143// P(t == true) <= P(cond == true)
144//
145// In other words, if we know P(cond == true) is unlikely, we know
146// that P(t == true) is also unlikely.
147//
149 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
150 if (!CondBr)
151 return;
152
153 uint64_t TrueWeight, FalseWeight;
154 if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))
155 return;
156
157 if (TrueWeight + FalseWeight == 0)
158 // Zero branch_weights do not give a hint for getting branch probabilities.
159 // Technically it would result in division by zero denominator, which is
160 // TrueWeight + FalseWeight.
161 return;
162
163 // Returns the outgoing edge of the dominating predecessor block
164 // that leads to the PhiNode's incoming block:
165 auto GetPredOutEdge =
166 [](BasicBlock *IncomingBB,
167 BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {
168 auto *PredBB = IncomingBB;
169 auto *SuccBB = PhiBB;
171 while (true) {
172 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
173 if (PredBr && PredBr->isConditional())
174 return {PredBB, SuccBB};
175 Visited.insert(PredBB);
176 auto *SinglePredBB = PredBB->getSinglePredecessor();
177 if (!SinglePredBB)
178 return {nullptr, nullptr};
179
180 // Stop searching when SinglePredBB has been visited. It means we see
181 // an unreachable loop.
182 if (Visited.count(SinglePredBB))
183 return {nullptr, nullptr};
184
185 SuccBB = PredBB;
186 PredBB = SinglePredBB;
187 }
188 };
189
190 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
191 Value *PhiOpnd = PN->getIncomingValue(i);
192 ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
193
194 if (!CI || !CI->getType()->isIntegerTy(1))
195 continue;
196
199 TrueWeight, TrueWeight + FalseWeight)
201 FalseWeight, TrueWeight + FalseWeight));
202
203 auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
204 if (!PredOutEdge.first)
205 return;
206
207 BasicBlock *PredBB = PredOutEdge.first;
208 BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
209 if (!PredBr)
210 return;
211
212 uint64_t PredTrueWeight, PredFalseWeight;
213 // FIXME: We currently only set the profile data when it is missing.
214 // With PGO, this can be used to refine even existing profile data with
215 // context information. This needs to be done after more performance
216 // testing.
217 if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))
218 continue;
219
220 // We can not infer anything useful when BP >= 50%, because BP is the
221 // upper bound probability value.
222 if (BP >= BranchProbability(50, 100))
223 continue;
224
225 uint32_t Weights[2];
226 if (PredBr->getSuccessor(0) == PredOutEdge.second) {
227 Weights[0] = BP.getNumerator();
228 Weights[1] = BP.getCompl().getNumerator();
229 } else {
230 Weights[0] = BP.getCompl().getNumerator();
231 Weights[1] = BP.getNumerator();
232 }
233 setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));
234 }
235}
236
239 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
240 // Jump Threading has no sense for the targets with divergent CF
242 return PreservedAnalyses::all();
243 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
244 auto &LVI = AM.getResult<LazyValueAnalysis>(F);
245 auto &AA = AM.getResult<AAManager>(F);
246 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
247
248 bool Changed =
249 runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
250 std::make_unique<DomTreeUpdater>(
251 &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
252 nullptr, nullptr);
253
254 if (!Changed)
255 return PreservedAnalyses::all();
256
257
259
260#if defined(EXPENSIVE_CHECKS)
261 assert(getDomTreeUpdater()->getDomTree().verify(
262 DominatorTree::VerificationLevel::Full) &&
263 "DT broken after JumpThreading");
264 assert((!getDomTreeUpdater()->hasPostDomTree() ||
265 getDomTreeUpdater()->getPostDomTree().verify(
267 "PDT broken after JumpThreading");
268#else
269 assert(getDomTreeUpdater()->getDomTree().verify(
270 DominatorTree::VerificationLevel::Fast) &&
271 "DT broken after JumpThreading");
272 assert((!getDomTreeUpdater()->hasPostDomTree() ||
273 getDomTreeUpdater()->getPostDomTree().verify(
275 "PDT broken after JumpThreading");
276#endif
277
278 return getPreservedAnalysis();
279}
280
282 TargetLibraryInfo *TLI_,
284 AliasAnalysis *AA_,
285 std::unique_ptr<DomTreeUpdater> DTU_,
286 BlockFrequencyInfo *BFI_,
287 BranchProbabilityInfo *BPI_) {
288 LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
289 F = &F_;
290 FAM = FAM_;
291 TLI = TLI_;
292 TTI = TTI_;
293 LVI = LVI_;
294 AA = AA_;
295 DTU = std::move(DTU_);
296 BFI = BFI_;
297 BPI = BPI_;
298 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
299 F->getParent(), Intrinsic::experimental_guard);
300 HasGuards = GuardDecl && !GuardDecl->use_empty();
301
302 // Reduce the number of instructions duplicated when optimizing strictly for
303 // size.
304 if (BBDuplicateThreshold.getNumOccurrences())
305 BBDupThreshold = BBDuplicateThreshold;
306 else if (F->hasMinSize())
307 BBDupThreshold = 3;
308 else
309 BBDupThreshold = DefaultBBDupThreshold;
310
311 assert(DTU && "DTU isn't passed into JumpThreading before using it.");
312 assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
313 DominatorTree &DT = DTU->getDomTree();
314
315 Unreachable.clear();
316 for (auto &BB : *F)
317 if (!DT.isReachableFromEntry(&BB))
318 Unreachable.insert(&BB);
319
322
323 bool EverChanged = false;
324 bool Changed;
325 do {
326 Changed = false;
327 for (auto &BB : *F) {
328 if (Unreachable.count(&BB))
329 continue;
330 while (processBlock(&BB)) // Thread all of the branches we can over BB.
331 Changed = ChangedSinceLastAnalysisUpdate = true;
332
333 // Stop processing BB if it's the entry or is now deleted. The following
334 // routines attempt to eliminate BB and locating a suitable replacement
335 // for the entry is non-trivial.
336 if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
337 continue;
338
339 if (pred_empty(&BB)) {
340 // When processBlock makes BB unreachable it doesn't bother to fix up
341 // the instructions in it. We must remove BB to prevent invalid IR.
342 LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()
343 << "' with terminator: " << *BB.getTerminator()
344 << '\n');
345 LoopHeaders.erase(&BB);
346 LVI->eraseBlock(&BB);
347 DeleteDeadBlock(&BB, DTU.get());
348 Changed = ChangedSinceLastAnalysisUpdate = true;
349 continue;
350 }
351
352 // processBlock doesn't thread BBs with unconditional TIs. However, if BB
353 // is "almost empty", we attempt to merge BB with its sole successor.
354 auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
355 if (BI && BI->isUnconditional()) {
356 BasicBlock *Succ = BI->getSuccessor(0);
357 if (
358 // The terminator must be the only non-phi instruction in BB.
359 BB.getFirstNonPHIOrDbg(true)->isTerminator() &&
360 // Don't alter Loop headers and latches to ensure another pass can
361 // detect and transform nested loops later.
362 !LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
364 // BB is valid for cleanup here because we passed in DTU. F remains
365 // BB's parent until a DTU->getDomTree() event.
366 LVI->eraseBlock(&BB);
367 Changed = ChangedSinceLastAnalysisUpdate = true;
368 }
369 }
370 }
371 EverChanged |= Changed;
372 } while (Changed);
373
374 // Jump threading may have introduced redundant debug values into F which
375 // should be removed.
376 if (EverChanged)
377 for (auto &BB : *F) {
379 }
380
381 LoopHeaders.clear();
382 return EverChanged;
383}
384
385// Replace uses of Cond with ToVal when safe to do so. If all uses are
386// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
387// because we may incorrectly replace uses when guards/assumes are uses of
388// of `Cond` and we used the guards/assume to reason about the `Cond` value
389// at the end of block. RAUW unconditionally replaces all uses
390// including the guards/assumes themselves and the uses before the
391// guard/assume.
393 BasicBlock *KnownAtEndOfBB) {
394 bool Changed = false;
395 assert(Cond->getType() == ToVal->getType());
396 // We can unconditionally replace all uses in non-local blocks (i.e. uses
397 // strictly dominated by BB), since LVI information is true from the
398 // terminator of BB.
399 if (Cond->getParent() == KnownAtEndOfBB)
400 Changed |= replaceNonLocalUsesWith(Cond, ToVal);
401 for (Instruction &I : reverse(*KnownAtEndOfBB)) {
402 // Replace any debug-info record users of Cond with ToVal.
403 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
404 DVR.replaceVariableLocationOp(Cond, ToVal, true);
405
406 // Reached the Cond whose uses we are trying to replace, so there are no
407 // more uses.
408 if (&I == Cond)
409 break;
410 // We only replace uses in instructions that are guaranteed to reach the end
411 // of BB, where we know Cond is ToVal.
413 break;
414 Changed |= I.replaceUsesOfWith(Cond, ToVal);
415 }
416 if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
417 Cond->eraseFromParent();
418 Changed = true;
419 }
420 return Changed;
421}
422
423/// Return the cost of duplicating a piece of this block from first non-phi
424/// and before StopAt instruction to thread across it. Stop scanning the block
425/// when exceeding the threshold. If duplication is impossible, returns ~0U.
427 BasicBlock *BB,
429 unsigned Threshold) {
430 assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
431
432 // Do not duplicate the BB if it has a lot of PHI nodes.
433 // If a threadable chain is too long then the number of PHI nodes can add up,
434 // leading to a substantial increase in compile time when rewriting the SSA.
435 unsigned PhiCount = 0;
436 Instruction *FirstNonPHI = nullptr;
437 for (Instruction &I : *BB) {
438 if (!isa<PHINode>(&I)) {
439 FirstNonPHI = &I;
440 break;
441 }
442 if (++PhiCount > PhiDuplicateThreshold)
443 return ~0U;
444 }
445
446 /// Ignore PHI nodes, these will be flattened when duplication happens.
447 BasicBlock::const_iterator I(FirstNonPHI);
448
449 // FIXME: THREADING will delete values that are just used to compute the
450 // branch, so they shouldn't count against the duplication cost.
451
452 unsigned Bonus = 0;
453 if (BB->getTerminator() == StopAt) {
454 // Threading through a switch statement is particularly profitable. If this
455 // block ends in a switch, decrease its cost to make it more likely to
456 // happen.
457 if (isa<SwitchInst>(StopAt))
458 Bonus = 6;
459
460 // The same holds for indirect branches, but slightly more so.
461 if (isa<IndirectBrInst>(StopAt))
462 Bonus = 8;
463 }
464
465 // Bump the threshold up so the early exit from the loop doesn't skip the
466 // terminator-based Size adjustment at the end.
467 Threshold += Bonus;
468
469 // Sum up the cost of each instruction until we get to the terminator. Don't
470 // include the terminator because the copy won't include it.
471 unsigned Size = 0;
472 for (; &*I != StopAt; ++I) {
473
474 // Stop scanning the block if we've reached the threshold.
475 if (Size > Threshold)
476 return Size;
477
478 // Bail out if this instruction gives back a token type, it is not possible
479 // to duplicate it if it is used outside this BB.
480 if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
481 return ~0U;
482
483 // Blocks with NoDuplicate are modelled as having infinite cost, so they
484 // are never duplicated.
485 if (const CallInst *CI = dyn_cast<CallInst>(I))
486 if (CI->cannotDuplicate() || CI->isConvergent())
487 return ~0U;
488
491 continue;
492
493 // All other instructions count for at least one unit.
494 ++Size;
495
496 // Calls are more expensive. If they are non-intrinsic calls, we model them
497 // as having cost of 4. If they are a non-vector intrinsic, we model them
498 // as having cost of 2 total, and if they are a vector intrinsic, we model
499 // them as having cost 1.
500 if (const CallInst *CI = dyn_cast<CallInst>(I)) {
501 if (!isa<IntrinsicInst>(CI))
502 Size += 3;
503 else if (!CI->getType()->isVectorTy())
504 Size += 1;
505 }
506 }
507
508 return Size > Bonus ? Size - Bonus : 0;
509}
510
511/// findLoopHeaders - We do not want jump threading to turn proper loop
512/// structures into irreducible loops. Doing this breaks up the loop nesting
513/// hierarchy and pessimizes later transformations. To prevent this from
514/// happening, we first have to find the loop headers. Here we approximate this
515/// by finding targets of backedges in the CFG.
516///
517/// Note that there definitely are cases when we want to allow threading of
518/// edges across a loop header. For example, threading a jump from outside the
519/// loop (the preheader) to an exit block of the loop is definitely profitable.
520/// It is also almost always profitable to thread backedges from within the loop
521/// to exit blocks, and is often profitable to thread backedges to other blocks
522/// within the loop (forming a nested loop). This simple analysis is not rich
523/// enough to track all of these properties and keep it up-to-date as the CFG
524/// mutates, so we don't allow any of these transformations.
527 FindFunctionBackedges(F, Edges);
528 LoopHeaders.insert_range(llvm::make_second_range(Edges));
529}
530
531/// getKnownConstant - Helper method to determine if we can thread over a
532/// terminator with the given value as its condition, and if so what value to
533/// use for that. What kind of value this is depends on whether we want an
534/// integer or a block address, but an undef is always accepted.
535/// Returns null if Val is null or not an appropriate constant.
537 if (!Val)
538 return nullptr;
539
540 // Undef is "known" enough.
541 if (UndefValue *U = dyn_cast<UndefValue>(Val))
542 return U;
543
544 if (Preference == WantBlockAddress)
545 return dyn_cast<BlockAddress>(Val->stripPointerCasts());
546
547 return dyn_cast<ConstantInt>(Val);
548}
549
550/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see
551/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
552/// in any of our predecessors. If so, return the known list of value and pred
553/// BB in the result vector.
554///
555/// This returns true if there were any known values.
557 Value *V, BasicBlock *BB, PredValueInfo &Result,
558 ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,
559 Instruction *CxtI) {
560 const DataLayout &DL = BB->getDataLayout();
561
562 // This method walks up use-def chains recursively. Because of this, we could
563 // get into an infinite loop going around loops in the use-def chain. To
564 // prevent this, keep track of what (value, block) pairs we've already visited
565 // and terminate the search if we loop back to them
566 if (!RecursionSet.insert(V).second)
567 return false;
568
569 // If V is a constant, then it is known in all predecessors.
570 if (Constant *KC = getKnownConstant(V, Preference)) {
571 for (BasicBlock *Pred : predecessors(BB))
572 Result.emplace_back(KC, Pred);
573
574 return !Result.empty();
575 }
576
577 // If V is a non-instruction value, or an instruction in a different block,
578 // then it can't be derived from a PHI.
579 Instruction *I = dyn_cast<Instruction>(V);
580 if (!I || I->getParent() != BB) {
581
582 // Okay, if this is a live-in value, see if it has a known value at the any
583 // edge from our predecessors.
584 for (BasicBlock *P : predecessors(BB)) {
585 using namespace PatternMatch;
586 // If the value is known by LazyValueInfo to be a constant in a
587 // predecessor, use that information to try to thread this block.
588 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
589 // If I is a non-local compare-with-constant instruction, use more-rich
590 // 'getPredicateOnEdge' method. This would be able to handle value
591 // inequalities better, for example if the compare is "X < 4" and "X < 3"
592 // is known true but "X < 4" itself is not available.
593 CmpPredicate Pred;
594 Value *Val;
595 Constant *Cst;
596 if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst))))
597 PredCst = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
598 if (Constant *KC = getKnownConstant(PredCst, Preference))
599 Result.emplace_back(KC, P);
600 }
601
602 return !Result.empty();
603 }
604
605 /// If I is a PHI node, then we know the incoming values for any constants.
606 if (PHINode *PN = dyn_cast<PHINode>(I)) {
607 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
608 Value *InVal = PN->getIncomingValue(i);
609 if (Constant *KC = getKnownConstant(InVal, Preference)) {
610 Result.emplace_back(KC, PN->getIncomingBlock(i));
611 } else {
612 Constant *CI = LVI->getConstantOnEdge(InVal,
613 PN->getIncomingBlock(i),
614 BB, CxtI);
615 if (Constant *KC = getKnownConstant(CI, Preference))
616 Result.emplace_back(KC, PN->getIncomingBlock(i));
617 }
618 }
619
620 return !Result.empty();
621 }
622
623 // Handle Cast instructions.
624 if (CastInst *CI = dyn_cast<CastInst>(I)) {
625 Value *Source = CI->getOperand(0);
626 PredValueInfoTy Vals;
627 computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,
628 RecursionSet, CxtI);
629 if (Vals.empty())
630 return false;
631
632 // Convert the known values.
633 for (auto &Val : Vals)
634 if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,
635 CI->getType(), DL))
636 Result.emplace_back(Folded, Val.second);
637
638 return !Result.empty();
639 }
640
641 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
642 Value *Source = FI->getOperand(0);
643 computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
644 RecursionSet, CxtI);
645
646 erase_if(Result, [](auto &Pair) {
647 return !isGuaranteedNotToBeUndefOrPoison(Pair.first);
648 });
649
650 return !Result.empty();
651 }
652
653 // Handle some boolean conditions.
654 if (I->getType()->getPrimitiveSizeInBits() == 1) {
655 using namespace PatternMatch;
656 if (Preference != WantInteger)
657 return false;
658 // X | true -> true
659 // X & false -> false
660 Value *Op0, *Op1;
661 if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||
662 match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
663 PredValueInfoTy LHSVals, RHSVals;
664
666 RecursionSet, CxtI);
668 RecursionSet, CxtI);
669
670 if (LHSVals.empty() && RHSVals.empty())
671 return false;
672
673 ConstantInt *InterestingVal;
674 if (match(I, m_LogicalOr()))
675 InterestingVal = ConstantInt::getTrue(I->getContext());
676 else
677 InterestingVal = ConstantInt::getFalse(I->getContext());
678
679 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
680
681 // Scan for the sentinel. If we find an undef, force it to the
682 // interesting value: x|undef -> true and x&undef -> false.
683 for (const auto &LHSVal : LHSVals)
684 if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
685 Result.emplace_back(InterestingVal, LHSVal.second);
686 LHSKnownBBs.insert(LHSVal.second);
687 }
688 for (const auto &RHSVal : RHSVals)
689 if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
690 // If we already inferred a value for this block on the LHS, don't
691 // re-add it.
692 if (!LHSKnownBBs.count(RHSVal.second))
693 Result.emplace_back(InterestingVal, RHSVal.second);
694 }
695
696 return !Result.empty();
697 }
698
699 // Handle the NOT form of XOR.
700 if (I->getOpcode() == Instruction::Xor &&
701 isa<ConstantInt>(I->getOperand(1)) &&
702 cast<ConstantInt>(I->getOperand(1))->isOne()) {
703 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
704 WantInteger, RecursionSet, CxtI);
705 if (Result.empty())
706 return false;
707
708 // Invert the known values.
709 for (auto &R : Result)
710 R.first = ConstantExpr::getNot(R.first);
711
712 return true;
713 }
714
715 // Try to simplify some other binary operator values.
716 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
717 if (Preference != WantInteger)
718 return false;
719 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
720 PredValueInfoTy LHSVals;
721 computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
722 WantInteger, RecursionSet, CxtI);
723
724 // Try to use constant folding to simplify the binary operator.
725 for (const auto &LHSVal : LHSVals) {
726 Constant *V = LHSVal.first;
727 Constant *Folded =
728 ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
729
730 if (Constant *KC = getKnownConstant(Folded, WantInteger))
731 Result.emplace_back(KC, LHSVal.second);
732 }
733 }
734
735 return !Result.empty();
736 }
737
738 // Handle compare with phi operand, where the PHI is defined in this block.
739 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
740 if (Preference != WantInteger)
741 return false;
742 Type *CmpType = Cmp->getType();
743 Value *CmpLHS = Cmp->getOperand(0);
744 Value *CmpRHS = Cmp->getOperand(1);
745 CmpInst::Predicate Pred = Cmp->getPredicate();
746
747 PHINode *PN = dyn_cast<PHINode>(CmpLHS);
748 if (!PN)
749 PN = dyn_cast<PHINode>(CmpRHS);
750 // Do not perform phi translation across a loop header phi, because this
751 // may result in comparison of values from two different loop iterations.
752 // FIXME: This check is broken if LoopHeaders is not populated.
753 if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
754 const DataLayout &DL = PN->getDataLayout();
755 // We can do this simplification if any comparisons fold to true or false.
756 // See if any do.
757 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
758 BasicBlock *PredBB = PN->getIncomingBlock(i);
759 Value *LHS, *RHS;
760 if (PN == CmpLHS) {
761 LHS = PN->getIncomingValue(i);
762 RHS = CmpRHS->DoPHITranslation(BB, PredBB);
763 } else {
764 LHS = CmpLHS->DoPHITranslation(BB, PredBB);
765 RHS = PN->getIncomingValue(i);
766 }
767 Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
768 if (!Res) {
769 if (!isa<Constant>(RHS))
770 continue;
771
772 // getPredicateOnEdge call will make no sense if LHS is defined in BB.
773 auto LHSInst = dyn_cast<Instruction>(LHS);
774 if (LHSInst && LHSInst->getParent() == BB)
775 continue;
776
777 Res = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB,
778 BB, CxtI ? CxtI : Cmp);
779 }
780
781 if (Constant *KC = getKnownConstant(Res, WantInteger))
782 Result.emplace_back(KC, PredBB);
783 }
784
785 return !Result.empty();
786 }
787
788 // If comparing a live-in value against a constant, see if we know the
789 // live-in value on any predecessors.
790 if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
791 Constant *CmpConst = cast<Constant>(CmpRHS);
792
793 if (!isa<Instruction>(CmpLHS) ||
794 cast<Instruction>(CmpLHS)->getParent() != BB) {
795 for (BasicBlock *P : predecessors(BB)) {
796 // If the value is known by LazyValueInfo to be a constant in a
797 // predecessor, use that information to try to thread this block.
798 Constant *Res = LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB,
799 CxtI ? CxtI : Cmp);
800 if (Constant *KC = getKnownConstant(Res, WantInteger))
801 Result.emplace_back(KC, P);
802 }
803
804 return !Result.empty();
805 }
806
807 // InstCombine can fold some forms of constant range checks into
808 // (icmp (add (x, C1)), C2). See if we have we have such a thing with
809 // x as a live-in.
810 {
811 using namespace PatternMatch;
812
813 Value *AddLHS;
814 ConstantInt *AddConst;
815 if (isa<ConstantInt>(CmpConst) &&
816 match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
817 if (!isa<Instruction>(AddLHS) ||
818 cast<Instruction>(AddLHS)->getParent() != BB) {
819 for (BasicBlock *P : predecessors(BB)) {
820 // If the value is known by LazyValueInfo to be a ConstantRange in
821 // a predecessor, use that information to try to thread this
822 // block.
824 AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
825 // Propagate the range through the addition.
826 CR = CR.add(AddConst->getValue());
827
828 // Get the range where the compare returns true.
830 Pred, cast<ConstantInt>(CmpConst)->getValue());
831
832 Constant *ResC;
833 if (CmpRange.contains(CR))
834 ResC = ConstantInt::getTrue(CmpType);
835 else if (CmpRange.inverse().contains(CR))
836 ResC = ConstantInt::getFalse(CmpType);
837 else
838 continue;
839
840 Result.emplace_back(ResC, P);
841 }
842
843 return !Result.empty();
844 }
845 }
846 }
847
848 // Try to find a constant value for the LHS of a comparison,
849 // and evaluate it statically if we can.
850 PredValueInfoTy LHSVals;
851 computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
852 WantInteger, RecursionSet, CxtI);
853
854 for (const auto &LHSVal : LHSVals) {
855 Constant *V = LHSVal.first;
856 Constant *Folded =
857 ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);
858 if (Constant *KC = getKnownConstant(Folded, WantInteger))
859 Result.emplace_back(KC, LHSVal.second);
860 }
861
862 return !Result.empty();
863 }
864 }
865
866 if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
867 // Handle select instructions where at least one operand is a known constant
868 // and we can figure out the condition value for any predecessor block.
869 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
870 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
871 PredValueInfoTy Conds;
872 if ((TrueVal || FalseVal) &&
873 computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
874 WantInteger, RecursionSet, CxtI)) {
875 for (auto &C : Conds) {
876 Constant *Cond = C.first;
877
878 // Figure out what value to use for the condition.
879 bool KnownCond;
880 if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
881 // A known boolean.
882 KnownCond = CI->isOne();
883 } else {
884 assert(isa<UndefValue>(Cond) && "Unexpected condition value");
885 // Either operand will do, so be sure to pick the one that's a known
886 // constant.
887 // FIXME: Do this more cleverly if both values are known constants?
888 KnownCond = (TrueVal != nullptr);
889 }
890
891 // See if the select has a known constant value for this predecessor.
892 if (Constant *Val = KnownCond ? TrueVal : FalseVal)
893 Result.emplace_back(Val, C.second);
894 }
895
896 return !Result.empty();
897 }
898 }
899
900 // If all else fails, see if LVI can figure out a constant value for us.
901 assert(CxtI->getParent() == BB && "CxtI should be in BB");
902 Constant *CI = LVI->getConstant(V, CxtI);
903 if (Constant *KC = getKnownConstant(CI, Preference)) {
904 for (BasicBlock *Pred : predecessors(BB))
905 Result.emplace_back(KC, Pred);
906 }
907
908 return !Result.empty();
909}
910
911/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
912/// in an undefined jump, decide which block is best to revector to.
913///
914/// Since we can pick an arbitrary destination, we pick the successor with the
915/// fewest predecessors. This should reduce the in-degree of the others.
917 Instruction *BBTerm = BB->getTerminator();
918 unsigned MinSucc = 0;
919 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
920 // Compute the successor with the minimum number of predecessors.
921 unsigned MinNumPreds = pred_size(TestBB);
922 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
923 TestBB = BBTerm->getSuccessor(i);
924 unsigned NumPreds = pred_size(TestBB);
925 if (NumPreds < MinNumPreds) {
926 MinSucc = i;
927 MinNumPreds = NumPreds;
928 }
929 }
930
931 return MinSucc;
932}
933
935 if (!BB->hasAddressTaken()) return false;
936
937 // If the block has its address taken, it may be a tree of dead constants
938 // hanging off of it. These shouldn't keep the block alive.
941 return !BA->use_empty();
942}
943
944/// processBlock - If there are any predecessors whose control can be threaded
945/// through to a successor, transform them now.
947 // If the block is trivially dead, just return and let the caller nuke it.
948 // This simplifies other transformations.
949 if (DTU->isBBPendingDeletion(BB) ||
950 (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
951 return false;
952
953 // If this block has a single predecessor, and if that pred has a single
954 // successor, merge the blocks. This encourages recursive jump threading
955 // because now the condition in this block can be threaded through
956 // predecessors of our predecessor block.
958 return true;
959
961 return true;
962
963 // Look if we can propagate guards to predecessors.
964 if (HasGuards && processGuards(BB))
965 return true;
966
967 // What kind of constant we're looking for.
968 ConstantPreference Preference = WantInteger;
969
970 // Look to see if the terminator is a conditional branch, switch or indirect
971 // branch, if not we can't thread it.
972 Value *Condition;
973 Instruction *Terminator = BB->getTerminator();
974 if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
975 // Can't thread an unconditional jump.
976 if (BI->isUnconditional()) return false;
977 Condition = BI->getCondition();
978 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
979 Condition = SI->getCondition();
980 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
981 // Can't thread indirect branch with no successors.
982 if (IB->getNumSuccessors() == 0) return false;
983 Condition = IB->getAddress()->stripPointerCasts();
984 Preference = WantBlockAddress;
985 } else {
986 return false; // Must be an invoke or callbr.
987 }
988
989 // Keep track if we constant folded the condition in this invocation.
990 bool ConstantFolded = false;
991
992 // Run constant folding to see if we can reduce the condition to a simple
993 // constant.
994 if (Instruction *I = dyn_cast<Instruction>(Condition)) {
995 Value *SimpleVal =
997 if (SimpleVal) {
998 I->replaceAllUsesWith(SimpleVal);
1000 I->eraseFromParent();
1001 Condition = SimpleVal;
1002 ConstantFolded = true;
1003 }
1004 }
1005
1006 // If the terminator is branching on an undef or freeze undef, we can pick any
1007 // of the successors to branch to. Let getBestDestForJumpOnUndef decide.
1008 auto *FI = dyn_cast<FreezeInst>(Condition);
1009 if (isa<UndefValue>(Condition) ||
1010 (FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {
1011 unsigned BestSucc = getBestDestForJumpOnUndef(BB);
1012 std::vector<DominatorTree::UpdateType> Updates;
1013
1014 // Fold the branch/switch.
1015 Instruction *BBTerm = BB->getTerminator();
1016 Updates.reserve(BBTerm->getNumSuccessors());
1017 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
1018 if (i == BestSucc) continue;
1019 BasicBlock *Succ = BBTerm->getSuccessor(i);
1020 Succ->removePredecessor(BB, true);
1021 Updates.push_back({DominatorTree::Delete, BB, Succ});
1022 }
1023
1024 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1025 << "' folding undef terminator: " << *BBTerm << '\n');
1026 Instruction *NewBI = BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm->getIterator());
1027 NewBI->setDebugLoc(BBTerm->getDebugLoc());
1028 ++NumFolds;
1029 BBTerm->eraseFromParent();
1030 DTU->applyUpdatesPermissive(Updates);
1031 if (FI)
1032 FI->eraseFromParent();
1033 return true;
1034 }
1035
1036 // If the terminator of this block is branching on a constant, simplify the
1037 // terminator to an unconditional branch. This can occur due to threading in
1038 // other blocks.
1039 if (getKnownConstant(Condition, Preference)) {
1040 LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
1041 << "' folding terminator: " << *BB->getTerminator()
1042 << '\n');
1043 ++NumFolds;
1044 ConstantFoldTerminator(BB, true, nullptr, DTU.get());
1045 if (auto *BPI = getBPI())
1046 BPI->eraseBlock(BB);
1047 return true;
1048 }
1049
1050 Instruction *CondInst = dyn_cast<Instruction>(Condition);
1051
1052 // All the rest of our checks depend on the condition being an instruction.
1053 if (!CondInst) {
1054 // FIXME: Unify this with code below.
1055 if (processThreadableEdges(Condition, BB, Preference, Terminator))
1056 return true;
1057 return ConstantFolded;
1058 }
1059
1060 // Some of the following optimization can safely work on the unfrozen cond.
1061 Value *CondWithoutFreeze = CondInst;
1062 if (auto *FI = dyn_cast<FreezeInst>(CondInst))
1063 CondWithoutFreeze = FI->getOperand(0);
1064
1065 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
1066 // If we're branching on a conditional, LVI might be able to determine
1067 // it's value at the branch instruction. We only handle comparisons
1068 // against a constant at this time.
1069 if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
1070 Constant *Res =
1071 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
1072 CondConst, BB->getTerminator(),
1073 /*UseBlockValue=*/false);
1074 if (Res) {
1075 // We can safely replace *some* uses of the CondInst if it has
1076 // exactly one value as returned by LVI. RAUW is incorrect in the
1077 // presence of guards and assumes, that have the `Cond` as the use. This
1078 // is because we use the guards/assume to reason about the `Cond` value
1079 // at the end of block, but RAUW unconditionally replaces all uses
1080 // including the guards/assumes themselves and the uses before the
1081 // guard/assume.
1082 if (replaceFoldableUses(CondCmp, Res, BB))
1083 return true;
1084 }
1085
1086 // We did not manage to simplify this branch, try to see whether
1087 // CondCmp depends on a known phi-select pattern.
1088 if (tryToUnfoldSelect(CondCmp, BB))
1089 return true;
1090 }
1091 }
1092
1093 if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
1094 if (tryToUnfoldSelect(SI, BB))
1095 return true;
1096
1097 // Check for some cases that are worth simplifying. Right now we want to look
1098 // for loads that are used by a switch or by the condition for the branch. If
1099 // we see one, check to see if it's partially redundant. If so, insert a PHI
1100 // which can then be used to thread the values.
1101 Value *SimplifyValue = CondWithoutFreeze;
1102
1103 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
1104 if (isa<Constant>(CondCmp->getOperand(1)))
1105 SimplifyValue = CondCmp->getOperand(0);
1106
1107 // TODO: There are other places where load PRE would be profitable, such as
1108 // more complex comparisons.
1109 if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))
1111 return true;
1112
1113 // Before threading, try to propagate profile data backwards:
1114 if (PHINode *PN = dyn_cast<PHINode>(CondInst))
1115 if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1117
1118 // Handle a variety of cases where we are branching on something derived from
1119 // a PHI node in the current block. If we can prove that any predecessors
1120 // compute a predictable value based on a PHI node, thread those predecessors.
1121 if (processThreadableEdges(CondInst, BB, Preference, Terminator))
1122 return true;
1123
1124 // If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
1125 // the current block, see if we can simplify.
1126 PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
1127 if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1128 return processBranchOnPHI(PN);
1129
1130 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
1131 if (CondInst->getOpcode() == Instruction::Xor &&
1132 CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
1133 return processBranchOnXOR(cast<BinaryOperator>(CondInst));
1134
1135 // Search for a stronger dominating condition that can be used to simplify a
1136 // conditional branch leaving BB.
1138 return true;
1139
1140 return false;
1141}
1142
1144 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
1145 if (!BI || !BI->isConditional())
1146 return false;
1147
1148 Value *Cond = BI->getCondition();
1149 // Assuming that predecessor's branch was taken, if pred's branch condition
1150 // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
1151 // freeze(Cond) is either true or a nondeterministic value.
1152 // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
1153 // without affecting other instructions.
1154 auto *FICond = dyn_cast<FreezeInst>(Cond);
1155 if (FICond && FICond->hasOneUse())
1156 Cond = FICond->getOperand(0);
1157 else
1158 FICond = nullptr;
1159
1160 BasicBlock *CurrentBB = BB;
1161 BasicBlock *CurrentPred = BB->getSinglePredecessor();
1162 unsigned Iter = 0;
1163
1164 auto &DL = BB->getDataLayout();
1165
1166 while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
1167 auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
1168 if (!PBI || !PBI->isConditional())
1169 return false;
1170 if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)
1171 return false;
1172
1173 bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
1174 std::optional<bool> Implication =
1175 isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
1176
1177 // If the branch condition of BB (which is Cond) and CurrentPred are
1178 // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
1179 if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
1180 if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
1181 FICond->getOperand(0))
1182 Implication = CondIsTrue;
1183 }
1184
1185 if (Implication) {
1186 BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
1187 BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
1188 RemoveSucc->removePredecessor(BB);
1189 BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI->getIterator());
1190 UncondBI->setDebugLoc(BI->getDebugLoc());
1191 ++NumFolds;
1192 BI->eraseFromParent();
1193 if (FICond)
1194 FICond->eraseFromParent();
1195
1196 DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
1197 if (auto *BPI = getBPI())
1198 BPI->eraseBlock(BB);
1199 return true;
1200 }
1201 CurrentBB = CurrentPred;
1202 CurrentPred = CurrentBB->getSinglePredecessor();
1203 }
1204
1205 return false;
1206}
1207
1208/// Return true if Op is an instruction defined in the given block.
1210 if (Instruction *OpInst = dyn_cast<Instruction>(Op))
1211 if (OpInst->getParent() == BB)
1212 return true;
1213 return false;
1214}
1215
1216/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially
1217/// redundant load instruction, eliminate it by replacing it with a PHI node.
1218/// This is an important optimization that encourages jump threading, and needs
1219/// to be run interlaced with other jump threading tasks.
1221 // Don't hack volatile and ordered loads.
1222 if (!LoadI->isUnordered()) return false;
1223
1224 // If the load is defined in a block with exactly one predecessor, it can't be
1225 // partially redundant.
1226 BasicBlock *LoadBB = LoadI->getParent();
1227 if (LoadBB->getSinglePredecessor())
1228 return false;
1229
1230 // If the load is defined in an EH pad, it can't be partially redundant,
1231 // because the edges between the invoke and the EH pad cannot have other
1232 // instructions between them.
1233 if (LoadBB->isEHPad())
1234 return false;
1235
1236 Value *LoadedPtr = LoadI->getOperand(0);
1237
1238 // If the loaded operand is defined in the LoadBB and its not a phi,
1239 // it can't be available in predecessors.
1240 if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
1241 return false;
1242
1243 // Scan a few instructions up from the load, to see if it is obviously live at
1244 // the entry to its block.
1245 BasicBlock::iterator BBIt(LoadI);
1246 bool IsLoadCSE;
1247 BatchAAResults BatchAA(*AA);
1248 // The dominator tree is updated lazily and may not be valid at this point.
1249 BatchAA.disableDominatorTree();
1250 if (Value *AvailableVal = FindAvailableLoadedValue(
1251 LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {
1252 // If the value of the load is locally available within the block, just use
1253 // it. This frequently occurs for reg2mem'd allocas.
1254
1255 if (IsLoadCSE) {
1256 LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
1257 combineMetadataForCSE(NLoadI, LoadI, false);
1258 LVI->forgetValue(NLoadI);
1259 };
1260
1261 // If the returned value is the load itself, replace with poison. This can
1262 // only happen in dead loops.
1263 if (AvailableVal == LoadI)
1264 AvailableVal = PoisonValue::get(LoadI->getType());
1265 if (AvailableVal->getType() != LoadI->getType()) {
1266 AvailableVal = CastInst::CreateBitOrPointerCast(
1267 AvailableVal, LoadI->getType(), "", LoadI->getIterator());
1268 cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
1269 }
1270 LoadI->replaceAllUsesWith(AvailableVal);
1271 LoadI->eraseFromParent();
1272 return true;
1273 }
1274
1275 // Otherwise, if we scanned the whole block and got to the top of the block,
1276 // we know the block is locally transparent to the load. If not, something
1277 // might clobber its value.
1278 if (BBIt != LoadBB->begin())
1279 return false;
1280
1281 // If all of the loads and stores that feed the value have the same AA tags,
1282 // then we can propagate them onto any newly inserted loads.
1283 AAMDNodes AATags = LoadI->getAAMetadata();
1284
1285 SmallPtrSet<BasicBlock*, 8> PredsScanned;
1286
1287 using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;
1288
1289 AvailablePredsTy AvailablePreds;
1290 BasicBlock *OneUnavailablePred = nullptr;
1292
1293 // If we got here, the loaded value is transparent through to the start of the
1294 // block. Check to see if it is available in any of the predecessor blocks.
1295 for (BasicBlock *PredBB : predecessors(LoadBB)) {
1296 // If we already scanned this predecessor, skip it.
1297 if (!PredsScanned.insert(PredBB).second)
1298 continue;
1299
1300 BBIt = PredBB->end();
1301 unsigned NumScanedInst = 0;
1302 Value *PredAvailable = nullptr;
1303 // NOTE: We don't CSE load that is volatile or anything stronger than
1304 // unordered, that should have been checked when we entered the function.
1305 assert(LoadI->isUnordered() &&
1306 "Attempting to CSE volatile or atomic loads");
1307 // If this is a load on a phi pointer, phi-translate it and search
1308 // for available load/store to the pointer in predecessors.
1309 Type *AccessTy = LoadI->getType();
1310 const auto &DL = LoadI->getDataLayout();
1311 MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
1312 LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
1313 AATags);
1314 PredAvailable = findAvailablePtrLoadStore(
1315 Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
1316 &BatchAA, &IsLoadCSE, &NumScanedInst);
1317
1318 // If PredBB has a single predecessor, continue scanning through the
1319 // single predecessor.
1320 BasicBlock *SinglePredBB = PredBB;
1321 while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
1322 NumScanedInst < DefMaxInstsToScan) {
1323 SinglePredBB = SinglePredBB->getSinglePredecessor();
1324 if (SinglePredBB) {
1325 BBIt = SinglePredBB->end();
1326 PredAvailable = findAvailablePtrLoadStore(
1327 Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,
1328 (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,
1329 &NumScanedInst);
1330 }
1331 }
1332
1333 if (!PredAvailable) {
1334 OneUnavailablePred = PredBB;
1335 continue;
1336 }
1337
1338 if (IsLoadCSE)
1339 CSELoads.push_back(cast<LoadInst>(PredAvailable));
1340
1341 // If so, this load is partially redundant. Remember this info so that we
1342 // can create a PHI node.
1343 AvailablePreds.emplace_back(PredBB, PredAvailable);
1344 }
1345
1346 // If the loaded value isn't available in any predecessor, it isn't partially
1347 // redundant.
1348 if (AvailablePreds.empty()) return false;
1349
1350 // Okay, the loaded value is available in at least one (and maybe all!)
1351 // predecessors. If the value is unavailable in more than one unique
1352 // predecessor, we want to insert a merge block for those common predecessors.
1353 // This ensures that we only have to insert one reload, thus not increasing
1354 // code size.
1355 BasicBlock *UnavailablePred = nullptr;
1356
1357 // If the value is unavailable in one of predecessors, we will end up
1358 // inserting a new instruction into them. It is only valid if all the
1359 // instructions before LoadI are guaranteed to pass execution to its
1360 // successor, or if LoadI is safe to speculate.
1361 // TODO: If this logic becomes more complex, and we will perform PRE insertion
1362 // farther than to a predecessor, we need to reuse the code from GVN's PRE.
1363 // It requires domination tree analysis, so for this simple case it is an
1364 // overkill.
1365 if (PredsScanned.size() != AvailablePreds.size() &&
1367 for (auto I = LoadBB->begin(); &*I != LoadI; ++I)
1369 return false;
1370
1371 // If there is exactly one predecessor where the value is unavailable, the
1372 // already computed 'OneUnavailablePred' block is it. If it ends in an
1373 // unconditional branch, we know that it isn't a critical edge.
1374 if (PredsScanned.size() == AvailablePreds.size()+1 &&
1375 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
1376 UnavailablePred = OneUnavailablePred;
1377 } else if (PredsScanned.size() != AvailablePreds.size()) {
1378 // Otherwise, we had multiple unavailable predecessors or we had a critical
1379 // edge from the one.
1380 SmallVector<BasicBlock*, 8> PredsToSplit;
1381 SmallPtrSet<BasicBlock *, 8> AvailablePredSet(
1382 llvm::from_range, llvm::make_first_range(AvailablePreds));
1383
1384 // Add all the unavailable predecessors to the PredsToSplit list.
1385 for (BasicBlock *P : predecessors(LoadBB)) {
1386 // If the predecessor is an indirect goto, we can't split the edge.
1387 if (isa<IndirectBrInst>(P->getTerminator()))
1388 return false;
1389
1390 if (!AvailablePredSet.count(P))
1391 PredsToSplit.push_back(P);
1392 }
1393
1394 // Split them out to their own block.
1395 UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
1396 }
1397
1398 // If the value isn't available in all predecessors, then there will be
1399 // exactly one where it isn't available. Insert a load on that edge and add
1400 // it to the AvailablePreds list.
1401 if (UnavailablePred) {
1402 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
1403 "Can't handle critical edge here!");
1404 LoadInst *NewVal = new LoadInst(
1405 LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
1406 LoadI->getName() + ".pr", false, LoadI->getAlign(),
1407 LoadI->getOrdering(), LoadI->getSyncScopeID(),
1408 UnavailablePred->getTerminator()->getIterator());
1409 NewVal->setDebugLoc(LoadI->getDebugLoc());
1410 if (AATags)
1411 NewVal->setAAMetadata(AATags);
1412
1413 AvailablePreds.emplace_back(UnavailablePred, NewVal);
1414 }
1415
1416 // Now we know that each predecessor of this block has a value in
1417 // AvailablePreds, sort them for efficient access as we're walking the preds.
1418 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
1419
1420 // Create a PHI node at the start of the block for the PRE'd load value.
1421 PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
1422 PN->insertBefore(LoadBB->begin());
1423 PN->takeName(LoadI);
1424 PN->setDebugLoc(LoadI->getDebugLoc());
1425
1426 // Insert new entries into the PHI for each predecessor. A single block may
1427 // have multiple entries here.
1428 for (BasicBlock *P : predecessors(LoadBB)) {
1429 AvailablePredsTy::iterator I =
1430 llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
1431
1432 assert(I != AvailablePreds.end() && I->first == P &&
1433 "Didn't find entry for predecessor!");
1434
1435 // If we have an available predecessor but it requires casting, insert the
1436 // cast in the predecessor and use the cast. Note that we have to update the
1437 // AvailablePreds vector as we go so that all of the PHI entries for this
1438 // predecessor use the same bitcast.
1439 Value *&PredV = I->second;
1440 if (PredV->getType() != LoadI->getType())
1442 PredV, LoadI->getType(), "", P->getTerminator()->getIterator());
1443
1444 PN->addIncoming(PredV, I->first);
1445 }
1446
1447 for (LoadInst *PredLoadI : CSELoads) {
1448 combineMetadataForCSE(PredLoadI, LoadI, true);
1449 LVI->forgetValue(PredLoadI);
1450 }
1451
1452 LoadI->replaceAllUsesWith(PN);
1453 LoadI->eraseFromParent();
1454
1455 return true;
1456}
1457
1458/// findMostPopularDest - The specified list contains multiple possible
1459/// threadable destinations. Pick the one that occurs the most frequently in
1460/// the list.
1461static BasicBlock *
1463 const SmallVectorImpl<std::pair<BasicBlock *,
1464 BasicBlock *>> &PredToDestList) {
1465 assert(!PredToDestList.empty());
1466
1467 // Determine popularity. If there are multiple possible destinations, we
1468 // explicitly choose to ignore 'undef' destinations. We prefer to thread
1469 // blocks with known and real destinations to threading undef. We'll handle
1470 // them later if interesting.
1471 MapVector<BasicBlock *, unsigned> DestPopularity;
1472
1473 // Populate DestPopularity with the successors in the order they appear in the
1474 // successor list. This way, we ensure determinism by iterating it in the
1475 // same order in llvm::max_element below. We map nullptr to 0 so that we can
1476 // return nullptr when PredToDestList contains nullptr only.
1477 DestPopularity[nullptr] = 0;
1478 for (auto *SuccBB : successors(BB))
1479 DestPopularity[SuccBB] = 0;
1480
1481 for (const auto &PredToDest : PredToDestList)
1482 if (PredToDest.second)
1483 DestPopularity[PredToDest.second]++;
1484
1485 // Find the most popular dest.
1486 auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());
1487
1488 // Okay, we have finally picked the most popular destination.
1489 return MostPopular->first;
1490}
1491
1492// Try to evaluate the value of V when the control flows from PredPredBB to
1493// BB->getSinglePredecessor() and then on to BB.
1495 BasicBlock *PredPredBB,
1496 Value *V,
1497 const DataLayout &DL) {
1499 return evaluateOnPredecessorEdge(BB, PredPredBB, V, DL, Visited);
1500}
1501
1503 BasicBlock *BB, BasicBlock *PredPredBB, Value *V, const DataLayout &DL,
1504 SmallPtrSet<Value *, 8> &Visited) {
1505 if (!Visited.insert(V).second)
1506 return nullptr;
1507 auto _ = make_scope_exit([&Visited, V]() { Visited.erase(V); });
1508
1509 BasicBlock *PredBB = BB->getSinglePredecessor();
1510 assert(PredBB && "Expected a single predecessor");
1511
1512 if (Constant *Cst = dyn_cast<Constant>(V)) {
1513 return Cst;
1514 }
1515
1516 // Consult LVI if V is not an instruction in BB or PredBB.
1517 Instruction *I = dyn_cast<Instruction>(V);
1518 if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1519 return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1520 }
1521
1522 // Look into a PHI argument.
1523 if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1524 if (PHI->getParent() == PredBB)
1525 return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1526 return nullptr;
1527 }
1528
1529 // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1530 // Note that during the execution of the pass, phi nodes may become constant
1531 // and may be removed, which can lead to self-referencing instructions in
1532 // code that becomes unreachable. Consequently, we need to handle those
1533 // instructions in unreachable code and check before going into recursion.
1534 if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1535 if (CondCmp->getParent() == BB) {
1537 BB, PredPredBB, CondCmp->getOperand(0), DL, Visited);
1539 BB, PredPredBB, CondCmp->getOperand(1), DL, Visited);
1540 if (Op0 && Op1) {
1541 return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
1542 Op1, DL);
1543 }
1544 }
1545 return nullptr;
1546 }
1547
1548 return nullptr;
1549}
1550
1552 ConstantPreference Preference,
1553 Instruction *CxtI) {
1554 // If threading this would thread across a loop header, don't even try to
1555 // thread the edge.
1556 if (LoopHeaders.count(BB))
1557 return false;
1558
1559 PredValueInfoTy PredValues;
1560 if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1561 CxtI)) {
1562 // We don't have known values in predecessors. See if we can thread through
1563 // BB and its sole predecessor.
1565 }
1566
1567 assert(!PredValues.empty() &&
1568 "computeValueKnownInPredecessors returned true with no values");
1569
1570 LLVM_DEBUG(dbgs() << "IN BB: " << *BB;
1571 for (const auto &PredValue : PredValues) {
1572 dbgs() << " BB '" << BB->getName()
1573 << "': FOUND condition = " << *PredValue.first
1574 << " for pred '" << PredValue.second->getName() << "'.\n";
1575 });
1576
1577 // Decide what we want to thread through. Convert our list of known values to
1578 // a list of known destinations for each pred. This also discards duplicate
1579 // predecessors and keeps track of the undefined inputs (which are represented
1580 // as a null dest in the PredToDestList).
1583
1584 BasicBlock *OnlyDest = nullptr;
1585 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
1586 Constant *OnlyVal = nullptr;
1587 Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
1588
1589 for (const auto &PredValue : PredValues) {
1590 BasicBlock *Pred = PredValue.second;
1591 if (!SeenPreds.insert(Pred).second)
1592 continue; // Duplicate predecessor entry.
1593
1594 Constant *Val = PredValue.first;
1595
1596 BasicBlock *DestBB;
1597 if (isa<UndefValue>(Val))
1598 DestBB = nullptr;
1599 else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
1600 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1601 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
1602 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
1603 assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
1604 DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
1605 } else {
1606 assert(isa<IndirectBrInst>(BB->getTerminator())
1607 && "Unexpected terminator");
1608 assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
1609 DestBB = cast<BlockAddress>(Val)->getBasicBlock();
1610 }
1611
1612 // If we have exactly one destination, remember it for efficiency below.
1613 if (PredToDestList.empty()) {
1614 OnlyDest = DestBB;
1615 OnlyVal = Val;
1616 } else {
1617 if (OnlyDest != DestBB)
1618 OnlyDest = MultipleDestSentinel;
1619 // It possible we have same destination, but different value, e.g. default
1620 // case in switchinst.
1621 if (Val != OnlyVal)
1622 OnlyVal = MultipleVal;
1623 }
1624
1625 // If the predecessor ends with an indirect goto, we can't change its
1626 // destination.
1627 if (isa<IndirectBrInst>(Pred->getTerminator()))
1628 continue;
1629
1630 PredToDestList.emplace_back(Pred, DestBB);
1631 }
1632
1633 // If all edges were unthreadable, we fail.
1634 if (PredToDestList.empty())
1635 return false;
1636
1637 // If all the predecessors go to a single known successor, we want to fold,
1638 // not thread. By doing so, we do not need to duplicate the current block and
1639 // also miss potential opportunities in case we dont/cant duplicate.
1640 if (OnlyDest && OnlyDest != MultipleDestSentinel) {
1641 if (BB->hasNPredecessors(PredToDestList.size())) {
1642 bool SeenFirstBranchToOnlyDest = false;
1643 std::vector <DominatorTree::UpdateType> Updates;
1644 Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
1645 for (BasicBlock *SuccBB : successors(BB)) {
1646 if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {
1647 SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
1648 } else {
1649 SuccBB->removePredecessor(BB, true); // This is unreachable successor.
1650 Updates.push_back({DominatorTree::Delete, BB, SuccBB});
1651 }
1652 }
1653
1654 // Finally update the terminator.
1655 Instruction *Term = BB->getTerminator();
1656 Instruction *NewBI = BranchInst::Create(OnlyDest, Term->getIterator());
1657 NewBI->setDebugLoc(Term->getDebugLoc());
1658 ++NumFolds;
1659 Term->eraseFromParent();
1660 DTU->applyUpdatesPermissive(Updates);
1661 if (auto *BPI = getBPI())
1662 BPI->eraseBlock(BB);
1663
1664 // If the condition is now dead due to the removal of the old terminator,
1665 // erase it.
1666 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
1667 if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
1668 CondInst->eraseFromParent();
1669 // We can safely replace *some* uses of the CondInst if it has
1670 // exactly one value as returned by LVI. RAUW is incorrect in the
1671 // presence of guards and assumes, that have the `Cond` as the use. This
1672 // is because we use the guards/assume to reason about the `Cond` value
1673 // at the end of block, but RAUW unconditionally replaces all uses
1674 // including the guards/assumes themselves and the uses before the
1675 // guard/assume.
1676 else if (OnlyVal && OnlyVal != MultipleVal)
1677 replaceFoldableUses(CondInst, OnlyVal, BB);
1678 }
1679 return true;
1680 }
1681 }
1682
1683 // Determine which is the most common successor. If we have many inputs and
1684 // this block is a switch, we want to start by threading the batch that goes
1685 // to the most popular destination first. If we only know about one
1686 // threadable destination (the common case) we can avoid this.
1687 BasicBlock *MostPopularDest = OnlyDest;
1688
1689 if (MostPopularDest == MultipleDestSentinel) {
1690 // Remove any loop headers from the Dest list, threadEdge conservatively
1691 // won't process them, but we might have other destination that are eligible
1692 // and we still want to process.
1693 erase_if(PredToDestList,
1694 [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
1695 return LoopHeaders.contains(PredToDest.second);
1696 });
1697
1698 if (PredToDestList.empty())
1699 return false;
1700
1701 MostPopularDest = findMostPopularDest(BB, PredToDestList);
1702 }
1703
1704 // Now that we know what the most popular destination is, factor all
1705 // predecessors that will jump to it into a single predecessor.
1706 SmallVector<BasicBlock*, 16> PredsToFactor;
1707 for (const auto &PredToDest : PredToDestList)
1708 if (PredToDest.second == MostPopularDest) {
1709 BasicBlock *Pred = PredToDest.first;
1710
1711 // This predecessor may be a switch or something else that has multiple
1712 // edges to the block. Factor each of these edges by listing them
1713 // according to # occurrences in PredsToFactor.
1714 for (BasicBlock *Succ : successors(Pred))
1715 if (Succ == BB)
1716 PredsToFactor.push_back(Pred);
1717 }
1718
1719 // If the threadable edges are branching on an undefined value, we get to pick
1720 // the destination that these predecessors should get to.
1721 if (!MostPopularDest)
1722 MostPopularDest = BB->getTerminator()->
1723 getSuccessor(getBestDestForJumpOnUndef(BB));
1724
1725 // Ok, try to thread it!
1726 return tryThreadEdge(BB, PredsToFactor, MostPopularDest);
1727}
1728
1729/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on
1730/// a PHI node (or freeze PHI) in the current block. See if there are any
1731/// simplifications we can do based on inputs to the phi node.
1733 BasicBlock *BB = PN->getParent();
1734
1735 // TODO: We could make use of this to do it once for blocks with common PHI
1736 // values.
1738 PredBBs.resize(1);
1739
1740 // If any of the predecessor blocks end in an unconditional branch, we can
1741 // *duplicate* the conditional branch into that block in order to further
1742 // encourage jump threading and to eliminate cases where we have branch on a
1743 // phi of an icmp (branch on icmp is much better).
1744 // This is still beneficial when a frozen phi is used as the branch condition
1745 // because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))
1746 // to br(icmp(freeze ...)).
1747 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1748 BasicBlock *PredBB = PN->getIncomingBlock(i);
1749 if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
1750 if (PredBr->isUnconditional()) {
1751 PredBBs[0] = PredBB;
1752 // Try to duplicate BB into PredBB.
1753 if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))
1754 return true;
1755 }
1756 }
1757
1758 return false;
1759}
1760
1761/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on
1762/// a xor instruction in the current block. See if there are any
1763/// simplifications we can do based on inputs to the xor.
1765 BasicBlock *BB = BO->getParent();
1766
1767 // If either the LHS or RHS of the xor is a constant, don't do this
1768 // optimization.
1769 if (isa<ConstantInt>(BO->getOperand(0)) ||
1770 isa<ConstantInt>(BO->getOperand(1)))
1771 return false;
1772
1773 // If the first instruction in BB isn't a phi, we won't be able to infer
1774 // anything special about any particular predecessor.
1775 if (!isa<PHINode>(BB->front()))
1776 return false;
1777
1778 // If this BB is a landing pad, we won't be able to split the edge into it.
1779 if (BB->isEHPad())
1780 return false;
1781
1782 // If we have a xor as the branch input to this block, and we know that the
1783 // LHS or RHS of the xor in any predecessor is true/false, then we can clone
1784 // the condition into the predecessor and fix that value to true, saving some
1785 // logical ops on that path and encouraging other paths to simplify.
1786 //
1787 // This copies something like this:
1788 //
1789 // BB:
1790 // %X = phi i1 [1], [%X']
1791 // %Y = icmp eq i32 %A, %B
1792 // %Z = xor i1 %X, %Y
1793 // br i1 %Z, ...
1794 //
1795 // Into:
1796 // BB':
1797 // %Y = icmp ne i32 %A, %B
1798 // br i1 %Y, ...
1799
1800 PredValueInfoTy XorOpValues;
1801 bool isLHS = true;
1802 if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
1803 WantInteger, BO)) {
1804 assert(XorOpValues.empty());
1805 if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
1806 WantInteger, BO))
1807 return false;
1808 isLHS = false;
1809 }
1810
1811 assert(!XorOpValues.empty() &&
1812 "computeValueKnownInPredecessors returned true with no values");
1813
1814 // Scan the information to see which is most popular: true or false. The
1815 // predecessors can be of the set true, false, or undef.
1816 unsigned NumTrue = 0, NumFalse = 0;
1817 for (const auto &XorOpValue : XorOpValues) {
1818 if (isa<UndefValue>(XorOpValue.first))
1819 // Ignore undefs for the count.
1820 continue;
1821 if (cast<ConstantInt>(XorOpValue.first)->isZero())
1822 ++NumFalse;
1823 else
1824 ++NumTrue;
1825 }
1826
1827 // Determine which value to split on, true, false, or undef if neither.
1828 ConstantInt *SplitVal = nullptr;
1829 if (NumTrue > NumFalse)
1830 SplitVal = ConstantInt::getTrue(BB->getContext());
1831 else if (NumTrue != 0 || NumFalse != 0)
1832 SplitVal = ConstantInt::getFalse(BB->getContext());
1833
1834 // Collect all of the blocks that this can be folded into so that we can
1835 // factor this once and clone it once.
1836 SmallVector<BasicBlock*, 8> BlocksToFoldInto;
1837 for (const auto &XorOpValue : XorOpValues) {
1838 if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
1839 continue;
1840
1841 BlocksToFoldInto.push_back(XorOpValue.second);
1842 }
1843
1844 // If we inferred a value for all of the predecessors, then duplication won't
1845 // help us. However, we can just replace the LHS or RHS with the constant.
1846 if (BlocksToFoldInto.size() ==
1847 cast<PHINode>(BB->front()).getNumIncomingValues()) {
1848 if (!SplitVal) {
1849 // If all preds provide undef, just nuke the xor, because it is undef too.
1851 BO->eraseFromParent();
1852 } else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {
1853 // If all preds provide 0, replace the xor with the other input.
1854 BO->replaceAllUsesWith(BO->getOperand(isLHS));
1855 BO->eraseFromParent();
1856 } else {
1857 // If all preds provide 1, set the computed value to 1.
1858 BO->setOperand(!isLHS, SplitVal);
1859 }
1860
1861 return true;
1862 }
1863
1864 // If any of predecessors end with an indirect goto, we can't change its
1865 // destination.
1866 if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
1867 return isa<IndirectBrInst>(Pred->getTerminator());
1868 }))
1869 return false;
1870
1871 // Try to duplicate BB into PredBB.
1872 return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
1873}
1874
1875/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
1876/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
1877/// NewPred using the entries from OldPred (suitably mapped).
1879 BasicBlock *OldPred,
1880 BasicBlock *NewPred,
1882 for (PHINode &PN : PHIBB->phis()) {
1883 // Ok, we have a PHI node. Figure out what the incoming value was for the
1884 // DestBlock.
1885 Value *IV = PN.getIncomingValueForBlock(OldPred);
1886
1887 // Remap the value if necessary.
1888 if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
1890 if (I != ValueMap.end())
1891 IV = I->second;
1892 }
1893
1894 PN.addIncoming(IV, NewPred);
1895 }
1896}
1897
1898/// Merge basic block BB into its sole predecessor if possible.
1900 BasicBlock *SinglePred = BB->getSinglePredecessor();
1901 if (!SinglePred)
1902 return false;
1903
1904 const Instruction *TI = SinglePred->getTerminator();
1905 if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||
1906 SinglePred == BB || hasAddressTakenAndUsed(BB))
1907 return false;
1908
1909 // MergeBasicBlockIntoOnlyPred may delete SinglePred, we need to avoid
1910 // deleting a BB pointer from Unreachable.
1911 if (Unreachable.count(SinglePred))
1912 return false;
1913
1914 // If SinglePred was a loop header, BB becomes one.
1915 if (LoopHeaders.erase(SinglePred))
1916 LoopHeaders.insert(BB);
1917
1918 LVI->eraseBlock(SinglePred);
1919 MergeBasicBlockIntoOnlyPred(BB, DTU.get());
1920
1921 // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
1922 // BB code within one basic block `BB`), we need to invalidate the LVI
1923 // information associated with BB, because the LVI information need not be
1924 // true for all of BB after the merge. For example,
1925 // Before the merge, LVI info and code is as follows:
1926 // SinglePred: <LVI info1 for %p val>
1927 // %y = use of %p
1928 // call @exit() // need not transfer execution to successor.
1929 // assume(%p) // from this point on %p is true
1930 // br label %BB
1931 // BB: <LVI info2 for %p val, i.e. %p is true>
1932 // %x = use of %p
1933 // br label exit
1934 //
1935 // Note that this LVI info for blocks BB and SinglPred is correct for %p
1936 // (info2 and info1 respectively). After the merge and the deletion of the
1937 // LVI info1 for SinglePred. We have the following code:
1938 // BB: <LVI info2 for %p val>
1939 // %y = use of %p
1940 // call @exit()
1941 // assume(%p)
1942 // %x = use of %p <-- LVI info2 is correct from here onwards.
1943 // br label exit
1944 // LVI info2 for BB is incorrect at the beginning of BB.
1945
1946 // Invalidate LVI information for BB if the LVI is not provably true for
1947 // all of BB.
1949 LVI->eraseBlock(BB);
1950 return true;
1951}
1952
1953/// Update the SSA form. NewBB contains instructions that are copied from BB.
1954/// ValueMapping maps old values in BB to new ones in NewBB.
1956 ValueToValueMapTy &ValueMapping) {
1957 // If there were values defined in BB that are used outside the block, then we
1958 // now have to update all uses of the value to use either the original value,
1959 // the cloned value, or some PHI derived value. This can require arbitrary
1960 // PHI insertion, of which we are prepared to do, clean these up now.
1961 SSAUpdater SSAUpdate;
1962 SmallVector<Use *, 16> UsesToRename;
1963 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1964
1965 for (Instruction &I : *BB) {
1966 // Scan all uses of this instruction to see if it is used outside of its
1967 // block, and if so, record them in UsesToRename.
1968 for (Use &U : I.uses()) {
1969 Instruction *User = cast<Instruction>(U.getUser());
1970 if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1971 if (UserPN->getIncomingBlock(U) == BB)
1972 continue;
1973 } else if (User->getParent() == BB)
1974 continue;
1975
1976 UsesToRename.push_back(&U);
1977 }
1978
1979 // Find debug values outside of the block
1980 findDbgValues(&I, DbgVariableRecords);
1981 llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
1982 return DbgVarRec->getParent() == BB;
1983 });
1984
1985 // If there are no uses outside the block, we're done with this instruction.
1986 if (UsesToRename.empty() && DbgVariableRecords.empty())
1987 continue;
1988 LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
1989
1990 // We found a use of I outside of BB. Rename all uses of I that are outside
1991 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
1992 // with the two values we know.
1993 SSAUpdate.Initialize(I.getType(), I.getName());
1994 SSAUpdate.AddAvailableValue(BB, &I);
1995 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
1996
1997 while (!UsesToRename.empty())
1998 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
1999 if (!DbgVariableRecords.empty()) {
2000 SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
2001 DbgVariableRecords.clear();
2002 }
2003
2004 LLVM_DEBUG(dbgs() << "\n");
2005 }
2006}
2007
2010 if (VM.AtomMap.empty())
2011 return;
2012 for (auto It = Begin; It != End; ++It)
2013 RemapSourceAtom(&*It, VM);
2014}
2015
2016/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
2017/// arguments that come from PredBB. Return the map from the variables in the
2018/// source basic block to the variables in the newly created basic block.
2019
2023 BasicBlock *NewBB,
2024 BasicBlock *PredBB) {
2025 // We are going to have to map operands from the source basic block to the new
2026 // copy of the block 'NewBB'. If there are PHI nodes in the source basic
2027 // block, evaluate them to account for entry from PredBB.
2028
2029 // Retargets dbg.value to any renamed variables.
2030 auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
2031 SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
2032 for (auto *Op : DVR->location_ops()) {
2033 Instruction *OpInst = dyn_cast<Instruction>(Op);
2034 if (!OpInst)
2035 continue;
2036
2037 auto I = ValueMapping.find(OpInst);
2038 if (I != ValueMapping.end())
2039 OperandsToRemap.insert({OpInst, I->second});
2040 }
2041
2042 for (auto &[OldOp, MappedOp] : OperandsToRemap)
2043 DVR->replaceVariableLocationOp(OldOp, MappedOp);
2044 };
2045
2046 BasicBlock *RangeBB = BI->getParent();
2047
2048 // Clone the phi nodes of the source basic block into NewBB. The resulting
2049 // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
2050 // might need to rewrite the operand of the cloned phi.
2051 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2052 PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
2053 NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
2054 ValueMapping[PN] = NewPN;
2055 if (const DebugLoc &DL = PN->getDebugLoc())
2056 mapAtomInstance(DL, ValueMapping);
2057 }
2058
2059 // Clone noalias scope declarations in the threaded block. When threading a
2060 // loop exit, we would otherwise end up with two idential scope declarations
2061 // visible at the same time.
2062 SmallVector<MDNode *> NoAliasScopes;
2063 DenseMap<MDNode *, MDNode *> ClonedScopes;
2064 LLVMContext &Context = PredBB->getContext();
2065 identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
2066 cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
2067
2068 auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
2069 auto DVRRange = NewInst->cloneDebugInfoFrom(From);
2070 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2071 RetargetDbgVariableRecordIfPossible(&DVR);
2072 };
2073
2074 // Clone the non-phi instructions of the source basic block into NewBB,
2075 // keeping track of the mapping and using it to remap operands in the cloned
2076 // instructions.
2077 for (; BI != BE; ++BI) {
2078 Instruction *New = BI->clone();
2079 New->setName(BI->getName());
2080 New->insertInto(NewBB, NewBB->end());
2081 ValueMapping[&*BI] = New;
2082 adaptNoAliasScopes(New, ClonedScopes, Context);
2083
2084 CloneAndRemapDbgInfo(New, &*BI);
2085 if (const DebugLoc &DL = New->getDebugLoc())
2086 mapAtomInstance(DL, ValueMapping);
2087
2088 // Remap operands to patch up intra-block references.
2089 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2090 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2091 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2092 if (I != ValueMapping.end())
2093 New->setOperand(i, I->second);
2094 }
2095 }
2096
2097 // There may be DbgVariableRecords on the terminator, clone directly from
2098 // marker to marker as there isn't an instruction there.
2099 if (BE != RangeBB->end() && BE->hasDbgRecords()) {
2100 // Dump them at the end.
2101 DbgMarker *Marker = RangeBB->getMarker(BE);
2102 DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());
2103 auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
2104 for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
2105 RetargetDbgVariableRecordIfPossible(&DVR);
2106 }
2107}
2108
2109/// Attempt to thread through two successive basic blocks.
2111 Value *Cond) {
2112 // Consider:
2113 //
2114 // PredBB:
2115 // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2116 // %tobool = icmp eq i32 %cond, 0
2117 // br i1 %tobool, label %BB, label ...
2118 //
2119 // BB:
2120 // %cmp = icmp eq i32* %var, null
2121 // br i1 %cmp, label ..., label ...
2122 //
2123 // We don't know the value of %var at BB even if we know which incoming edge
2124 // we take to BB. However, once we duplicate PredBB for each of its incoming
2125 // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2126 // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2127
2128 // Require that BB end with a Branch for simplicity.
2129 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2130 if (!CondBr)
2131 return false;
2132
2133 // BB must have exactly one predecessor.
2134 BasicBlock *PredBB = BB->getSinglePredecessor();
2135 if (!PredBB)
2136 return false;
2137
2138 // Require that PredBB end with a conditional Branch. If PredBB ends with an
2139 // unconditional branch, we should be merging PredBB and BB instead. For
2140 // simplicity, we don't deal with a switch.
2141 BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2142 if (!PredBBBranch || PredBBBranch->isUnconditional())
2143 return false;
2144
2145 // If PredBB has exactly one incoming edge, we don't gain anything by copying
2146 // PredBB.
2147 if (PredBB->getSinglePredecessor())
2148 return false;
2149
2150 // Don't thread through PredBB if it contains a successor edge to itself, in
2151 // which case we would infinite loop. Suppose we are threading an edge from
2152 // PredPredBB through PredBB and BB to SuccBB with PredBB containing a
2153 // successor edge to itself. If we allowed jump threading in this case, we
2154 // could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since
2155 // PredBB.thread has a successor edge to PredBB, we would immediately come up
2156 // with another jump threading opportunity from PredBB.thread through PredBB
2157 // and BB to SuccBB. This jump threading would repeatedly occur. That is, we
2158 // would keep peeling one iteration from PredBB.
2159 if (llvm::is_contained(successors(PredBB), PredBB))
2160 return false;
2161
2162 // Don't thread across a loop header.
2163 if (LoopHeaders.count(PredBB))
2164 return false;
2165
2166 // Avoid complication with duplicating EH pads.
2167 if (PredBB->isEHPad())
2168 return false;
2169
2170 // Find a predecessor that we can thread. For simplicity, we only consider a
2171 // successor edge out of BB to which we thread exactly one incoming edge into
2172 // PredBB.
2173 unsigned ZeroCount = 0;
2174 unsigned OneCount = 0;
2175 BasicBlock *ZeroPred = nullptr;
2176 BasicBlock *OnePred = nullptr;
2177 const DataLayout &DL = BB->getDataLayout();
2178 for (BasicBlock *P : predecessors(PredBB)) {
2179 // If PredPred ends with IndirectBrInst, we can't handle it.
2180 if (isa<IndirectBrInst>(P->getTerminator()))
2181 continue;
2182 if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
2184 if (CI->isZero()) {
2185 ZeroCount++;
2186 ZeroPred = P;
2187 } else if (CI->isOne()) {
2188 OneCount++;
2189 OnePred = P;
2190 }
2191 }
2192 }
2193
2194 // Disregard complicated cases where we have to thread multiple edges.
2195 BasicBlock *PredPredBB;
2196 if (ZeroCount == 1) {
2197 PredPredBB = ZeroPred;
2198 } else if (OneCount == 1) {
2199 PredPredBB = OnePred;
2200 } else {
2201 return false;
2202 }
2203
2204 BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2205
2206 // If threading to the same block as we come from, we would infinite loop.
2207 if (SuccBB == BB) {
2208 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2209 << "' - would thread to self!\n");
2210 return false;
2211 }
2212
2213 // If threading this would thread across a loop header, don't thread the edge.
2214 // See the comments above findLoopHeaders for justifications and caveats.
2215 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2216 LLVM_DEBUG({
2217 bool BBIsHeader = LoopHeaders.count(BB);
2218 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2219 dbgs() << " Not threading across "
2220 << (BBIsHeader ? "loop header BB '" : "block BB '")
2221 << BB->getName() << "' to dest "
2222 << (SuccIsHeader ? "loop header BB '" : "block BB '")
2223 << SuccBB->getName()
2224 << "' - it might create an irreducible loop!\n";
2225 });
2226 return false;
2227 }
2228
2229 // Compute the cost of duplicating BB and PredBB.
2230 unsigned BBCost = getJumpThreadDuplicationCost(
2231 TTI, BB, BB->getTerminator(), BBDupThreshold);
2232 unsigned PredBBCost = getJumpThreadDuplicationCost(
2233 TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
2234
2235 // Give up if costs are too high. We need to check BBCost and PredBBCost
2236 // individually before checking their sum because getJumpThreadDuplicationCost
2237 // return (unsigned)~0 for those basic blocks that cannot be duplicated.
2238 if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||
2239 BBCost + PredBBCost > BBDupThreshold) {
2240 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2241 << "' - Cost is too high: " << PredBBCost
2242 << " for PredBB, " << BBCost << "for BB\n");
2243 return false;
2244 }
2245
2246 // Now we are ready to duplicate PredBB.
2247 threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2248 return true;
2249}
2250
2252 BasicBlock *PredBB,
2253 BasicBlock *BB,
2254 BasicBlock *SuccBB) {
2255 LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2256 << BB->getName() << "'\n");
2257
2258 // Build BPI/BFI before any changes are made to IR.
2259 bool HasProfile = doesBlockHaveProfileData(BB);
2260 auto *BFI = getOrCreateBFI(HasProfile);
2261 auto *BPI = getOrCreateBPI(BFI != nullptr);
2262
2263 BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2264 BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2265
2266 BasicBlock *NewBB =
2267 BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2268 PredBB->getParent(), PredBB);
2269 NewBB->moveAfter(PredBB);
2270
2271 // Set the block frequency of NewBB.
2272 if (BFI) {
2273 assert(BPI && "It's expected BPI to exist along with BFI");
2274 auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2275 BPI->getEdgeProbability(PredPredBB, PredBB);
2276 BFI->setBlockFreq(NewBB, NewBBFreq);
2277 }
2278
2279 // We are going to have to map operands from the original BB block to the new
2280 // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2281 // to account for entry from PredPredBB.
2282 ValueToValueMapTy ValueMapping;
2283 cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,
2284 PredPredBB);
2285
2286 // Copy the edge probabilities from PredBB to NewBB.
2287 if (BPI)
2288 BPI->copyEdgeProbabilities(PredBB, NewBB);
2289
2290 // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2291 // This eliminates predecessors from PredPredBB, which requires us to simplify
2292 // any PHI nodes in PredBB.
2293 Instruction *PredPredTerm = PredPredBB->getTerminator();
2294 for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2295 if (PredPredTerm->getSuccessor(i) == PredBB) {
2296 PredBB->removePredecessor(PredPredBB, true);
2297 PredPredTerm->setSuccessor(i, NewBB);
2298 }
2299
2300 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2301 ValueMapping);
2302 addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2303 ValueMapping);
2304
2305 DTU->applyUpdatesPermissive(
2306 {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2307 {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2308 {DominatorTree::Insert, PredPredBB, NewBB},
2309 {DominatorTree::Delete, PredPredBB, PredBB}});
2310
2311 // Remap source location atoms beacuse we're duplicating control flow.
2312 remapSourceAtoms(ValueMapping, NewBB->begin(), NewBB->end());
2313
2314 updateSSA(PredBB, NewBB, ValueMapping);
2315
2316 // Clean up things like PHI nodes with single operands, dead instructions,
2317 // etc.
2318 SimplifyInstructionsInBlock(NewBB, TLI);
2319 SimplifyInstructionsInBlock(PredBB, TLI);
2320
2321 SmallVector<BasicBlock *, 1> PredsToFactor;
2322 PredsToFactor.push_back(NewBB);
2323 threadEdge(BB, PredsToFactor, SuccBB);
2324}
2325
2326/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.
2328 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
2329 BasicBlock *SuccBB) {
2330 // If threading to the same block as we come from, we would infinite loop.
2331 if (SuccBB == BB) {
2332 LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2333 << "' - would thread to self!\n");
2334 return false;
2335 }
2336
2337 // If threading this would thread across a loop header, don't thread the edge.
2338 // See the comments above findLoopHeaders for justifications and caveats.
2339 if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2340 LLVM_DEBUG({
2341 bool BBIsHeader = LoopHeaders.count(BB);
2342 bool SuccIsHeader = LoopHeaders.count(SuccBB);
2343 dbgs() << " Not threading across "
2344 << (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()
2345 << "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")
2346 << SuccBB->getName() << "' - it might create an irreducible loop!\n";
2347 });
2348 return false;
2349 }
2350
2351 unsigned JumpThreadCost = getJumpThreadDuplicationCost(
2352 TTI, BB, BB->getTerminator(), BBDupThreshold);
2353 if (JumpThreadCost > BBDupThreshold) {
2354 LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2355 << "' - Cost is too high: " << JumpThreadCost << "\n");
2356 return false;
2357 }
2358
2359 threadEdge(BB, PredBBs, SuccBB);
2360 return true;
2361}
2362
2363/// threadEdge - We have decided that it is safe and profitable to factor the
2364/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
2365/// across BB. Transform the IR to reflect this change.
2367 const SmallVectorImpl<BasicBlock *> &PredBBs,
2368 BasicBlock *SuccBB) {
2369 assert(SuccBB != BB && "Don't create an infinite loop");
2370
2371 assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
2372 "Don't thread across loop headers");
2373
2374 // Build BPI/BFI before any changes are made to IR.
2375 bool HasProfile = doesBlockHaveProfileData(BB);
2376 auto *BFI = getOrCreateBFI(HasProfile);
2377 auto *BPI = getOrCreateBPI(BFI != nullptr);
2378
2379 // And finally, do it! Start by factoring the predecessors if needed.
2380 BasicBlock *PredBB;
2381 if (PredBBs.size() == 1)
2382 PredBB = PredBBs[0];
2383 else {
2384 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2385 << " common predecessors.\n");
2386 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2387 }
2388
2389 // And finally, do it!
2390 LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
2391 << "' to '" << SuccBB->getName()
2392 << ", across block:\n " << *BB << "\n");
2393
2394 LVI->threadEdge(PredBB, BB, SuccBB);
2395
2397 BB->getName()+".thread",
2398 BB->getParent(), BB);
2399 NewBB->moveAfter(PredBB);
2400
2401 // Set the block frequency of NewBB.
2402 if (BFI) {
2403 assert(BPI && "It's expected BPI to exist along with BFI");
2404 auto NewBBFreq =
2405 BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
2406 BFI->setBlockFreq(NewBB, NewBBFreq);
2407 }
2408
2409 // Copy all the instructions from BB to NewBB except the terminator.
2410 ValueToValueMapTy ValueMapping;
2411 cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,
2412 PredBB);
2413
2414 // We didn't copy the terminator from BB over to NewBB, because there is now
2415 // an unconditional jump to SuccBB. Insert the unconditional jump.
2416 BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
2417 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
2418
2419 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
2420 // PHI nodes for NewBB now.
2421 addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
2422
2423 // Update the terminator of PredBB to jump to NewBB instead of BB. This
2424 // eliminates predecessors from BB, which requires us to simplify any PHI
2425 // nodes in BB.
2426 Instruction *PredTerm = PredBB->getTerminator();
2427 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
2428 if (PredTerm->getSuccessor(i) == BB) {
2429 BB->removePredecessor(PredBB, true);
2430 PredTerm->setSuccessor(i, NewBB);
2431 }
2432
2433 // Enqueue required DT updates.
2434 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
2435 {DominatorTree::Insert, PredBB, NewBB},
2436 {DominatorTree::Delete, PredBB, BB}});
2437
2438 remapSourceAtoms(ValueMapping, NewBB->begin(), NewBB->end());
2439 updateSSA(BB, NewBB, ValueMapping);
2440
2441 // At this point, the IR is fully up to date and consistent. Do a quick scan
2442 // over the new instructions and zap any that are constants or dead. This
2443 // frequently happens because of phi translation.
2444 SimplifyInstructionsInBlock(NewBB, TLI);
2445
2446 // Update the edge weight from BB to SuccBB, which should be less than before.
2447 updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
2448
2449 // Threaded an edge!
2450 ++NumThreads;
2451}
2452
2453/// Create a new basic block that will be the predecessor of BB and successor of
2454/// all blocks in Preds. When profile data is available, update the frequency of
2455/// this new block.
2456BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
2458 const char *Suffix) {
2460
2461 // Collect the frequencies of all predecessors of BB, which will be used to
2462 // update the edge weight of the result of splitting predecessors.
2464 auto *BFI = getBFI();
2465 if (BFI) {
2466 auto *BPI = getOrCreateBPI(true);
2467 for (auto *Pred : Preds)
2468 FreqMap.insert(std::make_pair(
2469 Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
2470 }
2471
2472 // In the case when BB is a LandingPad block we create 2 new predecessors
2473 // instead of just one.
2474 if (BB->isLandingPad()) {
2475 std::string NewName = std::string(Suffix) + ".split-lp";
2476 SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);
2477 } else {
2478 NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));
2479 }
2480
2481 std::vector<DominatorTree::UpdateType> Updates;
2482 Updates.reserve((2 * Preds.size()) + NewBBs.size());
2483 for (auto *NewBB : NewBBs) {
2484 BlockFrequency NewBBFreq(0);
2485 Updates.push_back({DominatorTree::Insert, NewBB, BB});
2486 for (auto *Pred : predecessors(NewBB)) {
2487 Updates.push_back({DominatorTree::Delete, Pred, BB});
2488 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
2489 if (BFI) // Update frequencies between Pred -> NewBB.
2490 NewBBFreq += FreqMap.lookup(Pred);
2491 }
2492 if (BFI) // Apply the summed frequency to NewBB.
2493 BFI->setBlockFreq(NewBB, NewBBFreq);
2494 }
2495
2496 DTU->applyUpdatesPermissive(Updates);
2497 return NewBBs[0];
2498}
2499
2500bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
2501 const Instruction *TI = BB->getTerminator();
2502 if (!TI || TI->getNumSuccessors() < 2)
2503 return false;
2504
2505 return hasValidBranchWeightMD(*TI);
2506}
2507
2508/// Update the block frequency of BB and branch weight and the metadata on the
2509/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
2510/// Freq(PredBB->BB) / Freq(BB->SuccBB).
2511void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
2512 BasicBlock *BB,
2513 BasicBlock *NewBB,
2514 BasicBlock *SuccBB,
2515 BlockFrequencyInfo *BFI,
2517 bool HasProfile) {
2518 assert(((BFI && BPI) || (!BFI && !BFI)) &&
2519 "Both BFI & BPI should either be set or unset");
2520
2521 if (!BFI) {
2522 assert(!HasProfile &&
2523 "It's expected to have BFI/BPI when profile info exists");
2524 return;
2525 }
2526
2527 // As the edge from PredBB to BB is deleted, we have to update the block
2528 // frequency of BB.
2529 auto BBOrigFreq = BFI->getBlockFreq(BB);
2530 auto NewBBFreq = BFI->getBlockFreq(NewBB);
2531 auto BBNewFreq = BBOrigFreq - NewBBFreq;
2532 BFI->setBlockFreq(BB, BBNewFreq);
2533
2534 // Collect updated outgoing edges' frequencies from BB and use them to update
2535 // edge probabilities.
2536 SmallVector<uint64_t, 4> BBSuccFreq;
2537 for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
2538 auto BB2SuccBBFreq =
2539 BBOrigFreq * BPI->getEdgeProbability(BB, I.getSuccessorIndex());
2540 auto SuccFreq = (*I == SuccBB) ? BB2SuccBBFreq - NewBBFreq : BB2SuccBBFreq;
2541 BBSuccFreq.push_back(SuccFreq.getFrequency());
2542 }
2543
2544 uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);
2545
2547 if (MaxBBSuccFreq == 0)
2548 BBSuccProbs.assign(BBSuccFreq.size(),
2549 {1, static_cast<unsigned>(BBSuccFreq.size())});
2550 else {
2551 for (uint64_t Freq : BBSuccFreq)
2552 BBSuccProbs.push_back(
2553 BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
2554 // Normalize edge probabilities so that they sum up to one.
2556 BBSuccProbs.end());
2557 }
2558
2559 // Update edge probabilities in BPI.
2560 BPI->setEdgeProbability(BB, BBSuccProbs);
2561
2562 // Update the profile metadata as well.
2563 //
2564 // Don't do this if the profile of the transformed blocks was statically
2565 // estimated. (This could occur despite the function having an entry
2566 // frequency in completely cold parts of the CFG.)
2567 //
2568 // In this case we don't want to suggest to subsequent passes that the
2569 // calculated weights are fully consistent. Consider this graph:
2570 //
2571 // check_1
2572 // 50% / |
2573 // eq_1 | 50%
2574 // \ |
2575 // check_2
2576 // 50% / |
2577 // eq_2 | 50%
2578 // \ |
2579 // check_3
2580 // 50% / |
2581 // eq_3 | 50%
2582 // \ |
2583 //
2584 // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
2585 // the overall probabilities are inconsistent; the total probability that the
2586 // value is either 1, 2 or 3 is 150%.
2587 //
2588 // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
2589 // becomes 0%. This is even worse if the edge whose probability becomes 0% is
2590 // the loop exit edge. Then based solely on static estimation we would assume
2591 // the loop was extremely hot.
2592 //
2593 // FIXME this locally as well so that BPI and BFI are consistent as well. We
2594 // shouldn't make edges extremely likely or unlikely based solely on static
2595 // estimation.
2596 if (BBSuccProbs.size() >= 2 && HasProfile) {
2598 for (auto Prob : BBSuccProbs)
2599 Weights.push_back(Prob.getNumerator());
2600
2601 auto TI = BB->getTerminator();
2602 setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));
2603 }
2604}
2605
2606/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
2607/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
2608/// If we can duplicate the contents of BB up into PredBB do so now, this
2609/// improves the odds that the branch will be on an analyzable instruction like
2610/// a compare.
2612 BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {
2613 assert(!PredBBs.empty() && "Can't handle an empty set");
2614
2615 // If BB is a loop header, then duplicating this block outside the loop would
2616 // cause us to transform this into an irreducible loop, don't do this.
2617 // See the comments above findLoopHeaders for justifications and caveats.
2618 if (LoopHeaders.count(BB)) {
2619 LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
2620 << "' into predecessor block '" << PredBBs[0]->getName()
2621 << "' - it might create an irreducible loop!\n");
2622 return false;
2623 }
2624
2625 unsigned DuplicationCost = getJumpThreadDuplicationCost(
2626 TTI, BB, BB->getTerminator(), BBDupThreshold);
2627 if (DuplicationCost > BBDupThreshold) {
2628 LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
2629 << "' - Cost is too high: " << DuplicationCost << "\n");
2630 return false;
2631 }
2632
2633 // And finally, do it! Start by factoring the predecessors if needed.
2634 std::vector<DominatorTree::UpdateType> Updates;
2635 BasicBlock *PredBB;
2636 if (PredBBs.size() == 1)
2637 PredBB = PredBBs[0];
2638 else {
2639 LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()
2640 << " common predecessors.\n");
2641 PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");
2642 }
2643 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2644
2645 // Okay, we decided to do this! Clone all the instructions in BB onto the end
2646 // of PredBB.
2647 LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()
2648 << "' into end of '" << PredBB->getName()
2649 << "' to eliminate branch on phi. Cost: "
2650 << DuplicationCost << " block is:" << *BB << "\n");
2651
2652 // Unless PredBB ends with an unconditional branch, split the edge so that we
2653 // can just clone the bits from BB into the end of the new PredBB.
2654 BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2655
2656 if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
2657 BasicBlock *OldPredBB = PredBB;
2658 PredBB = SplitEdge(OldPredBB, BB);
2659 Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});
2660 Updates.push_back({DominatorTree::Insert, PredBB, BB});
2661 Updates.push_back({DominatorTree::Delete, OldPredBB, BB});
2662 OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
2663 }
2664
2665 // We are going to have to map operands from the original BB block into the
2666 // PredBB block. Evaluate PHI nodes in BB.
2667 ValueToValueMapTy ValueMapping;
2668
2669 // Remember the position before the inserted instructions.
2670 auto RItBeforeInsertPt = std::next(OldPredBranch->getReverseIterator());
2671
2672 BasicBlock::iterator BI = BB->begin();
2673 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
2674 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
2675 // Clone the non-phi instructions of BB into PredBB, keeping track of the
2676 // mapping and using it to remap operands in the cloned instructions.
2677 for (; BI != BB->end(); ++BI) {
2678 Instruction *New = BI->clone();
2679 New->insertInto(PredBB, OldPredBranch->getIterator());
2680
2681 // Remap operands to patch up intra-block references.
2682 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2683 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
2684 ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
2685 if (I != ValueMapping.end())
2686 New->setOperand(i, I->second);
2687 }
2688
2689 // Remap debug variable operands.
2690 remapDebugVariable(ValueMapping, New);
2691 if (const DebugLoc &DL = New->getDebugLoc())
2692 mapAtomInstance(DL, ValueMapping);
2693
2694 // If this instruction can be simplified after the operands are updated,
2695 // just use the simplified value instead. This frequently happens due to
2696 // phi translation.
2698 New,
2699 {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
2700 ValueMapping[&*BI] = IV;
2701 if (!New->mayHaveSideEffects()) {
2702 New->eraseFromParent();
2703 New = nullptr;
2704 // Clone debug-info on the elided instruction to the destination
2705 // position.
2706 OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);
2707 }
2708 } else {
2709 ValueMapping[&*BI] = New;
2710 }
2711 if (New) {
2712 // Otherwise, insert the new instruction into the block.
2713 New->setName(BI->getName());
2714 // Clone across any debug-info attached to the old instruction.
2715 New->cloneDebugInfoFrom(&*BI);
2716 // Update Dominance from simplified New instruction operands.
2717 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
2718 if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
2719 Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});
2720 }
2721 }
2722
2723 // Check to see if the targets of the branch had PHI nodes. If so, we need to
2724 // add entries to the PHI nodes for branch from PredBB now.
2725 BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
2726 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
2727 ValueMapping);
2728 addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
2729 ValueMapping);
2730
2731 // KeyInstructions: Remap the cloned instructions' atoms only.
2732 remapSourceAtoms(ValueMapping, std::prev(RItBeforeInsertPt)->getIterator(),
2733 OldPredBranch->getIterator());
2734
2735 updateSSA(BB, PredBB, ValueMapping);
2736
2737 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
2738 // that we nuked.
2739 BB->removePredecessor(PredBB, true);
2740
2741 // Remove the unconditional branch at the end of the PredBB block.
2742 OldPredBranch->eraseFromParent();
2743 if (auto *BPI = getBPI())
2744 BPI->copyEdgeProbabilities(BB, PredBB);
2745 DTU->applyUpdatesPermissive(Updates);
2746
2747 ++NumDupes;
2748 return true;
2749}
2750
2751// Pred is a predecessor of BB with an unconditional branch to BB. SI is
2752// a Select instruction in Pred. BB has other predecessors and SI is used in
2753// a PHI node in BB. SI has no other use.
2754// A new basic block, NewBB, is created and SI is converted to compare and
2755// conditional branch. SI is erased from parent.
2757 SelectInst *SI, PHINode *SIUse,
2758 unsigned Idx) {
2759 // Expand the select.
2760 //
2761 // Pred --
2762 // | v
2763 // | NewBB
2764 // | |
2765 // |-----
2766 // v
2767 // BB
2768 BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());
2769 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
2770 BB->getParent(), BB);
2771 // Move the unconditional branch to NewBB.
2772 PredTerm->removeFromParent();
2773 PredTerm->insertInto(NewBB, NewBB->end());
2774 // Create a conditional branch and update PHI nodes.
2775 auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
2776 BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());
2777 BI->copyMetadata(*SI, {LLVMContext::MD_prof});
2778 SIUse->setIncomingValue(Idx, SI->getFalseValue());
2779 SIUse->addIncoming(SI->getTrueValue(), NewBB);
2780
2781 uint64_t TrueWeight = 1;
2782 uint64_t FalseWeight = 1;
2783 // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
2784 if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
2785 (TrueWeight + FalseWeight) != 0) {
2788 TrueWeight, TrueWeight + FalseWeight));
2790 FalseWeight, TrueWeight + FalseWeight));
2791 // Update BPI if exists.
2792 if (auto *BPI = getBPI())
2793 BPI->setEdgeProbability(Pred, BP);
2794 }
2795 // Set the block frequency of NewBB.
2796 if (auto *BFI = getBFI()) {
2797 if ((TrueWeight + FalseWeight) == 0) {
2798 TrueWeight = 1;
2799 FalseWeight = 1;
2800 }
2802 TrueWeight, TrueWeight + FalseWeight);
2803 auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
2804 BFI->setBlockFreq(NewBB, NewBBFreq);
2805 }
2806
2807 // The select is now dead.
2808 SI->eraseFromParent();
2809 DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
2810 {DominatorTree::Insert, Pred, NewBB}});
2811
2812 // Update any other PHI nodes in BB.
2813 for (BasicBlock::iterator BI = BB->begin();
2814 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
2815 if (Phi != SIUse)
2816 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
2817}
2818
2820 PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
2821
2822 if (!CondPHI || CondPHI->getParent() != BB)
2823 return false;
2824
2825 for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
2826 BasicBlock *Pred = CondPHI->getIncomingBlock(I);
2827 SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
2828
2829 // The second and third condition can be potentially relaxed. Currently
2830 // the conditions help to simplify the code and allow us to reuse existing
2831 // code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)
2832 if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
2833 continue;
2834
2835 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2836 if (!PredTerm || !PredTerm->isUnconditional())
2837 continue;
2838
2839 unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
2840 return true;
2841 }
2842 return false;
2843}
2844
2845/// tryToUnfoldSelect - Look for blocks of the form
2846/// bb1:
2847/// %a = select
2848/// br bb2
2849///
2850/// bb2:
2851/// %p = phi [%a, %bb1] ...
2852/// %c = icmp %p
2853/// br i1 %c
2854///
2855/// And expand the select into a branch structure if one of its arms allows %c
2856/// to be folded. This later enables threading from bb1 over bb2.
2858 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2859 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
2860 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
2861
2862 if (!CondBr || !CondBr->isConditional() || !CondLHS ||
2863 CondLHS->getParent() != BB)
2864 return false;
2865
2866 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
2867 BasicBlock *Pred = CondLHS->getIncomingBlock(I);
2868 SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
2869
2870 // Look if one of the incoming values is a select in the corresponding
2871 // predecessor.
2872 if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
2873 continue;
2874
2875 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
2876 if (!PredTerm || !PredTerm->isUnconditional())
2877 continue;
2878
2879 // Now check if one of the select values would allow us to constant fold the
2880 // terminator in BB. We don't do the transform if both sides fold, those
2881 // cases will be threaded in any case.
2882 Constant *LHSRes =
2883 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
2884 CondRHS, Pred, BB, CondCmp);
2885 Constant *RHSRes =
2886 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
2887 CondRHS, Pred, BB, CondCmp);
2888 if ((LHSRes || RHSRes) && LHSRes != RHSRes) {
2889 unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
2890 return true;
2891 }
2892 }
2893 return false;
2894}
2895
2896/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
2897/// same BB in the form
2898/// bb:
2899/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
2900/// %s = select %p, trueval, falseval
2901///
2902/// or
2903///
2904/// bb:
2905/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
2906/// %c = cmp %p, 0
2907/// %s = select %c, trueval, falseval
2908///
2909/// And expand the select into a branch structure. This later enables
2910/// jump-threading over bb in this pass.
2911///
2912/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
2913/// select if the associated PHI has at least one constant. If the unfolded
2914/// select is not jump-threaded, it will be folded again in the later
2915/// optimizations.
2917 // This transform would reduce the quality of msan diagnostics.
2918 // Disable this transform under MemorySanitizer.
2919 if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
2920 return false;
2921
2922 // If threading this would thread across a loop header, don't thread the edge.
2923 // See the comments above findLoopHeaders for justifications and caveats.
2924 if (LoopHeaders.count(BB))
2925 return false;
2926
2927 for (BasicBlock::iterator BI = BB->begin();
2928 PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
2929 // Look for a Phi having at least one constant incoming value.
2930 if (llvm::all_of(PN->incoming_values(),
2931 [](Value *V) { return !isa<ConstantInt>(V); }))
2932 continue;
2933
2934 auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
2935 using namespace PatternMatch;
2936
2937 // Check if SI is in BB and use V as condition.
2938 if (SI->getParent() != BB)
2939 return false;
2940 Value *Cond = SI->getCondition();
2941 bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));
2942 return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;
2943 };
2944
2945 SelectInst *SI = nullptr;
2946 for (Use &U : PN->uses()) {
2947 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2948 // Look for a ICmp in BB that compares PN with a constant and is the
2949 // condition of a Select.
2950 if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
2951 isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
2952 if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
2953 if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
2954 SI = SelectI;
2955 break;
2956 }
2957 } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
2958 // Look for a Select in BB that uses PN as condition.
2959 if (isUnfoldCandidate(SelectI, U.get())) {
2960 SI = SelectI;
2961 break;
2962 }
2963 }
2964 }
2965
2966 if (!SI)
2967 continue;
2968 // Expand the select.
2969 Value *Cond = SI->getCondition();
2970 if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI)) {
2971 Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());
2972 cast<FreezeInst>(Cond)->setDebugLoc(DebugLoc::getTemporary());
2973 }
2974 MDNode *BranchWeights = getBranchWeightMDNode(*SI);
2975 Instruction *Term =
2976 SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
2977 BasicBlock *SplitBB = SI->getParent();
2978 BasicBlock *NewBB = Term->getParent();
2979 PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());
2980 NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
2981 NewPN->addIncoming(SI->getFalseValue(), BB);
2982 NewPN->setDebugLoc(SI->getDebugLoc());
2983 SI->replaceAllUsesWith(NewPN);
2984 SI->eraseFromParent();
2985 // NewBB and SplitBB are newly created blocks which require insertion.
2986 std::vector<DominatorTree::UpdateType> Updates;
2987 Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);
2988 Updates.push_back({DominatorTree::Insert, BB, SplitBB});
2989 Updates.push_back({DominatorTree::Insert, BB, NewBB});
2990 Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
2991 // BB's successors were moved to SplitBB, update DTU accordingly.
2992 for (auto *Succ : successors(SplitBB)) {
2993 Updates.push_back({DominatorTree::Delete, BB, Succ});
2994 Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
2995 }
2996 DTU->applyUpdatesPermissive(Updates);
2997 return true;
2998 }
2999 return false;
3000}
3001
3002/// Try to propagate a guard from the current BB into one of its predecessors
3003/// in case if another branch of execution implies that the condition of this
3004/// guard is always true. Currently we only process the simplest case that
3005/// looks like:
3006///
3007/// Start:
3008/// %cond = ...
3009/// br i1 %cond, label %T1, label %F1
3010/// T1:
3011/// br label %Merge
3012/// F1:
3013/// br label %Merge
3014/// Merge:
3015/// %condGuard = ...
3016/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
3017///
3018/// And cond either implies condGuard or !condGuard. In this case all the
3019/// instructions before the guard can be duplicated in both branches, and the
3020/// guard is then threaded to one of them.
3022 using namespace PatternMatch;
3023
3024 // We only want to deal with two predecessors.
3025 BasicBlock *Pred1, *Pred2;
3026 auto PI = pred_begin(BB), PE = pred_end(BB);
3027 if (PI == PE)
3028 return false;
3029 Pred1 = *PI++;
3030 if (PI == PE)
3031 return false;
3032 Pred2 = *PI++;
3033 if (PI != PE)
3034 return false;
3035 if (Pred1 == Pred2)
3036 return false;
3037
3038 // Try to thread one of the guards of the block.
3039 // TODO: Look up deeper than to immediate predecessor?
3040 auto *Parent = Pred1->getSinglePredecessor();
3041 if (!Parent || Parent != Pred2->getSinglePredecessor())
3042 return false;
3043
3044 if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
3045 for (auto &I : *BB)
3046 if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))
3047 return true;
3048
3049 return false;
3050}
3051
3052/// Try to propagate the guard from BB which is the lower block of a diamond
3053/// to one of its branches, in case if diamond's condition implies guard's
3054/// condition.
3056 BranchInst *BI) {
3057 assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
3058 assert(BI->isConditional() && "Unconditional branch has 2 successors?");
3059 Value *GuardCond = Guard->getArgOperand(0);
3060 Value *BranchCond = BI->getCondition();
3061 BasicBlock *TrueDest = BI->getSuccessor(0);
3062 BasicBlock *FalseDest = BI->getSuccessor(1);
3063
3064 auto &DL = BB->getDataLayout();
3065 bool TrueDestIsSafe = false;
3066 bool FalseDestIsSafe = false;
3067
3068 // True dest is safe if BranchCond => GuardCond.
3069 auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
3070 if (Impl && *Impl)
3071 TrueDestIsSafe = true;
3072 else {
3073 // False dest is safe if !BranchCond => GuardCond.
3074 Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);
3075 if (Impl && *Impl)
3076 FalseDestIsSafe = true;
3077 }
3078
3079 if (!TrueDestIsSafe && !FalseDestIsSafe)
3080 return false;
3081
3082 BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
3083 BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
3084
3085 ValueToValueMapTy UnguardedMapping, GuardedMapping;
3086 Instruction *AfterGuard = Guard->getNextNode();
3087 unsigned Cost =
3088 getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
3089 if (Cost > BBDupThreshold)
3090 return false;
3091 // Duplicate all instructions before the guard and the guard itself to the
3092 // branch where implication is not proved.
3094 BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
3095 assert(GuardedBlock && "Could not create the guarded block?");
3096 // Duplicate all instructions before the guard in the unguarded branch.
3097 // Since we have successfully duplicated the guarded block and this block
3098 // has fewer instructions, we expect it to succeed.
3100 BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
3101 assert(UnguardedBlock && "Could not create the unguarded block?");
3102 LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
3103 << GuardedBlock->getName() << "\n");
3104 // Some instructions before the guard may still have uses. For them, we need
3105 // to create Phi nodes merging their copies in both guarded and unguarded
3106 // branches. Those instructions that have no uses can be just removed.
3108 for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
3109 if (!isa<PHINode>(&*BI))
3110 ToRemove.push_back(&*BI);
3111
3113 assert(InsertionPoint != BB->end() && "Empty block?");
3114 // Substitute with Phis & remove.
3115 for (auto *Inst : reverse(ToRemove)) {
3116 if (!Inst->use_empty()) {
3117 PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
3118 NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
3119 NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
3120 NewPN->setDebugLoc(Inst->getDebugLoc());
3122 Inst->replaceAllUsesWith(NewPN);
3123 }
3124 Inst->dropDbgRecords();
3125 Inst->eraseFromParent();
3126 }
3127 return true;
3128}
3129
3130PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
3134
3135 // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
3136 // TODO: Would be nice to verify BPI/BFI consistency as well.
3137 return PA;
3138}
3139
3140template <typename AnalysisT>
3141typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
3142 assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
3143
3144 // If there were no changes since last call to 'runExternalAnalysis' then all
3145 // analysis is either up to date or explicitly invalidated. Just go ahead and
3146 // run the "external" analysis.
3147 if (!ChangedSinceLastAnalysisUpdate) {
3148 assert(!DTU->hasPendingUpdates() &&
3149 "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
3150 // Run the "external" analysis.
3151 return &FAM->getResult<AnalysisT>(*F);
3152 }
3153 ChangedSinceLastAnalysisUpdate = false;
3154
3155 auto PA = getPreservedAnalysis();
3156 // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
3157 // as preserved.
3158 PA.preserve<BranchProbabilityAnalysis>();
3159 PA.preserve<BlockFrequencyAnalysis>();
3160 // Report everything except explicitly preserved as invalid.
3161 FAM->invalidate(*F, PA);
3162 // Update DT/PDT.
3163 DTU->flush();
3164 // Make sure DT/PDT are valid before running "external" analysis.
3165 assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
3166 assert((!DTU->hasPostDomTree() ||
3167 DTU->getPostDomTree().verify(
3169 // Run the "external" analysis.
3170 auto *Result = &FAM->getResult<AnalysisT>(*F);
3171 // Update analysis JumpThreading depends on and not explicitly preserved.
3172 TTI = &FAM->getResult<TargetIRAnalysis>(*F);
3173 TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
3174 AA = &FAM->getResult<AAManager>(*F);
3175
3176 return Result;
3177}
3178
3179BranchProbabilityInfo *JumpThreadingPass::getBPI() {
3180 if (!BPI) {
3181 assert(FAM && "Can't create BPI without FunctionAnalysisManager");
3183 }
3184 return BPI;
3185}
3186
3187BlockFrequencyInfo *JumpThreadingPass::getBFI() {
3188 if (!BFI) {
3189 assert(FAM && "Can't create BFI without FunctionAnalysisManager");
3191 }
3192 return BFI;
3193}
3194
3195// Important note on validity of BPI/BFI. JumpThreading tries to preserve
3196// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
3197// Otherwise, new instance of BPI/BFI is created (up to date by definition).
3198BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
3199 auto *Res = getBPI();
3200 if (Res)
3201 return Res;
3202
3203 if (Force)
3204 BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
3205
3206 return BPI;
3207}
3208
3209BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
3210 auto *Res = getBFI();
3211 if (Res)
3212 return Res;
3213
3214 if (Force)
3215 BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
3216
3217 return BFI;
3218}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
BlockVerifier::State From
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
static unsigned getBestDestForJumpOnUndef(BasicBlock *BB)
GetBestDestForBranchOnUndef - If we determine that the specified block ends in an undefined jump,...
static cl::opt< unsigned > PhiDuplicateThreshold("jump-threading-phi-threshold", cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76), cl::Hidden)
static bool replaceFoldableUses(Instruction *Cond, Value *ToVal, BasicBlock *KnownAtEndOfBB)
static cl::opt< unsigned > BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden)
static cl::opt< bool > ThreadAcrossLoopHeaders("jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), cl::init(false), cl::Hidden)
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, Instruction *StopAt, unsigned Threshold)
Return the cost of duplicating a piece of this block from first non-phi and before StopAt instruction...
static void remapSourceAtoms(ValueToValueMapTy &VM, BasicBlock::iterator Begin, BasicBlock::iterator End)
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, ValueToValueMapTy &ValueMap)
addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new predecessor to the PHIBB block.
static BasicBlock * findMostPopularDest(BasicBlock *BB, const SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &PredToDestList)
findMostPopularDest - The specified list contains multiple possible threadable destinations.
static Constant * getKnownConstant(Value *Val, ConstantPreference Preference)
getKnownConstant - Helper method to determine if we can thread over a terminator with the given value...
static cl::opt< unsigned > ImplicationSearchThreshold("jump-threading-implication-search-threshold", cl::desc("The number of predecessors to search for a stronger " "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden)
static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB)
Return true if Op is an instruction defined in the given block.
static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB)
static bool hasAddressTakenAndUsed(BasicBlock *BB)
See the comments on JumpThreadingPass.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define P(N)
ppc ctr loops verify
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:83
A manager for alias analyses.
A private abstract base class describing the concept of an individual alias analysis implementation.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:431
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
LLVM_ABI DbgMarker * createMarker(Instruction *I)
Attach a DbgMarker to the given instruction.
Definition: BasicBlock.cpp:33
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:690
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:171
const Instruction & front() const
Definition: BasicBlock.h:482
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
Definition: BasicBlock.cpp:243
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:459
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
LLVM_ABI DbgMarker * getMarker(InstListType::iterator It)
Return the DbgMarker for the position given by It, so that DbgRecords can be inserted there.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:661
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:707
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:494
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void disableDominatorTree()
Disable the use of the dominator tree during alias analysis queries.
The address of a basic block.
Definition: Constants.h:899
static LLVM_ABI BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1911
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Conditional or Unconditional Branch instruction.
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
LLVM_ABI void eraseBlock(const BasicBlock *BB)
Forget analysis results for the given basic block.
LLVM_ABI void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl< BranchProbability > &Probs)
Set the raw probabilities for all edges from the given block.
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
LLVM_ABI void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst)
Copy outgoing edge probabilities from Src to Dst.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
uint32_t getNumerator() const
BranchProbability getCompl() const
static void normalizeProbabilities(ProbabilityIter Begin, ProbabilityIter End)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1292
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:448
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
static LLVM_ABI Constant * getNot(Constant *C)
Definition: Constants.cpp:2641
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:220
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This class represents a range of values.
Definition: ConstantRange.h:47
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
This is an important base class in LLVM.
Definition: Constant.h:43
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:739
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Per-instruction record of debug-info.
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(DbgMarker *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere, bool InsertAtHead=false)
Clone all DbgMarkers from From into this marker.
LLVM_ABI const BasicBlock * getParent() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:124
static DebugLoc getTemporary()
Definition: DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
bool empty() const
Definition: DenseMap.h:119
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
This class represents a freeze function that returns random concrete value if an operand is either a ...
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
This instruction compares its operands according to the predicate given to the constructor.
Indirect Branch Instruction.
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:90
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1804
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1789
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
bool isSpecialTerminator() const
Definition: Instruction.h:323
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
LLVM_ABI bool simplifyPartiallyRedundantLoad(LoadInst *LI)
simplifyPartiallyRedundantLoad - If LoadI is an obviously partially redundant load instruction,...
LLVM_ABI bool processBranchOnXOR(BinaryOperator *BO)
processBranchOnXOR - We have an otherwise unthreadable conditional branch on a xor instruction in the...
LLVM_ABI bool processGuards(BasicBlock *BB)
Try to propagate a guard from the current BB into one of its predecessors in case if another branch o...
LLVM_ABI void updateSSA(BasicBlock *BB, BasicBlock *NewBB, ValueToValueMapTy &ValueMapping)
Update the SSA form.
bool computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
LLVM_ABI void findLoopHeaders(Function &F)
findLoopHeaders - We do not want jump threading to turn proper loop structures into irreducible loops...
LLVM_ABI bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB)
Merge basic block BB into its sole predecessor if possible.
LLVM_ABI JumpThreadingPass(int T=-1)
LLVM_ABI void cloneInstructions(ValueToValueMapTy &ValueMapping, BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB)
Clone instructions in range [BI, BE) to NewBB.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI Constant * evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond, const DataLayout &DL)
LLVM_ABI bool processBranchOnPHI(PHINode *PN)
processBranchOnPHI - We have an otherwise unthreadable conditional branch on a PHI node (or freeze PH...
LLVM_ABI bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond)
Attempt to thread through two successive basic blocks.
LLVM_ABI bool computeValueKnownInPredecessorsImpl(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, SmallPtrSet< Value *, 4 > &RecursionSet, Instruction *CxtI=nullptr)
computeValueKnownInPredecessors - Given a basic block BB and a value V, see if we can infer that the ...
LLVM_ABI void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx)
DomTreeUpdater * getDomTreeUpdater() const
LLVM_ABI bool runImpl(Function &F, FunctionAnalysisManager *FAM, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr< DomTreeUpdater > DTU, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI)
LLVM_ABI bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI=nullptr)
LLVM_ABI bool processBlock(BasicBlock *BB)
processBlock - If there are any predecessors whose control can be threaded through to a successor,...
LLVM_ABI bool processImpliedCondition(BasicBlock *BB)
LLVM_ABI bool duplicateCondBranchOnPHIIntoPred(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs)
duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch to BB which contains an i1...
LLVM_ABI void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB)
LLVM_ABI bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
tryThreadEdge - Thread an edge if it's safe and profitable to do so.
LLVM_ABI bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB)
tryToUnfoldSelect - Look for blocks of the form bb1: a = select br bb2
LLVM_ABI bool tryToUnfoldSelectInCurrBB(BasicBlock *BB)
tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the same BB in the form bb: p = ...
LLVM_ABI void threadEdge(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &PredBBs, BasicBlock *SuccBB)
threadEdge - We have decided that it is safe and profitable to factor the blocks in PredBBs to one pr...
LLVM_ABI bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI)
Try to propagate the guard from BB which is the lower block of a diamond to one of its branches,...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Analysis to compute lazy value information.
This pass computes, caches, and vends lazy value constraint information.
Definition: LazyValueInfo.h:32
void eraseBlock(BasicBlock *BB)
Inform the analysis cache that we have erased a block.
void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc)
Inform the analysis cache that we have threaded an edge from PredBB to OldSucc to be from PredBB to N...
Constant * getPredicateOnEdge(CmpInst::Predicate Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value comparison with a constant is known to be true or false on the ...
Constant * getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Determine whether the specified value is known to be a constant on the specified edge.
ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI=nullptr)
Return the ConstantRage constraint that is known to hold for the specified value on the specified edg...
Constant * getConstant(Value *V, Instruction *CxtI)
Determine whether the specified value is known to be a constant at the specified instruction.
void forgetValue(Value *V)
Remove information related to this value from the cache.
Constant * getPredicateAt(CmpInst::Predicate Pred, Value *V, Constant *C, Instruction *CxtI, bool UseBlockValue)
Determine whether the specified value comparison with a constant is known to be true or false at the ...
An instruction for reading from memory.
Definition: Instructions.h:180
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:224
bool isUnordered() const
Definition: Instructions.h:253
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:234
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:215
static LocationSize precise(uint64_t Value)
Metadata node.
Definition: Metadata.h:1077
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
Representation for a specific memory location.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:39
void RewriteUse(Use &U)
Rewrite a use of the symbolic value.
Definition: SSAUpdater.cpp:187
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Definition: SSAUpdater.cpp:52
void UpdateDebugValues(Instruction *I)
Rewrite debug value intrinsics to conform to a new SSA form.
Definition: SSAUpdater.cpp:199
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Definition: SSAUpdater.cpp:69
This class represents the LLVM 'select' instruction.
size_type size() const
Definition: SmallPtrSet.h:99
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:705
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
'undef' values are things that do not have specified contents.
Definition: Constants.h:1420
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1866
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
Value * getOperand(unsigned i) const
Definition: User.h:232
iterator find(const KeyT &Val)
Definition: ValueMap.h:160
iterator end()
Definition: ValueMap.h:139
DMAtomT AtomMap
Map {(InlinedAt, old atom number) -> new atom number}.
Definition: ValueMap.h:123
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1090
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:701
bool use_empty() const
Definition: Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
const ParentTy * getParent() const
Definition: ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:137
self_iterator getIterator()
Definition: ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:359
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
Definition: Intrinsics.cpp:762
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:134
static cl::opt< unsigned long > StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden, cl::desc("Vectorize if the invocation count is < than this. 0 " "disables vectorization."))
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
Definition: DebugInfo.cpp:124
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto pred_end(const MachineBasicBlock *BB)
LLVM_ABI unsigned replaceNonLocalUsesWith(Instruction *From, Value *To)
Definition: Local.cpp:3207
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
constexpr from_range_t from_range
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
LLVM_ABI Value * findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst)
Scan backwards to see if we have the value of the given pointer available locally within a small numb...
Definition: Loads.cpp:671
LLVM_ABI void remapDebugVariable(ValueToValueMapTy &Mapping, Instruction *Inst)
Remap the operands of the debug records attached to Inst, and the operands of Inst itself if it's a d...
Definition: Local.cpp:3420
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:721
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:538
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
LLVM_ABI BasicBlock * DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU)
Split edge between BB and PredBB and duplicate all non-Phi instructions from BB between its beginning...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:402
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1140
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI bool hasValidBranchWeightMD(const Instruction &I)
Checks if an instructions has valid Branch Weight Metadata.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1444
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI void cloneNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, DenseMap< MDNode *, MDNode * > &ClonedScopes, StringRef Ext, LLVMContext &Context)
Duplicate the specified list of noalias decl scopes.
LLVM_ABI cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3081
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
LLVM_ABI void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is a block with one predecessor and its predecessor is known to have one successor (BB!...
Definition: Local.cpp:761
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2013
LLVM_ABI void adaptNoAliasScopes(llvm::Instruction *I, const DenseMap< MDNode *, MDNode * > &ClonedScopes, LLVMContext &Context)
Adapt the metadata for the specified instruction according to the provided mapping.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2049
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition: STLExtras.h:1454
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1629
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void FindFunctionBackedges(const Function &F, SmallVectorImpl< std::pair< const BasicBlock *, const BasicBlock * > > &Result)
Analyze the specified function to find all of the loop backedges in the function and return them.
Definition: CFG.cpp:35
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1481