LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
99 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101 cl::desc(
102 "Temporary development switch used to gradually uplift SimplifyCFG "
103 "into preserving DomTree,"));
104
105// Chosen as 2 so as to be cheap, but still to have enough power to fold
106// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107// To catch this, we need to fold a compare and a select, hence '2' being the
108// minimum reasonable default.
110 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
111 cl::desc(
112 "Control the amount of phi node folding to perform (default = 2)"));
113
115 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
116 cl::desc("Control the maximal total instruction cost that we are willing "
117 "to speculatively execute to fold a 2-entry PHI node into a "
118 "select (default = 4)"));
119
120static cl::opt<bool>
121 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
122 cl::desc("Hoist common instructions up to the parent block"));
123
125 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
126 cl::desc("Hoist loads if the target supports conditional faulting"));
127
129 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
130 cl::desc("Hoist stores if the target supports conditional faulting"));
131
133 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
134 cl::desc("Control the maximal conditional load/store that we are willing "
135 "to speculatively execute to eliminate conditional branch "
136 "(default = 6)"));
137
139 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140 cl::init(20),
141 cl::desc("Allow reordering across at most this many "
142 "instructions when hoisting"));
143
144static cl::opt<bool>
145 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
146 cl::desc("Sink common instructions down to the end block"));
147
149 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores if an unconditional store precedes"));
151
153 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
154 cl::desc("Hoist conditional stores even if an unconditional store does not "
155 "precede - hoist multiple conditional stores into a single "
156 "predicated store"));
157
159 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
160 cl::desc("When merging conditional stores, do so even if the resultant "
161 "basic blocks are unlikely to be if-converted as a result"));
162
164 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
165 cl::desc("Allow exactly one expensive instruction to be speculatively "
166 "executed"));
167
169 "max-speculation-depth", cl::Hidden, cl::init(10),
170 cl::desc("Limit maximum recursion depth when calculating costs of "
171 "speculatively executed instructions"));
172
173static cl::opt<int>
174 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175 cl::init(10),
176 cl::desc("Max size of a block which is still considered "
177 "small enough to thread through"));
178
179// Two is chosen to allow one negation and a logical combine.
181 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182 cl::init(2),
183 cl::desc("Maximum cost of combining conditions when "
184 "folding branches"));
185
187 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188 cl::init(2),
189 cl::desc("Multiplier to apply to threshold when determining whether or not "
190 "to fold branch to common destination when vector operations are "
191 "present"));
192
194 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
195 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
196
198 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
199 cl::desc("Limit cases to analyze when converting a switch to select"));
200
202 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
203 cl::desc("Limit number of blocks a define in a threaded block is allowed "
204 "to be live in"));
205
206STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
207STATISTIC(NumLinearMaps,
208 "Number of switch instructions turned into linear mapping");
209STATISTIC(NumLookupTables,
210 "Number of switch instructions turned into lookup tables");
212 NumLookupTablesHoles,
213 "Number of switch instructions turned into lookup tables (holes checked)");
214STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
215STATISTIC(NumFoldValueComparisonIntoPredecessors,
216 "Number of value comparisons folded into predecessor basic blocks");
217STATISTIC(NumFoldBranchToCommonDest,
218 "Number of branches folded into predecessor basic block");
220 NumHoistCommonCode,
221 "Number of common instruction 'blocks' hoisted up to the begin block");
222STATISTIC(NumHoistCommonInstrs,
223 "Number of common instructions hoisted up to the begin block");
224STATISTIC(NumSinkCommonCode,
225 "Number of common instruction 'blocks' sunk down to the end block");
226STATISTIC(NumSinkCommonInstrs,
227 "Number of common instructions sunk down to the end block");
228STATISTIC(NumSpeculations, "Number of speculative executed instructions");
229STATISTIC(NumInvokes,
230 "Number of invokes with empty resume blocks simplified into calls");
231STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
232STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
233
234namespace {
235
236// The first field contains the value that the switch produces when a certain
237// case group is selected, and the second field is a vector containing the
238// cases composing the case group.
239using SwitchCaseResultVectorTy =
241
242// The first field contains the phi node that generates a result of the switch
243// and the second field contains the value generated for a certain case in the
244// switch for that PHI.
245using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
246
247/// ValueEqualityComparisonCase - Represents a case of a switch.
248struct ValueEqualityComparisonCase {
250 BasicBlock *Dest;
251
252 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
253 : Value(Value), Dest(Dest) {}
254
255 bool operator<(ValueEqualityComparisonCase RHS) const {
256 // Comparing pointers is ok as we only rely on the order for uniquing.
257 return Value < RHS.Value;
258 }
259
260 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
261};
262
263class SimplifyCFGOpt {
264 const TargetTransformInfo &TTI;
265 DomTreeUpdater *DTU;
266 const DataLayout &DL;
267 ArrayRef<WeakVH> LoopHeaders;
268 const SimplifyCFGOptions &Options;
269 bool Resimplify;
270
271 Value *isValueEqualityComparison(Instruction *TI);
272 BasicBlock *getValueEqualityComparisonCases(
273 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
274 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
275 BasicBlock *Pred,
276 IRBuilder<> &Builder);
277 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
278 Instruction *PTI,
279 IRBuilder<> &Builder);
280 bool foldValueComparisonIntoPredecessors(Instruction *TI,
281 IRBuilder<> &Builder);
282
283 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
284 bool simplifySingleResume(ResumeInst *RI);
285 bool simplifyCommonResume(ResumeInst *RI);
286 bool simplifyCleanupReturn(CleanupReturnInst *RI);
287 bool simplifyUnreachable(UnreachableInst *UI);
288 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
289 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
290 bool simplifyIndirectBr(IndirectBrInst *IBI);
291 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
292 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
293 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
294 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
295
296 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
297 IRBuilder<> &Builder);
298
299 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
300 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
301 Instruction *TI, Instruction *I1,
302 SmallVectorImpl<Instruction *> &OtherSuccTIs);
303 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
304 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
305 BasicBlock *TrueBB, BasicBlock *FalseBB,
306 uint32_t TrueWeight, uint32_t FalseWeight);
307 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
308 const DataLayout &DL);
309 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
310 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
311 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
312
313public:
314 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
315 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
316 const SimplifyCFGOptions &Opts)
317 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
318 assert((!DTU || !DTU->hasPostDomTree()) &&
319 "SimplifyCFG is not yet capable of maintaining validity of a "
320 "PostDomTree, so don't ask for it.");
321 }
322
323 bool simplifyOnce(BasicBlock *BB);
324 bool run(BasicBlock *BB);
325
326 // Helper to set Resimplify and return change indication.
327 bool requestResimplify() {
328 Resimplify = true;
329 return true;
330 }
331};
332
333} // end anonymous namespace
334
335/// Return true if all the PHI nodes in the basic block \p BB
336/// receive compatible (identical) incoming values when coming from
337/// all of the predecessor blocks that are specified in \p IncomingBlocks.
338///
339/// Note that if the values aren't exactly identical, but \p EquivalenceSet
340/// is provided, and *both* of the values are present in the set,
341/// then they are considered equal.
343 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
344 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
345 assert(IncomingBlocks.size() == 2 &&
346 "Only for a pair of incoming blocks at the time!");
347
348 // FIXME: it is okay if one of the incoming values is an `undef` value,
349 // iff the other incoming value is guaranteed to be a non-poison value.
350 // FIXME: it is okay if one of the incoming values is a `poison` value.
351 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
352 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
353 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
354 if (IV0 == IV1)
355 return true;
356 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
357 EquivalenceSet->contains(IV1))
358 return true;
359 return false;
360 });
361}
362
363/// Return true if it is safe to merge these two
364/// terminator instructions together.
365static bool
367 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
368 if (SI1 == SI2)
369 return false; // Can't merge with self!
370
371 // It is not safe to merge these two switch instructions if they have a common
372 // successor, and if that successor has a PHI node, and if *that* PHI node has
373 // conflicting incoming values from the two switch blocks.
374 BasicBlock *SI1BB = SI1->getParent();
375 BasicBlock *SI2BB = SI2->getParent();
376
378 bool Fail = false;
379 for (BasicBlock *Succ : successors(SI2BB)) {
380 if (!SI1Succs.count(Succ))
381 continue;
382 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
383 continue;
384 Fail = true;
385 if (FailBlocks)
386 FailBlocks->insert(Succ);
387 else
388 break;
389 }
390
391 return !Fail;
392}
393
394/// Update PHI nodes in Succ to indicate that there will now be entries in it
395/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
396/// will be the same as those coming in from ExistPred, an existing predecessor
397/// of Succ.
398static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
399 BasicBlock *ExistPred,
400 MemorySSAUpdater *MSSAU = nullptr) {
401 for (PHINode &PN : Succ->phis())
402 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
403 if (MSSAU)
404 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
405 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
406}
407
408/// Compute an abstract "cost" of speculating the given instruction,
409/// which is assumed to be safe to speculate. TCC_Free means cheap,
410/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
411/// expensive.
413 const TargetTransformInfo &TTI) {
414 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
415}
416
417/// If we have a merge point of an "if condition" as accepted above,
418/// return true if the specified value dominates the block. We don't handle
419/// the true generality of domination here, just a special case which works
420/// well enough for us.
421///
422/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
423/// see if V (which must be an instruction) and its recursive operands
424/// that do not dominate BB have a combined cost lower than Budget and
425/// are non-trapping. If both are true, the instruction is inserted into the
426/// set and true is returned.
427///
428/// The cost for most non-trapping instructions is defined as 1 except for
429/// Select whose cost is 2.
430///
431/// After this function returns, Cost is increased by the cost of
432/// V plus its non-dominating operands. If that cost is greater than
433/// Budget, false is returned and Cost is undefined.
435 Value *V, BasicBlock *BB, Instruction *InsertPt,
436 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
438 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
439 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
440 // so limit the recursion depth.
441 // TODO: While this recursion limit does prevent pathological behavior, it
442 // would be better to track visited instructions to avoid cycles.
444 return false;
445
447 if (!I) {
448 // Non-instructions dominate all instructions and can be executed
449 // unconditionally.
450 return true;
451 }
452 BasicBlock *PBB = I->getParent();
453
454 // We don't want to allow weird loops that might have the "if condition" in
455 // the bottom of this block.
456 if (PBB == BB)
457 return false;
458
459 // If this instruction is defined in a block that contains an unconditional
460 // branch to BB, then it must be in the 'conditional' part of the "if
461 // statement". If not, it definitely dominates the region.
463 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
464 return true;
465
466 // If we have seen this instruction before, don't count it again.
467 if (AggressiveInsts.count(I))
468 return true;
469
470 // Okay, it looks like the instruction IS in the "condition". Check to
471 // see if it's a cheap instruction to unconditionally compute, and if it
472 // only uses stuff defined outside of the condition. If so, hoist it out.
473 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
474 return false;
475
476 // Overflow arithmetic instruction plus extract value are usually generated
477 // when a division is being replaced. But, in this case, the zero check may
478 // still be kept in the code. In that case it would be worth to hoist these
479 // two instruction out of the basic block. Let's treat this pattern as one
480 // single cheap instruction here!
481 WithOverflowInst *OverflowInst;
482 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
483 ZeroCostInstructions.insert(OverflowInst);
484 Cost += 1;
485 } else if (!ZeroCostInstructions.contains(I))
486 Cost += computeSpeculationCost(I, TTI);
487
488 // Allow exactly one instruction to be speculated regardless of its cost
489 // (as long as it is safe to do so).
490 // This is intended to flatten the CFG even if the instruction is a division
491 // or other expensive operation. The speculation of an expensive instruction
492 // is expected to be undone in CodeGenPrepare if the speculation has not
493 // enabled further IR optimizations.
494 if (Cost > Budget &&
495 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
496 !Cost.isValid()))
497 return false;
498
499 // Okay, we can only really hoist these out if their operands do
500 // not take us over the cost threshold.
501 for (Use &Op : I->operands())
502 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
503 TTI, AC, ZeroCostInstructions, Depth + 1))
504 return false;
505 // Okay, it's safe to do this! Remember this instruction.
506 AggressiveInsts.insert(I);
507 return true;
508}
509
510/// Extract ConstantInt from value, looking through IntToPtr
511/// and PointerNullValue. Return NULL if value is not a constant int.
513 // Normal constant int.
515 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
516 DL.isNonIntegralPointerType(V->getType()))
517 return CI;
518
519 // This is some kind of pointer constant. Turn it into a pointer-sized
520 // ConstantInt if possible.
521 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
522
523 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
525 return ConstantInt::get(PtrTy, 0);
526
527 // IntToPtr const int.
529 if (CE->getOpcode() == Instruction::IntToPtr)
530 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
531 // The constant is very likely to have the right type already.
532 if (CI->getType() == PtrTy)
533 return CI;
534 else
535 return cast<ConstantInt>(
536 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
537 }
538 return nullptr;
539}
540
541namespace {
542
543/// Given a chain of or (||) or and (&&) comparison of a value against a
544/// constant, this will try to recover the information required for a switch
545/// structure.
546/// It will depth-first traverse the chain of comparison, seeking for patterns
547/// like %a == 12 or %a < 4 and combine them to produce a set of integer
548/// representing the different cases for the switch.
549/// Note that if the chain is composed of '||' it will build the set of elements
550/// that matches the comparisons (i.e. any of this value validate the chain)
551/// while for a chain of '&&' it will build the set elements that make the test
552/// fail.
553struct ConstantComparesGatherer {
554 const DataLayout &DL;
555
556 /// Value found for the switch comparison
557 Value *CompValue = nullptr;
558
559 /// Extra clause to be checked before the switch
560 Value *Extra = nullptr;
561
562 /// Set of integers to match in switch
564
565 /// Number of comparisons matched in the and/or chain
566 unsigned UsedICmps = 0;
567
568 /// If the elements in Vals matches the comparisons
569 bool IsEq = false;
570
571 // Used to check if the first matched CompValue shall be the Extra check.
572 bool IgnoreFirstMatch = false;
573 bool MultipleMatches = false;
574
575 /// Construct and compute the result for the comparison instruction Cond
576 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
577 gather(Cond);
578 if (CompValue || !MultipleMatches)
579 return;
580 Extra = nullptr;
581 Vals.clear();
582 UsedICmps = 0;
583 IgnoreFirstMatch = true;
584 gather(Cond);
585 }
586
587 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
588 ConstantComparesGatherer &
589 operator=(const ConstantComparesGatherer &) = delete;
590
591private:
592 /// Try to set the current value used for the comparison, it succeeds only if
593 /// it wasn't set before or if the new value is the same as the old one
594 bool setValueOnce(Value *NewVal) {
595 if (IgnoreFirstMatch) {
596 IgnoreFirstMatch = false;
597 return false;
598 }
599 if (CompValue && CompValue != NewVal) {
600 MultipleMatches = true;
601 return false;
602 }
603 CompValue = NewVal;
604 return true;
605 }
606
607 /// Try to match Instruction "I" as a comparison against a constant and
608 /// populates the array Vals with the set of values that match (or do not
609 /// match depending on isEQ).
610 /// Return false on failure. On success, the Value the comparison matched
611 /// against is placed in CompValue.
612 /// If CompValue is already set, the function is expected to fail if a match
613 /// is found but the value compared to is different.
614 bool matchInstruction(Instruction *I, bool isEQ) {
615 if (match(I, m_Not(m_Instruction(I))))
616 isEQ = !isEQ;
617
618 Value *Val;
619 if (match(I, m_NUWTrunc(m_Value(Val)))) {
620 // If we already have a value for the switch, it has to match!
621 if (!setValueOnce(Val))
622 return false;
623 UsedICmps++;
624 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
625 return true;
626 }
627 // If this is an icmp against a constant, handle this as one of the cases.
628 ICmpInst *ICI;
629 ConstantInt *C;
630 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
631 (C = getConstantInt(I->getOperand(1), DL)))) {
632 return false;
633 }
634
635 Value *RHSVal;
636 const APInt *RHSC;
637
638 // Pattern match a special case
639 // (x & ~2^z) == y --> x == y || x == y|2^z
640 // This undoes a transformation done by instcombine to fuse 2 compares.
641 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
642 // It's a little bit hard to see why the following transformations are
643 // correct. Here is a CVC3 program to verify them for 64-bit values:
644
645 /*
646 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
647 x : BITVECTOR(64);
648 y : BITVECTOR(64);
649 z : BITVECTOR(64);
650 mask : BITVECTOR(64) = BVSHL(ONE, z);
651 QUERY( (y & ~mask = y) =>
652 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
653 );
654 QUERY( (y | mask = y) =>
655 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
656 );
657 */
658
659 // Please note that each pattern must be a dual implication (<--> or
660 // iff). One directional implication can create spurious matches. If the
661 // implication is only one-way, an unsatisfiable condition on the left
662 // side can imply a satisfiable condition on the right side. Dual
663 // implication ensures that satisfiable conditions are transformed to
664 // other satisfiable conditions and unsatisfiable conditions are
665 // transformed to other unsatisfiable conditions.
666
667 // Here is a concrete example of a unsatisfiable condition on the left
668 // implying a satisfiable condition on the right:
669 //
670 // mask = (1 << z)
671 // (x & ~mask) == y --> (x == y || x == (y | mask))
672 //
673 // Substituting y = 3, z = 0 yields:
674 // (x & -2) == 3 --> (x == 3 || x == 2)
675
676 // Pattern match a special case:
677 /*
678 QUERY( (y & ~mask = y) =>
679 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
680 );
681 */
682 if (match(ICI->getOperand(0),
683 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
684 APInt Mask = ~*RHSC;
685 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
686 // If we already have a value for the switch, it has to match!
687 if (!setValueOnce(RHSVal))
688 return false;
689
690 Vals.push_back(C);
691 Vals.push_back(
692 ConstantInt::get(C->getContext(),
693 C->getValue() | Mask));
694 UsedICmps++;
695 return true;
696 }
697 }
698
699 // Pattern match a special case:
700 /*
701 QUERY( (y | mask = y) =>
702 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
703 );
704 */
705 if (match(ICI->getOperand(0),
706 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
707 APInt Mask = *RHSC;
708 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
709 // If we already have a value for the switch, it has to match!
710 if (!setValueOnce(RHSVal))
711 return false;
712
713 Vals.push_back(C);
714 Vals.push_back(ConstantInt::get(C->getContext(),
715 C->getValue() & ~Mask));
716 UsedICmps++;
717 return true;
718 }
719 }
720
721 // If we already have a value for the switch, it has to match!
722 if (!setValueOnce(ICI->getOperand(0)))
723 return false;
724
725 UsedICmps++;
726 Vals.push_back(C);
727 return true;
728 }
729
730 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
731 ConstantRange Span =
733
734 // Shift the range if the compare is fed by an add. This is the range
735 // compare idiom as emitted by instcombine.
736 Value *CandidateVal = I->getOperand(0);
737 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
738 Span = Span.subtract(*RHSC);
739 CandidateVal = RHSVal;
740 }
741
742 // If this is an and/!= check, then we are looking to build the set of
743 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
744 // x != 0 && x != 1.
745 if (!isEQ)
746 Span = Span.inverse();
747
748 // If there are a ton of values, we don't want to make a ginormous switch.
749 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
750 return false;
751 }
752
753 // If we already have a value for the switch, it has to match!
754 if (!setValueOnce(CandidateVal))
755 return false;
756
757 // Add all values from the range to the set
758 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
759 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
760
761 UsedICmps++;
762 return true;
763 }
764
765 /// Given a potentially 'or'd or 'and'd together collection of icmp
766 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
767 /// the value being compared, and stick the list constants into the Vals
768 /// vector.
769 /// One "Extra" case is allowed to differ from the other.
770 void gather(Value *V) {
771 Value *Op0, *Op1;
772 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
773 IsEq = true;
774 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
775 IsEq = false;
776 else
777 return;
778 // Keep a stack (SmallVector for efficiency) for depth-first traversal
779 SmallVector<Value *, 8> DFT{Op0, Op1};
780 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
781
782 while (!DFT.empty()) {
783 V = DFT.pop_back_val();
784
785 if (Instruction *I = dyn_cast<Instruction>(V)) {
786 // If it is a || (or && depending on isEQ), process the operands.
787 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
788 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
789 if (Visited.insert(Op1).second)
790 DFT.push_back(Op1);
791 if (Visited.insert(Op0).second)
792 DFT.push_back(Op0);
793
794 continue;
795 }
796
797 // Try to match the current instruction
798 if (matchInstruction(I, IsEq))
799 // Match succeed, continue the loop
800 continue;
801 }
802
803 // One element of the sequence of || (or &&) could not be match as a
804 // comparison against the same value as the others.
805 // We allow only one "Extra" case to be checked before the switch
806 if (!Extra) {
807 Extra = V;
808 continue;
809 }
810 // Failed to parse a proper sequence, abort now
811 CompValue = nullptr;
812 break;
813 }
814 }
815};
816
817} // end anonymous namespace
818
820 MemorySSAUpdater *MSSAU = nullptr) {
821 Instruction *Cond = nullptr;
823 Cond = dyn_cast<Instruction>(SI->getCondition());
824 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
825 if (BI->isConditional())
826 Cond = dyn_cast<Instruction>(BI->getCondition());
827 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
828 Cond = dyn_cast<Instruction>(IBI->getAddress());
829 }
830
831 TI->eraseFromParent();
832 if (Cond)
834}
835
836/// Return true if the specified terminator checks
837/// to see if a value is equal to constant integer value.
838Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
839 Value *CV = nullptr;
840 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
841 // Do not permit merging of large switch instructions into their
842 // predecessors unless there is only one predecessor.
843 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
844 CV = SI->getCondition();
845 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
846 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
847 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
848 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
849 CV = ICI->getOperand(0);
850 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
851 if (Trunc->hasNoUnsignedWrap())
852 CV = Trunc->getOperand(0);
853 }
854 }
855
856 // Unwrap any lossless ptrtoint cast.
857 if (CV) {
858 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
859 Value *Ptr = PTII->getPointerOperand();
860 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
861 CV = Ptr;
862 }
863 }
864 return CV;
865}
866
867/// Given a value comparison instruction,
868/// decode all of the 'cases' that it represents and return the 'default' block.
869BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
870 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
871 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
872 Cases.reserve(SI->getNumCases());
873 for (auto Case : SI->cases())
874 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
875 Case.getCaseSuccessor()));
876 return SI->getDefaultDest();
877 }
878
879 BranchInst *BI = cast<BranchInst>(TI);
880 Value *Cond = BI->getCondition();
881 ICmpInst::Predicate Pred;
882 ConstantInt *C;
883 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
884 Pred = ICI->getPredicate();
885 C = getConstantInt(ICI->getOperand(1), DL);
886 } else {
887 Pred = ICmpInst::ICMP_NE;
888 auto *Trunc = cast<TruncInst>(Cond);
889 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
890 }
891 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
892 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
893 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
894}
895
896/// Given a vector of bb/value pairs, remove any entries
897/// in the list that match the specified block.
898static void
900 std::vector<ValueEqualityComparisonCase> &Cases) {
901 llvm::erase(Cases, BB);
902}
903
904/// Return true if there are any keys in C1 that exist in C2 as well.
905static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
906 std::vector<ValueEqualityComparisonCase> &C2) {
907 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
908
909 // Make V1 be smaller than V2.
910 if (V1->size() > V2->size())
911 std::swap(V1, V2);
912
913 if (V1->empty())
914 return false;
915 if (V1->size() == 1) {
916 // Just scan V2.
917 ConstantInt *TheVal = (*V1)[0].Value;
918 for (const ValueEqualityComparisonCase &VECC : *V2)
919 if (TheVal == VECC.Value)
920 return true;
921 }
922
923 // Otherwise, just sort both lists and compare element by element.
924 array_pod_sort(V1->begin(), V1->end());
925 array_pod_sort(V2->begin(), V2->end());
926 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
927 while (i1 != e1 && i2 != e2) {
928 if ((*V1)[i1].Value == (*V2)[i2].Value)
929 return true;
930 if ((*V1)[i1].Value < (*V2)[i2].Value)
931 ++i1;
932 else
933 ++i2;
934 }
935 return false;
936}
937
938// Set branch weights on SwitchInst. This sets the metadata if there is at
939// least one non-zero weight.
941 bool IsExpected) {
942 // Check that there is at least one non-zero weight. Otherwise, pass
943 // nullptr to setMetadata which will erase the existing metadata.
944 MDNode *N = nullptr;
945 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
946 N = MDBuilder(SI->getParent()->getContext())
947 .createBranchWeights(Weights, IsExpected);
948 SI->setMetadata(LLVMContext::MD_prof, N);
949}
950
951// Similar to the above, but for branch and select instructions that take
952// exactly 2 weights.
953static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
954 uint32_t FalseWeight, bool IsExpected) {
956 // Check that there is at least one non-zero weight. Otherwise, pass
957 // nullptr to setMetadata which will erase the existing metadata.
958 MDNode *N = nullptr;
959 if (TrueWeight || FalseWeight)
960 N = MDBuilder(I->getParent()->getContext())
961 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
962 I->setMetadata(LLVMContext::MD_prof, N);
963}
964
965/// If TI is known to be a terminator instruction and its block is known to
966/// only have a single predecessor block, check to see if that predecessor is
967/// also a value comparison with the same value, and if that comparison
968/// determines the outcome of this comparison. If so, simplify TI. This does a
969/// very limited form of jump threading.
970bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
971 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
972 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
973 if (!PredVal)
974 return false; // Not a value comparison in predecessor.
975
976 Value *ThisVal = isValueEqualityComparison(TI);
977 assert(ThisVal && "This isn't a value comparison!!");
978 if (ThisVal != PredVal)
979 return false; // Different predicates.
980
981 // TODO: Preserve branch weight metadata, similarly to how
982 // foldValueComparisonIntoPredecessors preserves it.
983
984 // Find out information about when control will move from Pred to TI's block.
985 std::vector<ValueEqualityComparisonCase> PredCases;
986 BasicBlock *PredDef =
987 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
988 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
989
990 // Find information about how control leaves this block.
991 std::vector<ValueEqualityComparisonCase> ThisCases;
992 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
993 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
994
995 // If TI's block is the default block from Pred's comparison, potentially
996 // simplify TI based on this knowledge.
997 if (PredDef == TI->getParent()) {
998 // If we are here, we know that the value is none of those cases listed in
999 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1000 // can simplify TI.
1001 if (!valuesOverlap(PredCases, ThisCases))
1002 return false;
1003
1004 if (isa<BranchInst>(TI)) {
1005 // Okay, one of the successors of this condbr is dead. Convert it to a
1006 // uncond br.
1007 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1008 // Insert the new branch.
1009 Instruction *NI = Builder.CreateBr(ThisDef);
1010 (void)NI;
1011
1012 // Remove PHI node entries for the dead edge.
1013 ThisCases[0].Dest->removePredecessor(PredDef);
1014
1015 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1016 << "Through successor TI: " << *TI << "Leaving: " << *NI
1017 << "\n");
1018
1020
1021 if (DTU)
1022 DTU->applyUpdates(
1023 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1024
1025 return true;
1026 }
1027
1028 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1029 // Okay, TI has cases that are statically dead, prune them away.
1030 SmallPtrSet<Constant *, 16> DeadCases;
1031 for (const ValueEqualityComparisonCase &Case : PredCases)
1032 DeadCases.insert(Case.Value);
1033
1034 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1035 << "Through successor TI: " << *TI);
1036
1037 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1038 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1039 --i;
1040 auto *Successor = i->getCaseSuccessor();
1041 if (DTU)
1042 ++NumPerSuccessorCases[Successor];
1043 if (DeadCases.count(i->getCaseValue())) {
1044 Successor->removePredecessor(PredDef);
1045 SI.removeCase(i);
1046 if (DTU)
1047 --NumPerSuccessorCases[Successor];
1048 }
1049 }
1050
1051 if (DTU) {
1052 std::vector<DominatorTree::UpdateType> Updates;
1053 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1054 if (I.second == 0)
1055 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1056 DTU->applyUpdates(Updates);
1057 }
1058
1059 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1060 return true;
1061 }
1062
1063 // Otherwise, TI's block must correspond to some matched value. Find out
1064 // which value (or set of values) this is.
1065 ConstantInt *TIV = nullptr;
1066 BasicBlock *TIBB = TI->getParent();
1067 for (const auto &[Value, Dest] : PredCases)
1068 if (Dest == TIBB) {
1069 if (TIV)
1070 return false; // Cannot handle multiple values coming to this block.
1071 TIV = Value;
1072 }
1073 assert(TIV && "No edge from pred to succ?");
1074
1075 // Okay, we found the one constant that our value can be if we get into TI's
1076 // BB. Find out which successor will unconditionally be branched to.
1077 BasicBlock *TheRealDest = nullptr;
1078 for (const auto &[Value, Dest] : ThisCases)
1079 if (Value == TIV) {
1080 TheRealDest = Dest;
1081 break;
1082 }
1083
1084 // If not handled by any explicit cases, it is handled by the default case.
1085 if (!TheRealDest)
1086 TheRealDest = ThisDef;
1087
1088 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1089
1090 // Remove PHI node entries for dead edges.
1091 BasicBlock *CheckEdge = TheRealDest;
1092 for (BasicBlock *Succ : successors(TIBB))
1093 if (Succ != CheckEdge) {
1094 if (Succ != TheRealDest)
1095 RemovedSuccs.insert(Succ);
1096 Succ->removePredecessor(TIBB);
1097 } else
1098 CheckEdge = nullptr;
1099
1100 // Insert the new branch.
1101 Instruction *NI = Builder.CreateBr(TheRealDest);
1102 (void)NI;
1103
1104 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1105 << "Through successor TI: " << *TI << "Leaving: " << *NI
1106 << "\n");
1107
1109 if (DTU) {
1110 SmallVector<DominatorTree::UpdateType, 2> Updates;
1111 Updates.reserve(RemovedSuccs.size());
1112 for (auto *RemovedSucc : RemovedSuccs)
1113 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1114 DTU->applyUpdates(Updates);
1115 }
1116 return true;
1117}
1118
1119namespace {
1120
1121/// This class implements a stable ordering of constant
1122/// integers that does not depend on their address. This is important for
1123/// applications that sort ConstantInt's to ensure uniqueness.
1124struct ConstantIntOrdering {
1125 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1126 return LHS->getValue().ult(RHS->getValue());
1127 }
1128};
1129
1130} // end anonymous namespace
1131
1133 ConstantInt *const *P2) {
1134 const ConstantInt *LHS = *P1;
1135 const ConstantInt *RHS = *P2;
1136 if (LHS == RHS)
1137 return 0;
1138 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1139}
1140
1141/// Get Weights of a given terminator, the default weight is at the front
1142/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1143/// metadata.
1145 SmallVectorImpl<uint64_t> &Weights) {
1146 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1147 assert(MD && "Invalid branch-weight metadata");
1148 extractFromBranchWeightMD64(MD, Weights);
1149
1150 // If TI is a conditional eq, the default case is the false case,
1151 // and the corresponding branch-weight data is at index 2. We swap the
1152 // default weight to be the first entry.
1153 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1154 assert(Weights.size() == 2);
1155 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1156 if (!ICI)
1157 return;
1158
1159 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1160 std::swap(Weights.front(), Weights.back());
1161 }
1162}
1163
1164/// Keep halving the weights until all can fit in uint32_t.
1166 uint64_t Max = *llvm::max_element(Weights);
1167 if (Max > UINT_MAX) {
1168 unsigned Offset = 32 - llvm::countl_zero(Max);
1169 for (uint64_t &I : Weights)
1170 I >>= Offset;
1171 }
1172}
1173
1175 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1176 Instruction *PTI = PredBlock->getTerminator();
1177
1178 // If we have bonus instructions, clone them into the predecessor block.
1179 // Note that there may be multiple predecessor blocks, so we cannot move
1180 // bonus instructions to a predecessor block.
1181 for (Instruction &BonusInst : *BB) {
1182 if (BonusInst.isTerminator())
1183 continue;
1184
1185 Instruction *NewBonusInst = BonusInst.clone();
1186
1187 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1188 // Unless the instruction has the same !dbg location as the original
1189 // branch, drop it. When we fold the bonus instructions we want to make
1190 // sure we reset their debug locations in order to avoid stepping on
1191 // dead code caused by folding dead branches.
1192 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1193 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1194 mapAtomInstance(DL, VMap);
1195 }
1196
1197 RemapInstruction(NewBonusInst, VMap,
1199
1200 // If we speculated an instruction, we need to drop any metadata that may
1201 // result in undefined behavior, as the metadata might have been valid
1202 // only given the branch precondition.
1203 // Similarly strip attributes on call parameters that may cause UB in
1204 // location the call is moved to.
1205 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1206
1207 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1208 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1209 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1211
1212 NewBonusInst->takeName(&BonusInst);
1213 BonusInst.setName(NewBonusInst->getName() + ".old");
1214 VMap[&BonusInst] = NewBonusInst;
1215
1216 // Update (liveout) uses of bonus instructions,
1217 // now that the bonus instruction has been cloned into predecessor.
1218 // Note that we expect to be in a block-closed SSA form for this to work!
1219 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1220 auto *UI = cast<Instruction>(U.getUser());
1221 auto *PN = dyn_cast<PHINode>(UI);
1222 if (!PN) {
1223 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1224 "If the user is not a PHI node, then it should be in the same "
1225 "block as, and come after, the original bonus instruction.");
1226 continue; // Keep using the original bonus instruction.
1227 }
1228 // Is this the block-closed SSA form PHI node?
1229 if (PN->getIncomingBlock(U) == BB)
1230 continue; // Great, keep using the original bonus instruction.
1231 // The only other alternative is an "use" when coming from
1232 // the predecessor block - here we should refer to the cloned bonus instr.
1233 assert(PN->getIncomingBlock(U) == PredBlock &&
1234 "Not in block-closed SSA form?");
1235 U.set(NewBonusInst);
1236 }
1237 }
1238
1239 // Key Instructions: We may have propagated atom info into the pred. If the
1240 // pred's terminator already has atom info do nothing as merging would drop
1241 // one atom group anyway. If it doesn't, propagte the remapped atom group
1242 // from BB's terminator.
1243 if (auto &PredDL = PTI->getDebugLoc()) {
1244 auto &DL = BB->getTerminator()->getDebugLoc();
1245 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1246 PredDL.isSameSourceLocation(DL)) {
1247 PTI->setDebugLoc(DL);
1248 RemapSourceAtom(PTI, VMap);
1249 }
1250 }
1251}
1252
1253bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1254 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1255 BasicBlock *BB = TI->getParent();
1256 BasicBlock *Pred = PTI->getParent();
1257
1259
1260 // Figure out which 'cases' to copy from SI to PSI.
1261 std::vector<ValueEqualityComparisonCase> BBCases;
1262 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1263
1264 std::vector<ValueEqualityComparisonCase> PredCases;
1265 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1266
1267 // Based on whether the default edge from PTI goes to BB or not, fill in
1268 // PredCases and PredDefault with the new switch cases we would like to
1269 // build.
1270 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1271
1272 // Update the branch weight metadata along the way
1273 SmallVector<uint64_t, 8> Weights;
1274 bool PredHasWeights = hasBranchWeightMD(*PTI);
1275 bool SuccHasWeights = hasBranchWeightMD(*TI);
1276
1277 if (PredHasWeights) {
1278 getBranchWeights(PTI, Weights);
1279 // branch-weight metadata is inconsistent here.
1280 if (Weights.size() != 1 + PredCases.size())
1281 PredHasWeights = SuccHasWeights = false;
1282 } else if (SuccHasWeights)
1283 // If there are no predecessor weights but there are successor weights,
1284 // populate Weights with 1, which will later be scaled to the sum of
1285 // successor's weights
1286 Weights.assign(1 + PredCases.size(), 1);
1287
1288 SmallVector<uint64_t, 8> SuccWeights;
1289 if (SuccHasWeights) {
1290 getBranchWeights(TI, SuccWeights);
1291 // branch-weight metadata is inconsistent here.
1292 if (SuccWeights.size() != 1 + BBCases.size())
1293 PredHasWeights = SuccHasWeights = false;
1294 } else if (PredHasWeights)
1295 SuccWeights.assign(1 + BBCases.size(), 1);
1296
1297 if (PredDefault == BB) {
1298 // If this is the default destination from PTI, only the edges in TI
1299 // that don't occur in PTI, or that branch to BB will be activated.
1300 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1301 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1302 if (PredCases[i].Dest != BB)
1303 PTIHandled.insert(PredCases[i].Value);
1304 else {
1305 // The default destination is BB, we don't need explicit targets.
1306 std::swap(PredCases[i], PredCases.back());
1307
1308 if (PredHasWeights || SuccHasWeights) {
1309 // Increase weight for the default case.
1310 Weights[0] += Weights[i + 1];
1311 std::swap(Weights[i + 1], Weights.back());
1312 Weights.pop_back();
1313 }
1314
1315 PredCases.pop_back();
1316 --i;
1317 --e;
1318 }
1319
1320 // Reconstruct the new switch statement we will be building.
1321 if (PredDefault != BBDefault) {
1322 PredDefault->removePredecessor(Pred);
1323 if (DTU && PredDefault != BB)
1324 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1325 PredDefault = BBDefault;
1326 ++NewSuccessors[BBDefault];
1327 }
1328
1329 unsigned CasesFromPred = Weights.size();
1330 uint64_t ValidTotalSuccWeight = 0;
1331 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1332 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1333 PredCases.push_back(BBCases[i]);
1334 ++NewSuccessors[BBCases[i].Dest];
1335 if (SuccHasWeights || PredHasWeights) {
1336 // The default weight is at index 0, so weight for the ith case
1337 // should be at index i+1. Scale the cases from successor by
1338 // PredDefaultWeight (Weights[0]).
1339 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1340 ValidTotalSuccWeight += SuccWeights[i + 1];
1341 }
1342 }
1343
1344 if (SuccHasWeights || PredHasWeights) {
1345 ValidTotalSuccWeight += SuccWeights[0];
1346 // Scale the cases from predecessor by ValidTotalSuccWeight.
1347 for (unsigned i = 1; i < CasesFromPred; ++i)
1348 Weights[i] *= ValidTotalSuccWeight;
1349 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1350 Weights[0] *= SuccWeights[0];
1351 }
1352 } else {
1353 // If this is not the default destination from PSI, only the edges
1354 // in SI that occur in PSI with a destination of BB will be
1355 // activated.
1356 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1357 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1358 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1359 if (PredCases[i].Dest == BB) {
1360 PTIHandled.insert(PredCases[i].Value);
1361
1362 if (PredHasWeights || SuccHasWeights) {
1363 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1364 std::swap(Weights[i + 1], Weights.back());
1365 Weights.pop_back();
1366 }
1367
1368 std::swap(PredCases[i], PredCases.back());
1369 PredCases.pop_back();
1370 --i;
1371 --e;
1372 }
1373
1374 // Okay, now we know which constants were sent to BB from the
1375 // predecessor. Figure out where they will all go now.
1376 for (const ValueEqualityComparisonCase &Case : BBCases)
1377 if (PTIHandled.count(Case.Value)) {
1378 // If this is one we are capable of getting...
1379 if (PredHasWeights || SuccHasWeights)
1380 Weights.push_back(WeightsForHandled[Case.Value]);
1381 PredCases.push_back(Case);
1382 ++NewSuccessors[Case.Dest];
1383 PTIHandled.erase(Case.Value); // This constant is taken care of
1384 }
1385
1386 // If there are any constants vectored to BB that TI doesn't handle,
1387 // they must go to the default destination of TI.
1388 for (ConstantInt *I : PTIHandled) {
1389 if (PredHasWeights || SuccHasWeights)
1390 Weights.push_back(WeightsForHandled[I]);
1391 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1392 ++NewSuccessors[BBDefault];
1393 }
1394 }
1395
1396 // Okay, at this point, we know which new successor Pred will get. Make
1397 // sure we update the number of entries in the PHI nodes for these
1398 // successors.
1399 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1400 if (DTU) {
1401 SuccsOfPred = {llvm::from_range, successors(Pred)};
1402 Updates.reserve(Updates.size() + NewSuccessors.size());
1403 }
1404 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1405 NewSuccessors) {
1406 for (auto I : seq(NewSuccessor.second)) {
1407 (void)I;
1408 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1409 }
1410 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1411 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1412 }
1413
1414 Builder.SetInsertPoint(PTI);
1415 // Convert pointer to int before we switch.
1416 if (CV->getType()->isPointerTy()) {
1417 CV =
1418 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1419 }
1420
1421 // Now that the successors are updated, create the new Switch instruction.
1422 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1423 NewSI->setDebugLoc(PTI->getDebugLoc());
1424 for (ValueEqualityComparisonCase &V : PredCases)
1425 NewSI->addCase(V.Value, V.Dest);
1426
1427 if (PredHasWeights || SuccHasWeights) {
1428 // Halve the weights if any of them cannot fit in an uint32_t
1429 fitWeights(Weights);
1430
1431 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1432
1433 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1434 }
1435
1437
1438 // Okay, last check. If BB is still a successor of PSI, then we must
1439 // have an infinite loop case. If so, add an infinitely looping block
1440 // to handle the case to preserve the behavior of the code.
1441 BasicBlock *InfLoopBlock = nullptr;
1442 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1443 if (NewSI->getSuccessor(i) == BB) {
1444 if (!InfLoopBlock) {
1445 // Insert it at the end of the function, because it's either code,
1446 // or it won't matter if it's hot. :)
1447 InfLoopBlock =
1448 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1449 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1450 if (DTU)
1451 Updates.push_back(
1452 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1453 }
1454 NewSI->setSuccessor(i, InfLoopBlock);
1455 }
1456
1457 if (DTU) {
1458 if (InfLoopBlock)
1459 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1460
1461 Updates.push_back({DominatorTree::Delete, Pred, BB});
1462
1463 DTU->applyUpdates(Updates);
1464 }
1465
1466 ++NumFoldValueComparisonIntoPredecessors;
1467 return true;
1468}
1469
1470/// The specified terminator is a value equality comparison instruction
1471/// (either a switch or a branch on "X == c").
1472/// See if any of the predecessors of the terminator block are value comparisons
1473/// on the same value. If so, and if safe to do so, fold them together.
1474bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1475 IRBuilder<> &Builder) {
1476 BasicBlock *BB = TI->getParent();
1477 Value *CV = isValueEqualityComparison(TI); // CondVal
1478 assert(CV && "Not a comparison?");
1479
1480 bool Changed = false;
1481
1482 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1483 while (!Preds.empty()) {
1484 BasicBlock *Pred = Preds.pop_back_val();
1485 Instruction *PTI = Pred->getTerminator();
1486
1487 // Don't try to fold into itself.
1488 if (Pred == BB)
1489 continue;
1490
1491 // See if the predecessor is a comparison with the same value.
1492 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1493 if (PCV != CV)
1494 continue;
1495
1496 SmallSetVector<BasicBlock *, 4> FailBlocks;
1497 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1498 for (auto *Succ : FailBlocks) {
1499 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1500 return false;
1501 }
1502 }
1503
1504 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1505 Changed = true;
1506 }
1507 return Changed;
1508}
1509
1510// If we would need to insert a select that uses the value of this invoke
1511// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1512// need to do this), we can't hoist the invoke, as there is nowhere to put the
1513// select in this case.
1515 Instruction *I1, Instruction *I2) {
1516 for (BasicBlock *Succ : successors(BB1)) {
1517 for (const PHINode &PN : Succ->phis()) {
1518 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1519 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1520 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1521 return false;
1522 }
1523 }
1524 }
1525 return true;
1526}
1527
1528// Get interesting characteristics of instructions that
1529// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1530// instructions can be reordered across.
1536
1538 unsigned Flags = 0;
1539 if (I->mayReadFromMemory())
1540 Flags |= SkipReadMem;
1541 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1542 // inalloca) across stacksave/stackrestore boundaries.
1543 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1544 Flags |= SkipSideEffect;
1546 Flags |= SkipImplicitControlFlow;
1547 return Flags;
1548}
1549
1550// Returns true if it is safe to reorder an instruction across preceding
1551// instructions in a basic block.
1552static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1553 // Don't reorder a store over a load.
1554 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1555 return false;
1556
1557 // If we have seen an instruction with side effects, it's unsafe to reorder an
1558 // instruction which reads memory or itself has side effects.
1559 if ((Flags & SkipSideEffect) &&
1560 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1561 return false;
1562
1563 // Reordering across an instruction which does not necessarily transfer
1564 // control to the next instruction is speculation.
1566 return false;
1567
1568 // Hoisting of llvm.deoptimize is only legal together with the next return
1569 // instruction, which this pass is not always able to do.
1570 if (auto *CB = dyn_cast<CallBase>(I))
1571 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1572 return false;
1573
1574 // It's also unsafe/illegal to hoist an instruction above its instruction
1575 // operands
1576 BasicBlock *BB = I->getParent();
1577 for (Value *Op : I->operands()) {
1578 if (auto *J = dyn_cast<Instruction>(Op))
1579 if (J->getParent() == BB)
1580 return false;
1581 }
1582
1583 return true;
1584}
1585
1586static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1587
1588/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1589/// instructions \p I1 and \p I2 can and should be hoisted.
1591 const TargetTransformInfo &TTI) {
1592 // If we're going to hoist a call, make sure that the two instructions
1593 // we're commoning/hoisting are both marked with musttail, or neither of
1594 // them is marked as such. Otherwise, we might end up in a situation where
1595 // we hoist from a block where the terminator is a `ret` to a block where
1596 // the terminator is a `br`, and `musttail` calls expect to be followed by
1597 // a return.
1598 auto *C1 = dyn_cast<CallInst>(I1);
1599 auto *C2 = dyn_cast<CallInst>(I2);
1600 if (C1 && C2)
1601 if (C1->isMustTailCall() != C2->isMustTailCall())
1602 return false;
1603
1604 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1605 return false;
1606
1607 // If any of the two call sites has nomerge or convergent attribute, stop
1608 // hoisting.
1609 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1610 if (CB1->cannotMerge() || CB1->isConvergent())
1611 return false;
1612 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1613 if (CB2->cannotMerge() || CB2->isConvergent())
1614 return false;
1615
1616 return true;
1617}
1618
1619/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1620/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1621/// hoistCommonCodeFromSuccessors. e.g. The input:
1622/// I1 DVRs: { x, z },
1623/// OtherInsts: { I2 DVRs: { x, y, z } }
1624/// would result in hoisting only DbgVariableRecord x.
1626 Instruction *TI, Instruction *I1,
1627 SmallVectorImpl<Instruction *> &OtherInsts) {
1628 if (!I1->hasDbgRecords())
1629 return;
1630 using CurrentAndEndIt =
1631 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1632 // Vector of {Current, End} iterators.
1634 Itrs.reserve(OtherInsts.size() + 1);
1635 // Helper lambdas for lock-step checks:
1636 // Return true if this Current == End.
1637 auto atEnd = [](const CurrentAndEndIt &Pair) {
1638 return Pair.first == Pair.second;
1639 };
1640 // Return true if all Current are identical.
1641 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1642 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1644 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1645 });
1646 };
1647
1648 // Collect the iterators.
1649 Itrs.push_back(
1650 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1651 for (Instruction *Other : OtherInsts) {
1652 if (!Other->hasDbgRecords())
1653 return;
1654 Itrs.push_back(
1655 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1656 }
1657
1658 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1659 // the lock-step DbgRecord are identical, hoist all of them to TI.
1660 // This replicates the dbg.* intrinsic behaviour in
1661 // hoistCommonCodeFromSuccessors.
1662 while (none_of(Itrs, atEnd)) {
1663 bool HoistDVRs = allIdentical(Itrs);
1664 for (CurrentAndEndIt &Pair : Itrs) {
1665 // Increment Current iterator now as we may be about to move the
1666 // DbgRecord.
1667 DbgRecord &DR = *Pair.first++;
1668 if (HoistDVRs) {
1669 DR.removeFromParent();
1670 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1671 }
1672 }
1673 }
1674}
1675
1677 const Instruction *I2) {
1678 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1679 return true;
1680
1681 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1682 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1683 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1684 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1685 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1686
1687 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1688 return I1->getOperand(0) == I2->getOperand(1) &&
1689 I1->getOperand(1) == I2->getOperand(0) &&
1690 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1691 }
1692
1693 return false;
1694}
1695
1696/// If the target supports conditional faulting,
1697/// we look for the following pattern:
1698/// \code
1699/// BB:
1700/// ...
1701/// %cond = icmp ult %x, %y
1702/// br i1 %cond, label %TrueBB, label %FalseBB
1703/// FalseBB:
1704/// store i32 1, ptr %q, align 4
1705/// ...
1706/// TrueBB:
1707/// %maskedloadstore = load i32, ptr %b, align 4
1708/// store i32 %maskedloadstore, ptr %p, align 4
1709/// ...
1710/// \endcode
1711///
1712/// and transform it into:
1713///
1714/// \code
1715/// BB:
1716/// ...
1717/// %cond = icmp ult %x, %y
1718/// %maskedloadstore = cload i32, ptr %b, %cond
1719/// cstore i32 %maskedloadstore, ptr %p, %cond
1720/// cstore i32 1, ptr %q, ~%cond
1721/// br i1 %cond, label %TrueBB, label %FalseBB
1722/// FalseBB:
1723/// ...
1724/// TrueBB:
1725/// ...
1726/// \endcode
1727///
1728/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1729/// e.g.
1730///
1731/// \code
1732/// %vcond = bitcast i1 %cond to <1 x i1>
1733/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1734/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1735/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1736/// call void @llvm.masked.store.v1i32.p0
1737/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1738/// %cond.not = xor i1 %cond, true
1739/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1740/// call void @llvm.masked.store.v1i32.p0
1741/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1742/// \endcode
1743///
1744/// So we need to turn hoisted load/store into cload/cstore.
1745///
1746/// \param BI The branch instruction.
1747/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1748/// will be speculated.
1749/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1751 BranchInst *BI,
1752 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1753 std::optional<bool> Invert, Instruction *Sel) {
1754 auto &Context = BI->getParent()->getContext();
1755 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1756 auto *Cond = BI->getOperand(0);
1757 // Construct the condition if needed.
1758 BasicBlock *BB = BI->getParent();
1759 Value *Mask = nullptr;
1760 Value *MaskFalse = nullptr;
1761 Value *MaskTrue = nullptr;
1762 if (Invert.has_value()) {
1763 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1764 Mask = Builder.CreateBitCast(
1765 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1766 VCondTy);
1767 } else {
1768 IRBuilder<> Builder(BI);
1769 MaskFalse = Builder.CreateBitCast(
1770 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1771 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1772 }
1773 auto PeekThroughBitcasts = [](Value *V) {
1774 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1775 V = BitCast->getOperand(0);
1776 return V;
1777 };
1778 for (auto *I : SpeculatedConditionalLoadsStores) {
1779 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1780 if (!Invert.has_value())
1781 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1782 // We currently assume conditional faulting load/store is supported for
1783 // scalar types only when creating new instructions. This can be easily
1784 // extended for vector types in the future.
1785 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1786 auto *Op0 = I->getOperand(0);
1787 CallInst *MaskedLoadStore = nullptr;
1788 if (auto *LI = dyn_cast<LoadInst>(I)) {
1789 // Handle Load.
1790 auto *Ty = I->getType();
1791 PHINode *PN = nullptr;
1792 Value *PassThru = nullptr;
1793 if (Invert.has_value())
1794 for (User *U : I->users()) {
1795 if ((PN = dyn_cast<PHINode>(U))) {
1796 PassThru = Builder.CreateBitCast(
1797 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1798 FixedVectorType::get(Ty, 1));
1799 } else if (auto *Ins = cast<Instruction>(U);
1800 Sel && Ins->getParent() == BB) {
1801 // This happens when store or/and a speculative instruction between
1802 // load and store were hoisted to the BB. Make sure the masked load
1803 // inserted before its use.
1804 // We assume there's one of such use.
1805 Builder.SetInsertPoint(Ins);
1806 }
1807 }
1808 MaskedLoadStore = Builder.CreateMaskedLoad(
1809 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1810 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1811 if (PN)
1812 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1813 I->replaceAllUsesWith(NewLoadStore);
1814 } else {
1815 // Handle Store.
1816 auto *StoredVal = Builder.CreateBitCast(
1817 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1818 MaskedLoadStore = Builder.CreateMaskedStore(
1819 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1820 }
1821 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1822 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1823 //
1824 // !nonnull, !align : Not support pointer type, no need to keep.
1825 // !range: Load type is changed from scalar to vector, but the metadata on
1826 // vector specifies a per-element range, so the semantics stay the
1827 // same. Keep it.
1828 // !annotation: Not impact semantics. Keep it.
1829 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1830 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1831 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1832 // FIXME: DIAssignID is not supported for masked store yet.
1833 // (Verifier::visitDIAssignIDMetadata)
1835 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1836 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1837 });
1838 MaskedLoadStore->copyMetadata(*I);
1839 I->eraseFromParent();
1840 }
1841}
1842
1844 const TargetTransformInfo &TTI) {
1845 // Not handle volatile or atomic.
1846 bool IsStore = false;
1847 if (auto *L = dyn_cast<LoadInst>(I)) {
1848 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1849 return false;
1850 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1851 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1852 return false;
1853 IsStore = true;
1854 } else
1855 return false;
1856
1857 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1858 // That's why we have the alignment limitation.
1859 // FIXME: Update the prototype of the intrinsics?
1860 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1862}
1863
1864/// Hoist any common code in the successor blocks up into the block. This
1865/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1866/// given, only perform hoisting in case all successors blocks contain matching
1867/// instructions only. In that case, all instructions can be hoisted and the
1868/// original branch will be replaced and selects for PHIs are added.
1869bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1870 bool AllInstsEqOnly) {
1871 // This does very trivial matching, with limited scanning, to find identical
1872 // instructions in the two blocks. In particular, we don't want to get into
1873 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1874 // such, we currently just scan for obviously identical instructions in an
1875 // identical order, possibly separated by the same number of non-identical
1876 // instructions.
1877 BasicBlock *BB = TI->getParent();
1878 unsigned int SuccSize = succ_size(BB);
1879 if (SuccSize < 2)
1880 return false;
1881
1882 // If either of the blocks has it's address taken, then we can't do this fold,
1883 // because the code we'd hoist would no longer run when we jump into the block
1884 // by it's address.
1885 for (auto *Succ : successors(BB))
1886 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1887 return false;
1888
1889 // The second of pair is a SkipFlags bitmask.
1890 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1891 SmallVector<SuccIterPair, 8> SuccIterPairs;
1892 for (auto *Succ : successors(BB)) {
1893 BasicBlock::iterator SuccItr = Succ->begin();
1894 if (isa<PHINode>(*SuccItr))
1895 return false;
1896 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1897 }
1898
1899 if (AllInstsEqOnly) {
1900 // Check if all instructions in the successor blocks match. This allows
1901 // hoisting all instructions and removing the blocks we are hoisting from,
1902 // so does not add any new instructions.
1904 // Check if sizes and terminators of all successors match.
1905 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1906 Instruction *Term0 = Succs[0]->getTerminator();
1907 Instruction *Term = Succ->getTerminator();
1908 return !Term->isSameOperationAs(Term0) ||
1909 !equal(Term->operands(), Term0->operands()) ||
1910 Succs[0]->size() != Succ->size();
1911 });
1912 if (!AllSame)
1913 return false;
1914 if (AllSame) {
1915 LockstepReverseIterator<true> LRI(Succs);
1916 while (LRI.isValid()) {
1917 Instruction *I0 = (*LRI)[0];
1918 if (any_of(*LRI, [I0](Instruction *I) {
1919 return !areIdenticalUpToCommutativity(I0, I);
1920 })) {
1921 return false;
1922 }
1923 --LRI;
1924 }
1925 }
1926 // Now we know that all instructions in all successors can be hoisted. Let
1927 // the loop below handle the hoisting.
1928 }
1929
1930 // Count how many instructions were not hoisted so far. There's a limit on how
1931 // many instructions we skip, serving as a compilation time control as well as
1932 // preventing excessive increase of life ranges.
1933 unsigned NumSkipped = 0;
1934 // If we find an unreachable instruction at the beginning of a basic block, we
1935 // can still hoist instructions from the rest of the basic blocks.
1936 if (SuccIterPairs.size() > 2) {
1937 erase_if(SuccIterPairs,
1938 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1939 if (SuccIterPairs.size() < 2)
1940 return false;
1941 }
1942
1943 bool Changed = false;
1944
1945 for (;;) {
1946 auto *SuccIterPairBegin = SuccIterPairs.begin();
1947 auto &BB1ItrPair = *SuccIterPairBegin++;
1948 auto OtherSuccIterPairRange =
1949 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1950 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1951
1952 Instruction *I1 = &*BB1ItrPair.first;
1953
1954 bool AllInstsAreIdentical = true;
1955 bool HasTerminator = I1->isTerminator();
1956 for (auto &SuccIter : OtherSuccIterRange) {
1957 Instruction *I2 = &*SuccIter;
1958 HasTerminator |= I2->isTerminator();
1959 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1960 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1961 AllInstsAreIdentical = false;
1962 }
1963
1964 SmallVector<Instruction *, 8> OtherInsts;
1965 for (auto &SuccIter : OtherSuccIterRange)
1966 OtherInsts.push_back(&*SuccIter);
1967
1968 // If we are hoisting the terminator instruction, don't move one (making a
1969 // broken BB), instead clone it, and remove BI.
1970 if (HasTerminator) {
1971 // Even if BB, which contains only one unreachable instruction, is ignored
1972 // at the beginning of the loop, we can hoist the terminator instruction.
1973 // If any instructions remain in the block, we cannot hoist terminators.
1974 if (NumSkipped || !AllInstsAreIdentical) {
1975 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1976 return Changed;
1977 }
1978
1979 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1980 Changed;
1981 }
1982
1983 if (AllInstsAreIdentical) {
1984 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1985 AllInstsAreIdentical =
1986 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1987 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1988 Instruction *I2 = &*Pair.first;
1989 unsigned SkipFlagsBB2 = Pair.second;
1990 // Even if the instructions are identical, it may not
1991 // be safe to hoist them if we have skipped over
1992 // instructions with side effects or their operands
1993 // weren't hoisted.
1994 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1996 });
1997 }
1998
1999 if (AllInstsAreIdentical) {
2000 BB1ItrPair.first++;
2001 // For a normal instruction, we just move one to right before the
2002 // branch, then replace all uses of the other with the first. Finally,
2003 // we remove the now redundant second instruction.
2004 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2005 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2006 // and leave any that were not hoisted behind (by calling moveBefore
2007 // rather than moveBeforePreserving).
2008 I1->moveBefore(TI->getIterator());
2009 for (auto &SuccIter : OtherSuccIterRange) {
2010 Instruction *I2 = &*SuccIter++;
2011 assert(I2 != I1);
2012 if (!I2->use_empty())
2013 I2->replaceAllUsesWith(I1);
2014 I1->andIRFlags(I2);
2015 if (auto *CB = dyn_cast<CallBase>(I1)) {
2016 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2017 assert(Success && "We should not be trying to hoist callbases "
2018 "with non-intersectable attributes");
2019 // For NDEBUG Compile.
2020 (void)Success;
2021 }
2022
2023 combineMetadataForCSE(I1, I2, true);
2024 // I1 and I2 are being combined into a single instruction. Its debug
2025 // location is the merged locations of the original instructions.
2026 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2027 I2->eraseFromParent();
2028 }
2029 if (!Changed)
2030 NumHoistCommonCode += SuccIterPairs.size();
2031 Changed = true;
2032 NumHoistCommonInstrs += SuccIterPairs.size();
2033 } else {
2034 if (NumSkipped >= HoistCommonSkipLimit) {
2035 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2036 return Changed;
2037 }
2038 // We are about to skip over a pair of non-identical instructions. Record
2039 // if any have characteristics that would prevent reordering instructions
2040 // across them.
2041 for (auto &SuccIterPair : SuccIterPairs) {
2042 Instruction *I = &*SuccIterPair.first++;
2043 SuccIterPair.second |= skippedInstrFlags(I);
2044 }
2045 ++NumSkipped;
2046 }
2047 }
2048}
2049
2050bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2051 Instruction *TI, Instruction *I1,
2052 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2053
2054 auto *BI = dyn_cast<BranchInst>(TI);
2055
2056 bool Changed = false;
2057 BasicBlock *TIParent = TI->getParent();
2058 BasicBlock *BB1 = I1->getParent();
2059
2060 // Use only for an if statement.
2061 auto *I2 = *OtherSuccTIs.begin();
2062 auto *BB2 = I2->getParent();
2063 if (BI) {
2064 assert(OtherSuccTIs.size() == 1);
2065 assert(BI->getSuccessor(0) == I1->getParent());
2066 assert(BI->getSuccessor(1) == I2->getParent());
2067 }
2068
2069 // In the case of an if statement, we try to hoist an invoke.
2070 // FIXME: Can we define a safety predicate for CallBr?
2071 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2072 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2073 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2074 return false;
2075
2076 // TODO: callbr hoisting currently disabled pending further study.
2077 if (isa<CallBrInst>(I1))
2078 return false;
2079
2080 for (BasicBlock *Succ : successors(BB1)) {
2081 for (PHINode &PN : Succ->phis()) {
2082 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2083 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2084 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2085 if (BB1V == BB2V)
2086 continue;
2087
2088 // In the case of an if statement, check for
2089 // passingValueIsAlwaysUndefined here because we would rather eliminate
2090 // undefined control flow then converting it to a select.
2091 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2093 return false;
2094 }
2095 }
2096 }
2097
2098 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2099 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2100 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2101 // Clone the terminator and hoist it into the pred, without any debug info.
2102 Instruction *NT = I1->clone();
2103 NT->insertInto(TIParent, TI->getIterator());
2104 if (!NT->getType()->isVoidTy()) {
2105 I1->replaceAllUsesWith(NT);
2106 for (Instruction *OtherSuccTI : OtherSuccTIs)
2107 OtherSuccTI->replaceAllUsesWith(NT);
2108 NT->takeName(I1);
2109 }
2110 Changed = true;
2111 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2112
2113 // Ensure terminator gets a debug location, even an unknown one, in case
2114 // it involves inlinable calls.
2116 Locs.push_back(I1->getDebugLoc());
2117 for (auto *OtherSuccTI : OtherSuccTIs)
2118 Locs.push_back(OtherSuccTI->getDebugLoc());
2119 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2120
2121 // PHIs created below will adopt NT's merged DebugLoc.
2122 IRBuilder<NoFolder> Builder(NT);
2123
2124 // In the case of an if statement, hoisting one of the terminators from our
2125 // successor is a great thing. Unfortunately, the successors of the if/else
2126 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2127 // must agree for all PHI nodes, so we insert select instruction to compute
2128 // the final result.
2129 if (BI) {
2130 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2131 for (BasicBlock *Succ : successors(BB1)) {
2132 for (PHINode &PN : Succ->phis()) {
2133 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2134 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2135 if (BB1V == BB2V)
2136 continue;
2137
2138 // These values do not agree. Insert a select instruction before NT
2139 // that determines the right value.
2140 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2141 if (!SI) {
2142 // Propagate fast-math-flags from phi node to its replacement select.
2144 BI->getCondition(), BB1V, BB2V,
2145 isa<FPMathOperator>(PN) ? &PN : nullptr,
2146 BB1V->getName() + "." + BB2V->getName(), BI));
2147 }
2148
2149 // Make the PHI node use the select for all incoming values for BB1/BB2
2150 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2151 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2152 PN.setIncomingValue(i, SI);
2153 }
2154 }
2155 }
2156
2158
2159 // Update any PHI nodes in our new successors.
2160 for (BasicBlock *Succ : successors(BB1)) {
2161 addPredecessorToBlock(Succ, TIParent, BB1);
2162 if (DTU)
2163 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2164 }
2165
2166 if (DTU)
2167 for (BasicBlock *Succ : successors(TI))
2168 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2169
2171 if (DTU)
2172 DTU->applyUpdates(Updates);
2173 return Changed;
2174}
2175
2176// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2177// into variables.
2179 int OpIdx) {
2180 // Divide/Remainder by constant is typically much cheaper than by variable.
2181 if (I->isIntDivRem())
2182 return OpIdx != 1;
2183 return !isa<IntrinsicInst>(I);
2184}
2185
2186// All instructions in Insts belong to different blocks that all unconditionally
2187// branch to a common successor. Analyze each instruction and return true if it
2188// would be possible to sink them into their successor, creating one common
2189// instruction instead. For every value that would be required to be provided by
2190// PHI node (because an operand varies in each input block), add to PHIOperands.
2193 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2194 // Prune out obviously bad instructions to move. Each instruction must have
2195 // the same number of uses, and we check later that the uses are consistent.
2196 std::optional<unsigned> NumUses;
2197 for (auto *I : Insts) {
2198 // These instructions may change or break semantics if moved.
2199 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2200 I->getType()->isTokenTy())
2201 return false;
2202
2203 // Do not try to sink an instruction in an infinite loop - it can cause
2204 // this algorithm to infinite loop.
2205 if (I->getParent()->getSingleSuccessor() == I->getParent())
2206 return false;
2207
2208 // Conservatively return false if I is an inline-asm instruction. Sinking
2209 // and merging inline-asm instructions can potentially create arguments
2210 // that cannot satisfy the inline-asm constraints.
2211 // If the instruction has nomerge or convergent attribute, return false.
2212 if (const auto *C = dyn_cast<CallBase>(I))
2213 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2214 return false;
2215
2216 if (!NumUses)
2217 NumUses = I->getNumUses();
2218 else if (NumUses != I->getNumUses())
2219 return false;
2220 }
2221
2222 const Instruction *I0 = Insts.front();
2223 const auto I0MMRA = MMRAMetadata(*I0);
2224 for (auto *I : Insts) {
2225 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2226 return false;
2227
2228 // Treat MMRAs conservatively. This pass can be quite aggressive and
2229 // could drop a lot of MMRAs otherwise.
2230 if (MMRAMetadata(*I) != I0MMRA)
2231 return false;
2232 }
2233
2234 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2235 // then the other phi operands must match the instructions from Insts. This
2236 // also has to hold true for any phi nodes that would be created as a result
2237 // of sinking. Both of these cases are represented by PhiOperands.
2238 for (const Use &U : I0->uses()) {
2239 auto It = PHIOperands.find(&U);
2240 if (It == PHIOperands.end())
2241 // There may be uses in other blocks when sinking into a loop header.
2242 return false;
2243 if (!equal(Insts, It->second))
2244 return false;
2245 }
2246
2247 // For calls to be sinkable, they must all be indirect, or have same callee.
2248 // I.e. if we have two direct calls to different callees, we don't want to
2249 // turn that into an indirect call. Likewise, if we have an indirect call,
2250 // and a direct call, we don't actually want to have a single indirect call.
2251 if (isa<CallBase>(I0)) {
2252 auto IsIndirectCall = [](const Instruction *I) {
2253 return cast<CallBase>(I)->isIndirectCall();
2254 };
2255 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2256 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2257 if (HaveIndirectCalls) {
2258 if (!AllCallsAreIndirect)
2259 return false;
2260 } else {
2261 // All callees must be identical.
2262 Value *Callee = nullptr;
2263 for (const Instruction *I : Insts) {
2264 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2265 if (!Callee)
2266 Callee = CurrCallee;
2267 else if (Callee != CurrCallee)
2268 return false;
2269 }
2270 }
2271 }
2272
2273 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2274 Value *Op = I0->getOperand(OI);
2275 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2276 assert(I->getNumOperands() == I0->getNumOperands());
2277 return I->getOperand(OI) == I0->getOperand(OI);
2278 };
2279 if (!all_of(Insts, SameAsI0)) {
2282 // We can't create a PHI from this GEP.
2283 return false;
2284 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2285 for (auto *I : Insts)
2286 Ops.push_back(I->getOperand(OI));
2287 }
2288 }
2289 return true;
2290}
2291
2292// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2293// instruction of every block in Blocks to their common successor, commoning
2294// into one instruction.
2296 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2297
2298 // canSinkInstructions returning true guarantees that every block has at
2299 // least one non-terminator instruction.
2301 for (auto *BB : Blocks) {
2302 Instruction *I = BB->getTerminator();
2303 I = I->getPrevNode();
2304 Insts.push_back(I);
2305 }
2306
2307 // We don't need to do any more checking here; canSinkInstructions should
2308 // have done it all for us.
2309 SmallVector<Value*, 4> NewOperands;
2310 Instruction *I0 = Insts.front();
2311 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2312 // This check is different to that in canSinkInstructions. There, we
2313 // cared about the global view once simplifycfg (and instcombine) have
2314 // completed - it takes into account PHIs that become trivially
2315 // simplifiable. However here we need a more local view; if an operand
2316 // differs we create a PHI and rely on instcombine to clean up the very
2317 // small mess we may make.
2318 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2319 return I->getOperand(O) != I0->getOperand(O);
2320 });
2321 if (!NeedPHI) {
2322 NewOperands.push_back(I0->getOperand(O));
2323 continue;
2324 }
2325
2326 // Create a new PHI in the successor block and populate it.
2327 auto *Op = I0->getOperand(O);
2328 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2329 auto *PN =
2330 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2331 PN->insertBefore(BBEnd->begin());
2332 for (auto *I : Insts)
2333 PN->addIncoming(I->getOperand(O), I->getParent());
2334 NewOperands.push_back(PN);
2335 }
2336
2337 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2338 // and move it to the start of the successor block.
2339 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2340 I0->getOperandUse(O).set(NewOperands[O]);
2341
2342 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2343
2344 // Update metadata and IR flags, and merge debug locations.
2345 for (auto *I : Insts)
2346 if (I != I0) {
2347 // The debug location for the "common" instruction is the merged locations
2348 // of all the commoned instructions. We start with the original location
2349 // of the "common" instruction and iteratively merge each location in the
2350 // loop below.
2351 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2352 // However, as N-way merge for CallInst is rare, so we use simplified API
2353 // instead of using complex API for N-way merge.
2354 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2355 combineMetadataForCSE(I0, I, true);
2356 I0->andIRFlags(I);
2357 if (auto *CB = dyn_cast<CallBase>(I0)) {
2358 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2359 assert(Success && "We should not be trying to sink callbases "
2360 "with non-intersectable attributes");
2361 // For NDEBUG Compile.
2362 (void)Success;
2363 }
2364 }
2365
2366 for (User *U : make_early_inc_range(I0->users())) {
2367 // canSinkLastInstruction checked that all instructions are only used by
2368 // phi nodes in a way that allows replacing the phi node with the common
2369 // instruction.
2370 auto *PN = cast<PHINode>(U);
2371 PN->replaceAllUsesWith(I0);
2372 PN->eraseFromParent();
2373 }
2374
2375 // Finally nuke all instructions apart from the common instruction.
2376 for (auto *I : Insts) {
2377 if (I == I0)
2378 continue;
2379 // The remaining uses are debug users, replace those with the common inst.
2380 // In most (all?) cases this just introduces a use-before-def.
2381 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2382 I->replaceAllUsesWith(I0);
2383 I->eraseFromParent();
2384 }
2385}
2386
2387/// Check whether BB's predecessors end with unconditional branches. If it is
2388/// true, sink any common code from the predecessors to BB.
2390 DomTreeUpdater *DTU) {
2391 // We support two situations:
2392 // (1) all incoming arcs are unconditional
2393 // (2) there are non-unconditional incoming arcs
2394 //
2395 // (2) is very common in switch defaults and
2396 // else-if patterns;
2397 //
2398 // if (a) f(1);
2399 // else if (b) f(2);
2400 //
2401 // produces:
2402 //
2403 // [if]
2404 // / \
2405 // [f(1)] [if]
2406 // | | \
2407 // | | |
2408 // | [f(2)]|
2409 // \ | /
2410 // [ end ]
2411 //
2412 // [end] has two unconditional predecessor arcs and one conditional. The
2413 // conditional refers to the implicit empty 'else' arc. This conditional
2414 // arc can also be caused by an empty default block in a switch.
2415 //
2416 // In this case, we attempt to sink code from all *unconditional* arcs.
2417 // If we can sink instructions from these arcs (determined during the scan
2418 // phase below) we insert a common successor for all unconditional arcs and
2419 // connect that to [end], to enable sinking:
2420 //
2421 // [if]
2422 // / \
2423 // [x(1)] [if]
2424 // | | \
2425 // | | \
2426 // | [x(2)] |
2427 // \ / |
2428 // [sink.split] |
2429 // \ /
2430 // [ end ]
2431 //
2432 SmallVector<BasicBlock*,4> UnconditionalPreds;
2433 bool HaveNonUnconditionalPredecessors = false;
2434 for (auto *PredBB : predecessors(BB)) {
2435 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2436 if (PredBr && PredBr->isUnconditional())
2437 UnconditionalPreds.push_back(PredBB);
2438 else
2439 HaveNonUnconditionalPredecessors = true;
2440 }
2441 if (UnconditionalPreds.size() < 2)
2442 return false;
2443
2444 // We take a two-step approach to tail sinking. First we scan from the end of
2445 // each block upwards in lockstep. If the n'th instruction from the end of each
2446 // block can be sunk, those instructions are added to ValuesToSink and we
2447 // carry on. If we can sink an instruction but need to PHI-merge some operands
2448 // (because they're not identical in each instruction) we add these to
2449 // PHIOperands.
2450 // We prepopulate PHIOperands with the phis that already exist in BB.
2452 for (PHINode &PN : BB->phis()) {
2454 for (const Use &U : PN.incoming_values())
2455 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2456 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2457 for (BasicBlock *Pred : UnconditionalPreds)
2458 Ops.push_back(*IncomingVals[Pred]);
2459 }
2460
2461 int ScanIdx = 0;
2462 SmallPtrSet<Value*,4> InstructionsToSink;
2463 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2464 while (LRI.isValid() &&
2465 canSinkInstructions(*LRI, PHIOperands)) {
2466 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2467 << "\n");
2468 InstructionsToSink.insert_range(*LRI);
2469 ++ScanIdx;
2470 --LRI;
2471 }
2472
2473 // If no instructions can be sunk, early-return.
2474 if (ScanIdx == 0)
2475 return false;
2476
2477 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2478
2479 if (!followedByDeoptOrUnreachable) {
2480 // Check whether this is the pointer operand of a load/store.
2481 auto IsMemOperand = [](Use &U) {
2482 auto *I = cast<Instruction>(U.getUser());
2483 if (isa<LoadInst>(I))
2484 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2485 if (isa<StoreInst>(I))
2486 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2487 return false;
2488 };
2489
2490 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2491 // actually sink before encountering instruction that is unprofitable to
2492 // sink?
2493 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2494 unsigned NumPHIInsts = 0;
2495 for (Use &U : (*LRI)[0]->operands()) {
2496 auto It = PHIOperands.find(&U);
2497 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2498 return InstructionsToSink.contains(V);
2499 })) {
2500 ++NumPHIInsts;
2501 // Do not separate a load/store from the gep producing the address.
2502 // The gep can likely be folded into the load/store as an addressing
2503 // mode. Additionally, a load of a gep is easier to analyze than a
2504 // load of a phi.
2505 if (IsMemOperand(U) &&
2506 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2507 return false;
2508 // FIXME: this check is overly optimistic. We may end up not sinking
2509 // said instruction, due to the very same profitability check.
2510 // See @creating_too_many_phis in sink-common-code.ll.
2511 }
2512 }
2513 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2514 return NumPHIInsts <= 1;
2515 };
2516
2517 // We've determined that we are going to sink last ScanIdx instructions,
2518 // and recorded them in InstructionsToSink. Now, some instructions may be
2519 // unprofitable to sink. But that determination depends on the instructions
2520 // that we are going to sink.
2521
2522 // First, forward scan: find the first instruction unprofitable to sink,
2523 // recording all the ones that are profitable to sink.
2524 // FIXME: would it be better, after we detect that not all are profitable.
2525 // to either record the profitable ones, or erase the unprofitable ones?
2526 // Maybe we need to choose (at runtime) the one that will touch least
2527 // instrs?
2528 LRI.reset();
2529 int Idx = 0;
2530 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2531 while (Idx < ScanIdx) {
2532 if (!ProfitableToSinkInstruction(LRI)) {
2533 // Too many PHIs would be created.
2534 LLVM_DEBUG(
2535 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2536 break;
2537 }
2538 InstructionsProfitableToSink.insert_range(*LRI);
2539 --LRI;
2540 ++Idx;
2541 }
2542
2543 // If no instructions can be sunk, early-return.
2544 if (Idx == 0)
2545 return false;
2546
2547 // Did we determine that (only) some instructions are unprofitable to sink?
2548 if (Idx < ScanIdx) {
2549 // Okay, some instructions are unprofitable.
2550 ScanIdx = Idx;
2551 InstructionsToSink = InstructionsProfitableToSink;
2552
2553 // But, that may make other instructions unprofitable, too.
2554 // So, do a backward scan, do any earlier instructions become
2555 // unprofitable?
2556 assert(
2557 !ProfitableToSinkInstruction(LRI) &&
2558 "We already know that the last instruction is unprofitable to sink");
2559 ++LRI;
2560 --Idx;
2561 while (Idx >= 0) {
2562 // If we detect that an instruction becomes unprofitable to sink,
2563 // all earlier instructions won't be sunk either,
2564 // so preemptively keep InstructionsProfitableToSink in sync.
2565 // FIXME: is this the most performant approach?
2566 for (auto *I : *LRI)
2567 InstructionsProfitableToSink.erase(I);
2568 if (!ProfitableToSinkInstruction(LRI)) {
2569 // Everything starting with this instruction won't be sunk.
2570 ScanIdx = Idx;
2571 InstructionsToSink = InstructionsProfitableToSink;
2572 }
2573 ++LRI;
2574 --Idx;
2575 }
2576 }
2577
2578 // If no instructions can be sunk, early-return.
2579 if (ScanIdx == 0)
2580 return false;
2581 }
2582
2583 bool Changed = false;
2584
2585 if (HaveNonUnconditionalPredecessors) {
2586 if (!followedByDeoptOrUnreachable) {
2587 // It is always legal to sink common instructions from unconditional
2588 // predecessors. However, if not all predecessors are unconditional,
2589 // this transformation might be pessimizing. So as a rule of thumb,
2590 // don't do it unless we'd sink at least one non-speculatable instruction.
2591 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2592 LRI.reset();
2593 int Idx = 0;
2594 bool Profitable = false;
2595 while (Idx < ScanIdx) {
2596 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2597 Profitable = true;
2598 break;
2599 }
2600 --LRI;
2601 ++Idx;
2602 }
2603 if (!Profitable)
2604 return false;
2605 }
2606
2607 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2608 // We have a conditional edge and we're going to sink some instructions.
2609 // Insert a new block postdominating all blocks we're going to sink from.
2610 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2611 // Edges couldn't be split.
2612 return false;
2613 Changed = true;
2614 }
2615
2616 // Now that we've analyzed all potential sinking candidates, perform the
2617 // actual sink. We iteratively sink the last non-terminator of the source
2618 // blocks into their common successor unless doing so would require too
2619 // many PHI instructions to be generated (currently only one PHI is allowed
2620 // per sunk instruction).
2621 //
2622 // We can use InstructionsToSink to discount values needing PHI-merging that will
2623 // actually be sunk in a later iteration. This allows us to be more
2624 // aggressive in what we sink. This does allow a false positive where we
2625 // sink presuming a later value will also be sunk, but stop half way through
2626 // and never actually sink it which means we produce more PHIs than intended.
2627 // This is unlikely in practice though.
2628 int SinkIdx = 0;
2629 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2630 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2631 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2632 << "\n");
2633
2634 // Because we've sunk every instruction in turn, the current instruction to
2635 // sink is always at index 0.
2636 LRI.reset();
2637
2638 sinkLastInstruction(UnconditionalPreds);
2639 NumSinkCommonInstrs++;
2640 Changed = true;
2641 }
2642 if (SinkIdx != 0)
2643 ++NumSinkCommonCode;
2644 return Changed;
2645}
2646
2647namespace {
2648
2649struct CompatibleSets {
2650 using SetTy = SmallVector<InvokeInst *, 2>;
2651
2653
2654 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2655
2656 SetTy &getCompatibleSet(InvokeInst *II);
2657
2658 void insert(InvokeInst *II);
2659};
2660
2661CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2662 // Perform a linear scan over all the existing sets, see if the new `invoke`
2663 // is compatible with any particular set. Since we know that all the `invokes`
2664 // within a set are compatible, only check the first `invoke` in each set.
2665 // WARNING: at worst, this has quadratic complexity.
2666 for (CompatibleSets::SetTy &Set : Sets) {
2667 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2668 return Set;
2669 }
2670
2671 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2672 return Sets.emplace_back();
2673}
2674
2675void CompatibleSets::insert(InvokeInst *II) {
2676 getCompatibleSet(II).emplace_back(II);
2677}
2678
2679bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2680 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2681
2682 // Can we theoretically merge these `invoke`s?
2683 auto IsIllegalToMerge = [](InvokeInst *II) {
2684 return II->cannotMerge() || II->isInlineAsm();
2685 };
2686 if (any_of(Invokes, IsIllegalToMerge))
2687 return false;
2688
2689 // Either both `invoke`s must be direct,
2690 // or both `invoke`s must be indirect.
2691 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2692 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2693 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2694 if (HaveIndirectCalls) {
2695 if (!AllCallsAreIndirect)
2696 return false;
2697 } else {
2698 // All callees must be identical.
2699 Value *Callee = nullptr;
2700 for (InvokeInst *II : Invokes) {
2701 Value *CurrCallee = II->getCalledOperand();
2702 assert(CurrCallee && "There is always a called operand.");
2703 if (!Callee)
2704 Callee = CurrCallee;
2705 else if (Callee != CurrCallee)
2706 return false;
2707 }
2708 }
2709
2710 // Either both `invoke`s must not have a normal destination,
2711 // or both `invoke`s must have a normal destination,
2712 auto HasNormalDest = [](InvokeInst *II) {
2713 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2714 };
2715 if (any_of(Invokes, HasNormalDest)) {
2716 // Do not merge `invoke` that does not have a normal destination with one
2717 // that does have a normal destination, even though doing so would be legal.
2718 if (!all_of(Invokes, HasNormalDest))
2719 return false;
2720
2721 // All normal destinations must be identical.
2722 BasicBlock *NormalBB = nullptr;
2723 for (InvokeInst *II : Invokes) {
2724 BasicBlock *CurrNormalBB = II->getNormalDest();
2725 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2726 if (!NormalBB)
2727 NormalBB = CurrNormalBB;
2728 else if (NormalBB != CurrNormalBB)
2729 return false;
2730 }
2731
2732 // In the normal destination, the incoming values for these two `invoke`s
2733 // must be compatible.
2734 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2736 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2737 &EquivalenceSet))
2738 return false;
2739 }
2740
2741#ifndef NDEBUG
2742 // All unwind destinations must be identical.
2743 // We know that because we have started from said unwind destination.
2744 BasicBlock *UnwindBB = nullptr;
2745 for (InvokeInst *II : Invokes) {
2746 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2747 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2748 if (!UnwindBB)
2749 UnwindBB = CurrUnwindBB;
2750 else
2751 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2752 }
2753#endif
2754
2755 // In the unwind destination, the incoming values for these two `invoke`s
2756 // must be compatible.
2758 Invokes.front()->getUnwindDest(),
2759 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2760 return false;
2761
2762 // Ignoring arguments, these `invoke`s must be identical,
2763 // including operand bundles.
2764 const InvokeInst *II0 = Invokes.front();
2765 for (auto *II : Invokes.drop_front())
2766 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2767 return false;
2768
2769 // Can we theoretically form the data operands for the merged `invoke`?
2770 auto IsIllegalToMergeArguments = [](auto Ops) {
2771 Use &U0 = std::get<0>(Ops);
2772 Use &U1 = std::get<1>(Ops);
2773 if (U0 == U1)
2774 return false;
2776 U0.getOperandNo());
2777 };
2778 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2779 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2780 IsIllegalToMergeArguments))
2781 return false;
2782
2783 return true;
2784}
2785
2786} // namespace
2787
2788// Merge all invokes in the provided set, all of which are compatible
2789// as per the `CompatibleSets::shouldBelongToSameSet()`.
2791 DomTreeUpdater *DTU) {
2792 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2793
2795 if (DTU)
2796 Updates.reserve(2 + 3 * Invokes.size());
2797
2798 bool HasNormalDest =
2799 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2800
2801 // Clone one of the invokes into a new basic block.
2802 // Since they are all compatible, it doesn't matter which invoke is cloned.
2803 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2804 InvokeInst *II0 = Invokes.front();
2805 BasicBlock *II0BB = II0->getParent();
2806 BasicBlock *InsertBeforeBlock =
2807 II0->getParent()->getIterator()->getNextNode();
2808 Function *Func = II0BB->getParent();
2809 LLVMContext &Ctx = II0->getContext();
2810
2811 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2812 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2813
2814 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2815 // NOTE: all invokes have the same attributes, so no handling needed.
2816 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2817
2818 if (!HasNormalDest) {
2819 // This set does not have a normal destination,
2820 // so just form a new block with unreachable terminator.
2821 BasicBlock *MergedNormalDest = BasicBlock::Create(
2822 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2823 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2824 UI->setDebugLoc(DebugLoc::getTemporary());
2825 MergedInvoke->setNormalDest(MergedNormalDest);
2826 }
2827
2828 // The unwind destination, however, remainds identical for all invokes here.
2829
2830 return MergedInvoke;
2831 }();
2832
2833 if (DTU) {
2834 // Predecessor blocks that contained these invokes will now branch to
2835 // the new block that contains the merged invoke, ...
2836 for (InvokeInst *II : Invokes)
2837 Updates.push_back(
2838 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2839
2840 // ... which has the new `unreachable` block as normal destination,
2841 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2842 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2843 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2844 SuccBBOfMergedInvoke});
2845
2846 // Since predecessor blocks now unconditionally branch to a new block,
2847 // they no longer branch to their original successors.
2848 for (InvokeInst *II : Invokes)
2849 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2850 Updates.push_back(
2851 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2852 }
2853
2854 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2855
2856 // Form the merged operands for the merged invoke.
2857 for (Use &U : MergedInvoke->operands()) {
2858 // Only PHI together the indirect callees and data operands.
2859 if (MergedInvoke->isCallee(&U)) {
2860 if (!IsIndirectCall)
2861 continue;
2862 } else if (!MergedInvoke->isDataOperand(&U))
2863 continue;
2864
2865 // Don't create trivial PHI's with all-identical incoming values.
2866 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2867 return II->getOperand(U.getOperandNo()) != U.get();
2868 });
2869 if (!NeedPHI)
2870 continue;
2871
2872 // Form a PHI out of all the data ops under this index.
2874 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2875 for (InvokeInst *II : Invokes)
2876 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2877
2878 U.set(PN);
2879 }
2880
2881 // We've ensured that each PHI node has compatible (identical) incoming values
2882 // when coming from each of the `invoke`s in the current merge set,
2883 // so update the PHI nodes accordingly.
2884 for (BasicBlock *Succ : successors(MergedInvoke))
2885 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2886 /*ExistPred=*/Invokes.front()->getParent());
2887
2888 // And finally, replace the original `invoke`s with an unconditional branch
2889 // to the block with the merged `invoke`. Also, give that merged `invoke`
2890 // the merged debugloc of all the original `invoke`s.
2891 DILocation *MergedDebugLoc = nullptr;
2892 for (InvokeInst *II : Invokes) {
2893 // Compute the debug location common to all the original `invoke`s.
2894 if (!MergedDebugLoc)
2895 MergedDebugLoc = II->getDebugLoc();
2896 else
2897 MergedDebugLoc =
2898 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2899
2900 // And replace the old `invoke` with an unconditionally branch
2901 // to the block with the merged `invoke`.
2902 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2903 OrigSuccBB->removePredecessor(II->getParent());
2904 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2905 // The unconditional branch is part of the replacement for the original
2906 // invoke, so should use its DebugLoc.
2907 BI->setDebugLoc(II->getDebugLoc());
2908 bool Success = MergedInvoke->tryIntersectAttributes(II);
2909 assert(Success && "Merged invokes with incompatible attributes");
2910 // For NDEBUG Compile
2911 (void)Success;
2912 II->replaceAllUsesWith(MergedInvoke);
2913 II->eraseFromParent();
2914 ++NumInvokesMerged;
2915 }
2916 MergedInvoke->setDebugLoc(MergedDebugLoc);
2917 ++NumInvokeSetsFormed;
2918
2919 if (DTU)
2920 DTU->applyUpdates(Updates);
2921}
2922
2923/// If this block is a `landingpad` exception handling block, categorize all
2924/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2925/// being "mergeable" together, and then merge invokes in each set together.
2926///
2927/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2928/// [...] [...]
2929/// | |
2930/// [invoke0] [invoke1]
2931/// / \ / \
2932/// [cont0] [landingpad] [cont1]
2933/// to:
2934/// [...] [...]
2935/// \ /
2936/// [invoke]
2937/// / \
2938/// [cont] [landingpad]
2939///
2940/// But of course we can only do that if the invokes share the `landingpad`,
2941/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2942/// and the invoked functions are "compatible".
2945 return false;
2946
2947 bool Changed = false;
2948
2949 // FIXME: generalize to all exception handling blocks?
2950 if (!BB->isLandingPad())
2951 return Changed;
2952
2953 CompatibleSets Grouper;
2954
2955 // Record all the predecessors of this `landingpad`. As per verifier,
2956 // the only allowed predecessor is the unwind edge of an `invoke`.
2957 // We want to group "compatible" `invokes` into the same set to be merged.
2958 for (BasicBlock *PredBB : predecessors(BB))
2959 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2960
2961 // And now, merge `invoke`s that were grouped togeter.
2962 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2963 if (Invokes.size() < 2)
2964 continue;
2965 Changed = true;
2966 mergeCompatibleInvokesImpl(Invokes, DTU);
2967 }
2968
2969 return Changed;
2970}
2971
2972namespace {
2973/// Track ephemeral values, which should be ignored for cost-modelling
2974/// purposes. Requires walking instructions in reverse order.
2975class EphemeralValueTracker {
2976 SmallPtrSet<const Instruction *, 32> EphValues;
2977
2978 bool isEphemeral(const Instruction *I) {
2979 if (isa<AssumeInst>(I))
2980 return true;
2981 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2982 all_of(I->users(), [&](const User *U) {
2983 return EphValues.count(cast<Instruction>(U));
2984 });
2985 }
2986
2987public:
2988 bool track(const Instruction *I) {
2989 if (isEphemeral(I)) {
2990 EphValues.insert(I);
2991 return true;
2992 }
2993 return false;
2994 }
2995
2996 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2997};
2998} // namespace
2999
3000/// Determine if we can hoist sink a sole store instruction out of a
3001/// conditional block.
3002///
3003/// We are looking for code like the following:
3004/// BrBB:
3005/// store i32 %add, i32* %arrayidx2
3006/// ... // No other stores or function calls (we could be calling a memory
3007/// ... // function).
3008/// %cmp = icmp ult %x, %y
3009/// br i1 %cmp, label %EndBB, label %ThenBB
3010/// ThenBB:
3011/// store i32 %add5, i32* %arrayidx2
3012/// br label EndBB
3013/// EndBB:
3014/// ...
3015/// We are going to transform this into:
3016/// BrBB:
3017/// store i32 %add, i32* %arrayidx2
3018/// ... //
3019/// %cmp = icmp ult %x, %y
3020/// %add.add5 = select i1 %cmp, i32 %add, %add5
3021/// store i32 %add.add5, i32* %arrayidx2
3022/// ...
3023///
3024/// \return The pointer to the value of the previous store if the store can be
3025/// hoisted into the predecessor block. 0 otherwise.
3027 BasicBlock *StoreBB, BasicBlock *EndBB) {
3028 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3029 if (!StoreToHoist)
3030 return nullptr;
3031
3032 // Volatile or atomic.
3033 if (!StoreToHoist->isSimple())
3034 return nullptr;
3035
3036 Value *StorePtr = StoreToHoist->getPointerOperand();
3037 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3038
3039 // Look for a store to the same pointer in BrBB.
3040 unsigned MaxNumInstToLookAt = 9;
3041 // Skip pseudo probe intrinsic calls which are not really killing any memory
3042 // accesses.
3043 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3044 if (!MaxNumInstToLookAt)
3045 break;
3046 --MaxNumInstToLookAt;
3047
3048 // Could be calling an instruction that affects memory like free().
3049 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3050 return nullptr;
3051
3052 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3053 // Found the previous store to same location and type. Make sure it is
3054 // simple, to avoid introducing a spurious non-atomic write after an
3055 // atomic write.
3056 if (SI->getPointerOperand() == StorePtr &&
3057 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3058 SI->getAlign() >= StoreToHoist->getAlign())
3059 // Found the previous store, return its value operand.
3060 return SI->getValueOperand();
3061 return nullptr; // Unknown store.
3062 }
3063
3064 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3065 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3066 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3067 Value *Obj = getUnderlyingObject(StorePtr);
3068 bool ExplicitlyDereferenceableOnly;
3069 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3071 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3073 (!ExplicitlyDereferenceableOnly ||
3074 isDereferenceablePointer(StorePtr, StoreTy,
3075 LI->getDataLayout()))) {
3076 // Found a previous load, return it.
3077 return LI;
3078 }
3079 }
3080 // The load didn't work out, but we may still find a store.
3081 }
3082 }
3083
3084 return nullptr;
3085}
3086
3087/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3088/// converted to selects.
3090 BasicBlock *EndBB,
3091 unsigned &SpeculatedInstructions,
3092 InstructionCost &Cost,
3093 const TargetTransformInfo &TTI) {
3095 BB->getParent()->hasMinSize()
3098
3099 bool HaveRewritablePHIs = false;
3100 for (PHINode &PN : EndBB->phis()) {
3101 Value *OrigV = PN.getIncomingValueForBlock(BB);
3102 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3103
3104 // FIXME: Try to remove some of the duplication with
3105 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3106 if (ThenV == OrigV)
3107 continue;
3108
3109 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3110 CmpInst::makeCmpResultType(PN.getType()),
3112
3113 // Don't convert to selects if we could remove undefined behavior instead.
3114 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3116 return false;
3117
3118 HaveRewritablePHIs = true;
3119 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3120 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3121 if (!OrigCE && !ThenCE)
3122 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3123
3124 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3125 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3126 InstructionCost MaxCost =
3128 if (OrigCost + ThenCost > MaxCost)
3129 return false;
3130
3131 // Account for the cost of an unfolded ConstantExpr which could end up
3132 // getting expanded into Instructions.
3133 // FIXME: This doesn't account for how many operations are combined in the
3134 // constant expression.
3135 ++SpeculatedInstructions;
3136 if (SpeculatedInstructions > 1)
3137 return false;
3138 }
3139
3140 return HaveRewritablePHIs;
3141}
3142
3144 std::optional<bool> Invert,
3145 const TargetTransformInfo &TTI) {
3146 // If the branch is non-unpredictable, and is predicted to *not* branch to
3147 // the `then` block, then avoid speculating it.
3148 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3149 return true;
3150
3151 uint64_t TWeight, FWeight;
3152 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3153 return true;
3154
3155 if (!Invert.has_value())
3156 return false;
3157
3158 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3159 BranchProbability BIEndProb =
3160 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3161 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3162 return BIEndProb < Likely;
3163}
3164
3165/// Speculate a conditional basic block flattening the CFG.
3166///
3167/// Note that this is a very risky transform currently. Speculating
3168/// instructions like this is most often not desirable. Instead, there is an MI
3169/// pass which can do it with full awareness of the resource constraints.
3170/// However, some cases are "obvious" and we should do directly. An example of
3171/// this is speculating a single, reasonably cheap instruction.
3172///
3173/// There is only one distinct advantage to flattening the CFG at the IR level:
3174/// it makes very common but simplistic optimizations such as are common in
3175/// instcombine and the DAG combiner more powerful by removing CFG edges and
3176/// modeling their effects with easier to reason about SSA value graphs.
3177///
3178///
3179/// An illustration of this transform is turning this IR:
3180/// \code
3181/// BB:
3182/// %cmp = icmp ult %x, %y
3183/// br i1 %cmp, label %EndBB, label %ThenBB
3184/// ThenBB:
3185/// %sub = sub %x, %y
3186/// br label BB2
3187/// EndBB:
3188/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3189/// ...
3190/// \endcode
3191///
3192/// Into this IR:
3193/// \code
3194/// BB:
3195/// %cmp = icmp ult %x, %y
3196/// %sub = sub %x, %y
3197/// %cond = select i1 %cmp, 0, %sub
3198/// ...
3199/// \endcode
3200///
3201/// \returns true if the conditional block is removed.
3202bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3203 BasicBlock *ThenBB) {
3204 if (!Options.SpeculateBlocks)
3205 return false;
3206
3207 // Be conservative for now. FP select instruction can often be expensive.
3208 Value *BrCond = BI->getCondition();
3209 if (isa<FCmpInst>(BrCond))
3210 return false;
3211
3212 BasicBlock *BB = BI->getParent();
3213 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3214 InstructionCost Budget =
3216
3217 // If ThenBB is actually on the false edge of the conditional branch, remember
3218 // to swap the select operands later.
3219 bool Invert = false;
3220 if (ThenBB != BI->getSuccessor(0)) {
3221 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3222 Invert = true;
3223 }
3224 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3225
3226 if (!isProfitableToSpeculate(BI, Invert, TTI))
3227 return false;
3228
3229 // Keep a count of how many times instructions are used within ThenBB when
3230 // they are candidates for sinking into ThenBB. Specifically:
3231 // - They are defined in BB, and
3232 // - They have no side effects, and
3233 // - All of their uses are in ThenBB.
3234 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3235
3236 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3237
3238 unsigned SpeculatedInstructions = 0;
3239 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3240 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3241 Value *SpeculatedStoreValue = nullptr;
3242 StoreInst *SpeculatedStore = nullptr;
3243 EphemeralValueTracker EphTracker;
3244 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3245 // Skip pseudo probes. The consequence is we lose track of the branch
3246 // probability for ThenBB, which is fine since the optimization here takes
3247 // place regardless of the branch probability.
3248 if (isa<PseudoProbeInst>(I)) {
3249 // The probe should be deleted so that it will not be over-counted when
3250 // the samples collected on the non-conditional path are counted towards
3251 // the conditional path. We leave it for the counts inference algorithm to
3252 // figure out a proper count for an unknown probe.
3253 SpeculatedPseudoProbes.push_back(&I);
3254 continue;
3255 }
3256
3257 // Ignore ephemeral values, they will be dropped by the transform.
3258 if (EphTracker.track(&I))
3259 continue;
3260
3261 // Only speculatively execute a single instruction (not counting the
3262 // terminator) for now.
3263 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3265 SpeculatedConditionalLoadsStores.size() <
3267 // Not count load/store into cost if target supports conditional faulting
3268 // b/c it's cheap to speculate it.
3269 if (IsSafeCheapLoadStore)
3270 SpeculatedConditionalLoadsStores.push_back(&I);
3271 else
3272 ++SpeculatedInstructions;
3273
3274 if (SpeculatedInstructions > 1)
3275 return false;
3276
3277 // Don't hoist the instruction if it's unsafe or expensive.
3278 if (!IsSafeCheapLoadStore &&
3280 !(HoistCondStores && !SpeculatedStoreValue &&
3281 (SpeculatedStoreValue =
3282 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3283 return false;
3284 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3287 return false;
3288
3289 // Store the store speculation candidate.
3290 if (!SpeculatedStore && SpeculatedStoreValue)
3291 SpeculatedStore = cast<StoreInst>(&I);
3292
3293 // Do not hoist the instruction if any of its operands are defined but not
3294 // used in BB. The transformation will prevent the operand from
3295 // being sunk into the use block.
3296 for (Use &Op : I.operands()) {
3298 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3299 continue; // Not a candidate for sinking.
3300
3301 ++SinkCandidateUseCounts[OpI];
3302 }
3303 }
3304
3305 // Consider any sink candidates which are only used in ThenBB as costs for
3306 // speculation. Note, while we iterate over a DenseMap here, we are summing
3307 // and so iteration order isn't significant.
3308 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3309 if (Inst->hasNUses(Count)) {
3310 ++SpeculatedInstructions;
3311 if (SpeculatedInstructions > 1)
3312 return false;
3313 }
3314
3315 // Check that we can insert the selects and that it's not too expensive to do
3316 // so.
3317 bool Convert =
3318 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3320 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3321 SpeculatedInstructions, Cost, TTI);
3322 if (!Convert || Cost > Budget)
3323 return false;
3324
3325 // If we get here, we can hoist the instruction and if-convert.
3326 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3327
3328 Instruction *Sel = nullptr;
3329 // Insert a select of the value of the speculated store.
3330 if (SpeculatedStoreValue) {
3331 IRBuilder<NoFolder> Builder(BI);
3332 Value *OrigV = SpeculatedStore->getValueOperand();
3333 Value *TrueV = SpeculatedStore->getValueOperand();
3334 Value *FalseV = SpeculatedStoreValue;
3335 if (Invert)
3336 std::swap(TrueV, FalseV);
3337 Value *S = Builder.CreateSelect(
3338 BrCond, TrueV, FalseV, "spec.store.select", BI);
3339 Sel = cast<Instruction>(S);
3340 SpeculatedStore->setOperand(0, S);
3341 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3342 SpeculatedStore->getDebugLoc());
3343 // The value stored is still conditional, but the store itself is now
3344 // unconditonally executed, so we must be sure that any linked dbg.assign
3345 // intrinsics are tracking the new stored value (the result of the
3346 // select). If we don't, and the store were to be removed by another pass
3347 // (e.g. DSE), then we'd eventually end up emitting a location describing
3348 // the conditional value, unconditionally.
3349 //
3350 // === Before this transformation ===
3351 // pred:
3352 // store %one, %x.dest, !DIAssignID !1
3353 // dbg.assign %one, "x", ..., !1, ...
3354 // br %cond if.then
3355 //
3356 // if.then:
3357 // store %two, %x.dest, !DIAssignID !2
3358 // dbg.assign %two, "x", ..., !2, ...
3359 //
3360 // === After this transformation ===
3361 // pred:
3362 // store %one, %x.dest, !DIAssignID !1
3363 // dbg.assign %one, "x", ..., !1
3364 /// ...
3365 // %merge = select %cond, %two, %one
3366 // store %merge, %x.dest, !DIAssignID !2
3367 // dbg.assign %merge, "x", ..., !2
3368 for (DbgVariableRecord *DbgAssign :
3369 at::getDVRAssignmentMarkers(SpeculatedStore))
3370 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3371 DbgAssign->replaceVariableLocationOp(OrigV, S);
3372 }
3373
3374 // Metadata can be dependent on the condition we are hoisting above.
3375 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3376 // to avoid making it appear as if the condition is a constant, which would
3377 // be misleading while debugging.
3378 // Similarly strip attributes that maybe dependent on condition we are
3379 // hoisting above.
3380 for (auto &I : make_early_inc_range(*ThenBB)) {
3381 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3382 I.setDebugLoc(DebugLoc::getDropped());
3383 }
3384 I.dropUBImplyingAttrsAndMetadata();
3385
3386 // Drop ephemeral values.
3387 if (EphTracker.contains(&I)) {
3388 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3389 I.eraseFromParent();
3390 }
3391 }
3392
3393 // Hoist the instructions.
3394 // Drop DbgVariableRecords attached to these instructions.
3395 for (auto &It : *ThenBB)
3396 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3397 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3398 // equivalent).
3399 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3400 !DVR || !DVR->isDbgAssign())
3401 It.dropOneDbgRecord(&DR);
3402 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3403 std::prev(ThenBB->end()));
3404
3405 if (!SpeculatedConditionalLoadsStores.empty())
3406 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3407 Sel);
3408
3409 // Insert selects and rewrite the PHI operands.
3410 IRBuilder<NoFolder> Builder(BI);
3411 for (PHINode &PN : EndBB->phis()) {
3412 unsigned OrigI = PN.getBasicBlockIndex(BB);
3413 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3414 Value *OrigV = PN.getIncomingValue(OrigI);
3415 Value *ThenV = PN.getIncomingValue(ThenI);
3416
3417 // Skip PHIs which are trivial.
3418 if (OrigV == ThenV)
3419 continue;
3420
3421 // Create a select whose true value is the speculatively executed value and
3422 // false value is the pre-existing value. Swap them if the branch
3423 // destinations were inverted.
3424 Value *TrueV = ThenV, *FalseV = OrigV;
3425 if (Invert)
3426 std::swap(TrueV, FalseV);
3427 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3428 PN.setIncomingValue(OrigI, V);
3429 PN.setIncomingValue(ThenI, V);
3430 }
3431
3432 // Remove speculated pseudo probes.
3433 for (Instruction *I : SpeculatedPseudoProbes)
3434 I->eraseFromParent();
3435
3436 ++NumSpeculations;
3437 return true;
3438}
3439
3441
3442// Return false if number of blocks searched is too much.
3443static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3444 BlocksSet &ReachesNonLocalUses) {
3445 if (BB == DefBB)
3446 return true;
3447 if (!ReachesNonLocalUses.insert(BB).second)
3448 return true;
3449
3450 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3451 return false;
3452 for (BasicBlock *Pred : predecessors(BB))
3453 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3454 return false;
3455 return true;
3456}
3457
3458/// Return true if we can thread a branch across this block.
3460 BlocksSet &NonLocalUseBlocks) {
3461 int Size = 0;
3462 EphemeralValueTracker EphTracker;
3463
3464 // Walk the loop in reverse so that we can identify ephemeral values properly
3465 // (values only feeding assumes).
3466 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3467 // Can't fold blocks that contain noduplicate or convergent calls.
3468 if (CallInst *CI = dyn_cast<CallInst>(&I))
3469 if (CI->cannotDuplicate() || CI->isConvergent())
3470 return false;
3471
3472 // Ignore ephemeral values which are deleted during codegen.
3473 // We will delete Phis while threading, so Phis should not be accounted in
3474 // block's size.
3475 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3476 if (Size++ > MaxSmallBlockSize)
3477 return false; // Don't clone large BB's.
3478 }
3479
3480 // Record blocks with non-local uses of values defined in the current basic
3481 // block.
3482 for (User *U : I.users()) {
3484 BasicBlock *UsedInBB = UI->getParent();
3485 if (UsedInBB == BB) {
3486 if (isa<PHINode>(UI))
3487 return false;
3488 } else
3489 NonLocalUseBlocks.insert(UsedInBB);
3490 }
3491
3492 // Looks ok, continue checking.
3493 }
3494
3495 return true;
3496}
3497
3499 BasicBlock *To) {
3500 // Don't look past the block defining the value, we might get the value from
3501 // a previous loop iteration.
3502 auto *I = dyn_cast<Instruction>(V);
3503 if (I && I->getParent() == To)
3504 return nullptr;
3505
3506 // We know the value if the From block branches on it.
3507 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3508 if (BI && BI->isConditional() && BI->getCondition() == V &&
3509 BI->getSuccessor(0) != BI->getSuccessor(1))
3510 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3512
3513 return nullptr;
3514}
3515
3516/// If we have a conditional branch on something for which we know the constant
3517/// value in predecessors (e.g. a phi node in the current block), thread edges
3518/// from the predecessor to their ultimate destination.
3519static std::optional<bool>
3521 const DataLayout &DL,
3522 AssumptionCache *AC) {
3524 BasicBlock *BB = BI->getParent();
3525 Value *Cond = BI->getCondition();
3527 if (PN && PN->getParent() == BB) {
3528 // Degenerate case of a single entry PHI.
3529 if (PN->getNumIncomingValues() == 1) {
3531 return true;
3532 }
3533
3534 for (Use &U : PN->incoming_values())
3535 if (auto *CB = dyn_cast<ConstantInt>(U))
3536 KnownValues[CB].insert(PN->getIncomingBlock(U));
3537 } else {
3538 for (BasicBlock *Pred : predecessors(BB)) {
3539 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3540 KnownValues[CB].insert(Pred);
3541 }
3542 }
3543
3544 if (KnownValues.empty())
3545 return false;
3546
3547 // Now we know that this block has multiple preds and two succs.
3548 // Check that the block is small enough and record which non-local blocks use
3549 // values defined in the block.
3550
3551 BlocksSet NonLocalUseBlocks;
3552 BlocksSet ReachesNonLocalUseBlocks;
3553 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3554 return false;
3555
3556 // Jump-threading can only be done to destinations where no values defined
3557 // in BB are live.
3558
3559 // Quickly check if both destinations have uses. If so, jump-threading cannot
3560 // be done.
3561 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3562 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3563 return false;
3564
3565 // Search backward from NonLocalUseBlocks to find which blocks
3566 // reach non-local uses.
3567 for (BasicBlock *UseBB : NonLocalUseBlocks)
3568 // Give up if too many blocks are searched.
3569 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3570 return false;
3571
3572 for (const auto &Pair : KnownValues) {
3573 ConstantInt *CB = Pair.first;
3574 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3575 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3576
3577 // Okay, we now know that all edges from PredBB should be revectored to
3578 // branch to RealDest.
3579 if (RealDest == BB)
3580 continue; // Skip self loops.
3581
3582 // Skip if the predecessor's terminator is an indirect branch.
3583 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3584 return isa<IndirectBrInst>(PredBB->getTerminator());
3585 }))
3586 continue;
3587
3588 // Only revector to RealDest if no values defined in BB are live.
3589 if (ReachesNonLocalUseBlocks.contains(RealDest))
3590 continue;
3591
3592 LLVM_DEBUG({
3593 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3594 << " has value " << *Pair.first << " in predecessors:\n";
3595 for (const BasicBlock *PredBB : Pair.second)
3596 dbgs() << " " << PredBB->getName() << "\n";
3597 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3598 });
3599
3600 // Split the predecessors we are threading into a new edge block. We'll
3601 // clone the instructions into this block, and then redirect it to RealDest.
3602 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3603
3604 // TODO: These just exist to reduce test diff, we can drop them if we like.
3605 EdgeBB->setName(RealDest->getName() + ".critedge");
3606 EdgeBB->moveBefore(RealDest);
3607
3608 // Update PHI nodes.
3609 addPredecessorToBlock(RealDest, EdgeBB, BB);
3610
3611 // BB may have instructions that are being threaded over. Clone these
3612 // instructions into EdgeBB. We know that there will be no uses of the
3613 // cloned instructions outside of EdgeBB.
3614 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3615 ValueToValueMapTy TranslateMap; // Track translated values.
3616 TranslateMap[Cond] = CB;
3617
3618 // RemoveDIs: track instructions that we optimise away while folding, so
3619 // that we can copy DbgVariableRecords from them later.
3620 BasicBlock::iterator SrcDbgCursor = BB->begin();
3621 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3622 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3623 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3624 continue;
3625 }
3626 // Clone the instruction.
3627 Instruction *N = BBI->clone();
3628 // Insert the new instruction into its new home.
3629 N->insertInto(EdgeBB, InsertPt);
3630
3631 if (BBI->hasName())
3632 N->setName(BBI->getName() + ".c");
3633
3634 // Update operands due to translation.
3635 // Key Instructions: Remap all the atom groups.
3636 if (const DebugLoc &DL = BBI->getDebugLoc())
3637 mapAtomInstance(DL, TranslateMap);
3638 RemapInstruction(N, TranslateMap,
3640
3641 // Check for trivial simplification.
3642 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3643 if (!BBI->use_empty())
3644 TranslateMap[&*BBI] = V;
3645 if (!N->mayHaveSideEffects()) {
3646 N->eraseFromParent(); // Instruction folded away, don't need actual
3647 // inst
3648 N = nullptr;
3649 }
3650 } else {
3651 if (!BBI->use_empty())
3652 TranslateMap[&*BBI] = N;
3653 }
3654 if (N) {
3655 // Copy all debug-info attached to instructions from the last we
3656 // successfully clone, up to this instruction (they might have been
3657 // folded away).
3658 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3659 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3660 SrcDbgCursor = std::next(BBI);
3661 // Clone debug-info on this instruction too.
3662 N->cloneDebugInfoFrom(&*BBI);
3663
3664 // Register the new instruction with the assumption cache if necessary.
3665 if (auto *Assume = dyn_cast<AssumeInst>(N))
3666 if (AC)
3667 AC->registerAssumption(Assume);
3668 }
3669 }
3670
3671 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3672 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3673 InsertPt->cloneDebugInfoFrom(BI);
3674
3675 BB->removePredecessor(EdgeBB);
3676 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3677 EdgeBI->setSuccessor(0, RealDest);
3678 EdgeBI->setDebugLoc(BI->getDebugLoc());
3679
3680 if (DTU) {
3682 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3683 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3684 DTU->applyUpdates(Updates);
3685 }
3686
3687 // For simplicity, we created a separate basic block for the edge. Merge
3688 // it back into the predecessor if possible. This not only avoids
3689 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3690 // bypass the check for trivial cycles above.
3691 MergeBlockIntoPredecessor(EdgeBB, DTU);
3692
3693 // Signal repeat, simplifying any other constants.
3694 return std::nullopt;
3695 }
3696
3697 return false;
3698}
3699
3700bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3701 // Note: If BB is a loop header then there is a risk that threading introduces
3702 // a non-canonical loop by moving a back edge. So we avoid this optimization
3703 // for loop headers if NeedCanonicalLoop is set.
3704 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3705 return false;
3706
3707 std::optional<bool> Result;
3708 bool EverChanged = false;
3709 do {
3710 // Note that None means "we changed things, but recurse further."
3711 Result =
3713 EverChanged |= Result == std::nullopt || *Result;
3714 } while (Result == std::nullopt);
3715 return EverChanged;
3716}
3717
3718/// Given a BB that starts with the specified two-entry PHI node,
3719/// see if we can eliminate it.
3722 const DataLayout &DL,
3723 bool SpeculateUnpredictables) {
3724 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3725 // statement", which has a very simple dominance structure. Basically, we
3726 // are trying to find the condition that is being branched on, which
3727 // subsequently causes this merge to happen. We really want control
3728 // dependence information for this check, but simplifycfg can't keep it up
3729 // to date, and this catches most of the cases we care about anyway.
3730 BasicBlock *BB = PN->getParent();
3731
3732 BasicBlock *IfTrue, *IfFalse;
3733 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3734 if (!DomBI)
3735 return false;
3736 Value *IfCond = DomBI->getCondition();
3737 // Don't bother if the branch will be constant folded trivially.
3738 if (isa<ConstantInt>(IfCond))
3739 return false;
3740
3741 BasicBlock *DomBlock = DomBI->getParent();
3744 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3745 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3746 });
3747 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3748 "Will have either one or two blocks to speculate.");
3749
3750 // If the branch is non-unpredictable, see if we either predictably jump to
3751 // the merge bb (if we have only a single 'then' block), or if we predictably
3752 // jump to one specific 'then' block (if we have two of them).
3753 // It isn't beneficial to speculatively execute the code
3754 // from the block that we know is predictably not entered.
3755 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3756 if (!IsUnpredictable) {
3757 uint64_t TWeight, FWeight;
3758 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3759 (TWeight + FWeight) != 0) {
3760 BranchProbability BITrueProb =
3761 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3762 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3763 BranchProbability BIFalseProb = BITrueProb.getCompl();
3764 if (IfBlocks.size() == 1) {
3765 BranchProbability BIBBProb =
3766 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3767 if (BIBBProb >= Likely)
3768 return false;
3769 } else {
3770 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3771 return false;
3772 }
3773 }
3774 }
3775
3776 // Don't try to fold an unreachable block. For example, the phi node itself
3777 // can't be the candidate if-condition for a select that we want to form.
3778 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3779 if (IfCondPhiInst->getParent() == BB)
3780 return false;
3781
3782 // Okay, we found that we can merge this two-entry phi node into a select.
3783 // Doing so would require us to fold *all* two entry phi nodes in this block.
3784 // At some point this becomes non-profitable (particularly if the target
3785 // doesn't support cmov's). Only do this transformation if there are two or
3786 // fewer PHI nodes in this block.
3787 unsigned NumPhis = 0;
3788 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3789 if (NumPhis > 2)
3790 return false;
3791
3792 // Loop over the PHI's seeing if we can promote them all to select
3793 // instructions. While we are at it, keep track of the instructions
3794 // that need to be moved to the dominating block.
3795 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3796 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3797 InstructionCost Cost = 0;
3798 InstructionCost Budget =
3800 if (SpeculateUnpredictables && IsUnpredictable)
3801 Budget += TTI.getBranchMispredictPenalty();
3802
3803 bool Changed = false;
3804 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3805 PHINode *PN = cast<PHINode>(II++);
3806 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3807 PN->replaceAllUsesWith(V);
3808 PN->eraseFromParent();
3809 Changed = true;
3810 continue;
3811 }
3812
3813 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3814 AggressiveInsts, Cost, Budget, TTI, AC,
3815 ZeroCostInstructions) ||
3816 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3817 AggressiveInsts, Cost, Budget, TTI, AC,
3818 ZeroCostInstructions))
3819 return Changed;
3820 }
3821
3822 // If we folded the first phi, PN dangles at this point. Refresh it. If
3823 // we ran out of PHIs then we simplified them all.
3824 PN = dyn_cast<PHINode>(BB->begin());
3825 if (!PN)
3826 return true;
3827
3828 // Return true if at least one of these is a 'not', and another is either
3829 // a 'not' too, or a constant.
3830 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3831 if (!match(V0, m_Not(m_Value())))
3832 std::swap(V0, V1);
3833 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3834 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3835 };
3836
3837 // Don't fold i1 branches on PHIs which contain binary operators or
3838 // (possibly inverted) select form of or/ands, unless one of
3839 // the incoming values is an 'not' and another one is freely invertible.
3840 // These can often be turned into switches and other things.
3841 auto IsBinOpOrAnd = [](Value *V) {
3842 return match(
3844 };
3845 if (PN->getType()->isIntegerTy(1) &&
3846 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3847 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3848 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3849 PN->getIncomingValue(1)))
3850 return Changed;
3851
3852 // If all PHI nodes are promotable, check to make sure that all instructions
3853 // in the predecessor blocks can be promoted as well. If not, we won't be able
3854 // to get rid of the control flow, so it's not worth promoting to select
3855 // instructions.
3856 for (BasicBlock *IfBlock : IfBlocks)
3857 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3858 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3859 // This is not an aggressive instruction that we can promote.
3860 // Because of this, we won't be able to get rid of the control flow, so
3861 // the xform is not worth it.
3862 return Changed;
3863 }
3864
3865 // If either of the blocks has it's address taken, we can't do this fold.
3866 if (any_of(IfBlocks,
3867 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3868 return Changed;
3869
3870 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3871 if (IsUnpredictable) dbgs() << " (unpredictable)";
3872 dbgs() << " T: " << IfTrue->getName()
3873 << " F: " << IfFalse->getName() << "\n");
3874
3875 // If we can still promote the PHI nodes after this gauntlet of tests,
3876 // do all of the PHI's now.
3877
3878 // Move all 'aggressive' instructions, which are defined in the
3879 // conditional parts of the if's up to the dominating block.
3880 for (BasicBlock *IfBlock : IfBlocks)
3881 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3882
3883 IRBuilder<NoFolder> Builder(DomBI);
3884 // Propagate fast-math-flags from phi nodes to replacement selects.
3885 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3886 // Change the PHI node into a select instruction.
3887 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3888 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3889
3890 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3891 isa<FPMathOperator>(PN) ? PN : nullptr,
3892 "", DomBI);
3893 PN->replaceAllUsesWith(Sel);
3894 Sel->takeName(PN);
3895 PN->eraseFromParent();
3896 }
3897
3898 // At this point, all IfBlocks are empty, so our if statement
3899 // has been flattened. Change DomBlock to jump directly to our new block to
3900 // avoid other simplifycfg's kicking in on the diamond.
3901 Builder.CreateBr(BB);
3902
3904 if (DTU) {
3905 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3906 for (auto *Successor : successors(DomBlock))
3907 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3908 }
3909
3910 DomBI->eraseFromParent();
3911 if (DTU)
3912 DTU->applyUpdates(Updates);
3913
3914 return true;
3915}
3916
3919 Value *RHS, const Twine &Name = "") {
3920 // Try to relax logical op to binary op.
3921 if (impliesPoison(RHS, LHS))
3922 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3923 if (Opc == Instruction::And)
3924 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3925 if (Opc == Instruction::Or)
3926 return Builder.CreateLogicalOr(LHS, RHS, Name);
3927 llvm_unreachable("Invalid logical opcode");
3928}
3929
3930/// Return true if either PBI or BI has branch weight available, and store
3931/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3932/// not have branch weight, use 1:1 as its weight.
3934 uint64_t &PredTrueWeight,
3935 uint64_t &PredFalseWeight,
3936 uint64_t &SuccTrueWeight,
3937 uint64_t &SuccFalseWeight) {
3938 bool PredHasWeights =
3939 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3940 bool SuccHasWeights =
3941 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3942 if (PredHasWeights || SuccHasWeights) {
3943 if (!PredHasWeights)
3944 PredTrueWeight = PredFalseWeight = 1;
3945 if (!SuccHasWeights)
3946 SuccTrueWeight = SuccFalseWeight = 1;
3947 return true;
3948 } else {
3949 return false;
3950 }
3951}
3952
3953/// Determine if the two branches share a common destination and deduce a glue
3954/// that joins the branches' conditions to arrive at the common destination if
3955/// that would be profitable.
3956static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3958 const TargetTransformInfo *TTI) {
3959 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3960 "Both blocks must end with a conditional branches.");
3962 "PredBB must be a predecessor of BB.");
3963
3964 // We have the potential to fold the conditions together, but if the
3965 // predecessor branch is predictable, we may not want to merge them.
3966 uint64_t PTWeight, PFWeight;
3967 BranchProbability PBITrueProb, Likely;
3968 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3969 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3970 (PTWeight + PFWeight) != 0) {
3971 PBITrueProb =
3972 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3973 Likely = TTI->getPredictableBranchThreshold();
3974 }
3975
3976 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3977 // Speculate the 2nd condition unless the 1st is probably true.
3978 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3979 return {{BI->getSuccessor(0), Instruction::Or, false}};
3980 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3981 // Speculate the 2nd condition unless the 1st is probably false.
3982 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3983 return {{BI->getSuccessor(1), Instruction::And, false}};
3984 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3985 // Speculate the 2nd condition unless the 1st is probably true.
3986 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3987 return {{BI->getSuccessor(1), Instruction::And, true}};
3988 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3989 // Speculate the 2nd condition unless the 1st is probably false.
3990 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3991 return {{BI->getSuccessor(0), Instruction::Or, true}};
3992 }
3993 return std::nullopt;
3994}
3995
3997 DomTreeUpdater *DTU,
3998 MemorySSAUpdater *MSSAU,
3999 const TargetTransformInfo *TTI) {
4000 BasicBlock *BB = BI->getParent();
4001 BasicBlock *PredBlock = PBI->getParent();
4002
4003 // Determine if the two branches share a common destination.
4004 BasicBlock *CommonSucc;
4006 bool InvertPredCond;
4007 std::tie(CommonSucc, Opc, InvertPredCond) =
4009
4010 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4011
4012 IRBuilder<> Builder(PBI);
4013 // The builder is used to create instructions to eliminate the branch in BB.
4014 // If BB's terminator has !annotation metadata, add it to the new
4015 // instructions.
4016 Builder.CollectMetadataToCopy(BB->getTerminator(),
4017 {LLVMContext::MD_annotation});
4018
4019 // If we need to invert the condition in the pred block to match, do so now.
4020 if (InvertPredCond) {
4021 InvertBranch(PBI, Builder);
4022 }
4023
4024 BasicBlock *UniqueSucc =
4025 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4026
4027 // Before cloning instructions, notify the successor basic block that it
4028 // is about to have a new predecessor. This will update PHI nodes,
4029 // which will allow us to update live-out uses of bonus instructions.
4030 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4031
4032 // Try to update branch weights.
4033 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4034 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4035 SuccTrueWeight, SuccFalseWeight)) {
4036 SmallVector<uint64_t, 8> NewWeights;
4037
4038 if (PBI->getSuccessor(0) == BB) {
4039 // PBI: br i1 %x, BB, FalseDest
4040 // BI: br i1 %y, UniqueSucc, FalseDest
4041 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4042 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4043 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4044 // TrueWeight for PBI * FalseWeight for BI.
4045 // We assume that total weights of a BranchInst can fit into 32 bits.
4046 // Therefore, we will not have overflow using 64-bit arithmetic.
4047 NewWeights.push_back(PredFalseWeight *
4048 (SuccFalseWeight + SuccTrueWeight) +
4049 PredTrueWeight * SuccFalseWeight);
4050 } else {
4051 // PBI: br i1 %x, TrueDest, BB
4052 // BI: br i1 %y, TrueDest, UniqueSucc
4053 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4054 // FalseWeight for PBI * TrueWeight for BI.
4055 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4056 PredFalseWeight * SuccTrueWeight);
4057 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4058 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4059 }
4060
4061 // Halve the weights if any of them cannot fit in an uint32_t
4062 fitWeights(NewWeights);
4063
4064 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
4065 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4066
4067 // TODO: If BB is reachable from all paths through PredBlock, then we
4068 // could replace PBI's branch probabilities with BI's.
4069 } else
4070 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4071
4072 // Now, update the CFG.
4073 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4074
4075 if (DTU)
4076 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4077 {DominatorTree::Delete, PredBlock, BB}});
4078
4079 // If BI was a loop latch, it may have had associated loop metadata.
4080 // We need to copy it to the new latch, that is, PBI.
4081 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4082 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4083
4084 ValueToValueMapTy VMap; // maps original values to cloned values
4086
4087 Module *M = BB->getModule();
4088
4089 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4090 for (DbgVariableRecord &DVR :
4092 RemapDbgRecord(M, &DVR, VMap,
4094 }
4095
4096 // Now that the Cond was cloned into the predecessor basic block,
4097 // or/and the two conditions together.
4098 Value *BICond = VMap[BI->getCondition()];
4099 PBI->setCondition(
4100 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4101
4102 ++NumFoldBranchToCommonDest;
4103 return true;
4104}
4105
4106/// Return if an instruction's type or any of its operands' types are a vector
4107/// type.
4108static bool isVectorOp(Instruction &I) {
4109 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4110 return U->getType()->isVectorTy();
4111 });
4112}
4113
4114/// If this basic block is simple enough, and if a predecessor branches to us
4115/// and one of our successors, fold the block into the predecessor and use
4116/// logical operations to pick the right destination.
4118 MemorySSAUpdater *MSSAU,
4119 const TargetTransformInfo *TTI,
4120 unsigned BonusInstThreshold) {
4121 // If this block ends with an unconditional branch,
4122 // let speculativelyExecuteBB() deal with it.
4123 if (!BI->isConditional())
4124 return false;
4125
4126 BasicBlock *BB = BI->getParent();
4130
4132
4134 Cond->getParent() != BB || !Cond->hasOneUse())
4135 return false;
4136
4137 // Finally, don't infinitely unroll conditional loops.
4138 if (is_contained(successors(BB), BB))
4139 return false;
4140
4141 // With which predecessors will we want to deal with?
4143 for (BasicBlock *PredBlock : predecessors(BB)) {
4144 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4145
4146 // Check that we have two conditional branches. If there is a PHI node in
4147 // the common successor, verify that the same value flows in from both
4148 // blocks.
4149 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4150 continue;
4151
4152 // Determine if the two branches share a common destination.
4153 BasicBlock *CommonSucc;
4155 bool InvertPredCond;
4156 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4157 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4158 else
4159 continue;
4160
4161 // Check the cost of inserting the necessary logic before performing the
4162 // transformation.
4163 if (TTI) {
4164 Type *Ty = BI->getCondition()->getType();
4165 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4166 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4167 !isa<CmpInst>(PBI->getCondition())))
4168 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4169
4171 continue;
4172 }
4173
4174 // Ok, we do want to deal with this predecessor. Record it.
4175 Preds.emplace_back(PredBlock);
4176 }
4177
4178 // If there aren't any predecessors into which we can fold,
4179 // don't bother checking the cost.
4180 if (Preds.empty())
4181 return false;
4182
4183 // Only allow this transformation if computing the condition doesn't involve
4184 // too many instructions and these involved instructions can be executed
4185 // unconditionally. We denote all involved instructions except the condition
4186 // as "bonus instructions", and only allow this transformation when the
4187 // number of the bonus instructions we'll need to create when cloning into
4188 // each predecessor does not exceed a certain threshold.
4189 unsigned NumBonusInsts = 0;
4190 bool SawVectorOp = false;
4191 const unsigned PredCount = Preds.size();
4192 for (Instruction &I : *BB) {
4193 // Don't check the branch condition comparison itself.
4194 if (&I == Cond)
4195 continue;
4196 // Ignore the terminator.
4197 if (isa<BranchInst>(I))
4198 continue;
4199 // I must be safe to execute unconditionally.
4201 return false;
4202 SawVectorOp |= isVectorOp(I);
4203
4204 // Account for the cost of duplicating this instruction into each
4205 // predecessor. Ignore free instructions.
4206 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4208 NumBonusInsts += PredCount;
4209
4210 // Early exits once we reach the limit.
4211 if (NumBonusInsts >
4212 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4213 return false;
4214 }
4215
4216 auto IsBCSSAUse = [BB, &I](Use &U) {
4217 auto *UI = cast<Instruction>(U.getUser());
4218 if (auto *PN = dyn_cast<PHINode>(UI))
4219 return PN->getIncomingBlock(U) == BB;
4220 return UI->getParent() == BB && I.comesBefore(UI);
4221 };
4222
4223 // Does this instruction require rewriting of uses?
4224 if (!all_of(I.uses(), IsBCSSAUse))
4225 return false;
4226 }
4227 if (NumBonusInsts >
4228 BonusInstThreshold *
4229 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4230 return false;
4231
4232 // Ok, we have the budget. Perform the transformation.
4233 for (BasicBlock *PredBlock : Preds) {
4234 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4235 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4236 }
4237 return false;
4238}
4239
4240// If there is only one store in BB1 and BB2, return it, otherwise return
4241// nullptr.
4243 StoreInst *S = nullptr;
4244 for (auto *BB : {BB1, BB2}) {
4245 if (!BB)
4246 continue;
4247 for (auto &I : *BB)
4248 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4249 if (S)
4250 // Multiple stores seen.
4251 return nullptr;
4252 else
4253 S = SI;
4254 }
4255 }
4256 return S;
4257}
4258
4260 Value *AlternativeV = nullptr) {
4261 // PHI is going to be a PHI node that allows the value V that is defined in
4262 // BB to be referenced in BB's only successor.
4263 //
4264 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4265 // doesn't matter to us what the other operand is (it'll never get used). We
4266 // could just create a new PHI with an undef incoming value, but that could
4267 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4268 // other PHI. So here we directly look for some PHI in BB's successor with V
4269 // as an incoming operand. If we find one, we use it, else we create a new
4270 // one.
4271 //
4272 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4273 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4274 // where OtherBB is the single other predecessor of BB's only successor.
4275 PHINode *PHI = nullptr;
4276 BasicBlock *Succ = BB->getSingleSuccessor();
4277
4278 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4279 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4280 PHI = cast<PHINode>(I);
4281 if (!AlternativeV)
4282 break;
4283
4284 assert(Succ->hasNPredecessors(2));
4285 auto PredI = pred_begin(Succ);
4286 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4287 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4288 break;
4289 PHI = nullptr;
4290 }
4291 if (PHI)
4292 return PHI;
4293
4294 // If V is not an instruction defined in BB, just return it.
4295 if (!AlternativeV &&
4296 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4297 return V;
4298
4299 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4300 PHI->insertBefore(Succ->begin());
4301 PHI->addIncoming(V, BB);
4302 for (BasicBlock *PredBB : predecessors(Succ))
4303 if (PredBB != BB)
4304 PHI->addIncoming(
4305 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4306 return PHI;
4307}
4308
4310 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4311 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4312 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4313 // For every pointer, there must be exactly two stores, one coming from
4314 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4315 // store (to any address) in PTB,PFB or QTB,QFB.
4316 // FIXME: We could relax this restriction with a bit more work and performance
4317 // testing.
4318 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4319 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4320 if (!PStore || !QStore)
4321 return false;
4322
4323 // Now check the stores are compatible.
4324 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4325 PStore->getValueOperand()->getType() !=
4326 QStore->getValueOperand()->getType())
4327 return false;
4328
4329 // Check that sinking the store won't cause program behavior changes. Sinking
4330 // the store out of the Q blocks won't change any behavior as we're sinking
4331 // from a block to its unconditional successor. But we're moving a store from
4332 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4333 // So we need to check that there are no aliasing loads or stores in
4334 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4335 // operations between PStore and the end of its parent block.
4336 //
4337 // The ideal way to do this is to query AliasAnalysis, but we don't
4338 // preserve AA currently so that is dangerous. Be super safe and just
4339 // check there are no other memory operations at all.
4340 for (auto &I : *QFB->getSinglePredecessor())
4341 if (I.mayReadOrWriteMemory())
4342 return false;
4343 for (auto &I : *QFB)
4344 if (&I != QStore && I.mayReadOrWriteMemory())
4345 return false;
4346 if (QTB)
4347 for (auto &I : *QTB)
4348 if (&I != QStore && I.mayReadOrWriteMemory())
4349 return false;
4350 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4351 I != E; ++I)
4352 if (&*I != PStore && I->mayReadOrWriteMemory())
4353 return false;
4354
4355 // If we're not in aggressive mode, we only optimize if we have some
4356 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4357 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4358 if (!BB)
4359 return true;
4360 // Heuristic: if the block can be if-converted/phi-folded and the
4361 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4362 // thread this store.
4363 InstructionCost Cost = 0;
4364 InstructionCost Budget =
4366 for (auto &I : BB->instructionsWithoutDebug(false)) {
4367 // Consider terminator instruction to be free.
4368 if (I.isTerminator())
4369 continue;
4370 // If this is one the stores that we want to speculate out of this BB,
4371 // then don't count it's cost, consider it to be free.
4372 if (auto *S = dyn_cast<StoreInst>(&I))
4373 if (llvm::find(FreeStores, S))
4374 continue;
4375 // Else, we have a white-list of instructions that we are ak speculating.
4377 return false; // Not in white-list - not worthwhile folding.
4378 // And finally, if this is a non-free instruction that we are okay
4379 // speculating, ensure that we consider the speculation budget.
4380 Cost +=
4381 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4382 if (Cost > Budget)
4383 return false; // Eagerly refuse to fold as soon as we're out of budget.
4384 }
4385 assert(Cost <= Budget &&
4386 "When we run out of budget we will eagerly return from within the "
4387 "per-instruction loop.");
4388 return true;
4389 };
4390
4391 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4393 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4394 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4395 return false;
4396
4397 // If PostBB has more than two predecessors, we need to split it so we can
4398 // sink the store.
4399 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4400 // We know that QFB's only successor is PostBB. And QFB has a single
4401 // predecessor. If QTB exists, then its only successor is also PostBB.
4402 // If QTB does not exist, then QFB's only predecessor has a conditional
4403 // branch to QFB and PostBB.
4404 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4405 BasicBlock *NewBB =
4406 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4407 if (!NewBB)
4408 return false;
4409 PostBB = NewBB;
4410 }
4411
4412 // OK, we're going to sink the stores to PostBB. The store has to be
4413 // conditional though, so first create the predicate.
4414 BranchInst *PBranch =
4416 BranchInst *QBranch =
4418 Value *PCond = PBranch->getCondition();
4419 Value *QCond = QBranch->getCondition();
4420
4422 PStore->getParent());
4424 QStore->getParent(), PPHI);
4425
4426 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4427 IRBuilder<> QB(PostBB, PostBBFirst);
4428 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4429
4430 InvertPCond ^= (PStore->getParent() != PTB);
4431 InvertQCond ^= (QStore->getParent() != QTB);
4432 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4433 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4434
4435 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4436
4437 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4438 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4439 /*Unreachable=*/false,
4440 /*BranchWeights=*/nullptr, DTU);
4441
4442 QB.SetInsertPoint(T);
4443 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4444 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4445 // Choose the minimum alignment. If we could prove both stores execute, we
4446 // could use biggest one. In this case, though, we only know that one of the
4447 // stores executes. And we don't know it's safe to take the alignment from a
4448 // store that doesn't execute.
4449 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4450
4451 QStore->eraseFromParent();
4452 PStore->eraseFromParent();
4453
4454 return true;
4455}
4456
4458 DomTreeUpdater *DTU, const DataLayout &DL,
4459 const TargetTransformInfo &TTI) {
4460 // The intention here is to find diamonds or triangles (see below) where each
4461 // conditional block contains a store to the same address. Both of these
4462 // stores are conditional, so they can't be unconditionally sunk. But it may
4463 // be profitable to speculatively sink the stores into one merged store at the
4464 // end, and predicate the merged store on the union of the two conditions of
4465 // PBI and QBI.
4466 //
4467 // This can reduce the number of stores executed if both of the conditions are
4468 // true, and can allow the blocks to become small enough to be if-converted.
4469 // This optimization will also chain, so that ladders of test-and-set
4470 // sequences can be if-converted away.
4471 //
4472 // We only deal with simple diamonds or triangles:
4473 //
4474 // PBI or PBI or a combination of the two
4475 // / \ | \
4476 // PTB PFB | PFB
4477 // \ / | /
4478 // QBI QBI
4479 // / \ | \
4480 // QTB QFB | QFB
4481 // \ / | /
4482 // PostBB PostBB
4483 //
4484 // We model triangles as a type of diamond with a nullptr "true" block.
4485 // Triangles are canonicalized so that the fallthrough edge is represented by
4486 // a true condition, as in the diagram above.
4487 BasicBlock *PTB = PBI->getSuccessor(0);
4488 BasicBlock *PFB = PBI->getSuccessor(1);
4489 BasicBlock *QTB = QBI->getSuccessor(0);
4490 BasicBlock *QFB = QBI->getSuccessor(1);
4491 BasicBlock *PostBB = QFB->getSingleSuccessor();
4492
4493 // Make sure we have a good guess for PostBB. If QTB's only successor is
4494 // QFB, then QFB is a better PostBB.
4495 if (QTB->getSingleSuccessor() == QFB)
4496 PostBB = QFB;
4497
4498 // If we couldn't find a good PostBB, stop.
4499 if (!PostBB)
4500 return false;
4501
4502 bool InvertPCond = false, InvertQCond = false;
4503 // Canonicalize fallthroughs to the true branches.
4504 if (PFB == QBI->getParent()) {
4505 std::swap(PFB, PTB);
4506 InvertPCond = true;
4507 }
4508 if (QFB == PostBB) {
4509 std::swap(QFB, QTB);
4510 InvertQCond = true;
4511 }
4512
4513 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4514 // and QFB may not. Model fallthroughs as a nullptr block.
4515 if (PTB == QBI->getParent())
4516 PTB = nullptr;
4517 if (QTB == PostBB)
4518 QTB = nullptr;
4519
4520 // Legality bailouts. We must have at least the non-fallthrough blocks and
4521 // the post-dominating block, and the non-fallthroughs must only have one
4522 // predecessor.
4523 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4524 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4525 };
4526 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4527 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4528 return false;
4529 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4530 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4531 return false;
4532 if (!QBI->getParent()->hasNUses(2))
4533 return false;
4534
4535 // OK, this is a sequence of two diamonds or triangles.
4536 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4537 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4538 for (auto *BB : {PTB, PFB}) {
4539 if (!BB)
4540 continue;
4541 for (auto &I : *BB)
4543 PStoreAddresses.insert(SI->getPointerOperand());
4544 }
4545 for (auto *BB : {QTB, QFB}) {
4546 if (!BB)
4547 continue;
4548 for (auto &I : *BB)
4550 QStoreAddresses.insert(SI->getPointerOperand());
4551 }
4552
4553 set_intersect(PStoreAddresses, QStoreAddresses);
4554 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4555 // clear what it contains.
4556 auto &CommonAddresses = PStoreAddresses;
4557
4558 bool Changed = false;
4559 for (auto *Address : CommonAddresses)
4560 Changed |=
4561 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4562 InvertPCond, InvertQCond, DTU, DL, TTI);
4563 return Changed;
4564}
4565
4566/// If the previous block ended with a widenable branch, determine if reusing
4567/// the target block is profitable and legal. This will have the effect of
4568/// "widening" PBI, but doesn't require us to reason about hosting safety.
4570 DomTreeUpdater *DTU) {
4571 // TODO: This can be generalized in two important ways:
4572 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4573 // values from the PBI edge.
4574 // 2) We can sink side effecting instructions into BI's fallthrough
4575 // successor provided they doesn't contribute to computation of
4576 // BI's condition.
4577 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4578 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4579 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4580 !BI->getParent()->getSinglePredecessor())
4581 return false;
4582 if (!IfFalseBB->phis().empty())
4583 return false; // TODO
4584 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4585 // may undo the transform done here.
4586 // TODO: There might be a more fine-grained solution to this.
4587 if (!llvm::succ_empty(IfFalseBB))
4588 return false;
4589 // Use lambda to lazily compute expensive condition after cheap ones.
4590 auto NoSideEffects = [](BasicBlock &BB) {
4591 return llvm::none_of(BB, [](const Instruction &I) {
4592 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4593 });
4594 };
4595 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4596 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4597 NoSideEffects(*BI->getParent())) {
4598 auto *OldSuccessor = BI->getSuccessor(1);
4599 OldSuccessor->removePredecessor(BI->getParent());
4600 BI->setSuccessor(1, IfFalseBB);
4601 if (DTU)
4602 DTU->applyUpdates(
4603 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4604 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4605 return true;
4606 }
4607 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4608 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4609 NoSideEffects(*BI->getParent())) {
4610 auto *OldSuccessor = BI->getSuccessor(0);
4611 OldSuccessor->removePredecessor(BI->getParent());
4612 BI->setSuccessor(0, IfFalseBB);
4613 if (DTU)
4614 DTU->applyUpdates(
4615 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4616 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4617 return true;
4618 }
4619 return false;
4620}
4621
4622/// If we have a conditional branch as a predecessor of another block,
4623/// this function tries to simplify it. We know
4624/// that PBI and BI are both conditional branches, and BI is in one of the
4625/// successor blocks of PBI - PBI branches to BI.
4627 DomTreeUpdater *DTU,
4628 const DataLayout &DL,
4629 const TargetTransformInfo &TTI) {
4630 assert(PBI->isConditional() && BI->isConditional());
4631 BasicBlock *BB = BI->getParent();
4632
4633 // If this block ends with a branch instruction, and if there is a
4634 // predecessor that ends on a branch of the same condition, make
4635 // this conditional branch redundant.
4636 if (PBI->getCondition() == BI->getCondition() &&
4637 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4638 // Okay, the outcome of this conditional branch is statically
4639 // knowable. If this block had a single pred, handle specially, otherwise
4640 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4641 if (BB->getSinglePredecessor()) {
4642 // Turn this into a branch on constant.
4643 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4644 BI->setCondition(
4645 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4646 return true; // Nuke the branch on constant.
4647 }
4648 }
4649
4650 // If the previous block ended with a widenable branch, determine if reusing
4651 // the target block is profitable and legal. This will have the effect of
4652 // "widening" PBI, but doesn't require us to reason about hosting safety.
4653 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4654 return true;
4655
4656 // If both branches are conditional and both contain stores to the same
4657 // address, remove the stores from the conditionals and create a conditional
4658 // merged store at the end.
4659 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4660 return true;
4661
4662 // If this is a conditional branch in an empty block, and if any
4663 // predecessors are a conditional branch to one of our destinations,
4664 // fold the conditions into logical ops and one cond br.
4665
4666 // Ignore dbg intrinsics.
4667 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4668 return false;
4669
4670 int PBIOp, BIOp;
4671 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4672 PBIOp = 0;
4673 BIOp = 0;
4674 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4675 PBIOp = 0;
4676 BIOp = 1;
4677 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4678 PBIOp = 1;
4679 BIOp = 0;
4680 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4681 PBIOp = 1;
4682 BIOp = 1;
4683 } else {
4684 return false;
4685 }
4686
4687 // Check to make sure that the other destination of this branch
4688 // isn't BB itself. If so, this is an infinite loop that will
4689 // keep getting unwound.
4690 if (PBI->getSuccessor(PBIOp) == BB)
4691 return false;
4692
4693 // If predecessor's branch probability to BB is too low don't merge branches.
4694 SmallVector<uint32_t, 2> PredWeights;
4695 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4696 extractBranchWeights(*PBI, PredWeights) &&
4697 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4698
4700 PredWeights[PBIOp],
4701 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4702
4703 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4704 if (CommonDestProb >= Likely)
4705 return false;
4706 }
4707
4708 // Do not perform this transformation if it would require
4709 // insertion of a large number of select instructions. For targets
4710 // without predication/cmovs, this is a big pessimization.
4711
4712 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4713 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4714 unsigned NumPhis = 0;
4715 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4716 ++II, ++NumPhis) {
4717 if (NumPhis > 2) // Disable this xform.
4718 return false;
4719 }
4720
4721 // Finally, if everything is ok, fold the branches to logical ops.
4722 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4723
4724 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4725 << "AND: " << *BI->getParent());
4726
4728
4729 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4730 // branch in it, where one edge (OtherDest) goes back to itself but the other
4731 // exits. We don't *know* that the program avoids the infinite loop
4732 // (even though that seems likely). If we do this xform naively, we'll end up
4733 // recursively unpeeling the loop. Since we know that (after the xform is
4734 // done) that the block *is* infinite if reached, we just make it an obviously
4735 // infinite loop with no cond branch.
4736 if (OtherDest == BB) {
4737 // Insert it at the end of the function, because it's either code,
4738 // or it won't matter if it's hot. :)
4739 BasicBlock *InfLoopBlock =
4740 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4741 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4742 if (DTU)
4743 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4744 OtherDest = InfLoopBlock;
4745 }
4746
4747 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4748
4749 // BI may have other predecessors. Because of this, we leave
4750 // it alone, but modify PBI.
4751
4752 // Make sure we get to CommonDest on True&True directions.
4753 Value *PBICond = PBI->getCondition();
4754 IRBuilder<NoFolder> Builder(PBI);
4755 if (PBIOp)
4756 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4757
4758 Value *BICond = BI->getCondition();
4759 if (BIOp)
4760 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4761
4762 // Merge the conditions.
4763 Value *Cond =
4764 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4765
4766 // Modify PBI to branch on the new condition to the new dests.
4767 PBI->setCondition(Cond);
4768 PBI->setSuccessor(0, CommonDest);
4769 PBI->setSuccessor(1, OtherDest);
4770
4771 if (DTU) {
4772 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4773 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4774
4775 DTU->applyUpdates(Updates);
4776 }
4777
4778 // Update branch weight for PBI.
4779 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4780 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4781 bool HasWeights =
4782 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4783 SuccTrueWeight, SuccFalseWeight);
4784 if (HasWeights) {
4785 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4786 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4787 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4788 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4789 // The weight to CommonDest should be PredCommon * SuccTotal +
4790 // PredOther * SuccCommon.
4791 // The weight to OtherDest should be PredOther * SuccOther.
4792 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4793 PredOther * SuccCommon,
4794 PredOther * SuccOther};
4795 // Halve the weights if any of them cannot fit in an uint32_t
4796 fitWeights(NewWeights);
4797
4798 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4799 }
4800
4801 // OtherDest may have phi nodes. If so, add an entry from PBI's
4802 // block that are identical to the entries for BI's block.
4803 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4804
4805 // We know that the CommonDest already had an edge from PBI to
4806 // it. If it has PHIs though, the PHIs may have different
4807 // entries for BB and PBI's BB. If so, insert a select to make
4808 // them agree.
4809 for (PHINode &PN : CommonDest->phis()) {
4810 Value *BIV = PN.getIncomingValueForBlock(BB);
4811 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4812 Value *PBIV = PN.getIncomingValue(PBBIdx);
4813 if (BIV != PBIV) {
4814 // Insert a select in PBI to pick the right value.
4816 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4817 PN.setIncomingValue(PBBIdx, NV);
4818 // The select has the same condition as PBI, in the same BB. The
4819 // probabilities don't change.
4820 if (HasWeights) {
4821 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4822 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4823 setBranchWeights(NV, TrueWeight, FalseWeight,
4824 /*IsExpected=*/false);
4825 }
4826 }
4827 }
4828
4829 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4830 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4831
4832 // This basic block is probably dead. We know it has at least
4833 // one fewer predecessor.
4834 return true;
4835}
4836
4837// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4838// true or to FalseBB if Cond is false.
4839// Takes care of updating the successors and removing the old terminator.
4840// Also makes sure not to introduce new successors by assuming that edges to
4841// non-successor TrueBBs and FalseBBs aren't reachable.
4842bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4843 Value *Cond, BasicBlock *TrueBB,
4844 BasicBlock *FalseBB,
4845 uint32_t TrueWeight,
4846 uint32_t FalseWeight) {
4847 auto *BB = OldTerm->getParent();
4848 // Remove any superfluous successor edges from the CFG.
4849 // First, figure out which successors to preserve.
4850 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4851 // successor.
4852 BasicBlock *KeepEdge1 = TrueBB;
4853 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4854
4855 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4856
4857 // Then remove the rest.
4858 for (BasicBlock *Succ : successors(OldTerm)) {
4859 // Make sure only to keep exactly one copy of each edge.
4860 if (Succ == KeepEdge1)
4861 KeepEdge1 = nullptr;
4862 else if (Succ == KeepEdge2)
4863 KeepEdge2 = nullptr;
4864 else {
4865 Succ->removePredecessor(BB,
4866 /*KeepOneInputPHIs=*/true);
4867
4868 if (Succ != TrueBB && Succ != FalseBB)
4869 RemovedSuccessors.insert(Succ);
4870 }
4871 }
4872
4873 IRBuilder<> Builder(OldTerm);
4874 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4875
4876 // Insert an appropriate new terminator.
4877 if (!KeepEdge1 && !KeepEdge2) {
4878 if (TrueBB == FalseBB) {
4879 // We were only looking for one successor, and it was present.
4880 // Create an unconditional branch to it.
4881 Builder.CreateBr(TrueBB);
4882 } else {
4883 // We found both of the successors we were looking for.
4884 // Create a conditional branch sharing the condition of the select.
4885 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4886 if (TrueWeight != FalseWeight)
4887 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4888 }
4889 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4890 // Neither of the selected blocks were successors, so this
4891 // terminator must be unreachable.
4892 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4893 } else {
4894 // One of the selected values was a successor, but the other wasn't.
4895 // Insert an unconditional branch to the one that was found;
4896 // the edge to the one that wasn't must be unreachable.
4897 if (!KeepEdge1) {
4898 // Only TrueBB was found.
4899 Builder.CreateBr(TrueBB);
4900 } else {
4901 // Only FalseBB was found.
4902 Builder.CreateBr(FalseBB);
4903 }
4904 }
4905
4907
4908 if (DTU) {
4909 SmallVector<DominatorTree::UpdateType, 2> Updates;
4910 Updates.reserve(RemovedSuccessors.size());
4911 for (auto *RemovedSuccessor : RemovedSuccessors)
4912 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4913 DTU->applyUpdates(Updates);
4914 }
4915
4916 return true;
4917}
4918
4919// Replaces
4920// (switch (select cond, X, Y)) on constant X, Y
4921// with a branch - conditional if X and Y lead to distinct BBs,
4922// unconditional otherwise.
4923bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4924 SelectInst *Select) {
4925 // Check for constant integer values in the select.
4926 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4927 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4928 if (!TrueVal || !FalseVal)
4929 return false;
4930
4931 // Find the relevant condition and destinations.
4932 Value *Condition = Select->getCondition();
4933 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4934 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4935
4936 // Get weight for TrueBB and FalseBB.
4937 uint32_t TrueWeight = 0, FalseWeight = 0;
4938 SmallVector<uint64_t, 8> Weights;
4939 bool HasWeights = hasBranchWeightMD(*SI);
4940 if (HasWeights) {
4941 getBranchWeights(SI, Weights);
4942 if (Weights.size() == 1 + SI->getNumCases()) {
4943 TrueWeight =
4944 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4945 FalseWeight =
4946 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4947 }
4948 }
4949
4950 // Perform the actual simplification.
4951 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4952 FalseWeight);
4953}
4954
4955// Replaces
4956// (indirectbr (select cond, blockaddress(@fn, BlockA),
4957// blockaddress(@fn, BlockB)))
4958// with
4959// (br cond, BlockA, BlockB).
4960bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4961 SelectInst *SI) {
4962 // Check that both operands of the select are block addresses.
4963 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4964 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4965 if (!TBA || !FBA)
4966 return false;
4967
4968 // Extract the actual blocks.
4969 BasicBlock *TrueBB = TBA->getBasicBlock();
4970 BasicBlock *FalseBB = FBA->getBasicBlock();
4971
4972 // Perform the actual simplification.
4973 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4974 0);
4975}
4976
4977/// This is called when we find an icmp instruction
4978/// (a seteq/setne with a constant) as the only instruction in a
4979/// block that ends with an uncond branch. We are looking for a very specific
4980/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4981/// this case, we merge the first two "or's of icmp" into a switch, but then the
4982/// default value goes to an uncond block with a seteq in it, we get something
4983/// like:
4984///
4985/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4986/// DEFAULT:
4987/// %tmp = icmp eq i8 %A, 92
4988/// br label %end
4989/// end:
4990/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4991///
4992/// We prefer to split the edge to 'end' so that there is a true/false entry to
4993/// the PHI, merging the third icmp into the switch.
4994bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4995 ICmpInst *ICI, IRBuilder<> &Builder) {
4996 BasicBlock *BB = ICI->getParent();
4997
4998 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4999 // complex.
5000 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5001 return false;
5002
5003 Value *V = ICI->getOperand(0);
5004 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5005
5006 // The pattern we're looking for is where our only predecessor is a switch on
5007 // 'V' and this block is the default case for the switch. In this case we can
5008 // fold the compared value into the switch to simplify things.
5009 BasicBlock *Pred = BB->getSinglePredecessor();
5010 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5011 return false;
5012
5013 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5014 if (SI->getCondition() != V)
5015 return false;
5016
5017 // If BB is reachable on a non-default case, then we simply know the value of
5018 // V in this block. Substitute it and constant fold the icmp instruction
5019 // away.
5020 if (SI->getDefaultDest() != BB) {
5021 ConstantInt *VVal = SI->findCaseDest(BB);
5022 assert(VVal && "Should have a unique destination value");
5023 ICI->setOperand(0, VVal);
5024
5025 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5026 ICI->replaceAllUsesWith(V);
5027 ICI->eraseFromParent();
5028 }
5029 // BB is now empty, so it is likely to simplify away.
5030 return requestResimplify();
5031 }
5032
5033 // Ok, the block is reachable from the default dest. If the constant we're
5034 // comparing exists in one of the other edges, then we can constant fold ICI
5035 // and zap it.
5036 if (SI->findCaseValue(Cst) != SI->case_default()) {
5037 Value *V;
5038 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5040 else
5042
5043 ICI->replaceAllUsesWith(V);
5044 ICI->eraseFromParent();
5045 // BB is now empty, so it is likely to simplify away.
5046 return requestResimplify();
5047 }
5048
5049 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5050 // the block.
5051 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5052 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5053 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5055 return false;
5056
5057 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5058 // true in the PHI.
5059 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5060 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5061
5062 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5063 std::swap(DefaultCst, NewCst);
5064
5065 // Replace ICI (which is used by the PHI for the default value) with true or
5066 // false depending on if it is EQ or NE.
5067 ICI->replaceAllUsesWith(DefaultCst);
5068 ICI->eraseFromParent();
5069
5070 SmallVector<DominatorTree::UpdateType, 2> Updates;
5071
5072 // Okay, the switch goes to this block on a default value. Add an edge from
5073 // the switch to the merge point on the compared value.
5074 BasicBlock *NewBB =
5075 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5076 {
5077 SwitchInstProfUpdateWrapper SIW(*SI);
5078 auto W0 = SIW.getSuccessorWeight(0);
5080 if (W0) {
5081 NewW = ((uint64_t(*W0) + 1) >> 1);
5082 SIW.setSuccessorWeight(0, *NewW);
5083 }
5084 SIW.addCase(Cst, NewBB, NewW);
5085 if (DTU)
5086 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5087 }
5088
5089 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5090 Builder.SetInsertPoint(NewBB);
5091 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5092 Builder.CreateBr(SuccBlock);
5093 PHIUse->addIncoming(NewCst, NewBB);
5094 if (DTU) {
5095 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5096 DTU->applyUpdates(Updates);
5097 }
5098 return true;
5099}
5100
5101/// The specified branch is a conditional branch.
5102/// Check to see if it is branching on an or/and chain of icmp instructions, and
5103/// fold it into a switch instruction if so.
5104bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5105 IRBuilder<> &Builder,
5106 const DataLayout &DL) {
5108 if (!Cond)
5109 return false;
5110
5111 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5112 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5113 // 'setne's and'ed together, collect them.
5114
5115 // Try to gather values from a chain of and/or to be turned into a switch
5116 ConstantComparesGatherer ConstantCompare(Cond, DL);
5117 // Unpack the result
5118 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5119 Value *CompVal = ConstantCompare.CompValue;
5120 unsigned UsedICmps = ConstantCompare.UsedICmps;
5121 Value *ExtraCase = ConstantCompare.Extra;
5122 bool TrueWhenEqual = ConstantCompare.IsEq;
5123
5124 // If we didn't have a multiply compared value, fail.
5125 if (!CompVal)
5126 return false;
5127
5128 // Avoid turning single icmps into a switch.
5129 if (UsedICmps <= 1)
5130 return false;
5131
5132 // There might be duplicate constants in the list, which the switch
5133 // instruction can't handle, remove them now.
5134 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5135 Values.erase(llvm::unique(Values), Values.end());
5136
5137 // If Extra was used, we require at least two switch values to do the
5138 // transformation. A switch with one value is just a conditional branch.
5139 if (ExtraCase && Values.size() < 2)
5140 return false;
5141
5142 // TODO: Preserve branch weight metadata, similarly to how
5143 // foldValueComparisonIntoPredecessors preserves it.
5144
5145 // Figure out which block is which destination.
5146 BasicBlock *DefaultBB = BI->getSuccessor(1);
5147 BasicBlock *EdgeBB = BI->getSuccessor(0);
5148 if (!TrueWhenEqual)
5149 std::swap(DefaultBB, EdgeBB);
5150
5151 BasicBlock *BB = BI->getParent();
5152
5153 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5154 << " cases into SWITCH. BB is:\n"
5155 << *BB);
5156
5157 SmallVector<DominatorTree::UpdateType, 2> Updates;
5158
5159 // If there are any extra values that couldn't be folded into the switch
5160 // then we evaluate them with an explicit branch first. Split the block
5161 // right before the condbr to handle it.
5162 if (ExtraCase) {
5163 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5164 /*MSSAU=*/nullptr, "switch.early.test");
5165
5166 // Remove the uncond branch added to the old block.
5167 Instruction *OldTI = BB->getTerminator();
5168 Builder.SetInsertPoint(OldTI);
5169
5170 // There can be an unintended UB if extra values are Poison. Before the
5171 // transformation, extra values may not be evaluated according to the
5172 // condition, and it will not raise UB. But after transformation, we are
5173 // evaluating extra values before checking the condition, and it will raise
5174 // UB. It can be solved by adding freeze instruction to extra values.
5175 AssumptionCache *AC = Options.AC;
5176
5177 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5178 ExtraCase = Builder.CreateFreeze(ExtraCase);
5179
5180 if (TrueWhenEqual)
5181 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5182 else
5183 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5184
5185 OldTI->eraseFromParent();
5186
5187 if (DTU)
5188 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5189
5190 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5191 // for the edge we just added.
5192 addPredecessorToBlock(EdgeBB, BB, NewBB);
5193
5194 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5195 << "\nEXTRABB = " << *BB);
5196 BB = NewBB;
5197 }
5198
5199 Builder.SetInsertPoint(BI);
5200 // Convert pointer to int before we switch.
5201 if (CompVal->getType()->isPointerTy()) {
5202 CompVal = Builder.CreatePtrToInt(
5203 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5204 }
5205
5206 // Create the new switch instruction now.
5207 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5208
5209 // Add all of the 'cases' to the switch instruction.
5210 for (ConstantInt *Val : Values)
5211 New->addCase(Val, EdgeBB);
5212
5213 // We added edges from PI to the EdgeBB. As such, if there were any
5214 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5215 // the number of edges added.
5216 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5217 PHINode *PN = cast<PHINode>(BBI);
5218 Value *InVal = PN->getIncomingValueForBlock(BB);
5219 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5220 PN->addIncoming(InVal, BB);
5221 }
5222
5223 // Erase the old branch instruction.
5225 if (DTU)
5226 DTU->applyUpdates(Updates);
5227
5228 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5229 return true;
5230}
5231
5232bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5233 if (isa<PHINode>(RI->getValue()))
5234 return simplifyCommonResume(RI);
5235 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5236 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5237 // The resume must unwind the exception that caused control to branch here.
5238 return simplifySingleResume(RI);
5239
5240 return false;
5241}
5242
5243// Check if cleanup block is empty
5245 for (Instruction &I : R) {
5246 auto *II = dyn_cast<IntrinsicInst>(&I);
5247 if (!II)
5248 return false;
5249
5250 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5251 switch (IntrinsicID) {
5252 case Intrinsic::dbg_declare:
5253 case Intrinsic::dbg_value:
5254 case Intrinsic::dbg_label:
5255 case Intrinsic::lifetime_end:
5256 break;
5257 default:
5258 return false;
5259 }
5260 }
5261 return true;
5262}
5263
5264// Simplify resume that is shared by several landing pads (phi of landing pad).
5265bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5266 BasicBlock *BB = RI->getParent();
5267
5268 // Check that there are no other instructions except for debug and lifetime
5269 // intrinsics between the phi's and resume instruction.
5270 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5271 BB->getTerminator()->getIterator())))
5272 return false;
5273
5274 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5275 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5276
5277 // Check incoming blocks to see if any of them are trivial.
5278 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5279 Idx++) {
5280 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5281 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5282
5283 // If the block has other successors, we can not delete it because
5284 // it has other dependents.
5285 if (IncomingBB->getUniqueSuccessor() != BB)
5286 continue;
5287
5288 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5289 // Not the landing pad that caused the control to branch here.
5290 if (IncomingValue != LandingPad)
5291 continue;
5292
5294 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5295 TrivialUnwindBlocks.insert(IncomingBB);
5296 }
5297
5298 // If no trivial unwind blocks, don't do any simplifications.
5299 if (TrivialUnwindBlocks.empty())
5300 return false;
5301
5302 // Turn all invokes that unwind here into calls.
5303 for (auto *TrivialBB : TrivialUnwindBlocks) {
5304 // Blocks that will be simplified should be removed from the phi node.
5305 // Note there could be multiple edges to the resume block, and we need
5306 // to remove them all.
5307 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5308 BB->removePredecessor(TrivialBB, true);
5309
5310 for (BasicBlock *Pred :
5312 removeUnwindEdge(Pred, DTU);
5313 ++NumInvokes;
5314 }
5315
5316 // In each SimplifyCFG run, only the current processed block can be erased.
5317 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5318 // of erasing TrivialBB, we only remove the branch to the common resume
5319 // block so that we can later erase the resume block since it has no
5320 // predecessors.
5321 TrivialBB->getTerminator()->eraseFromParent();
5322 new UnreachableInst(RI->getContext(), TrivialBB);
5323 if (DTU)
5324 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5325 }
5326
5327 // Delete the resume block if all its predecessors have been removed.
5328 if (pred_empty(BB))
5329 DeleteDeadBlock(BB, DTU);
5330
5331 return !TrivialUnwindBlocks.empty();
5332}
5333
5334// Simplify resume that is only used by a single (non-phi) landing pad.
5335bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5336 BasicBlock *BB = RI->getParent();
5337 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5338 assert(RI->getValue() == LPInst &&
5339 "Resume must unwind the exception that caused control to here");
5340
5341 // Check that there are no other instructions except for debug intrinsics.
5343 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5344 return false;
5345
5346 // Turn all invokes that unwind here into calls and delete the basic block.
5347 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5348 removeUnwindEdge(Pred, DTU);
5349 ++NumInvokes;
5350 }
5351
5352 // The landingpad is now unreachable. Zap it.
5353 DeleteDeadBlock(BB, DTU);
5354 return true;
5355}
5356
5358 // If this is a trivial cleanup pad that executes no instructions, it can be
5359 // eliminated. If the cleanup pad continues to the caller, any predecessor
5360 // that is an EH pad will be updated to continue to the caller and any
5361 // predecessor that terminates with an invoke instruction will have its invoke
5362 // instruction converted to a call instruction. If the cleanup pad being
5363 // simplified does not continue to the caller, each predecessor will be
5364 // updated to continue to the unwind destination of the cleanup pad being
5365 // simplified.
5366 BasicBlock *BB = RI->getParent();
5367 CleanupPadInst *CPInst = RI->getCleanupPad();
5368 if (CPInst->getParent() != BB)
5369 // This isn't an empty cleanup.
5370 return false;
5371
5372 // We cannot kill the pad if it has multiple uses. This typically arises
5373 // from unreachable basic blocks.
5374 if (!CPInst->hasOneUse())
5375 return false;
5376
5377 // Check that there are no other instructions except for benign intrinsics.
5379 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5380 return false;
5381
5382 // If the cleanup return we are simplifying unwinds to the caller, this will
5383 // set UnwindDest to nullptr.
5384 BasicBlock *UnwindDest = RI->getUnwindDest();
5385
5386 // We're about to remove BB from the control flow. Before we do, sink any
5387 // PHINodes into the unwind destination. Doing this before changing the
5388 // control flow avoids some potentially slow checks, since we can currently
5389 // be certain that UnwindDest and BB have no common predecessors (since they
5390 // are both EH pads).
5391 if (UnwindDest) {
5392 // First, go through the PHI nodes in UnwindDest and update any nodes that
5393 // reference the block we are removing
5394 for (PHINode &DestPN : UnwindDest->phis()) {
5395 int Idx = DestPN.getBasicBlockIndex(BB);
5396 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5397 assert(Idx != -1);
5398 // This PHI node has an incoming value that corresponds to a control
5399 // path through the cleanup pad we are removing. If the incoming
5400 // value is in the cleanup pad, it must be a PHINode (because we
5401 // verified above that the block is otherwise empty). Otherwise, the
5402 // value is either a constant or a value that dominates the cleanup
5403 // pad being removed.
5404 //
5405 // Because BB and UnwindDest are both EH pads, all of their
5406 // predecessors must unwind to these blocks, and since no instruction
5407 // can have multiple unwind destinations, there will be no overlap in
5408 // incoming blocks between SrcPN and DestPN.
5409 Value *SrcVal = DestPN.getIncomingValue(Idx);
5410 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5411
5412 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5413 for (auto *Pred : predecessors(BB)) {
5414 Value *Incoming =
5415 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5416 DestPN.addIncoming(Incoming, Pred);
5417 }
5418 }
5419
5420 // Sink any remaining PHI nodes directly into UnwindDest.
5421 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5422 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5423 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5424 // If the PHI node has no uses or all of its uses are in this basic
5425 // block (meaning they are debug or lifetime intrinsics), just leave
5426 // it. It will be erased when we erase BB below.
5427 continue;
5428
5429 // Otherwise, sink this PHI node into UnwindDest.
5430 // Any predecessors to UnwindDest which are not already represented
5431 // must be back edges which inherit the value from the path through
5432 // BB. In this case, the PHI value must reference itself.
5433 for (auto *pred : predecessors(UnwindDest))
5434 if (pred != BB)
5435 PN.addIncoming(&PN, pred);
5436 PN.moveBefore(InsertPt);
5437 // Also, add a dummy incoming value for the original BB itself,
5438 // so that the PHI is well-formed until we drop said predecessor.
5439 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5440 }
5441 }
5442
5443 std::vector<DominatorTree::UpdateType> Updates;
5444
5445 // We use make_early_inc_range here because we will remove all predecessors.
5447 if (UnwindDest == nullptr) {
5448 if (DTU) {
5449 DTU->applyUpdates(Updates);
5450 Updates.clear();
5451 }
5452 removeUnwindEdge(PredBB, DTU);
5453 ++NumInvokes;
5454 } else {
5455 BB->removePredecessor(PredBB);
5456 Instruction *TI = PredBB->getTerminator();
5457 TI->replaceUsesOfWith(BB, UnwindDest);
5458 if (DTU) {
5459 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5460 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5461 }
5462 }
5463 }
5464
5465 if (DTU)
5466 DTU->applyUpdates(Updates);
5467
5468 DeleteDeadBlock(BB, DTU);
5469
5470 return true;
5471}
5472
5473// Try to merge two cleanuppads together.
5475 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5476 // with.
5477 BasicBlock *UnwindDest = RI->getUnwindDest();
5478 if (!UnwindDest)
5479 return false;
5480
5481 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5482 // be safe to merge without code duplication.
5483 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5484 return false;
5485
5486 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5487 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5488 if (!SuccessorCleanupPad)
5489 return false;
5490
5491 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5492 // Replace any uses of the successor cleanupad with the predecessor pad
5493 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5494 // funclet bundle operands.
5495 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5496 // Remove the old cleanuppad.
5497 SuccessorCleanupPad->eraseFromParent();
5498 // Now, we simply replace the cleanupret with a branch to the unwind
5499 // destination.
5500 BranchInst::Create(UnwindDest, RI->getParent());
5501 RI->eraseFromParent();
5502
5503 return true;
5504}
5505
5506bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5507 // It is possible to transiantly have an undef cleanuppad operand because we
5508 // have deleted some, but not all, dead blocks.
5509 // Eventually, this block will be deleted.
5510 if (isa<UndefValue>(RI->getOperand(0)))
5511 return false;
5512
5513 if (mergeCleanupPad(RI))
5514 return true;
5515
5516 if (removeEmptyCleanup(RI, DTU))
5517 return true;
5518
5519 return false;
5520}
5521
5522// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5523bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5524 BasicBlock *BB = UI->getParent();
5525
5526 bool Changed = false;
5527
5528 // Ensure that any debug-info records that used to occur after the Unreachable
5529 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5530 // the block.
5532
5533 // Debug-info records on the unreachable inst itself should be deleted, as
5534 // below we delete everything past the final executable instruction.
5535 UI->dropDbgRecords();
5536
5537 // If there are any instructions immediately before the unreachable that can
5538 // be removed, do so.
5539 while (UI->getIterator() != BB->begin()) {
5541 --BBI;
5542
5544 break; // Can not drop any more instructions. We're done here.
5545 // Otherwise, this instruction can be freely erased,
5546 // even if it is not side-effect free.
5547
5548 // Note that deleting EH's here is in fact okay, although it involves a bit
5549 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5550 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5551 // and we can therefore guarantee this block will be erased.
5552
5553 // If we're deleting this, we're deleting any subsequent debug info, so
5554 // delete DbgRecords.
5555 BBI->dropDbgRecords();
5556
5557 // Delete this instruction (any uses are guaranteed to be dead)
5558 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5559 BBI->eraseFromParent();
5560 Changed = true;
5561 }
5562
5563 // If the unreachable instruction is the first in the block, take a gander
5564 // at all of the predecessors of this instruction, and simplify them.
5565 if (&BB->front() != UI)
5566 return Changed;
5567
5568 std::vector<DominatorTree::UpdateType> Updates;
5569
5570 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5571 for (BasicBlock *Predecessor : Preds) {
5572 Instruction *TI = Predecessor->getTerminator();
5573 IRBuilder<> Builder(TI);
5574 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5575 // We could either have a proper unconditional branch,
5576 // or a degenerate conditional branch with matching destinations.
5577 if (all_of(BI->successors(),
5578 [BB](auto *Successor) { return Successor == BB; })) {
5579 new UnreachableInst(TI->getContext(), TI->getIterator());
5580 TI->eraseFromParent();
5581 Changed = true;
5582 } else {
5583 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5584 Value* Cond = BI->getCondition();
5585 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5586 "The destinations are guaranteed to be different here.");
5587 CallInst *Assumption;
5588 if (BI->getSuccessor(0) == BB) {
5589 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5590 Builder.CreateBr(BI->getSuccessor(1));
5591 } else {
5592 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5593 Assumption = Builder.CreateAssumption(Cond);
5594 Builder.CreateBr(BI->getSuccessor(0));
5595 }
5596 if (Options.AC)
5597 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5598
5600 Changed = true;
5601 }
5602 if (DTU)
5603 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5604 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5605 SwitchInstProfUpdateWrapper SU(*SI);
5606 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5607 if (i->getCaseSuccessor() != BB) {
5608 ++i;
5609 continue;
5610 }
5611 BB->removePredecessor(SU->getParent());
5612 i = SU.removeCase(i);
5613 e = SU->case_end();
5614 Changed = true;
5615 }
5616 // Note that the default destination can't be removed!
5617 if (DTU && SI->getDefaultDest() != BB)
5618 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5619 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5620 if (II->getUnwindDest() == BB) {
5621 if (DTU) {
5622 DTU->applyUpdates(Updates);
5623 Updates.clear();
5624 }
5625 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5626 if (!CI->doesNotThrow())
5627 CI->setDoesNotThrow();
5628 Changed = true;
5629 }
5630 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5631 if (CSI->getUnwindDest() == BB) {
5632 if (DTU) {
5633 DTU->applyUpdates(Updates);
5634 Updates.clear();
5635 }
5636 removeUnwindEdge(TI->getParent(), DTU);
5637 Changed = true;
5638 continue;
5639 }
5640
5641 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5642 E = CSI->handler_end();
5643 I != E; ++I) {
5644 if (*I == BB) {
5645 CSI->removeHandler(I);
5646 --I;
5647 --E;
5648 Changed = true;
5649 }
5650 }
5651 if (DTU)
5652 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5653 if (CSI->getNumHandlers() == 0) {
5654 if (CSI->hasUnwindDest()) {
5655 // Redirect all predecessors of the block containing CatchSwitchInst
5656 // to instead branch to the CatchSwitchInst's unwind destination.
5657 if (DTU) {
5658 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5659 Updates.push_back({DominatorTree::Insert,
5660 PredecessorOfPredecessor,
5661 CSI->getUnwindDest()});
5662 Updates.push_back({DominatorTree::Delete,
5663 PredecessorOfPredecessor, Predecessor});
5664 }
5665 }
5666 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5667 } else {
5668 // Rewrite all preds to unwind to caller (or from invoke to call).
5669 if (DTU) {
5670 DTU->applyUpdates(Updates);
5671 Updates.clear();
5672 }
5673 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5674 for (BasicBlock *EHPred : EHPreds)
5675 removeUnwindEdge(EHPred, DTU);
5676 }
5677 // The catchswitch is no longer reachable.
5678 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5679 CSI->eraseFromParent();
5680 Changed = true;
5681 }
5682 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5683 (void)CRI;
5684 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5685 "Expected to always have an unwind to BB.");
5686 if (DTU)
5687 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5688 new UnreachableInst(TI->getContext(), TI->getIterator());
5689 TI->eraseFromParent();
5690 Changed = true;
5691 }
5692 }
5693
5694 if (DTU)
5695 DTU->applyUpdates(Updates);
5696
5697 // If this block is now dead, remove it.
5698 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5699 DeleteDeadBlock(BB, DTU);
5700 return true;
5701 }
5702
5703 return Changed;
5704}
5705
5707 assert(Cases.size() >= 1);
5708
5710 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5711 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5712 return false;
5713 }
5714 return true;
5715}
5716
5718 DomTreeUpdater *DTU,
5719 bool RemoveOrigDefaultBlock = true) {
5720 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5721 auto *BB = Switch->getParent();
5722 auto *OrigDefaultBlock = Switch->getDefaultDest();
5723 if (RemoveOrigDefaultBlock)
5724 OrigDefaultBlock->removePredecessor(BB);
5725 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5726 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5727 OrigDefaultBlock);
5728 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5730 Switch->setDefaultDest(&*NewDefaultBlock);
5731 if (DTU) {
5733 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5734 if (RemoveOrigDefaultBlock &&
5735 !is_contained(successors(BB), OrigDefaultBlock))
5736 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5737 DTU->applyUpdates(Updates);
5738 }
5739}
5740
5741/// Turn a switch into an integer range comparison and branch.
5742/// Switches with more than 2 destinations are ignored.
5743/// Switches with 1 destination are also ignored.
5744bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5745 IRBuilder<> &Builder) {
5746 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5747
5748 bool HasDefault = !SI->defaultDestUnreachable();
5749
5750 auto *BB = SI->getParent();
5751
5752 // Partition the cases into two sets with different destinations.
5753 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5754 BasicBlock *DestB = nullptr;
5757
5758 for (auto Case : SI->cases()) {
5759 BasicBlock *Dest = Case.getCaseSuccessor();
5760 if (!DestA)
5761 DestA = Dest;
5762 if (Dest == DestA) {
5763 CasesA.push_back(Case.getCaseValue());
5764 continue;
5765 }
5766 if (!DestB)
5767 DestB = Dest;
5768 if (Dest == DestB) {
5769 CasesB.push_back(Case.getCaseValue());
5770 continue;
5771 }
5772 return false; // More than two destinations.
5773 }
5774 if (!DestB)
5775 return false; // All destinations are the same and the default is unreachable
5776
5777 assert(DestA && DestB &&
5778 "Single-destination switch should have been folded.");
5779 assert(DestA != DestB);
5780 assert(DestB != SI->getDefaultDest());
5781 assert(!CasesB.empty() && "There must be non-default cases.");
5782 assert(!CasesA.empty() || HasDefault);
5783
5784 // Figure out if one of the sets of cases form a contiguous range.
5785 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5786 BasicBlock *ContiguousDest = nullptr;
5787 BasicBlock *OtherDest = nullptr;
5788 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5789 ContiguousCases = &CasesA;
5790 ContiguousDest = DestA;
5791 OtherDest = DestB;
5792 } else if (casesAreContiguous(CasesB)) {
5793 ContiguousCases = &CasesB;
5794 ContiguousDest = DestB;
5795 OtherDest = DestA;
5796 } else
5797 return false;
5798
5799 // Start building the compare and branch.
5800
5801 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5802 Constant *NumCases =
5803 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5804
5805 Value *Sub = SI->getCondition();
5806 if (!Offset->isNullValue())
5807 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5808
5809 Value *Cmp;
5810 // If NumCases overflowed, then all possible values jump to the successor.
5811 if (NumCases->isNullValue() && !ContiguousCases->empty())
5812 Cmp = ConstantInt::getTrue(SI->getContext());
5813 else
5814 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5815 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5816
5817 // Update weight for the newly-created conditional branch.
5818 if (hasBranchWeightMD(*SI)) {
5819 SmallVector<uint64_t, 8> Weights;
5820 getBranchWeights(SI, Weights);
5821 if (Weights.size() == 1 + SI->getNumCases()) {
5822 uint64_t TrueWeight = 0;
5823 uint64_t FalseWeight = 0;
5824 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5825 if (SI->getSuccessor(I) == ContiguousDest)
5826 TrueWeight += Weights[I];
5827 else
5828 FalseWeight += Weights[I];
5829 }
5830 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5831 TrueWeight /= 2;
5832 FalseWeight /= 2;
5833 }
5834 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5835 }
5836 }
5837
5838 // Prune obsolete incoming values off the successors' PHI nodes.
5839 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5840 unsigned PreviousEdges = ContiguousCases->size();
5841 if (ContiguousDest == SI->getDefaultDest())
5842 ++PreviousEdges;
5843 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5844 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5845 }
5846 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5847 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5848 if (OtherDest == SI->getDefaultDest())
5849 ++PreviousEdges;
5850 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5851 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5852 }
5853
5854 // Clean up the default block - it may have phis or other instructions before
5855 // the unreachable terminator.
5856 if (!HasDefault)
5858
5859 auto *UnreachableDefault = SI->getDefaultDest();
5860
5861 // Drop the switch.
5862 SI->eraseFromParent();
5863
5864 if (!HasDefault && DTU)
5865 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5866
5867 return true;
5868}
5869
5870/// Compute masked bits for the condition of a switch
5871/// and use it to remove dead cases.
5873 AssumptionCache *AC,
5874 const DataLayout &DL) {
5875 Value *Cond = SI->getCondition();
5876 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
5877
5878 // We can also eliminate cases by determining that their values are outside of
5879 // the limited range of the condition based on how many significant (non-sign)
5880 // bits are in the condition value.
5881 unsigned MaxSignificantBitsInCond =
5883
5884 // Gather dead cases.
5886 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5887 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5888 for (const auto &Case : SI->cases()) {
5889 auto *Successor = Case.getCaseSuccessor();
5890 if (DTU) {
5891 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
5892 if (Inserted)
5893 UniqueSuccessors.push_back(Successor);
5894 ++It->second;
5895 }
5896 const APInt &CaseVal = Case.getCaseValue()->getValue();
5897 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5898 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5899 DeadCases.push_back(Case.getCaseValue());
5900 if (DTU)
5901 --NumPerSuccessorCases[Successor];
5902 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5903 << " is dead.\n");
5904 }
5905 }
5906
5907 // If we can prove that the cases must cover all possible values, the
5908 // default destination becomes dead and we can remove it. If we know some
5909 // of the bits in the value, we can use that to more precisely compute the
5910 // number of possible unique case values.
5911 bool HasDefault = !SI->defaultDestUnreachable();
5912 const unsigned NumUnknownBits =
5913 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5914 assert(NumUnknownBits <= Known.getBitWidth());
5915 if (HasDefault && DeadCases.empty() &&
5916 NumUnknownBits < 64 /* avoid overflow */) {
5917 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5918 if (SI->getNumCases() == AllNumCases) {
5920 return true;
5921 }
5922 // When only one case value is missing, replace default with that case.
5923 // Eliminating the default branch will provide more opportunities for
5924 // optimization, such as lookup tables.
5925 if (SI->getNumCases() == AllNumCases - 1) {
5926 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5927 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5928 if (CondTy->getIntegerBitWidth() > 64 ||
5929 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5930 return false;
5931
5932 uint64_t MissingCaseVal = 0;
5933 for (const auto &Case : SI->cases())
5934 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5935 auto *MissingCase =
5936 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5938 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5939 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5940 SIW.setSuccessorWeight(0, 0);
5941 return true;
5942 }
5943 }
5944
5945 if (DeadCases.empty())
5946 return false;
5947
5949 for (ConstantInt *DeadCase : DeadCases) {
5950 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5951 assert(CaseI != SI->case_default() &&
5952 "Case was not found. Probably mistake in DeadCases forming.");
5953 // Prune unused values from PHI nodes.
5954 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5955 SIW.removeCase(CaseI);
5956 }
5957
5958 if (DTU) {
5959 std::vector<DominatorTree::UpdateType> Updates;
5960 for (auto *Successor : UniqueSuccessors)
5961 if (NumPerSuccessorCases[Successor] == 0)
5962 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5963 DTU->applyUpdates(Updates);
5964 }
5965
5966 return true;
5967}
5968
5969/// If BB would be eligible for simplification by
5970/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5971/// by an unconditional branch), look at the phi node for BB in the successor
5972/// block and see if the incoming value is equal to CaseValue. If so, return
5973/// the phi node, and set PhiIndex to BB's index in the phi node.
5975 BasicBlock *BB, int *PhiIndex) {
5976 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
5977 return nullptr; // BB must be empty to be a candidate for simplification.
5978 if (!BB->getSinglePredecessor())
5979 return nullptr; // BB must be dominated by the switch.
5980
5982 if (!Branch || !Branch->isUnconditional())
5983 return nullptr; // Terminator must be unconditional branch.
5984
5985 BasicBlock *Succ = Branch->getSuccessor(0);
5986
5987 for (PHINode &PHI : Succ->phis()) {
5988 int Idx = PHI.getBasicBlockIndex(BB);
5989 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5990
5991 Value *InValue = PHI.getIncomingValue(Idx);
5992 if (InValue != CaseValue)
5993 continue;
5994
5995 *PhiIndex = Idx;
5996 return &PHI;
5997 }
5998
5999 return nullptr;
6000}
6001
6002/// Try to forward the condition of a switch instruction to a phi node
6003/// dominated by the switch, if that would mean that some of the destination
6004/// blocks of the switch can be folded away. Return true if a change is made.
6006 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6007
6008 ForwardingNodesMap ForwardingNodes;
6009 BasicBlock *SwitchBlock = SI->getParent();
6010 bool Changed = false;
6011 for (const auto &Case : SI->cases()) {
6012 ConstantInt *CaseValue = Case.getCaseValue();
6013 BasicBlock *CaseDest = Case.getCaseSuccessor();
6014
6015 // Replace phi operands in successor blocks that are using the constant case
6016 // value rather than the switch condition variable:
6017 // switchbb:
6018 // switch i32 %x, label %default [
6019 // i32 17, label %succ
6020 // ...
6021 // succ:
6022 // %r = phi i32 ... [ 17, %switchbb ] ...
6023 // -->
6024 // %r = phi i32 ... [ %x, %switchbb ] ...
6025
6026 for (PHINode &Phi : CaseDest->phis()) {
6027 // This only works if there is exactly 1 incoming edge from the switch to
6028 // a phi. If there is >1, that means multiple cases of the switch map to 1
6029 // value in the phi, and that phi value is not the switch condition. Thus,
6030 // this transform would not make sense (the phi would be invalid because
6031 // a phi can't have different incoming values from the same block).
6032 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6033 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6034 count(Phi.blocks(), SwitchBlock) == 1) {
6035 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6036 Changed = true;
6037 }
6038 }
6039
6040 // Collect phi nodes that are indirectly using this switch's case constants.
6041 int PhiIdx;
6042 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6043 ForwardingNodes[Phi].push_back(PhiIdx);
6044 }
6045
6046 for (auto &ForwardingNode : ForwardingNodes) {
6047 PHINode *Phi = ForwardingNode.first;
6048 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6049 // Check if it helps to fold PHI.
6050 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6051 continue;
6052
6053 for (int Index : Indexes)
6054 Phi->setIncomingValue(Index, SI->getCondition());
6055 Changed = true;
6056 }
6057
6058 return Changed;
6059}
6060
6061/// Return true if the backend will be able to handle
6062/// initializing an array of constants like C.
6064 if (C->isThreadDependent())
6065 return false;
6066 if (C->isDLLImportDependent())
6067 return false;
6068
6069 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6072 return false;
6073
6075 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6076 // materializing the array of constants.
6077 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6078 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6079 return false;
6080 }
6081
6082 if (!TTI.shouldBuildLookupTablesForConstant(C))
6083 return false;
6084
6085 return true;
6086}
6087
6088/// If V is a Constant, return it. Otherwise, try to look up
6089/// its constant value in ConstantPool, returning 0 if it's not there.
6090static Constant *
6093 if (Constant *C = dyn_cast<Constant>(V))
6094 return C;
6095 return ConstantPool.lookup(V);
6096}
6097
6098/// Try to fold instruction I into a constant. This works for
6099/// simple instructions such as binary operations where both operands are
6100/// constant or can be replaced by constants from the ConstantPool. Returns the
6101/// resulting constant on success, 0 otherwise.
6102static Constant *
6106 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6107 if (!A)
6108 return nullptr;
6109 if (A->isAllOnesValue())
6110 return lookupConstant(Select->getTrueValue(), ConstantPool);
6111 if (A->isNullValue())
6112 return lookupConstant(Select->getFalseValue(), ConstantPool);
6113 return nullptr;
6114 }
6115
6117 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6118 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6119 COps.push_back(A);
6120 else
6121 return nullptr;
6122 }
6123
6124 return ConstantFoldInstOperands(I, COps, DL);
6125}
6126
6127/// Try to determine the resulting constant values in phi nodes
6128/// at the common destination basic block, *CommonDest, for one of the case
6129/// destionations CaseDest corresponding to value CaseVal (0 for the default
6130/// case), of a switch instruction SI.
6131static bool
6133 BasicBlock **CommonDest,
6134 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6135 const DataLayout &DL, const TargetTransformInfo &TTI) {
6136 // The block from which we enter the common destination.
6137 BasicBlock *Pred = SI->getParent();
6138
6139 // If CaseDest is empty except for some side-effect free instructions through
6140 // which we can constant-propagate the CaseVal, continue to its successor.
6142 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6143 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6144 if (I.isTerminator()) {
6145 // If the terminator is a simple branch, continue to the next block.
6146 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6147 return false;
6148 Pred = CaseDest;
6149 CaseDest = I.getSuccessor(0);
6150 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6151 // Instruction is side-effect free and constant.
6152
6153 // If the instruction has uses outside this block or a phi node slot for
6154 // the block, it is not safe to bypass the instruction since it would then
6155 // no longer dominate all its uses.
6156 for (auto &Use : I.uses()) {
6157 User *User = Use.getUser();
6159 if (I->getParent() == CaseDest)
6160 continue;
6161 if (PHINode *Phi = dyn_cast<PHINode>(User))
6162 if (Phi->getIncomingBlock(Use) == CaseDest)
6163 continue;
6164 return false;
6165 }
6166
6167 ConstantPool.insert(std::make_pair(&I, C));
6168 } else {
6169 break;
6170 }
6171 }
6172
6173 // If we did not have a CommonDest before, use the current one.
6174 if (!*CommonDest)
6175 *CommonDest = CaseDest;
6176 // If the destination isn't the common one, abort.
6177 if (CaseDest != *CommonDest)
6178 return false;
6179
6180 // Get the values for this case from phi nodes in the destination block.
6181 for (PHINode &PHI : (*CommonDest)->phis()) {
6182 int Idx = PHI.getBasicBlockIndex(Pred);
6183 if (Idx == -1)
6184 continue;
6185
6186 Constant *ConstVal =
6187 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6188 if (!ConstVal)
6189 return false;
6190
6191 // Be conservative about which kinds of constants we support.
6192 if (!validLookupTableConstant(ConstVal, TTI))
6193 return false;
6194
6195 Res.push_back(std::make_pair(&PHI, ConstVal));
6196 }
6197
6198 return Res.size() > 0;
6199}
6200
6201// Helper function used to add CaseVal to the list of cases that generate
6202// Result. Returns the updated number of cases that generate this result.
6203static size_t mapCaseToResult(ConstantInt *CaseVal,
6204 SwitchCaseResultVectorTy &UniqueResults,
6205 Constant *Result) {
6206 for (auto &I : UniqueResults) {
6207 if (I.first == Result) {
6208 I.second.push_back(CaseVal);
6209 return I.second.size();
6210 }
6211 }
6212 UniqueResults.push_back(
6213 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6214 return 1;
6215}
6216
6217// Helper function that initializes a map containing
6218// results for the PHI node of the common destination block for a switch
6219// instruction. Returns false if multiple PHI nodes have been found or if
6220// there is not a common destination block for the switch.
6222 BasicBlock *&CommonDest,
6223 SwitchCaseResultVectorTy &UniqueResults,
6224 Constant *&DefaultResult,
6225 const DataLayout &DL,
6226 const TargetTransformInfo &TTI,
6227 uintptr_t MaxUniqueResults) {
6228 for (const auto &I : SI->cases()) {
6229 ConstantInt *CaseVal = I.getCaseValue();
6230
6231 // Resulting value at phi nodes for this case value.
6232 SwitchCaseResultsTy Results;
6233 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6234 DL, TTI))
6235 return false;
6236
6237 // Only one value per case is permitted.
6238 if (Results.size() > 1)
6239 return false;
6240
6241 // Add the case->result mapping to UniqueResults.
6242 const size_t NumCasesForResult =
6243 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6244
6245 // Early out if there are too many cases for this result.
6246 if (NumCasesForResult > MaxSwitchCasesPerResult)
6247 return false;
6248
6249 // Early out if there are too many unique results.
6250 if (UniqueResults.size() > MaxUniqueResults)
6251 return false;
6252
6253 // Check the PHI consistency.
6254 if (!PHI)
6255 PHI = Results[0].first;
6256 else if (PHI != Results[0].first)
6257 return false;
6258 }
6259 // Find the default result value.
6261 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6262 DL, TTI);
6263 // If the default value is not found abort unless the default destination
6264 // is unreachable.
6265 DefaultResult =
6266 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6267
6268 return DefaultResult || SI->defaultDestUnreachable();
6269}
6270
6271// Helper function that checks if it is possible to transform a switch with only
6272// two cases (or two cases + default) that produces a result into a select.
6273// TODO: Handle switches with more than 2 cases that map to the same result.
6274static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6275 Constant *DefaultResult, Value *Condition,
6276 IRBuilder<> &Builder, const DataLayout &DL) {
6277 // If we are selecting between only two cases transform into a simple
6278 // select or a two-way select if default is possible.
6279 // Example:
6280 // switch (a) { %0 = icmp eq i32 %a, 10
6281 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6282 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6283 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6284 // }
6285 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6286 ResultVector[1].second.size() == 1) {
6287 ConstantInt *FirstCase = ResultVector[0].second[0];
6288 ConstantInt *SecondCase = ResultVector[1].second[0];
6289 Value *SelectValue = ResultVector[1].first;
6290 if (DefaultResult) {
6291 Value *ValueCompare =
6292 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6293 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6294 DefaultResult, "switch.select");
6295 }
6296 Value *ValueCompare =
6297 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6298 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6299 SelectValue, "switch.select");
6300 }
6301
6302 // Handle the degenerate case where two cases have the same result value.
6303 if (ResultVector.size() == 1 && DefaultResult) {
6304 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6305 unsigned CaseCount = CaseValues.size();
6306 // n bits group cases map to the same result:
6307 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6308 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6309 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6310 if (isPowerOf2_32(CaseCount)) {
6311 ConstantInt *MinCaseVal = CaseValues[0];
6312 // If there are bits that are set exclusively by CaseValues, we
6313 // can transform the switch into a select if the conjunction of
6314 // all the values uniquely identify CaseValues.
6315 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6316
6317 // Find the minimum value and compute the and of all the case values.
6318 for (auto *Case : CaseValues) {
6319 if (Case->getValue().slt(MinCaseVal->getValue()))
6320 MinCaseVal = Case;
6321 AndMask &= Case->getValue();
6322 }
6323 KnownBits Known = computeKnownBits(Condition, DL);
6324
6325 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6326 // Compute the number of bits that are free to vary.
6327 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6328
6329 // Check if the number of values covered by the mask is equal
6330 // to the number of cases.
6331 if (FreeBits == Log2_32(CaseCount)) {
6332 Value *And = Builder.CreateAnd(Condition, AndMask);
6333 Value *Cmp = Builder.CreateICmpEQ(
6334 And, Constant::getIntegerValue(And->getType(), AndMask));
6335 return Builder.CreateSelect(Cmp, ResultVector[0].first,
6336 DefaultResult);
6337 }
6338 }
6339
6340 // Mark the bits case number touched.
6341 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6342 for (auto *Case : CaseValues)
6343 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6344
6345 // Check if cases with the same result can cover all number
6346 // in touched bits.
6347 if (BitMask.popcount() == Log2_32(CaseCount)) {
6348 if (!MinCaseVal->isNullValue())
6349 Condition = Builder.CreateSub(Condition, MinCaseVal);
6350 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6351 Value *Cmp = Builder.CreateICmpEQ(
6352 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6353 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6354 }
6355 }
6356
6357 // Handle the degenerate case where two cases have the same value.
6358 if (CaseValues.size() == 2) {
6359 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6360 "switch.selectcmp.case1");
6361 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6362 "switch.selectcmp.case2");
6363 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6364 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6365 }
6366 }
6367
6368 return nullptr;
6369}
6370
6371// Helper function to cleanup a switch instruction that has been converted into
6372// a select, fixing up PHI nodes and basic blocks.
6374 Value *SelectValue,
6375 IRBuilder<> &Builder,
6376 DomTreeUpdater *DTU) {
6377 std::vector<DominatorTree::UpdateType> Updates;
6378
6379 BasicBlock *SelectBB = SI->getParent();
6380 BasicBlock *DestBB = PHI->getParent();
6381
6382 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6383 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6384 Builder.CreateBr(DestBB);
6385
6386 // Remove the switch.
6387
6388 PHI->removeIncomingValueIf(
6389 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6390 PHI->addIncoming(SelectValue, SelectBB);
6391
6392 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6393 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6394 BasicBlock *Succ = SI->getSuccessor(i);
6395
6396 if (Succ == DestBB)
6397 continue;
6398 Succ->removePredecessor(SelectBB);
6399 if (DTU && RemovedSuccessors.insert(Succ).second)
6400 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6401 }
6402 SI->eraseFromParent();
6403 if (DTU)
6404 DTU->applyUpdates(Updates);
6405}
6406
6407/// If a switch is only used to initialize one or more phi nodes in a common
6408/// successor block with only two different constant values, try to replace the
6409/// switch with a select. Returns true if the fold was made.
6411 DomTreeUpdater *DTU, const DataLayout &DL,
6412 const TargetTransformInfo &TTI) {
6413 Value *const Cond = SI->getCondition();
6414 PHINode *PHI = nullptr;
6415 BasicBlock *CommonDest = nullptr;
6416 Constant *DefaultResult;
6417 SwitchCaseResultVectorTy UniqueResults;
6418 // Collect all the cases that will deliver the same value from the switch.
6419 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6420 DL, TTI, /*MaxUniqueResults*/ 2))
6421 return false;
6422
6423 assert(PHI != nullptr && "PHI for value select not found");
6424 Builder.SetInsertPoint(SI);
6425 Value *SelectValue =
6426 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder, DL);
6427 if (!SelectValue)
6428 return false;
6429
6430 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6431 return true;
6432}
6433
6434namespace {
6435
6436/// This class finds alternatives for switches to ultimately
6437/// replace the switch.
6438class SwitchReplacement {
6439public:
6440 /// Create a helper for optimizations to use as a switch replacement.
6441 /// Find a better representation for the content of Values,
6442 /// using DefaultValue to fill any holes in the table.
6443 SwitchReplacement(
6444 Module &M, uint64_t TableSize, ConstantInt *Offset,
6445 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6446 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6447
6448 /// Build instructions with Builder to retrieve values using Index
6449 /// and replace the switch.
6450 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6451 Function *Func);
6452
6453 /// Return true if a table with TableSize elements of
6454 /// type ElementType would fit in a target-legal register.
6455 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6456 Type *ElementType);
6457
6458 /// Return the default value of the switch.
6459 Constant *getDefaultValue();
6460
6461 /// Return true if the replacement is a lookup table.
6462 bool isLookupTable();
6463
6464private:
6465 // Depending on the switch, there are different alternatives.
6466 enum {
6467 // For switches where each case contains the same value, we just have to
6468 // store that single value and return it for each lookup.
6469 SingleValueKind,
6470
6471 // For switches where there is a linear relationship between table index
6472 // and values. We calculate the result with a simple multiplication
6473 // and addition instead of a table lookup.
6474 LinearMapKind,
6475
6476 // For small tables with integer elements, we can pack them into a bitmap
6477 // that fits into a target-legal register. Values are retrieved by
6478 // shift and mask operations.
6479 BitMapKind,
6480
6481 // The table is stored as an array of values. Values are retrieved by load
6482 // instructions from the table.
6483 LookupTableKind
6484 } Kind;
6485
6486 // The default value of the switch.
6487 Constant *DefaultValue;
6488
6489 // The type of the output values.
6490 Type *ValueType;
6491
6492 // For SingleValueKind, this is the single value.
6493 Constant *SingleValue = nullptr;
6494
6495 // For BitMapKind, this is the bitmap.
6496 ConstantInt *BitMap = nullptr;
6497 IntegerType *BitMapElementTy = nullptr;
6498
6499 // For LinearMapKind, these are the constants used to derive the value.
6500 ConstantInt *LinearOffset = nullptr;
6501 ConstantInt *LinearMultiplier = nullptr;
6502 bool LinearMapValWrapped = false;
6503
6504 // For LookupTableKind, this is the table.
6505 Constant *Initializer = nullptr;
6506};
6507
6508} // end anonymous namespace
6509
6510SwitchReplacement::SwitchReplacement(
6511 Module &M, uint64_t TableSize, ConstantInt *Offset,
6512 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6513 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6514 : DefaultValue(DefaultValue) {
6515 assert(Values.size() && "Can't build lookup table without values!");
6516 assert(TableSize >= Values.size() && "Can't fit values in table!");
6517
6518 // If all values in the table are equal, this is that value.
6519 SingleValue = Values.begin()->second;
6520
6521 ValueType = Values.begin()->second->getType();
6522
6523 // Build up the table contents.
6524 SmallVector<Constant *, 64> TableContents(TableSize);
6525 for (const auto &[CaseVal, CaseRes] : Values) {
6526 assert(CaseRes->getType() == ValueType);
6527
6528 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6529 TableContents[Idx] = CaseRes;
6530
6531 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6532 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6533 }
6534
6535 // Fill in any holes in the table with the default result.
6536 if (Values.size() < TableSize) {
6537 assert(DefaultValue &&
6538 "Need a default value to fill the lookup table holes.");
6539 assert(DefaultValue->getType() == ValueType);
6540 for (uint64_t I = 0; I < TableSize; ++I) {
6541 if (!TableContents[I])
6542 TableContents[I] = DefaultValue;
6543 }
6544
6545 // If the default value is poison, all the holes are poison.
6546 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6547
6548 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6549 SingleValue = nullptr;
6550 }
6551
6552 // If each element in the table contains the same value, we only need to store
6553 // that single value.
6554 if (SingleValue) {
6555 Kind = SingleValueKind;
6556 return;
6557 }
6558
6559 // Check if we can derive the value with a linear transformation from the
6560 // table index.
6562 bool LinearMappingPossible = true;
6563 APInt PrevVal;
6564 APInt DistToPrev;
6565 // When linear map is monotonic and signed overflow doesn't happen on
6566 // maximum index, we can attach nsw on Add and Mul.
6567 bool NonMonotonic = false;
6568 assert(TableSize >= 2 && "Should be a SingleValue table.");
6569 // Check if there is the same distance between two consecutive values.
6570 for (uint64_t I = 0; I < TableSize; ++I) {
6571 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6572
6573 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6574 // This is an poison, so it's (probably) a lookup table hole.
6575 // To prevent any regressions from before we switched to using poison as
6576 // the default value, holes will fall back to using the first value.
6577 // This can be removed once we add proper handling for poisons in lookup
6578 // tables.
6579 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6580 }
6581
6582 if (!ConstVal) {
6583 // This is an undef. We could deal with it, but undefs in lookup tables
6584 // are very seldom. It's probably not worth the additional complexity.
6585 LinearMappingPossible = false;
6586 break;
6587 }
6588 const APInt &Val = ConstVal->getValue();
6589 if (I != 0) {
6590 APInt Dist = Val - PrevVal;
6591 if (I == 1) {
6592 DistToPrev = Dist;
6593 } else if (Dist != DistToPrev) {
6594 LinearMappingPossible = false;
6595 break;
6596 }
6597 NonMonotonic |=
6598 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6599 }
6600 PrevVal = Val;
6601 }
6602 if (LinearMappingPossible) {
6603 LinearOffset = cast<ConstantInt>(TableContents[0]);
6604 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6605 APInt M = LinearMultiplier->getValue();
6606 bool MayWrap = true;
6607 if (isIntN(M.getBitWidth(), TableSize - 1))
6608 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6609 LinearMapValWrapped = NonMonotonic || MayWrap;
6610 Kind = LinearMapKind;
6611 return;
6612 }
6613 }
6614
6615 // If the type is integer and the table fits in a register, build a bitmap.
6616 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6618 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6619 for (uint64_t I = TableSize; I > 0; --I) {
6620 TableInt <<= IT->getBitWidth();
6621 // Insert values into the bitmap. Undef values are set to zero.
6622 if (!isa<UndefValue>(TableContents[I - 1])) {
6623 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6624 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6625 }
6626 }
6627 BitMap = ConstantInt::get(M.getContext(), TableInt);
6628 BitMapElementTy = IT;
6629 Kind = BitMapKind;
6630 return;
6631 }
6632
6633 // Store the table in an array.
6634 auto *TableTy = ArrayType::get(ValueType, TableSize);
6635 Initializer = ConstantArray::get(TableTy, TableContents);
6636
6637 Kind = LookupTableKind;
6638}
6639
6640Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6641 const DataLayout &DL, Function *Func) {
6642 switch (Kind) {
6643 case SingleValueKind:
6644 return SingleValue;
6645 case LinearMapKind: {
6646 ++NumLinearMaps;
6647 // Derive the result value from the input value.
6648 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6649 false, "switch.idx.cast");
6650 if (!LinearMultiplier->isOne())
6651 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6652 /*HasNUW = */ false,
6653 /*HasNSW = */ !LinearMapValWrapped);
6654
6655 if (!LinearOffset->isZero())
6656 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6657 /*HasNUW = */ false,
6658 /*HasNSW = */ !LinearMapValWrapped);
6659 return Result;
6660 }
6661 case BitMapKind: {
6662 ++NumBitMaps;
6663 // Type of the bitmap (e.g. i59).
6664 IntegerType *MapTy = BitMap->getIntegerType();
6665
6666 // Cast Index to the same type as the bitmap.
6667 // Note: The Index is <= the number of elements in the table, so
6668 // truncating it to the width of the bitmask is safe.
6669 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6670
6671 // Multiply the shift amount by the element width. NUW/NSW can always be
6672 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6673 // BitMap's bit width.
6674 ShiftAmt = Builder.CreateMul(
6675 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6676 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6677
6678 // Shift down.
6679 Value *DownShifted =
6680 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6681 // Mask off.
6682 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6683 }
6684 case LookupTableKind: {
6685 ++NumLookupTables;
6686 auto *Table =
6687 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6688 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6689 Initializer, "switch.table." + Func->getName());
6690 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6691 // Set the alignment to that of an array items. We will be only loading one
6692 // value out of it.
6693 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6694 Type *IndexTy = DL.getIndexType(Table->getType());
6695 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6696
6697 if (Index->getType() != IndexTy) {
6698 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6699 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6700 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6701 Zext->setNonNeg(
6702 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6703 }
6704
6705 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6706 Value *GEP =
6707 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6708 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6709 }
6710 }
6711 llvm_unreachable("Unknown helper kind!");
6712}
6713
6714bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6715 uint64_t TableSize,
6716 Type *ElementType) {
6717 auto *IT = dyn_cast<IntegerType>(ElementType);
6718 if (!IT)
6719 return false;
6720 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6721 // are <= 15, we could try to narrow the type.
6722
6723 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6724 if (TableSize >= UINT_MAX / IT->getBitWidth())
6725 return false;
6726 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6727}
6728
6730 const DataLayout &DL) {
6731 // Allow any legal type.
6732 if (TTI.isTypeLegal(Ty))
6733 return true;
6734
6735 auto *IT = dyn_cast<IntegerType>(Ty);
6736 if (!IT)
6737 return false;
6738
6739 // Also allow power of 2 integer types that have at least 8 bits and fit in
6740 // a register. These types are common in frontend languages and targets
6741 // usually support loads of these types.
6742 // TODO: We could relax this to any integer that fits in a register and rely
6743 // on ABI alignment and padding in the table to allow the load to be widened.
6744 // Or we could widen the constants and truncate the load.
6745 unsigned BitWidth = IT->getBitWidth();
6746 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6747 DL.fitsInLegalInteger(IT->getBitWidth());
6748}
6749
6750Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6751
6752bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
6753
6754static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6755 // 40% is the default density for building a jump table in optsize/minsize
6756 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6757 // function was based on.
6758 const uint64_t MinDensity = 40;
6759
6760 if (CaseRange >= UINT64_MAX / 100)
6761 return false; // Avoid multiplication overflows below.
6762
6763 return NumCases * 100 >= CaseRange * MinDensity;
6764}
6765
6767 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6768 uint64_t Range = Diff + 1;
6769 if (Range < Diff)
6770 return false; // Overflow.
6771
6772 return isSwitchDense(Values.size(), Range);
6773}
6774
6775/// Determine whether a lookup table should be built for this switch, based on
6776/// the number of cases, size of the table, and the types of the results.
6777// TODO: We could support larger than legal types by limiting based on the
6778// number of loads required and/or table size. If the constants are small we
6779// could use smaller table entries and extend after the load.
6781 const TargetTransformInfo &TTI,
6782 const DataLayout &DL,
6783 const SmallVector<Type *> &ResultTypes) {
6784 if (SI->getNumCases() > TableSize)
6785 return false; // TableSize overflowed.
6786
6787 bool AllTablesFitInRegister = true;
6788 bool HasIllegalType = false;
6789 for (const auto &Ty : ResultTypes) {
6790 // Saturate this flag to true.
6791 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6792
6793 // Saturate this flag to false.
6794 AllTablesFitInRegister =
6795 AllTablesFitInRegister &&
6796 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
6797
6798 // If both flags saturate, we're done. NOTE: This *only* works with
6799 // saturating flags, and all flags have to saturate first due to the
6800 // non-deterministic behavior of iterating over a dense map.
6801 if (HasIllegalType && !AllTablesFitInRegister)
6802 break;
6803 }
6804
6805 // If each table would fit in a register, we should build it anyway.
6806 if (AllTablesFitInRegister)
6807 return true;
6808
6809 // Don't build a table that doesn't fit in-register if it has illegal types.
6810 if (HasIllegalType)
6811 return false;
6812
6813 return isSwitchDense(SI->getNumCases(), TableSize);
6814}
6815
6817 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6818 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
6819 const DataLayout &DL, const TargetTransformInfo &TTI) {
6820 if (MinCaseVal.isNullValue())
6821 return true;
6822 if (MinCaseVal.isNegative() ||
6823 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6824 !HasDefaultResults)
6825 return false;
6826 return all_of(ResultTypes, [&](const auto &ResultType) {
6827 return SwitchReplacement::wouldFitInRegister(
6828 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
6829 });
6830}
6831
6832/// Try to reuse the switch table index compare. Following pattern:
6833/// \code
6834/// if (idx < tablesize)
6835/// r = table[idx]; // table does not contain default_value
6836/// else
6837/// r = default_value;
6838/// if (r != default_value)
6839/// ...
6840/// \endcode
6841/// Is optimized to:
6842/// \code
6843/// cond = idx < tablesize;
6844/// if (cond)
6845/// r = table[idx];
6846/// else
6847/// r = default_value;
6848/// if (cond)
6849/// ...
6850/// \endcode
6851/// Jump threading will then eliminate the second if(cond).
6853 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6854 Constant *DefaultValue,
6855 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6857 if (!CmpInst)
6858 return;
6859
6860 // We require that the compare is in the same block as the phi so that jump
6861 // threading can do its work afterwards.
6862 if (CmpInst->getParent() != PhiBlock)
6863 return;
6864
6866 if (!CmpOp1)
6867 return;
6868
6869 Value *RangeCmp = RangeCheckBranch->getCondition();
6870 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6871 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6872
6873 // Check if the compare with the default value is constant true or false.
6874 const DataLayout &DL = PhiBlock->getDataLayout();
6876 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6877 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6878 return;
6879
6880 // Check if the compare with the case values is distinct from the default
6881 // compare result.
6882 for (auto ValuePair : Values) {
6884 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6885 if (!CaseConst || CaseConst == DefaultConst ||
6886 (CaseConst != TrueConst && CaseConst != FalseConst))
6887 return;
6888 }
6889
6890 // Check if the branch instruction dominates the phi node. It's a simple
6891 // dominance check, but sufficient for our needs.
6892 // Although this check is invariant in the calling loops, it's better to do it
6893 // at this late stage. Practically we do it at most once for a switch.
6894 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6895 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6896 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6897 return;
6898 }
6899
6900 if (DefaultConst == FalseConst) {
6901 // The compare yields the same result. We can replace it.
6902 CmpInst->replaceAllUsesWith(RangeCmp);
6903 ++NumTableCmpReuses;
6904 } else {
6905 // The compare yields the same result, just inverted. We can replace it.
6906 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6907 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6908 RangeCheckBranch->getIterator());
6909 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6910 ++NumTableCmpReuses;
6911 }
6912}
6913
6914/// If the switch is only used to initialize one or more phi nodes in a common
6915/// successor block with different constant values, replace the switch with
6916/// lookup tables.
6918 DomTreeUpdater *DTU, const DataLayout &DL,
6919 const TargetTransformInfo &TTI) {
6920 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6921
6922 BasicBlock *BB = SI->getParent();
6923 Function *Fn = BB->getParent();
6924
6925 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6926 // split off a dense part and build a lookup table for that.
6927
6928 // FIXME: This creates arrays of GEPs to constant strings, which means each
6929 // GEP needs a runtime relocation in PIC code. We should just build one big
6930 // string and lookup indices into that.
6931
6932 // Ignore switches with less than three cases. Lookup tables will not make
6933 // them faster, so we don't analyze them.
6934 if (SI->getNumCases() < 3)
6935 return false;
6936
6937 // Figure out the corresponding result for each case value and phi node in the
6938 // common destination, as well as the min and max case values.
6939 assert(!SI->cases().empty());
6940 SwitchInst::CaseIt CI = SI->case_begin();
6941 ConstantInt *MinCaseVal = CI->getCaseValue();
6942 ConstantInt *MaxCaseVal = CI->getCaseValue();
6943
6944 BasicBlock *CommonDest = nullptr;
6945
6946 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6948
6950 SmallVector<Type *> ResultTypes;
6952
6953 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6954 ConstantInt *CaseVal = CI->getCaseValue();
6955 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6956 MinCaseVal = CaseVal;
6957 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6958 MaxCaseVal = CaseVal;
6959
6960 // Resulting value at phi nodes for this case value.
6962 ResultsTy Results;
6963 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6964 Results, DL, TTI))
6965 return false;
6966
6967 // Append the result and result types from this case to the list for each
6968 // phi.
6969 for (const auto &I : Results) {
6970 PHINode *PHI = I.first;
6971 Constant *Value = I.second;
6972 auto [It, Inserted] = ResultLists.try_emplace(PHI);
6973 if (Inserted)
6974 PHIs.push_back(PHI);
6975 It->second.push_back(std::make_pair(CaseVal, Value));
6976 ResultTypes.push_back(PHI->getType());
6977 }
6978 }
6979
6980 // If the table has holes, we need a constant result for the default case
6981 // or a bitmask that fits in a register.
6982 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6983 bool HasDefaultResults =
6984 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6985 DefaultResultsList, DL, TTI);
6986 for (const auto &I : DefaultResultsList) {
6987 PHINode *PHI = I.first;
6988 Constant *Result = I.second;
6989 DefaultResults[PHI] = Result;
6990 }
6991
6992 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6993 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6994 uint64_t TableSize;
6995 ConstantInt *TableIndexOffset;
6996 if (UseSwitchConditionAsTableIndex) {
6997 TableSize = MaxCaseVal->getLimitedValue() + 1;
6998 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6999 } else {
7000 TableSize =
7001 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7002
7003 TableIndexOffset = MinCaseVal;
7004 }
7005
7006 // If the default destination is unreachable, or if the lookup table covers
7007 // all values of the conditional variable, branch directly to the lookup table
7008 // BB. Otherwise, check that the condition is within the case range.
7009 uint64_t NumResults = ResultLists[PHIs[0]].size();
7010 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7011
7012 bool TableHasHoles = (NumResults < TableSize);
7013
7014 // If the table has holes but the default destination doesn't produce any
7015 // constant results, the lookup table entries corresponding to the holes will
7016 // contain poison.
7017 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7018
7019 // If the default destination doesn't produce a constant result but is still
7020 // reachable, and the lookup table has holes, we need to use a mask to
7021 // determine if the current index should load from the lookup table or jump
7022 // to the default case.
7023 // The mask is unnecessary if the table has holes but the default destination
7024 // is unreachable, as in that case the holes must also be unreachable.
7025 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7026 if (NeedMask) {
7027 // As an extra penalty for the validity test we require more cases.
7028 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7029 return false;
7030 if (!DL.fitsInLegalInteger(TableSize))
7031 return false;
7032 }
7033
7034 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7035 return false;
7036
7037 // Compute the table index value.
7038 Value *TableIndex;
7039 if (UseSwitchConditionAsTableIndex) {
7040 TableIndex = SI->getCondition();
7041 if (HasDefaultResults) {
7042 // Grow the table to cover all possible index values to avoid the range
7043 // check. It will use the default result to fill in the table hole later,
7044 // so make sure it exist.
7045 ConstantRange CR =
7046 computeConstantRange(TableIndex, /* ForSigned */ false);
7047 // Grow the table shouldn't have any size impact by checking
7048 // wouldFitInRegister.
7049 // TODO: Consider growing the table also when it doesn't fit in a register
7050 // if no optsize is specified.
7051 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7052 if (!CR.isUpperWrapped() &&
7053 all_of(ResultTypes, [&](const auto &ResultType) {
7054 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7055 ResultType);
7056 })) {
7057 // There may be some case index larger than the UpperBound (unreachable
7058 // case), so make sure the table size does not get smaller.
7059 TableSize = std::max(UpperBound, TableSize);
7060 // The default branch is unreachable after we enlarge the lookup table.
7061 // Adjust DefaultIsReachable to reuse code path.
7062 DefaultIsReachable = false;
7063 }
7064 }
7065 }
7066
7067 // Keep track of the switch replacement for each phi
7069 for (PHINode *PHI : PHIs) {
7070 const auto &ResultList = ResultLists[PHI];
7071
7072 Type *ResultType = ResultList.begin()->second->getType();
7073 // Use any value to fill the lookup table holes.
7075 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7076 StringRef FuncName = Fn->getName();
7077 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7078 ResultList, DefaultVal, DL, FuncName);
7079 PhiToReplacementMap.insert({PHI, Replacement});
7080 }
7081
7082 bool AnyLookupTables = any_of(
7083 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7084
7085 // A few conditions prevent the generation of lookup tables:
7086 // 1. The target does not support lookup tables.
7087 // 2. The "no-jump-tables" function attribute is set.
7088 // However, these objections do not apply to other switch replacements, like
7089 // the bitmap, so we only stop here if any of these conditions are met and we
7090 // want to create a LUT. Otherwise, continue with the switch replacement.
7091 if (AnyLookupTables &&
7092 (!TTI.shouldBuildLookupTables() ||
7093 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7094 return false;
7095
7096 Builder.SetInsertPoint(SI);
7097 // TableIndex is the switch condition - TableIndexOffset if we don't
7098 // use the condition directly
7099 if (!UseSwitchConditionAsTableIndex) {
7100 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7101 // we can try to attach nsw.
7102 bool MayWrap = true;
7103 if (!DefaultIsReachable) {
7104 APInt Res =
7105 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7106 (void)Res;
7107 }
7108 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7109 "switch.tableidx", /*HasNUW =*/false,
7110 /*HasNSW =*/!MayWrap);
7111 }
7112
7113 std::vector<DominatorTree::UpdateType> Updates;
7114
7115 // Compute the maximum table size representable by the integer type we are
7116 // switching upon.
7117 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7118 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7119 assert(MaxTableSize >= TableSize &&
7120 "It is impossible for a switch to have more entries than the max "
7121 "representable value of its input integer type's size.");
7122
7123 // Create the BB that does the lookups.
7124 Module &Mod = *CommonDest->getParent()->getParent();
7125 BasicBlock *LookupBB = BasicBlock::Create(
7126 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7127
7128 BranchInst *RangeCheckBranch = nullptr;
7129
7130 Builder.SetInsertPoint(SI);
7131 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7132 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7133 Builder.CreateBr(LookupBB);
7134 if (DTU)
7135 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7136 // Note: We call removeProdecessor later since we need to be able to get the
7137 // PHI value for the default case in case we're using a bit mask.
7138 } else {
7139 Value *Cmp = Builder.CreateICmpULT(
7140 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7141 RangeCheckBranch =
7142 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7143 if (DTU)
7144 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7145 }
7146
7147 // Populate the BB that does the lookups.
7148 Builder.SetInsertPoint(LookupBB);
7149
7150 if (NeedMask) {
7151 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7152 // re-purposed to do the hole check, and we create a new LookupBB.
7153 BasicBlock *MaskBB = LookupBB;
7154 MaskBB->setName("switch.hole_check");
7155 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7156 CommonDest->getParent(), CommonDest);
7157
7158 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7159 // unnecessary illegal types.
7160 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7161 APInt MaskInt(TableSizePowOf2, 0);
7162 APInt One(TableSizePowOf2, 1);
7163 // Build bitmask; fill in a 1 bit for every case.
7164 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7165 for (const auto &Result : ResultList) {
7166 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7167 .getLimitedValue();
7168 MaskInt |= One << Idx;
7169 }
7170 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7171
7172 // Get the TableIndex'th bit of the bitmask.
7173 // If this bit is 0 (meaning hole) jump to the default destination,
7174 // else continue with table lookup.
7175 IntegerType *MapTy = TableMask->getIntegerType();
7176 Value *MaskIndex =
7177 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7178 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7179 Value *LoBit = Builder.CreateTrunc(
7180 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7181 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7182 if (DTU) {
7183 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7184 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7185 }
7186 Builder.SetInsertPoint(LookupBB);
7187 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7188 }
7189
7190 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7191 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7192 // do not delete PHINodes here.
7193 SI->getDefaultDest()->removePredecessor(BB,
7194 /*KeepOneInputPHIs=*/true);
7195 if (DTU)
7196 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7197 }
7198
7199 for (PHINode *PHI : PHIs) {
7200 const ResultListTy &ResultList = ResultLists[PHI];
7201 auto Replacement = PhiToReplacementMap.at(PHI);
7202 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7203 // Do a small peephole optimization: re-use the switch table compare if
7204 // possible.
7205 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7206 BasicBlock *PhiBlock = PHI->getParent();
7207 // Search for compare instructions which use the phi.
7208 for (auto *User : PHI->users()) {
7209 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7210 Replacement.getDefaultValue(), ResultList);
7211 }
7212 }
7213
7214 PHI->addIncoming(Result, LookupBB);
7215 }
7216
7217 Builder.CreateBr(CommonDest);
7218 if (DTU)
7219 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7220
7221 // Remove the switch.
7222 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7223 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7224 BasicBlock *Succ = SI->getSuccessor(i);
7225
7226 if (Succ == SI->getDefaultDest())
7227 continue;
7228 Succ->removePredecessor(BB);
7229 if (DTU && RemovedSuccessors.insert(Succ).second)
7230 Updates.push_back({DominatorTree::Delete, BB, Succ});
7231 }
7232 SI->eraseFromParent();
7233
7234 if (DTU)
7235 DTU->applyUpdates(Updates);
7236
7237 if (NeedMask)
7238 ++NumLookupTablesHoles;
7239 return true;
7240}
7241
7242/// Try to transform a switch that has "holes" in it to a contiguous sequence
7243/// of cases.
7244///
7245/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7246/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7247///
7248/// This converts a sparse switch into a dense switch which allows better
7249/// lowering and could also allow transforming into a lookup table.
7251 const DataLayout &DL,
7252 const TargetTransformInfo &TTI) {
7253 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7254 if (CondTy->getIntegerBitWidth() > 64 ||
7255 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7256 return false;
7257 // Only bother with this optimization if there are more than 3 switch cases;
7258 // SDAG will only bother creating jump tables for 4 or more cases.
7259 if (SI->getNumCases() < 4)
7260 return false;
7261
7262 // This transform is agnostic to the signedness of the input or case values. We
7263 // can treat the case values as signed or unsigned. We can optimize more common
7264 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7265 // as signed.
7267 for (const auto &C : SI->cases())
7268 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7269 llvm::sort(Values);
7270
7271 // If the switch is already dense, there's nothing useful to do here.
7272 if (isSwitchDense(Values))
7273 return false;
7274
7275 // First, transform the values such that they start at zero and ascend.
7276 int64_t Base = Values[0];
7277 for (auto &V : Values)
7278 V -= (uint64_t)(Base);
7279
7280 // Now we have signed numbers that have been shifted so that, given enough
7281 // precision, there are no negative values. Since the rest of the transform
7282 // is bitwise only, we switch now to an unsigned representation.
7283
7284 // This transform can be done speculatively because it is so cheap - it
7285 // results in a single rotate operation being inserted.
7286
7287 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7288 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7289 // less than 64.
7290 unsigned Shift = 64;
7291 for (auto &V : Values)
7292 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7293 assert(Shift < 64);
7294 if (Shift > 0)
7295 for (auto &V : Values)
7296 V = (int64_t)((uint64_t)V >> Shift);
7297
7298 if (!isSwitchDense(Values))
7299 // Transform didn't create a dense switch.
7300 return false;
7301
7302 // The obvious transform is to shift the switch condition right and emit a
7303 // check that the condition actually cleanly divided by GCD, i.e.
7304 // C & (1 << Shift - 1) == 0
7305 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7306 //
7307 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7308 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7309 // are nonzero then the switch condition will be very large and will hit the
7310 // default case.
7311
7312 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7313 Builder.SetInsertPoint(SI);
7314 Value *Sub =
7315 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7316 Value *Rot = Builder.CreateIntrinsic(
7317 Ty, Intrinsic::fshl,
7318 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7319 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7320
7321 for (auto Case : SI->cases()) {
7322 auto *Orig = Case.getCaseValue();
7323 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7324 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7325 }
7326 return true;
7327}
7328
7329/// Tries to transform switch of powers of two to reduce switch range.
7330/// For example, switch like:
7331/// switch (C) { case 1: case 2: case 64: case 128: }
7332/// will be transformed to:
7333/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7334///
7335/// This transformation allows better lowering and may transform the switch
7336/// instruction into a sequence of bit manipulation and a smaller
7337/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7338/// address of the jump target, and indirectly jump to it).
7340 const DataLayout &DL,
7341 const TargetTransformInfo &TTI) {
7342 Value *Condition = SI->getCondition();
7343 LLVMContext &Context = SI->getContext();
7344 auto *CondTy = cast<IntegerType>(Condition->getType());
7345
7346 if (CondTy->getIntegerBitWidth() > 64 ||
7347 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7348 return false;
7349
7350 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7351 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7352 {Condition, ConstantInt::getTrue(Context)});
7353 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7354 TTI::TCC_Basic * 2)
7355 return false;
7356
7357 // Only bother with this optimization if there are more than 3 switch cases.
7358 // SDAG will start emitting jump tables for 4 or more cases.
7359 if (SI->getNumCases() < 4)
7360 return false;
7361
7362 // We perform this optimization only for switches with
7363 // unreachable default case.
7364 // This assumtion will save us from checking if `Condition` is a power of two.
7365 if (!SI->defaultDestUnreachable())
7366 return false;
7367
7368 // Check that switch cases are powers of two.
7370 for (const auto &Case : SI->cases()) {
7371 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7372 if (llvm::has_single_bit(CaseValue))
7373 Values.push_back(CaseValue);
7374 else
7375 return false;
7376 }
7377
7378 // isSwichDense requires case values to be sorted.
7379 llvm::sort(Values);
7380 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7381 llvm::countr_zero(Values.front()) + 1))
7382 // Transform is unable to generate dense switch.
7383 return false;
7384
7385 Builder.SetInsertPoint(SI);
7386
7387 // Replace each case with its trailing zeros number.
7388 for (auto &Case : SI->cases()) {
7389 auto *OrigValue = Case.getCaseValue();
7390 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7391 OrigValue->getValue().countr_zero()));
7392 }
7393
7394 // Replace condition with its trailing zeros number.
7395 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7396 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7397
7398 SI->setCondition(ConditionTrailingZeros);
7399
7400 return true;
7401}
7402
7403/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7404/// the same destination.
7406 DomTreeUpdater *DTU) {
7407 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7408 if (!Cmp || !Cmp->hasOneUse())
7409 return false;
7410
7412 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7413 if (!HasWeights)
7414 Weights.resize(4); // Avoid checking HasWeights everywhere.
7415
7416 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7417 int64_t Res;
7418 BasicBlock *Succ, *OtherSucc;
7419 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7420 BasicBlock *Unreachable = nullptr;
7421
7422 if (SI->getNumCases() == 2) {
7423 // Find which of 1, 0 or -1 is missing (handled by default dest).
7424 SmallSet<int64_t, 3> Missing;
7425 Missing.insert(1);
7426 Missing.insert(0);
7427 Missing.insert(-1);
7428
7429 Succ = SI->getDefaultDest();
7430 SuccWeight = Weights[0];
7431 OtherSucc = nullptr;
7432 for (auto &Case : SI->cases()) {
7433 std::optional<int64_t> Val =
7434 Case.getCaseValue()->getValue().trySExtValue();
7435 if (!Val)
7436 return false;
7437 if (!Missing.erase(*Val))
7438 return false;
7439 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7440 return false;
7441 OtherSucc = Case.getCaseSuccessor();
7442 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7443 }
7444
7445 assert(Missing.size() == 1 && "Should have one case left");
7446 Res = *Missing.begin();
7447 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7448 // Normalize so that Succ is taken once and OtherSucc twice.
7449 Unreachable = SI->getDefaultDest();
7450 Succ = OtherSucc = nullptr;
7451 for (auto &Case : SI->cases()) {
7452 BasicBlock *NewSucc = Case.getCaseSuccessor();
7453 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7454 if (!OtherSucc || OtherSucc == NewSucc) {
7455 OtherSucc = NewSucc;
7456 OtherSuccWeight += Weight;
7457 } else if (!Succ) {
7458 Succ = NewSucc;
7459 SuccWeight = Weight;
7460 } else if (Succ == NewSucc) {
7461 std::swap(Succ, OtherSucc);
7462 std::swap(SuccWeight, OtherSuccWeight);
7463 } else
7464 return false;
7465 }
7466 for (auto &Case : SI->cases()) {
7467 std::optional<int64_t> Val =
7468 Case.getCaseValue()->getValue().trySExtValue();
7469 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7470 return false;
7471 if (Case.getCaseSuccessor() == Succ) {
7472 Res = *Val;
7473 break;
7474 }
7475 }
7476 } else {
7477 return false;
7478 }
7479
7480 // Determine predicate for the missing case.
7482 switch (Res) {
7483 case 1:
7484 Pred = ICmpInst::ICMP_UGT;
7485 break;
7486 case 0:
7487 Pred = ICmpInst::ICMP_EQ;
7488 break;
7489 case -1:
7490 Pred = ICmpInst::ICMP_ULT;
7491 break;
7492 }
7493 if (Cmp->isSigned())
7494 Pred = ICmpInst::getSignedPredicate(Pred);
7495
7496 MDNode *NewWeights = nullptr;
7497 if (HasWeights)
7498 NewWeights = MDBuilder(SI->getContext())
7499 .createBranchWeights(SuccWeight, OtherSuccWeight);
7500
7501 BasicBlock *BB = SI->getParent();
7502 Builder.SetInsertPoint(SI->getIterator());
7503 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7504 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7505 SI->getMetadata(LLVMContext::MD_unpredictable));
7506 OtherSucc->removePredecessor(BB);
7507 if (Unreachable)
7508 Unreachable->removePredecessor(BB);
7509 SI->eraseFromParent();
7510 Cmp->eraseFromParent();
7511 if (DTU && Unreachable)
7512 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7513 return true;
7514}
7515
7516/// Checking whether two cases of SI are equal depends on the contents of the
7517/// BasicBlock and the incoming values of their successor PHINodes.
7518/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7519/// calling this function on each BasicBlock every time isEqual is called,
7520/// especially since the same BasicBlock may be passed as an argument multiple
7521/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7522/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7523/// of the incoming values.
7528
7529namespace llvm {
7530template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7532 return static_cast<SwitchSuccWrapper *>(
7534 }
7536 return static_cast<SwitchSuccWrapper *>(
7538 }
7539 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7540 BasicBlock *Succ = SSW->Dest;
7542 assert(BI->isUnconditional() &&
7543 "Only supporting unconditional branches for now");
7544 assert(BI->getNumSuccessors() == 1 &&
7545 "Expected unconditional branches to have one successor");
7546 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7547
7548 // Since we assume the BB is just a single BranchInst with a single
7549 // successor, we hash as the BB and the incoming Values of its successor
7550 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7551 // including the incoming PHI values leads to better performance.
7552 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7553 // time and passing it in SwitchSuccWrapper, but this slowed down the
7554 // average compile time without having any impact on the worst case compile
7555 // time.
7556 BasicBlock *BB = BI->getSuccessor(0);
7557 SmallVector<Value *> PhiValsForBB;
7558 for (PHINode &Phi : BB->phis())
7559 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7560
7561 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7562 }
7563 static bool isEqual(const SwitchSuccWrapper *LHS,
7564 const SwitchSuccWrapper *RHS) {
7567 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7568 return LHS == RHS;
7569
7570 BasicBlock *A = LHS->Dest;
7571 BasicBlock *B = RHS->Dest;
7572
7573 // FIXME: we checked that the size of A and B are both 1 in
7574 // simplifyDuplicateSwitchArms to make the Case list smaller to
7575 // improve performance. If we decide to support BasicBlocks with more
7576 // than just a single instruction, we need to check that A.size() ==
7577 // B.size() here, and we need to check more than just the BranchInsts
7578 // for equality.
7579
7580 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7581 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7582 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7583 "Only supporting unconditional branches for now");
7584 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7585 return false;
7586
7587 // Need to check that PHIs in successor have matching values
7588 BasicBlock *Succ = ABI->getSuccessor(0);
7589 for (PHINode &Phi : Succ->phis()) {
7590 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7591 if (PredIVs[A] != PredIVs[B])
7592 return false;
7593 }
7594
7595 return true;
7596 }
7597};
7598} // namespace llvm
7599
7600bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7601 DomTreeUpdater *DTU) {
7602 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7603 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7604 // an entire PHI at once after the loop, opposed to calling
7605 // getIncomingValueForBlock inside this loop, since each call to
7606 // getIncomingValueForBlock is O(|Preds|).
7607 SmallPtrSet<PHINode *, 8> Phis;
7608 SmallPtrSet<BasicBlock *, 8> Seen;
7609 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
7610 DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
7612 Cases.reserve(SI->getNumSuccessors());
7613
7614 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7615 BasicBlock *BB = SI->getSuccessor(I);
7616
7617 // FIXME: Support more than just a single BranchInst. One way we could do
7618 // this is by taking a hashing approach of all insts in BB.
7619 if (BB->size() != 1)
7620 continue;
7621
7622 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7623 // on other kinds of terminators. We decide to only support unconditional
7624 // branches for now for compile time reasons.
7625 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7626 if (!BI || BI->isConditional())
7627 continue;
7628
7629 if (!Seen.insert(BB).second) {
7630 auto It = BBToSuccessorIndexes.find(BB);
7631 if (It != BBToSuccessorIndexes.end())
7632 It->second.emplace_back(I);
7633 continue;
7634 }
7635
7636 // FIXME: This case needs some extra care because the terminators other than
7637 // SI need to be updated. For now, consider only backedges to the SI.
7638 if (BB->getUniquePredecessor() != SI->getParent())
7639 continue;
7640
7641 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7642 for (BasicBlock *Succ : BI->successors())
7644
7645 // Add the successor only if not previously visited.
7646 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7647 BBToSuccessorIndexes[BB].emplace_back(I);
7648 }
7649
7650 // Precompute a data structure to improve performance of isEqual for
7651 // SwitchSuccWrapper.
7652 PhiPredIVs.reserve(Phis.size());
7653 for (PHINode *Phi : Phis) {
7654 auto &IVs =
7655 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7656 for (auto &IV : Phi->incoming_values())
7657 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7658 }
7659
7660 // Build a set such that if the SwitchSuccWrapper exists in the set and
7661 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7662 // which is not in the set should be replaced with the one in the set. If the
7663 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7664 // other SwitchSuccWrappers can check against it in the same manner. We use
7665 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7666 // around information to isEquality, getHashValue, and when doing the
7667 // replacement with better performance.
7668 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7669 ReplaceWith.reserve(Cases.size());
7670
7672 Updates.reserve(ReplaceWith.size());
7673 bool MadeChange = false;
7674 for (auto &SSW : Cases) {
7675 // SSW is a candidate for simplification. If we find a duplicate BB,
7676 // replace it.
7677 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7678 if (!Inserted) {
7679 // We know that SI's parent BB no longer dominates the old case successor
7680 // since we are making it dead.
7681 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7682 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7683 for (unsigned Idx : Successors)
7684 SI->setSuccessor(Idx, (*It)->Dest);
7685 MadeChange = true;
7686 }
7687 }
7688
7689 if (DTU)
7690 DTU->applyUpdates(Updates);
7691
7692 return MadeChange;
7693}
7694
7695bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7696 BasicBlock *BB = SI->getParent();
7697
7698 if (isValueEqualityComparison(SI)) {
7699 // If we only have one predecessor, and if it is a branch on this value,
7700 // see if that predecessor totally determines the outcome of this switch.
7701 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7702 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7703 return requestResimplify();
7704
7705 Value *Cond = SI->getCondition();
7706 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7707 if (simplifySwitchOnSelect(SI, Select))
7708 return requestResimplify();
7709
7710 // If the block only contains the switch, see if we can fold the block
7711 // away into any preds.
7712 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7713 if (foldValueComparisonIntoPredecessors(SI, Builder))
7714 return requestResimplify();
7715 }
7716
7717 // Try to transform the switch into an icmp and a branch.
7718 // The conversion from switch to comparison may lose information on
7719 // impossible switch values, so disable it early in the pipeline.
7720 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7721 return requestResimplify();
7722
7723 // Remove unreachable cases.
7724 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7725 return requestResimplify();
7726
7727 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7728 return requestResimplify();
7729
7730 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7731 return requestResimplify();
7732
7733 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7734 return requestResimplify();
7735
7736 // The conversion from switch to lookup tables results in difficult-to-analyze
7737 // code and makes pruning branches much harder. This is a problem if the
7738 // switch expression itself can still be restricted as a result of inlining or
7739 // CVP. Therefore, only apply this transformation during late stages of the
7740 // optimisation pipeline.
7741 if (Options.ConvertSwitchToLookupTable &&
7742 simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
7743 return requestResimplify();
7744
7745 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7746 return requestResimplify();
7747
7748 if (reduceSwitchRange(SI, Builder, DL, TTI))
7749 return requestResimplify();
7750
7751 if (HoistCommon &&
7752 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7753 return requestResimplify();
7754
7755 if (simplifyDuplicateSwitchArms(SI, DTU))
7756 return requestResimplify();
7757
7758 return false;
7759}
7760
7761bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7762 BasicBlock *BB = IBI->getParent();
7763 bool Changed = false;
7764
7765 // Eliminate redundant destinations.
7766 SmallPtrSet<Value *, 8> Succs;
7767 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
7768 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7769 BasicBlock *Dest = IBI->getDestination(i);
7770 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7771 if (!Dest->hasAddressTaken())
7772 RemovedSuccs.insert(Dest);
7773 Dest->removePredecessor(BB);
7774 IBI->removeDestination(i);
7775 --i;
7776 --e;
7777 Changed = true;
7778 }
7779 }
7780
7781 if (DTU) {
7782 std::vector<DominatorTree::UpdateType> Updates;
7783 Updates.reserve(RemovedSuccs.size());
7784 for (auto *RemovedSucc : RemovedSuccs)
7785 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7786 DTU->applyUpdates(Updates);
7787 }
7788
7789 if (IBI->getNumDestinations() == 0) {
7790 // If the indirectbr has no successors, change it to unreachable.
7791 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7793 return true;
7794 }
7795
7796 if (IBI->getNumDestinations() == 1) {
7797 // If the indirectbr has one successor, change it to a direct branch.
7800 return true;
7801 }
7802
7803 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7804 if (simplifyIndirectBrOnSelect(IBI, SI))
7805 return requestResimplify();
7806 }
7807 return Changed;
7808}
7809
7810/// Given an block with only a single landing pad and a unconditional branch
7811/// try to find another basic block which this one can be merged with. This
7812/// handles cases where we have multiple invokes with unique landing pads, but
7813/// a shared handler.
7814///
7815/// We specifically choose to not worry about merging non-empty blocks
7816/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7817/// practice, the optimizer produces empty landing pad blocks quite frequently
7818/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7819/// sinking in this file)
7820///
7821/// This is primarily a code size optimization. We need to avoid performing
7822/// any transform which might inhibit optimization (such as our ability to
7823/// specialize a particular handler via tail commoning). We do this by not
7824/// merging any blocks which require us to introduce a phi. Since the same
7825/// values are flowing through both blocks, we don't lose any ability to
7826/// specialize. If anything, we make such specialization more likely.
7827///
7828/// TODO - This transformation could remove entries from a phi in the target
7829/// block when the inputs in the phi are the same for the two blocks being
7830/// merged. In some cases, this could result in removal of the PHI entirely.
7832 BasicBlock *BB, DomTreeUpdater *DTU) {
7833 auto Succ = BB->getUniqueSuccessor();
7834 assert(Succ);
7835 // If there's a phi in the successor block, we'd likely have to introduce
7836 // a phi into the merged landing pad block.
7837 if (isa<PHINode>(*Succ->begin()))
7838 return false;
7839
7840 for (BasicBlock *OtherPred : predecessors(Succ)) {
7841 if (BB == OtherPred)
7842 continue;
7843 BasicBlock::iterator I = OtherPred->begin();
7845 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7846 continue;
7847 ++I;
7849 if (!BI2 || !BI2->isIdenticalTo(BI))
7850 continue;
7851
7852 std::vector<DominatorTree::UpdateType> Updates;
7853
7854 // We've found an identical block. Update our predecessors to take that
7855 // path instead and make ourselves dead.
7857 for (BasicBlock *Pred : UniquePreds) {
7858 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7859 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7860 "unexpected successor");
7861 II->setUnwindDest(OtherPred);
7862 if (DTU) {
7863 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7864 Updates.push_back({DominatorTree::Delete, Pred, BB});
7865 }
7866 }
7867
7869 for (BasicBlock *Succ : UniqueSuccs) {
7870 Succ->removePredecessor(BB);
7871 if (DTU)
7872 Updates.push_back({DominatorTree::Delete, BB, Succ});
7873 }
7874
7875 IRBuilder<> Builder(BI);
7876 Builder.CreateUnreachable();
7877 BI->eraseFromParent();
7878 if (DTU)
7879 DTU->applyUpdates(Updates);
7880 return true;
7881 }
7882 return false;
7883}
7884
7885bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7886 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7887 : simplifyCondBranch(Branch, Builder);
7888}
7889
7890bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7891 IRBuilder<> &Builder) {
7892 BasicBlock *BB = BI->getParent();
7893 BasicBlock *Succ = BI->getSuccessor(0);
7894
7895 // If the Terminator is the only non-phi instruction, simplify the block.
7896 // If LoopHeader is provided, check if the block or its successor is a loop
7897 // header. (This is for early invocations before loop simplify and
7898 // vectorization to keep canonical loop forms for nested loops. These blocks
7899 // can be eliminated when the pass is invoked later in the back-end.)
7900 // Note that if BB has only one predecessor then we do not introduce new
7901 // backedge, so we can eliminate BB.
7902 bool NeedCanonicalLoop =
7903 Options.NeedCanonicalLoop &&
7904 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7905 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7907 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7908 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7909 return true;
7910
7911 // If the only instruction in the block is a seteq/setne comparison against a
7912 // constant, try to simplify the block.
7913 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7914 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7915 ++I;
7916 if (I->isTerminator() &&
7917 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7918 return true;
7919 }
7920
7921 // See if we can merge an empty landing pad block with another which is
7922 // equivalent.
7923 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7924 ++I;
7925 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7926 return true;
7927 }
7928
7929 // If this basic block is ONLY a compare and a branch, and if a predecessor
7930 // branches to us and our successor, fold the comparison into the
7931 // predecessor and use logical operations to update the incoming value
7932 // for PHI nodes in common successor.
7933 if (Options.SpeculateBlocks &&
7934 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7935 Options.BonusInstThreshold))
7936 return requestResimplify();
7937 return false;
7938}
7939
7941 BasicBlock *PredPred = nullptr;
7942 for (auto *P : predecessors(BB)) {
7943 BasicBlock *PPred = P->getSinglePredecessor();
7944 if (!PPred || (PredPred && PredPred != PPred))
7945 return nullptr;
7946 PredPred = PPred;
7947 }
7948 return PredPred;
7949}
7950
7951/// Fold the following pattern:
7952/// bb0:
7953/// br i1 %cond1, label %bb1, label %bb2
7954/// bb1:
7955/// br i1 %cond2, label %bb3, label %bb4
7956/// bb2:
7957/// br i1 %cond2, label %bb4, label %bb3
7958/// bb3:
7959/// ...
7960/// bb4:
7961/// ...
7962/// into
7963/// bb0:
7964/// %cond = xor i1 %cond1, %cond2
7965/// br i1 %cond, label %bb4, label %bb3
7966/// bb3:
7967/// ...
7968/// bb4:
7969/// ...
7970/// NOTE: %cond2 always dominates the terminator of bb0.
7972 BasicBlock *BB = BI->getParent();
7973 BasicBlock *BB1 = BI->getSuccessor(0);
7974 BasicBlock *BB2 = BI->getSuccessor(1);
7975 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7976 if (Succ == BB)
7977 return false;
7978 if (&Succ->front() != Succ->getTerminator())
7979 return false;
7980 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7981 if (!SuccBI || !SuccBI->isConditional())
7982 return false;
7983 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7984 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7985 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7986 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7987 };
7988 BranchInst *BB1BI, *BB2BI;
7989 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7990 return false;
7991
7992 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7993 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7994 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7995 return false;
7996
7997 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7998 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7999 IRBuilder<> Builder(BI);
8000 BI->setCondition(
8001 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8002 BB1->removePredecessor(BB);
8003 BI->setSuccessor(0, BB4);
8004 BB2->removePredecessor(BB);
8005 BI->setSuccessor(1, BB3);
8006 if (DTU) {
8008 Updates.push_back({DominatorTree::Delete, BB, BB1});
8009 Updates.push_back({DominatorTree::Insert, BB, BB4});
8010 Updates.push_back({DominatorTree::Delete, BB, BB2});
8011 Updates.push_back({DominatorTree::Insert, BB, BB3});
8012
8013 DTU->applyUpdates(Updates);
8014 }
8015 bool HasWeight = false;
8016 uint64_t BBTWeight, BBFWeight;
8017 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8018 HasWeight = true;
8019 else
8020 BBTWeight = BBFWeight = 1;
8021 uint64_t BB1TWeight, BB1FWeight;
8022 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8023 HasWeight = true;
8024 else
8025 BB1TWeight = BB1FWeight = 1;
8026 uint64_t BB2TWeight, BB2FWeight;
8027 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8028 HasWeight = true;
8029 else
8030 BB2TWeight = BB2FWeight = 1;
8031 if (HasWeight) {
8032 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8033 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8034 fitWeights(Weights);
8035 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8036 }
8037 return true;
8038}
8039
8040bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8041 assert(
8043 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8044 "Tautological conditional branch should have been eliminated already.");
8045
8046 BasicBlock *BB = BI->getParent();
8047 if (!Options.SimplifyCondBranch ||
8048 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8049 return false;
8050
8051 // Conditional branch
8052 if (isValueEqualityComparison(BI)) {
8053 // If we only have one predecessor, and if it is a branch on this value,
8054 // see if that predecessor totally determines the outcome of this
8055 // switch.
8056 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8057 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8058 return requestResimplify();
8059
8060 // This block must be empty, except for the setcond inst, if it exists.
8061 // Ignore dbg and pseudo intrinsics.
8062 auto I = BB->instructionsWithoutDebug(true).begin();
8063 if (&*I == BI) {
8064 if (foldValueComparisonIntoPredecessors(BI, Builder))
8065 return requestResimplify();
8066 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8067 ++I;
8068 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8069 return requestResimplify();
8070 }
8071 }
8072
8073 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8074 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8075 return true;
8076
8077 // If this basic block has dominating predecessor blocks and the dominating
8078 // blocks' conditions imply BI's condition, we know the direction of BI.
8079 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8080 if (Imp) {
8081 // Turn this into a branch on constant.
8082 auto *OldCond = BI->getCondition();
8083 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8084 : ConstantInt::getFalse(BB->getContext());
8085 BI->setCondition(TorF);
8087 return requestResimplify();
8088 }
8089
8090 // If this basic block is ONLY a compare and a branch, and if a predecessor
8091 // branches to us and one of our successors, fold the comparison into the
8092 // predecessor and use logical operations to pick the right destination.
8093 if (Options.SpeculateBlocks &&
8094 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8095 Options.BonusInstThreshold))
8096 return requestResimplify();
8097
8098 // We have a conditional branch to two blocks that are only reachable
8099 // from BI. We know that the condbr dominates the two blocks, so see if
8100 // there is any identical code in the "then" and "else" blocks. If so, we
8101 // can hoist it up to the branching block.
8102 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8103 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8104 if (HoistCommon &&
8105 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8106 return requestResimplify();
8107
8108 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8109 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8110 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8111 auto CanSpeculateConditionalLoadsStores = [&]() {
8112 for (auto *Succ : successors(BB)) {
8113 for (Instruction &I : *Succ) {
8114 if (I.isTerminator()) {
8115 if (I.getNumSuccessors() > 1)
8116 return false;
8117 continue;
8118 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8119 SpeculatedConditionalLoadsStores.size() ==
8121 return false;
8122 }
8123 SpeculatedConditionalLoadsStores.push_back(&I);
8124 }
8125 }
8126 return !SpeculatedConditionalLoadsStores.empty();
8127 };
8128
8129 if (CanSpeculateConditionalLoadsStores()) {
8130 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8131 std::nullopt, nullptr);
8132 return requestResimplify();
8133 }
8134 }
8135 } else {
8136 // If Successor #1 has multiple preds, we may be able to conditionally
8137 // execute Successor #0 if it branches to Successor #1.
8138 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8139 if (Succ0TI->getNumSuccessors() == 1 &&
8140 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8141 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8142 return requestResimplify();
8143 }
8144 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8145 // If Successor #0 has multiple preds, we may be able to conditionally
8146 // execute Successor #1 if it branches to Successor #0.
8147 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8148 if (Succ1TI->getNumSuccessors() == 1 &&
8149 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8150 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8151 return requestResimplify();
8152 }
8153
8154 // If this is a branch on something for which we know the constant value in
8155 // predecessors (e.g. a phi node in the current block), thread control
8156 // through this block.
8157 if (foldCondBranchOnValueKnownInPredecessor(BI))
8158 return requestResimplify();
8159
8160 // Scan predecessor blocks for conditional branches.
8161 for (BasicBlock *Pred : predecessors(BB))
8162 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8163 if (PBI != BI && PBI->isConditional())
8164 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8165 return requestResimplify();
8166
8167 // Look for diamond patterns.
8168 if (MergeCondStores)
8169 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8170 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8171 if (PBI != BI && PBI->isConditional())
8172 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8173 return requestResimplify();
8174
8175 // Look for nested conditional branches.
8176 if (mergeNestedCondBranch(BI, DTU))
8177 return requestResimplify();
8178
8179 return false;
8180}
8181
8182/// Check if passing a value to an instruction will cause undefined behavior.
8183static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8184 assert(V->getType() == I->getType() && "Mismatched types");
8186 if (!C)
8187 return false;
8188
8189 if (I->use_empty())
8190 return false;
8191
8192 if (C->isNullValue() || isa<UndefValue>(C)) {
8193 // Only look at the first use we can handle, avoid hurting compile time with
8194 // long uselists
8195 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8196 auto *Use = cast<Instruction>(U.getUser());
8197 // Change this list when we want to add new instructions.
8198 switch (Use->getOpcode()) {
8199 default:
8200 return false;
8201 case Instruction::GetElementPtr:
8202 case Instruction::Ret:
8203 case Instruction::BitCast:
8204 case Instruction::Load:
8205 case Instruction::Store:
8206 case Instruction::Call:
8207 case Instruction::CallBr:
8208 case Instruction::Invoke:
8209 case Instruction::UDiv:
8210 case Instruction::URem:
8211 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8212 // implemented to avoid code complexity as it is unclear how useful such
8213 // logic is.
8214 case Instruction::SDiv:
8215 case Instruction::SRem:
8216 return true;
8217 }
8218 });
8219 if (FindUse == I->use_end())
8220 return false;
8221 auto &Use = *FindUse;
8222 auto *User = cast<Instruction>(Use.getUser());
8223 // Bail out if User is not in the same BB as I or User == I or User comes
8224 // before I in the block. The latter two can be the case if User is a
8225 // PHI node.
8226 if (User->getParent() != I->getParent() || User == I ||
8227 User->comesBefore(I))
8228 return false;
8229
8230 // Now make sure that there are no instructions in between that can alter
8231 // control flow (eg. calls)
8232 auto InstrRange =
8233 make_range(std::next(I->getIterator()), User->getIterator());
8234 if (any_of(InstrRange, [](Instruction &I) {
8236 }))
8237 return false;
8238
8239 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8241 if (GEP->getPointerOperand() == I) {
8242 // The type of GEP may differ from the type of base pointer.
8243 // Bail out on vector GEPs, as they are not handled by other checks.
8244 if (GEP->getType()->isVectorTy())
8245 return false;
8246 // The current base address is null, there are four cases to consider:
8247 // getelementptr (TY, null, 0) -> null
8248 // getelementptr (TY, null, not zero) -> may be modified
8249 // getelementptr inbounds (TY, null, 0) -> null
8250 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8251 // undefined?
8252 if (!GEP->hasAllZeroIndices() &&
8253 (!GEP->isInBounds() ||
8254 NullPointerIsDefined(GEP->getFunction(),
8255 GEP->getPointerAddressSpace())))
8256 PtrValueMayBeModified = true;
8257 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8258 }
8259
8260 // Look through return.
8261 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8262 bool HasNoUndefAttr =
8263 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8264 // Return undefined to a noundef return value is undefined.
8265 if (isa<UndefValue>(C) && HasNoUndefAttr)
8266 return true;
8267 // Return null to a nonnull+noundef return value is undefined.
8268 if (C->isNullValue() && HasNoUndefAttr &&
8269 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8270 return !PtrValueMayBeModified;
8271 }
8272 }
8273
8274 // Load from null is undefined.
8275 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8276 if (!LI->isVolatile())
8277 return !NullPointerIsDefined(LI->getFunction(),
8278 LI->getPointerAddressSpace());
8279
8280 // Store to null is undefined.
8282 if (!SI->isVolatile())
8283 return (!NullPointerIsDefined(SI->getFunction(),
8284 SI->getPointerAddressSpace())) &&
8285 SI->getPointerOperand() == I;
8286
8287 // llvm.assume(false/undef) always triggers immediate UB.
8288 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8289 // Ignore assume operand bundles.
8290 if (I == Assume->getArgOperand(0))
8291 return true;
8292 }
8293
8294 if (auto *CB = dyn_cast<CallBase>(User)) {
8295 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8296 return false;
8297 // A call to null is undefined.
8298 if (CB->getCalledOperand() == I)
8299 return true;
8300
8301 if (CB->isArgOperand(&Use)) {
8302 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8303 // Passing null to a nonnnull+noundef argument is undefined.
8305 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8306 return !PtrValueMayBeModified;
8307 // Passing undef to a noundef argument is undefined.
8308 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8309 return true;
8310 }
8311 }
8312 // Div/Rem by zero is immediate UB
8313 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8314 return true;
8315 }
8316 return false;
8317}
8318
8319/// If BB has an incoming value that will always trigger undefined behavior
8320/// (eg. null pointer dereference), remove the branch leading here.
8322 DomTreeUpdater *DTU,
8323 AssumptionCache *AC) {
8324 for (PHINode &PHI : BB->phis())
8325 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8326 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8327 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8328 Instruction *T = Predecessor->getTerminator();
8329 IRBuilder<> Builder(T);
8330 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8331 BB->removePredecessor(Predecessor);
8332 // Turn unconditional branches into unreachables and remove the dead
8333 // destination from conditional branches.
8334 if (BI->isUnconditional())
8335 Builder.CreateUnreachable();
8336 else {
8337 // Preserve guarding condition in assume, because it might not be
8338 // inferrable from any dominating condition.
8339 Value *Cond = BI->getCondition();
8340 CallInst *Assumption;
8341 if (BI->getSuccessor(0) == BB)
8342 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8343 else
8344 Assumption = Builder.CreateAssumption(Cond);
8345 if (AC)
8346 AC->registerAssumption(cast<AssumeInst>(Assumption));
8347 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8348 : BI->getSuccessor(0));
8349 }
8350 BI->eraseFromParent();
8351 if (DTU)
8352 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8353 return true;
8354 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8355 // Redirect all branches leading to UB into
8356 // a newly created unreachable block.
8357 BasicBlock *Unreachable = BasicBlock::Create(
8358 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8359 Builder.SetInsertPoint(Unreachable);
8360 // The new block contains only one instruction: Unreachable
8361 Builder.CreateUnreachable();
8362 for (const auto &Case : SI->cases())
8363 if (Case.getCaseSuccessor() == BB) {
8364 BB->removePredecessor(Predecessor);
8365 Case.setSuccessor(Unreachable);
8366 }
8367 if (SI->getDefaultDest() == BB) {
8368 BB->removePredecessor(Predecessor);
8369 SI->setDefaultDest(Unreachable);
8370 }
8371
8372 if (DTU)
8373 DTU->applyUpdates(
8374 { { DominatorTree::Insert, Predecessor, Unreachable },
8375 { DominatorTree::Delete, Predecessor, BB } });
8376 return true;
8377 }
8378 }
8379
8380 return false;
8381}
8382
8383bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8384 bool Changed = false;
8385
8386 assert(BB && BB->getParent() && "Block not embedded in function!");
8387 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8388
8389 // Remove basic blocks that have no predecessors (except the entry block)...
8390 // or that just have themself as a predecessor. These are unreachable.
8391 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8392 BB->getSinglePredecessor() == BB) {
8393 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8394 DeleteDeadBlock(BB, DTU);
8395 return true;
8396 }
8397
8398 // Check to see if we can constant propagate this terminator instruction
8399 // away...
8400 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8401 /*TLI=*/nullptr, DTU);
8402
8403 // Check for and eliminate duplicate PHI nodes in this block.
8405
8406 // Check for and remove branches that will always cause undefined behavior.
8408 return requestResimplify();
8409
8410 // Merge basic blocks into their predecessor if there is only one distinct
8411 // pred, and if there is only one distinct successor of the predecessor, and
8412 // if there are no PHI nodes.
8413 if (MergeBlockIntoPredecessor(BB, DTU))
8414 return true;
8415
8416 if (SinkCommon && Options.SinkCommonInsts)
8417 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8418 mergeCompatibleInvokes(BB, DTU)) {
8419 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8420 // so we may now how duplicate PHI's.
8421 // Let's rerun EliminateDuplicatePHINodes() first,
8422 // before foldTwoEntryPHINode() potentially converts them into select's,
8423 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8424 return true;
8425 }
8426
8427 IRBuilder<> Builder(BB);
8428
8429 if (Options.SpeculateBlocks &&
8430 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8431 // If there is a trivial two-entry PHI node in this basic block, and we can
8432 // eliminate it, do so now.
8433 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8434 if (PN->getNumIncomingValues() == 2)
8435 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8436 Options.SpeculateUnpredictables))
8437 return true;
8438 }
8439
8441 Builder.SetInsertPoint(Terminator);
8442 switch (Terminator->getOpcode()) {
8443 case Instruction::Br:
8444 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8445 break;
8446 case Instruction::Resume:
8447 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8448 break;
8449 case Instruction::CleanupRet:
8450 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8451 break;
8452 case Instruction::Switch:
8453 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8454 break;
8455 case Instruction::Unreachable:
8456 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8457 break;
8458 case Instruction::IndirectBr:
8459 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8460 break;
8461 }
8462
8463 return Changed;
8464}
8465
8466bool SimplifyCFGOpt::run(BasicBlock *BB) {
8467 bool Changed = false;
8468
8469 // Repeated simplify BB as long as resimplification is requested.
8470 do {
8471 Resimplify = false;
8472
8473 // Perform one round of simplifcation. Resimplify flag will be set if
8474 // another iteration is requested.
8475 Changed |= simplifyOnce(BB);
8476 } while (Resimplify);
8477
8478 return Changed;
8479}
8480
8483 ArrayRef<WeakVH> LoopHeaders) {
8484 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8485 Options)
8486 .run(BB);
8487}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL)
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:112
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2345
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2100
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1931
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1805
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1847
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1860
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1077
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:115
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:359
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:843
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:307
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:256
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:646
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2078
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1779
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:157
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2130
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:203
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:420
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1652
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1741
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1427
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3081
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:337
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3339
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1956
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3839
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2032
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2122
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1612
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:591
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2090
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:465
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:378
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:289
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:138
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:249