LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
99 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101 cl::desc(
102 "Temporary development switch used to gradually uplift SimplifyCFG "
103 "into preserving DomTree,"));
104
105// Chosen as 2 so as to be cheap, but still to have enough power to fold
106// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107// To catch this, we need to fold a compare and a select, hence '2' being the
108// minimum reasonable default.
110 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
111 cl::desc(
112 "Control the amount of phi node folding to perform (default = 2)"));
113
115 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
116 cl::desc("Control the maximal total instruction cost that we are willing "
117 "to speculatively execute to fold a 2-entry PHI node into a "
118 "select (default = 4)"));
119
120static cl::opt<bool>
121 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
122 cl::desc("Hoist common instructions up to the parent block"));
123
125 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
126 cl::desc("Hoist loads if the target supports conditional faulting"));
127
129 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
130 cl::desc("Hoist stores if the target supports conditional faulting"));
131
133 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
134 cl::desc("Control the maximal conditional load/store that we are willing "
135 "to speculatively execute to eliminate conditional branch "
136 "(default = 6)"));
137
139 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140 cl::init(20),
141 cl::desc("Allow reordering across at most this many "
142 "instructions when hoisting"));
143
144static cl::opt<bool>
145 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
146 cl::desc("Sink common instructions down to the end block"));
147
149 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores if an unconditional store precedes"));
151
153 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
154 cl::desc("Hoist conditional stores even if an unconditional store does not "
155 "precede - hoist multiple conditional stores into a single "
156 "predicated store"));
157
159 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
160 cl::desc("When merging conditional stores, do so even if the resultant "
161 "basic blocks are unlikely to be if-converted as a result"));
162
164 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
165 cl::desc("Allow exactly one expensive instruction to be speculatively "
166 "executed"));
167
169 "max-speculation-depth", cl::Hidden, cl::init(10),
170 cl::desc("Limit maximum recursion depth when calculating costs of "
171 "speculatively executed instructions"));
172
173static cl::opt<int>
174 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175 cl::init(10),
176 cl::desc("Max size of a block which is still considered "
177 "small enough to thread through"));
178
179// Two is chosen to allow one negation and a logical combine.
181 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182 cl::init(2),
183 cl::desc("Maximum cost of combining conditions when "
184 "folding branches"));
185
187 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188 cl::init(2),
189 cl::desc("Multiplier to apply to threshold when determining whether or not "
190 "to fold branch to common destination when vector operations are "
191 "present"));
192
194 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
195 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
196
198 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
199 cl::desc("Limit cases to analyze when converting a switch to select"));
200
202 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
203 cl::desc("Limit number of blocks a define in a threaded block is allowed "
204 "to be live in"));
205
206STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
207STATISTIC(NumLinearMaps,
208 "Number of switch instructions turned into linear mapping");
209STATISTIC(NumLookupTables,
210 "Number of switch instructions turned into lookup tables");
212 NumLookupTablesHoles,
213 "Number of switch instructions turned into lookup tables (holes checked)");
214STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
215STATISTIC(NumFoldValueComparisonIntoPredecessors,
216 "Number of value comparisons folded into predecessor basic blocks");
217STATISTIC(NumFoldBranchToCommonDest,
218 "Number of branches folded into predecessor basic block");
220 NumHoistCommonCode,
221 "Number of common instruction 'blocks' hoisted up to the begin block");
222STATISTIC(NumHoistCommonInstrs,
223 "Number of common instructions hoisted up to the begin block");
224STATISTIC(NumSinkCommonCode,
225 "Number of common instruction 'blocks' sunk down to the end block");
226STATISTIC(NumSinkCommonInstrs,
227 "Number of common instructions sunk down to the end block");
228STATISTIC(NumSpeculations, "Number of speculative executed instructions");
229STATISTIC(NumInvokes,
230 "Number of invokes with empty resume blocks simplified into calls");
231STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
232STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
233
234namespace {
235
236// The first field contains the value that the switch produces when a certain
237// case group is selected, and the second field is a vector containing the
238// cases composing the case group.
239using SwitchCaseResultVectorTy =
241
242// The first field contains the phi node that generates a result of the switch
243// and the second field contains the value generated for a certain case in the
244// switch for that PHI.
245using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
246
247/// ValueEqualityComparisonCase - Represents a case of a switch.
248struct ValueEqualityComparisonCase {
250 BasicBlock *Dest;
251
252 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
253 : Value(Value), Dest(Dest) {}
254
255 bool operator<(ValueEqualityComparisonCase RHS) const {
256 // Comparing pointers is ok as we only rely on the order for uniquing.
257 return Value < RHS.Value;
258 }
259
260 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
261};
262
263class SimplifyCFGOpt {
265 DomTreeUpdater *DTU;
266 const DataLayout &DL;
267 ArrayRef<WeakVH> LoopHeaders;
268 const SimplifyCFGOptions &Options;
269 bool Resimplify;
270
271 Value *isValueEqualityComparison(Instruction *TI);
272 BasicBlock *getValueEqualityComparisonCases(
273 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
274 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
275 BasicBlock *Pred,
276 IRBuilder<> &Builder);
277 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
278 Instruction *PTI,
279 IRBuilder<> &Builder);
280 bool foldValueComparisonIntoPredecessors(Instruction *TI,
281 IRBuilder<> &Builder);
282
283 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
284 bool simplifySingleResume(ResumeInst *RI);
285 bool simplifyCommonResume(ResumeInst *RI);
286 bool simplifyCleanupReturn(CleanupReturnInst *RI);
287 bool simplifyUnreachable(UnreachableInst *UI);
288 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
289 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
290 bool simplifyIndirectBr(IndirectBrInst *IBI);
291 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
292 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
293 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
294 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
295
296 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
297 IRBuilder<> &Builder);
298
299 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
300 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
301 Instruction *TI, Instruction *I1,
302 SmallVectorImpl<Instruction *> &OtherSuccTIs);
303 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
304 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
305 BasicBlock *TrueBB, BasicBlock *FalseBB,
306 uint32_t TrueWeight, uint32_t FalseWeight);
307 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
308 const DataLayout &DL);
309 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
310 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
311 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
312
313public:
314 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
315 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
316 const SimplifyCFGOptions &Opts)
317 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
318 assert((!DTU || !DTU->hasPostDomTree()) &&
319 "SimplifyCFG is not yet capable of maintaining validity of a "
320 "PostDomTree, so don't ask for it.");
321 }
322
323 bool simplifyOnce(BasicBlock *BB);
324 bool run(BasicBlock *BB);
325
326 // Helper to set Resimplify and return change indication.
327 bool requestResimplify() {
328 Resimplify = true;
329 return true;
330 }
331};
332
333} // end anonymous namespace
334
335/// Return true if all the PHI nodes in the basic block \p BB
336/// receive compatible (identical) incoming values when coming from
337/// all of the predecessor blocks that are specified in \p IncomingBlocks.
338///
339/// Note that if the values aren't exactly identical, but \p EquivalenceSet
340/// is provided, and *both* of the values are present in the set,
341/// then they are considered equal.
343 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
344 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
345 assert(IncomingBlocks.size() == 2 &&
346 "Only for a pair of incoming blocks at the time!");
347
348 // FIXME: it is okay if one of the incoming values is an `undef` value,
349 // iff the other incoming value is guaranteed to be a non-poison value.
350 // FIXME: it is okay if one of the incoming values is a `poison` value.
351 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
352 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
353 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
354 if (IV0 == IV1)
355 return true;
356 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
357 EquivalenceSet->contains(IV1))
358 return true;
359 return false;
360 });
361}
362
363/// Return true if it is safe to merge these two
364/// terminator instructions together.
365static bool
367 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
368 if (SI1 == SI2)
369 return false; // Can't merge with self!
370
371 // It is not safe to merge these two switch instructions if they have a common
372 // successor, and if that successor has a PHI node, and if *that* PHI node has
373 // conflicting incoming values from the two switch blocks.
374 BasicBlock *SI1BB = SI1->getParent();
375 BasicBlock *SI2BB = SI2->getParent();
376
378 bool Fail = false;
379 for (BasicBlock *Succ : successors(SI2BB)) {
380 if (!SI1Succs.count(Succ))
381 continue;
382 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
383 continue;
384 Fail = true;
385 if (FailBlocks)
386 FailBlocks->insert(Succ);
387 else
388 break;
389 }
390
391 return !Fail;
392}
393
394/// Update PHI nodes in Succ to indicate that there will now be entries in it
395/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
396/// will be the same as those coming in from ExistPred, an existing predecessor
397/// of Succ.
398static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
399 BasicBlock *ExistPred,
400 MemorySSAUpdater *MSSAU = nullptr) {
401 for (PHINode &PN : Succ->phis())
402 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
403 if (MSSAU)
404 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
405 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
406}
407
408/// Compute an abstract "cost" of speculating the given instruction,
409/// which is assumed to be safe to speculate. TCC_Free means cheap,
410/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
411/// expensive.
413 const TargetTransformInfo &TTI) {
415}
416
417/// If we have a merge point of an "if condition" as accepted above,
418/// return true if the specified value dominates the block. We don't handle
419/// the true generality of domination here, just a special case which works
420/// well enough for us.
421///
422/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
423/// see if V (which must be an instruction) and its recursive operands
424/// that do not dominate BB have a combined cost lower than Budget and
425/// are non-trapping. If both are true, the instruction is inserted into the
426/// set and true is returned.
427///
428/// The cost for most non-trapping instructions is defined as 1 except for
429/// Select whose cost is 2.
430///
431/// After this function returns, Cost is increased by the cost of
432/// V plus its non-dominating operands. If that cost is greater than
433/// Budget, false is returned and Cost is undefined.
435 Value *V, BasicBlock *BB, Instruction *InsertPt,
438 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
439 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
440 // so limit the recursion depth.
441 // TODO: While this recursion limit does prevent pathological behavior, it
442 // would be better to track visited instructions to avoid cycles.
444 return false;
445
446 Instruction *I = dyn_cast<Instruction>(V);
447 if (!I) {
448 // Non-instructions dominate all instructions and can be executed
449 // unconditionally.
450 return true;
451 }
452 BasicBlock *PBB = I->getParent();
453
454 // We don't want to allow weird loops that might have the "if condition" in
455 // the bottom of this block.
456 if (PBB == BB)
457 return false;
458
459 // If this instruction is defined in a block that contains an unconditional
460 // branch to BB, then it must be in the 'conditional' part of the "if
461 // statement". If not, it definitely dominates the region.
462 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
463 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
464 return true;
465
466 // If we have seen this instruction before, don't count it again.
467 if (AggressiveInsts.count(I))
468 return true;
469
470 // Okay, it looks like the instruction IS in the "condition". Check to
471 // see if it's a cheap instruction to unconditionally compute, and if it
472 // only uses stuff defined outside of the condition. If so, hoist it out.
473 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
474 return false;
475
476 // Overflow arithmetic instruction plus extract value are usually generated
477 // when a division is being replaced. But, in this case, the zero check may
478 // still be kept in the code. In that case it would be worth to hoist these
479 // two instruction out of the basic block. Let's treat this pattern as one
480 // single cheap instruction here!
481 WithOverflowInst *OverflowInst;
482 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
483 ZeroCostInstructions.insert(OverflowInst);
484 Cost += 1;
485 } else if (!ZeroCostInstructions.contains(I))
487
488 // Allow exactly one instruction to be speculated regardless of its cost
489 // (as long as it is safe to do so).
490 // This is intended to flatten the CFG even if the instruction is a division
491 // or other expensive operation. The speculation of an expensive instruction
492 // is expected to be undone in CodeGenPrepare if the speculation has not
493 // enabled further IR optimizations.
494 if (Cost > Budget &&
495 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
496 !Cost.isValid()))
497 return false;
498
499 // Okay, we can only really hoist these out if their operands do
500 // not take us over the cost threshold.
501 for (Use &Op : I->operands())
502 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
503 TTI, AC, ZeroCostInstructions, Depth + 1))
504 return false;
505 // Okay, it's safe to do this! Remember this instruction.
506 AggressiveInsts.insert(I);
507 return true;
508}
509
510/// Extract ConstantInt from value, looking through IntToPtr
511/// and PointerNullValue. Return NULL if value is not a constant int.
513 // Normal constant int.
514 ConstantInt *CI = dyn_cast<ConstantInt>(V);
515 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
516 DL.isNonIntegralPointerType(V->getType()))
517 return CI;
518
519 // This is some kind of pointer constant. Turn it into a pointer-sized
520 // ConstantInt if possible.
521 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
522
523 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
524 if (isa<ConstantPointerNull>(V))
525 return ConstantInt::get(PtrTy, 0);
526
527 // IntToPtr const int.
528 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
529 if (CE->getOpcode() == Instruction::IntToPtr)
530 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
531 // The constant is very likely to have the right type already.
532 if (CI->getType() == PtrTy)
533 return CI;
534 else
535 return cast<ConstantInt>(
536 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
537 }
538 return nullptr;
539}
540
541namespace {
542
543/// Given a chain of or (||) or and (&&) comparison of a value against a
544/// constant, this will try to recover the information required for a switch
545/// structure.
546/// It will depth-first traverse the chain of comparison, seeking for patterns
547/// like %a == 12 or %a < 4 and combine them to produce a set of integer
548/// representing the different cases for the switch.
549/// Note that if the chain is composed of '||' it will build the set of elements
550/// that matches the comparisons (i.e. any of this value validate the chain)
551/// while for a chain of '&&' it will build the set elements that make the test
552/// fail.
553struct ConstantComparesGatherer {
554 const DataLayout &DL;
555
556 /// Value found for the switch comparison
557 Value *CompValue = nullptr;
558
559 /// Extra clause to be checked before the switch
560 Value *Extra = nullptr;
561
562 /// Set of integers to match in switch
564
565 /// Number of comparisons matched in the and/or chain
566 unsigned UsedICmps = 0;
567
568 /// If the elements in Vals matches the comparisons
569 bool IsEq = false;
570
571 // Used to check if the first matched CompValue shall be the Extra check.
572 bool IgnoreFirstMatch = false;
573 bool MultipleMatches = false;
574
575 /// Construct and compute the result for the comparison instruction Cond
576 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
577 gather(Cond);
578 if (CompValue || !MultipleMatches)
579 return;
580 Extra = nullptr;
581 Vals.clear();
582 UsedICmps = 0;
583 IgnoreFirstMatch = true;
584 gather(Cond);
585 }
586
587 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
588 ConstantComparesGatherer &
589 operator=(const ConstantComparesGatherer &) = delete;
590
591private:
592 /// Try to set the current value used for the comparison, it succeeds only if
593 /// it wasn't set before or if the new value is the same as the old one
594 bool setValueOnce(Value *NewVal) {
595 if (IgnoreFirstMatch) {
596 IgnoreFirstMatch = false;
597 return false;
598 }
599 if (CompValue && CompValue != NewVal) {
600 MultipleMatches = true;
601 return false;
602 }
603 CompValue = NewVal;
604 return true;
605 }
606
607 /// Try to match Instruction "I" as a comparison against a constant and
608 /// populates the array Vals with the set of values that match (or do not
609 /// match depending on isEQ).
610 /// Return false on failure. On success, the Value the comparison matched
611 /// against is placed in CompValue.
612 /// If CompValue is already set, the function is expected to fail if a match
613 /// is found but the value compared to is different.
614 bool matchInstruction(Instruction *I, bool isEQ) {
615 // If this is an icmp against a constant, handle this as one of the cases.
616 ICmpInst *ICI;
617 ConstantInt *C;
618 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
619 (C = getConstantInt(I->getOperand(1), DL)))) {
620 return false;
621 }
622
623 Value *RHSVal;
624 const APInt *RHSC;
625
626 // Pattern match a special case
627 // (x & ~2^z) == y --> x == y || x == y|2^z
628 // This undoes a transformation done by instcombine to fuse 2 compares.
629 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
630 // It's a little bit hard to see why the following transformations are
631 // correct. Here is a CVC3 program to verify them for 64-bit values:
632
633 /*
634 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
635 x : BITVECTOR(64);
636 y : BITVECTOR(64);
637 z : BITVECTOR(64);
638 mask : BITVECTOR(64) = BVSHL(ONE, z);
639 QUERY( (y & ~mask = y) =>
640 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
641 );
642 QUERY( (y | mask = y) =>
643 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
644 );
645 */
646
647 // Please note that each pattern must be a dual implication (<--> or
648 // iff). One directional implication can create spurious matches. If the
649 // implication is only one-way, an unsatisfiable condition on the left
650 // side can imply a satisfiable condition on the right side. Dual
651 // implication ensures that satisfiable conditions are transformed to
652 // other satisfiable conditions and unsatisfiable conditions are
653 // transformed to other unsatisfiable conditions.
654
655 // Here is a concrete example of a unsatisfiable condition on the left
656 // implying a satisfiable condition on the right:
657 //
658 // mask = (1 << z)
659 // (x & ~mask) == y --> (x == y || x == (y | mask))
660 //
661 // Substituting y = 3, z = 0 yields:
662 // (x & -2) == 3 --> (x == 3 || x == 2)
663
664 // Pattern match a special case:
665 /*
666 QUERY( (y & ~mask = y) =>
667 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
668 );
669 */
670 if (match(ICI->getOperand(0),
671 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
672 APInt Mask = ~*RHSC;
673 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
674 // If we already have a value for the switch, it has to match!
675 if (!setValueOnce(RHSVal))
676 return false;
677
678 Vals.push_back(C);
679 Vals.push_back(
680 ConstantInt::get(C->getContext(),
681 C->getValue() | Mask));
682 UsedICmps++;
683 return true;
684 }
685 }
686
687 // Pattern match a special case:
688 /*
689 QUERY( (y | mask = y) =>
690 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
691 );
692 */
693 if (match(ICI->getOperand(0),
694 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
695 APInt Mask = *RHSC;
696 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
697 // If we already have a value for the switch, it has to match!
698 if (!setValueOnce(RHSVal))
699 return false;
700
701 Vals.push_back(C);
702 Vals.push_back(ConstantInt::get(C->getContext(),
703 C->getValue() & ~Mask));
704 UsedICmps++;
705 return true;
706 }
707 }
708
709 // If we already have a value for the switch, it has to match!
710 if (!setValueOnce(ICI->getOperand(0)))
711 return false;
712
713 UsedICmps++;
714 Vals.push_back(C);
715 return true;
716 }
717
718 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
719 ConstantRange Span =
721
722 // Shift the range if the compare is fed by an add. This is the range
723 // compare idiom as emitted by instcombine.
724 Value *CandidateVal = I->getOperand(0);
725 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
726 Span = Span.subtract(*RHSC);
727 CandidateVal = RHSVal;
728 }
729
730 // If this is an and/!= check, then we are looking to build the set of
731 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
732 // x != 0 && x != 1.
733 if (!isEQ)
734 Span = Span.inverse();
735
736 // If there are a ton of values, we don't want to make a ginormous switch.
737 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
738 return false;
739 }
740
741 // If we already have a value for the switch, it has to match!
742 if (!setValueOnce(CandidateVal))
743 return false;
744
745 // Add all values from the range to the set
746 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
747 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
748
749 UsedICmps++;
750 return true;
751 }
752
753 /// Given a potentially 'or'd or 'and'd together collection of icmp
754 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
755 /// the value being compared, and stick the list constants into the Vals
756 /// vector.
757 /// One "Extra" case is allowed to differ from the other.
758 void gather(Value *V) {
759 Value *Op0, *Op1;
760 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
761 IsEq = true;
762 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
763 IsEq = false;
764 else
765 return;
766 // Keep a stack (SmallVector for efficiency) for depth-first traversal
767 SmallVector<Value *, 8> DFT{Op0, Op1};
768 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
769
770 while (!DFT.empty()) {
771 V = DFT.pop_back_val();
772
773 if (Instruction *I = dyn_cast<Instruction>(V)) {
774 // If it is a || (or && depending on isEQ), process the operands.
775 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
776 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
777 if (Visited.insert(Op1).second)
778 DFT.push_back(Op1);
779 if (Visited.insert(Op0).second)
780 DFT.push_back(Op0);
781
782 continue;
783 }
784
785 // Try to match the current instruction
786 if (matchInstruction(I, IsEq))
787 // Match succeed, continue the loop
788 continue;
789 }
790
791 // One element of the sequence of || (or &&) could not be match as a
792 // comparison against the same value as the others.
793 // We allow only one "Extra" case to be checked before the switch
794 if (!Extra) {
795 Extra = V;
796 continue;
797 }
798 // Failed to parse a proper sequence, abort now
799 CompValue = nullptr;
800 break;
801 }
802 }
803};
804
805} // end anonymous namespace
806
808 MemorySSAUpdater *MSSAU = nullptr) {
809 Instruction *Cond = nullptr;
810 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
811 Cond = dyn_cast<Instruction>(SI->getCondition());
812 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
813 if (BI->isConditional())
814 Cond = dyn_cast<Instruction>(BI->getCondition());
815 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
816 Cond = dyn_cast<Instruction>(IBI->getAddress());
817 }
818
819 TI->eraseFromParent();
820 if (Cond)
822}
823
824/// Return true if the specified terminator checks
825/// to see if a value is equal to constant integer value.
826Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
827 Value *CV = nullptr;
828 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
829 // Do not permit merging of large switch instructions into their
830 // predecessors unless there is only one predecessor.
831 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
832 CV = SI->getCondition();
833 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
834 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
835 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
836 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
837 CV = ICI->getOperand(0);
838 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
839 if (Trunc->hasNoUnsignedWrap())
840 CV = Trunc->getOperand(0);
841 }
842 }
843
844 // Unwrap any lossless ptrtoint cast.
845 if (CV) {
846 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
847 Value *Ptr = PTII->getPointerOperand();
848 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
849 CV = Ptr;
850 }
851 }
852 return CV;
853}
854
855/// Given a value comparison instruction,
856/// decode all of the 'cases' that it represents and return the 'default' block.
857BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
858 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
859 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
860 Cases.reserve(SI->getNumCases());
861 for (auto Case : SI->cases())
862 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
863 Case.getCaseSuccessor()));
864 return SI->getDefaultDest();
865 }
866
867 BranchInst *BI = cast<BranchInst>(TI);
868 Value *Cond = BI->getCondition();
870 ConstantInt *C;
871 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
872 Pred = ICI->getPredicate();
873 C = getConstantInt(ICI->getOperand(1), DL);
874 } else {
875 Pred = ICmpInst::ICMP_NE;
876 auto *Trunc = cast<TruncInst>(Cond);
877 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
878 }
879 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
880 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
881 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
882}
883
884/// Given a vector of bb/value pairs, remove any entries
885/// in the list that match the specified block.
886static void
888 std::vector<ValueEqualityComparisonCase> &Cases) {
889 llvm::erase(Cases, BB);
890}
891
892/// Return true if there are any keys in C1 that exist in C2 as well.
893static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
894 std::vector<ValueEqualityComparisonCase> &C2) {
895 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
896
897 // Make V1 be smaller than V2.
898 if (V1->size() > V2->size())
899 std::swap(V1, V2);
900
901 if (V1->empty())
902 return false;
903 if (V1->size() == 1) {
904 // Just scan V2.
905 ConstantInt *TheVal = (*V1)[0].Value;
906 for (const ValueEqualityComparisonCase &VECC : *V2)
907 if (TheVal == VECC.Value)
908 return true;
909 }
910
911 // Otherwise, just sort both lists and compare element by element.
912 array_pod_sort(V1->begin(), V1->end());
913 array_pod_sort(V2->begin(), V2->end());
914 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
915 while (i1 != e1 && i2 != e2) {
916 if ((*V1)[i1].Value == (*V2)[i2].Value)
917 return true;
918 if ((*V1)[i1].Value < (*V2)[i2].Value)
919 ++i1;
920 else
921 ++i2;
922 }
923 return false;
924}
925
926// Set branch weights on SwitchInst. This sets the metadata if there is at
927// least one non-zero weight.
929 bool IsExpected) {
930 // Check that there is at least one non-zero weight. Otherwise, pass
931 // nullptr to setMetadata which will erase the existing metadata.
932 MDNode *N = nullptr;
933 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
934 N = MDBuilder(SI->getParent()->getContext())
935 .createBranchWeights(Weights, IsExpected);
936 SI->setMetadata(LLVMContext::MD_prof, N);
937}
938
939// Similar to the above, but for branch and select instructions that take
940// exactly 2 weights.
941static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
942 uint32_t FalseWeight, bool IsExpected) {
943 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
944 // Check that there is at least one non-zero weight. Otherwise, pass
945 // nullptr to setMetadata which will erase the existing metadata.
946 MDNode *N = nullptr;
947 if (TrueWeight || FalseWeight)
948 N = MDBuilder(I->getParent()->getContext())
949 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
950 I->setMetadata(LLVMContext::MD_prof, N);
951}
952
953/// If TI is known to be a terminator instruction and its block is known to
954/// only have a single predecessor block, check to see if that predecessor is
955/// also a value comparison with the same value, and if that comparison
956/// determines the outcome of this comparison. If so, simplify TI. This does a
957/// very limited form of jump threading.
958bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
959 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
960 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
961 if (!PredVal)
962 return false; // Not a value comparison in predecessor.
963
964 Value *ThisVal = isValueEqualityComparison(TI);
965 assert(ThisVal && "This isn't a value comparison!!");
966 if (ThisVal != PredVal)
967 return false; // Different predicates.
968
969 // TODO: Preserve branch weight metadata, similarly to how
970 // foldValueComparisonIntoPredecessors preserves it.
971
972 // Find out information about when control will move from Pred to TI's block.
973 std::vector<ValueEqualityComparisonCase> PredCases;
974 BasicBlock *PredDef =
975 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
976 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
977
978 // Find information about how control leaves this block.
979 std::vector<ValueEqualityComparisonCase> ThisCases;
980 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
981 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
982
983 // If TI's block is the default block from Pred's comparison, potentially
984 // simplify TI based on this knowledge.
985 if (PredDef == TI->getParent()) {
986 // If we are here, we know that the value is none of those cases listed in
987 // PredCases. If there are any cases in ThisCases that are in PredCases, we
988 // can simplify TI.
989 if (!valuesOverlap(PredCases, ThisCases))
990 return false;
991
992 if (isa<BranchInst>(TI)) {
993 // Okay, one of the successors of this condbr is dead. Convert it to a
994 // uncond br.
995 assert(ThisCases.size() == 1 && "Branch can only have one case!");
996 // Insert the new branch.
997 Instruction *NI = Builder.CreateBr(ThisDef);
998 (void)NI;
999
1000 // Remove PHI node entries for the dead edge.
1001 ThisCases[0].Dest->removePredecessor(PredDef);
1002
1003 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1004 << "Through successor TI: " << *TI << "Leaving: " << *NI
1005 << "\n");
1006
1008
1009 if (DTU)
1010 DTU->applyUpdates(
1011 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1012
1013 return true;
1014 }
1015
1016 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1017 // Okay, TI has cases that are statically dead, prune them away.
1019 for (const ValueEqualityComparisonCase &Case : PredCases)
1020 DeadCases.insert(Case.Value);
1021
1022 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1023 << "Through successor TI: " << *TI);
1024
1025 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1026 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1027 --i;
1028 auto *Successor = i->getCaseSuccessor();
1029 if (DTU)
1030 ++NumPerSuccessorCases[Successor];
1031 if (DeadCases.count(i->getCaseValue())) {
1032 Successor->removePredecessor(PredDef);
1033 SI.removeCase(i);
1034 if (DTU)
1035 --NumPerSuccessorCases[Successor];
1036 }
1037 }
1038
1039 if (DTU) {
1040 std::vector<DominatorTree::UpdateType> Updates;
1041 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1042 if (I.second == 0)
1043 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1044 DTU->applyUpdates(Updates);
1045 }
1046
1047 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1048 return true;
1049 }
1050
1051 // Otherwise, TI's block must correspond to some matched value. Find out
1052 // which value (or set of values) this is.
1053 ConstantInt *TIV = nullptr;
1054 BasicBlock *TIBB = TI->getParent();
1055 for (const auto &[Value, Dest] : PredCases)
1056 if (Dest == TIBB) {
1057 if (TIV)
1058 return false; // Cannot handle multiple values coming to this block.
1059 TIV = Value;
1060 }
1061 assert(TIV && "No edge from pred to succ?");
1062
1063 // Okay, we found the one constant that our value can be if we get into TI's
1064 // BB. Find out which successor will unconditionally be branched to.
1065 BasicBlock *TheRealDest = nullptr;
1066 for (const auto &[Value, Dest] : ThisCases)
1067 if (Value == TIV) {
1068 TheRealDest = Dest;
1069 break;
1070 }
1071
1072 // If not handled by any explicit cases, it is handled by the default case.
1073 if (!TheRealDest)
1074 TheRealDest = ThisDef;
1075
1076 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1077
1078 // Remove PHI node entries for dead edges.
1079 BasicBlock *CheckEdge = TheRealDest;
1080 for (BasicBlock *Succ : successors(TIBB))
1081 if (Succ != CheckEdge) {
1082 if (Succ != TheRealDest)
1083 RemovedSuccs.insert(Succ);
1084 Succ->removePredecessor(TIBB);
1085 } else
1086 CheckEdge = nullptr;
1087
1088 // Insert the new branch.
1089 Instruction *NI = Builder.CreateBr(TheRealDest);
1090 (void)NI;
1091
1092 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1093 << "Through successor TI: " << *TI << "Leaving: " << *NI
1094 << "\n");
1095
1097 if (DTU) {
1099 Updates.reserve(RemovedSuccs.size());
1100 for (auto *RemovedSucc : RemovedSuccs)
1101 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1102 DTU->applyUpdates(Updates);
1103 }
1104 return true;
1105}
1106
1107namespace {
1108
1109/// This class implements a stable ordering of constant
1110/// integers that does not depend on their address. This is important for
1111/// applications that sort ConstantInt's to ensure uniqueness.
1112struct ConstantIntOrdering {
1113 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1114 return LHS->getValue().ult(RHS->getValue());
1115 }
1116};
1117
1118} // end anonymous namespace
1119
1121 ConstantInt *const *P2) {
1122 const ConstantInt *LHS = *P1;
1123 const ConstantInt *RHS = *P2;
1124 if (LHS == RHS)
1125 return 0;
1126 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1127}
1128
1129/// Get Weights of a given terminator, the default weight is at the front
1130/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1131/// metadata.
1133 SmallVectorImpl<uint64_t> &Weights) {
1134 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1135 assert(MD && "Invalid branch-weight metadata");
1136 extractFromBranchWeightMD64(MD, Weights);
1137
1138 // If TI is a conditional eq, the default case is the false case,
1139 // and the corresponding branch-weight data is at index 2. We swap the
1140 // default weight to be the first entry.
1141 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1142 assert(Weights.size() == 2);
1143 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1144 if (!ICI)
1145 return;
1146
1147 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1148 std::swap(Weights.front(), Weights.back());
1149 }
1150}
1151
1152/// Keep halving the weights until all can fit in uint32_t.
1154 uint64_t Max = *llvm::max_element(Weights);
1155 if (Max > UINT_MAX) {
1156 unsigned Offset = 32 - llvm::countl_zero(Max);
1157 for (uint64_t &I : Weights)
1158 I >>= Offset;
1159 }
1160}
1161
1163 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1164 Instruction *PTI = PredBlock->getTerminator();
1165
1166 // If we have bonus instructions, clone them into the predecessor block.
1167 // Note that there may be multiple predecessor blocks, so we cannot move
1168 // bonus instructions to a predecessor block.
1169 for (Instruction &BonusInst : *BB) {
1170 if (BonusInst.isTerminator())
1171 continue;
1172
1173 Instruction *NewBonusInst = BonusInst.clone();
1174
1175 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1176 // Unless the instruction has the same !dbg location as the original
1177 // branch, drop it. When we fold the bonus instructions we want to make
1178 // sure we reset their debug locations in order to avoid stepping on
1179 // dead code caused by folding dead branches.
1180 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1181 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1182 mapAtomInstance(DL, VMap);
1183 }
1184
1185 RemapInstruction(NewBonusInst, VMap,
1187
1188 // If we speculated an instruction, we need to drop any metadata that may
1189 // result in undefined behavior, as the metadata might have been valid
1190 // only given the branch precondition.
1191 // Similarly strip attributes on call parameters that may cause UB in
1192 // location the call is moved to.
1193 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1194
1195 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1196 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1197 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1199
1200 NewBonusInst->takeName(&BonusInst);
1201 BonusInst.setName(NewBonusInst->getName() + ".old");
1202 VMap[&BonusInst] = NewBonusInst;
1203
1204 // Update (liveout) uses of bonus instructions,
1205 // now that the bonus instruction has been cloned into predecessor.
1206 // Note that we expect to be in a block-closed SSA form for this to work!
1207 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1208 auto *UI = cast<Instruction>(U.getUser());
1209 auto *PN = dyn_cast<PHINode>(UI);
1210 if (!PN) {
1211 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1212 "If the user is not a PHI node, then it should be in the same "
1213 "block as, and come after, the original bonus instruction.");
1214 continue; // Keep using the original bonus instruction.
1215 }
1216 // Is this the block-closed SSA form PHI node?
1217 if (PN->getIncomingBlock(U) == BB)
1218 continue; // Great, keep using the original bonus instruction.
1219 // The only other alternative is an "use" when coming from
1220 // the predecessor block - here we should refer to the cloned bonus instr.
1221 assert(PN->getIncomingBlock(U) == PredBlock &&
1222 "Not in block-closed SSA form?");
1223 U.set(NewBonusInst);
1224 }
1225 }
1226
1227 // Key Instructions: We may have propagated atom info into the pred. If the
1228 // pred's terminator already has atom info do nothing as merging would drop
1229 // one atom group anyway. If it doesn't, propagte the remapped atom group
1230 // from BB's terminator.
1231 if (auto &PredDL = PTI->getDebugLoc()) {
1232 auto &DL = BB->getTerminator()->getDebugLoc();
1233 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1234 PredDL.isSameSourceLocation(DL)) {
1235 PTI->setDebugLoc(DL);
1236 RemapSourceAtom(PTI, VMap);
1237 }
1238 }
1239}
1240
1241bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1242 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1243 BasicBlock *BB = TI->getParent();
1244 BasicBlock *Pred = PTI->getParent();
1245
1247
1248 // Figure out which 'cases' to copy from SI to PSI.
1249 std::vector<ValueEqualityComparisonCase> BBCases;
1250 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1251
1252 std::vector<ValueEqualityComparisonCase> PredCases;
1253 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1254
1255 // Based on whether the default edge from PTI goes to BB or not, fill in
1256 // PredCases and PredDefault with the new switch cases we would like to
1257 // build.
1259
1260 // Update the branch weight metadata along the way
1262 bool PredHasWeights = hasBranchWeightMD(*PTI);
1263 bool SuccHasWeights = hasBranchWeightMD(*TI);
1264
1265 if (PredHasWeights) {
1266 getBranchWeights(PTI, Weights);
1267 // branch-weight metadata is inconsistent here.
1268 if (Weights.size() != 1 + PredCases.size())
1269 PredHasWeights = SuccHasWeights = false;
1270 } else if (SuccHasWeights)
1271 // If there are no predecessor weights but there are successor weights,
1272 // populate Weights with 1, which will later be scaled to the sum of
1273 // successor's weights
1274 Weights.assign(1 + PredCases.size(), 1);
1275
1276 SmallVector<uint64_t, 8> SuccWeights;
1277 if (SuccHasWeights) {
1278 getBranchWeights(TI, SuccWeights);
1279 // branch-weight metadata is inconsistent here.
1280 if (SuccWeights.size() != 1 + BBCases.size())
1281 PredHasWeights = SuccHasWeights = false;
1282 } else if (PredHasWeights)
1283 SuccWeights.assign(1 + BBCases.size(), 1);
1284
1285 if (PredDefault == BB) {
1286 // If this is the default destination from PTI, only the edges in TI
1287 // that don't occur in PTI, or that branch to BB will be activated.
1288 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1289 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1290 if (PredCases[i].Dest != BB)
1291 PTIHandled.insert(PredCases[i].Value);
1292 else {
1293 // The default destination is BB, we don't need explicit targets.
1294 std::swap(PredCases[i], PredCases.back());
1295
1296 if (PredHasWeights || SuccHasWeights) {
1297 // Increase weight for the default case.
1298 Weights[0] += Weights[i + 1];
1299 std::swap(Weights[i + 1], Weights.back());
1300 Weights.pop_back();
1301 }
1302
1303 PredCases.pop_back();
1304 --i;
1305 --e;
1306 }
1307
1308 // Reconstruct the new switch statement we will be building.
1309 if (PredDefault != BBDefault) {
1310 PredDefault->removePredecessor(Pred);
1311 if (DTU && PredDefault != BB)
1312 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1313 PredDefault = BBDefault;
1314 ++NewSuccessors[BBDefault];
1315 }
1316
1317 unsigned CasesFromPred = Weights.size();
1318 uint64_t ValidTotalSuccWeight = 0;
1319 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1320 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1321 PredCases.push_back(BBCases[i]);
1322 ++NewSuccessors[BBCases[i].Dest];
1323 if (SuccHasWeights || PredHasWeights) {
1324 // The default weight is at index 0, so weight for the ith case
1325 // should be at index i+1. Scale the cases from successor by
1326 // PredDefaultWeight (Weights[0]).
1327 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1328 ValidTotalSuccWeight += SuccWeights[i + 1];
1329 }
1330 }
1331
1332 if (SuccHasWeights || PredHasWeights) {
1333 ValidTotalSuccWeight += SuccWeights[0];
1334 // Scale the cases from predecessor by ValidTotalSuccWeight.
1335 for (unsigned i = 1; i < CasesFromPred; ++i)
1336 Weights[i] *= ValidTotalSuccWeight;
1337 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1338 Weights[0] *= SuccWeights[0];
1339 }
1340 } else {
1341 // If this is not the default destination from PSI, only the edges
1342 // in SI that occur in PSI with a destination of BB will be
1343 // activated.
1344 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1345 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1346 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1347 if (PredCases[i].Dest == BB) {
1348 PTIHandled.insert(PredCases[i].Value);
1349
1350 if (PredHasWeights || SuccHasWeights) {
1351 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1352 std::swap(Weights[i + 1], Weights.back());
1353 Weights.pop_back();
1354 }
1355
1356 std::swap(PredCases[i], PredCases.back());
1357 PredCases.pop_back();
1358 --i;
1359 --e;
1360 }
1361
1362 // Okay, now we know which constants were sent to BB from the
1363 // predecessor. Figure out where they will all go now.
1364 for (const ValueEqualityComparisonCase &Case : BBCases)
1365 if (PTIHandled.count(Case.Value)) {
1366 // If this is one we are capable of getting...
1367 if (PredHasWeights || SuccHasWeights)
1368 Weights.push_back(WeightsForHandled[Case.Value]);
1369 PredCases.push_back(Case);
1370 ++NewSuccessors[Case.Dest];
1371 PTIHandled.erase(Case.Value); // This constant is taken care of
1372 }
1373
1374 // If there are any constants vectored to BB that TI doesn't handle,
1375 // they must go to the default destination of TI.
1376 for (ConstantInt *I : PTIHandled) {
1377 if (PredHasWeights || SuccHasWeights)
1378 Weights.push_back(WeightsForHandled[I]);
1379 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1380 ++NewSuccessors[BBDefault];
1381 }
1382 }
1383
1384 // Okay, at this point, we know which new successor Pred will get. Make
1385 // sure we update the number of entries in the PHI nodes for these
1386 // successors.
1387 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1388 if (DTU) {
1389 SuccsOfPred = {llvm::from_range, successors(Pred)};
1390 Updates.reserve(Updates.size() + NewSuccessors.size());
1391 }
1392 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1393 NewSuccessors) {
1394 for (auto I : seq(NewSuccessor.second)) {
1395 (void)I;
1396 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1397 }
1398 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1399 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1400 }
1401
1402 Builder.SetInsertPoint(PTI);
1403 // Convert pointer to int before we switch.
1404 if (CV->getType()->isPointerTy()) {
1405 CV =
1406 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1407 }
1408
1409 // Now that the successors are updated, create the new Switch instruction.
1410 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1411 NewSI->setDebugLoc(PTI->getDebugLoc());
1412 for (ValueEqualityComparisonCase &V : PredCases)
1413 NewSI->addCase(V.Value, V.Dest);
1414
1415 if (PredHasWeights || SuccHasWeights) {
1416 // Halve the weights if any of them cannot fit in an uint32_t
1417 fitWeights(Weights);
1418
1419 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1420
1421 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1422 }
1423
1425
1426 // Okay, last check. If BB is still a successor of PSI, then we must
1427 // have an infinite loop case. If so, add an infinitely looping block
1428 // to handle the case to preserve the behavior of the code.
1429 BasicBlock *InfLoopBlock = nullptr;
1430 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1431 if (NewSI->getSuccessor(i) == BB) {
1432 if (!InfLoopBlock) {
1433 // Insert it at the end of the function, because it's either code,
1434 // or it won't matter if it's hot. :)
1435 InfLoopBlock =
1436 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1437 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1438 if (DTU)
1439 Updates.push_back(
1440 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1441 }
1442 NewSI->setSuccessor(i, InfLoopBlock);
1443 }
1444
1445 if (DTU) {
1446 if (InfLoopBlock)
1447 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1448
1449 Updates.push_back({DominatorTree::Delete, Pred, BB});
1450
1451 DTU->applyUpdates(Updates);
1452 }
1453
1454 ++NumFoldValueComparisonIntoPredecessors;
1455 return true;
1456}
1457
1458/// The specified terminator is a value equality comparison instruction
1459/// (either a switch or a branch on "X == c").
1460/// See if any of the predecessors of the terminator block are value comparisons
1461/// on the same value. If so, and if safe to do so, fold them together.
1462bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1463 IRBuilder<> &Builder) {
1464 BasicBlock *BB = TI->getParent();
1465 Value *CV = isValueEqualityComparison(TI); // CondVal
1466 assert(CV && "Not a comparison?");
1467
1468 bool Changed = false;
1469
1471 while (!Preds.empty()) {
1472 BasicBlock *Pred = Preds.pop_back_val();
1473 Instruction *PTI = Pred->getTerminator();
1474
1475 // Don't try to fold into itself.
1476 if (Pred == BB)
1477 continue;
1478
1479 // See if the predecessor is a comparison with the same value.
1480 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1481 if (PCV != CV)
1482 continue;
1483
1485 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1486 for (auto *Succ : FailBlocks) {
1487 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1488 return false;
1489 }
1490 }
1491
1492 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1493 Changed = true;
1494 }
1495 return Changed;
1496}
1497
1498// If we would need to insert a select that uses the value of this invoke
1499// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1500// need to do this), we can't hoist the invoke, as there is nowhere to put the
1501// select in this case.
1503 Instruction *I1, Instruction *I2) {
1504 for (BasicBlock *Succ : successors(BB1)) {
1505 for (const PHINode &PN : Succ->phis()) {
1506 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1507 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1508 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1509 return false;
1510 }
1511 }
1512 }
1513 return true;
1514}
1515
1516// Get interesting characteristics of instructions that
1517// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1518// instructions can be reordered across.
1524
1526 unsigned Flags = 0;
1527 if (I->mayReadFromMemory())
1528 Flags |= SkipReadMem;
1529 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1530 // inalloca) across stacksave/stackrestore boundaries.
1531 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1532 Flags |= SkipSideEffect;
1534 Flags |= SkipImplicitControlFlow;
1535 return Flags;
1536}
1537
1538// Returns true if it is safe to reorder an instruction across preceding
1539// instructions in a basic block.
1540static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1541 // Don't reorder a store over a load.
1542 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1543 return false;
1544
1545 // If we have seen an instruction with side effects, it's unsafe to reorder an
1546 // instruction which reads memory or itself has side effects.
1547 if ((Flags & SkipSideEffect) &&
1548 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1549 return false;
1550
1551 // Reordering across an instruction which does not necessarily transfer
1552 // control to the next instruction is speculation.
1554 return false;
1555
1556 // Hoisting of llvm.deoptimize is only legal together with the next return
1557 // instruction, which this pass is not always able to do.
1558 if (auto *CB = dyn_cast<CallBase>(I))
1559 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1560 return false;
1561
1562 // It's also unsafe/illegal to hoist an instruction above its instruction
1563 // operands
1564 BasicBlock *BB = I->getParent();
1565 for (Value *Op : I->operands()) {
1566 if (auto *J = dyn_cast<Instruction>(Op))
1567 if (J->getParent() == BB)
1568 return false;
1569 }
1570
1571 return true;
1572}
1573
1574static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1575
1576/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1577/// instructions \p I1 and \p I2 can and should be hoisted.
1579 const TargetTransformInfo &TTI) {
1580 // If we're going to hoist a call, make sure that the two instructions
1581 // we're commoning/hoisting are both marked with musttail, or neither of
1582 // them is marked as such. Otherwise, we might end up in a situation where
1583 // we hoist from a block where the terminator is a `ret` to a block where
1584 // the terminator is a `br`, and `musttail` calls expect to be followed by
1585 // a return.
1586 auto *C1 = dyn_cast<CallInst>(I1);
1587 auto *C2 = dyn_cast<CallInst>(I2);
1588 if (C1 && C2)
1589 if (C1->isMustTailCall() != C2->isMustTailCall())
1590 return false;
1591
1593 return false;
1594
1595 // If any of the two call sites has nomerge or convergent attribute, stop
1596 // hoisting.
1597 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1598 if (CB1->cannotMerge() || CB1->isConvergent())
1599 return false;
1600 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1601 if (CB2->cannotMerge() || CB2->isConvergent())
1602 return false;
1603
1604 return true;
1605}
1606
1607/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1608/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1609/// hoistCommonCodeFromSuccessors. e.g. The input:
1610/// I1 DVRs: { x, z },
1611/// OtherInsts: { I2 DVRs: { x, y, z } }
1612/// would result in hoisting only DbgVariableRecord x.
1614 Instruction *TI, Instruction *I1,
1615 SmallVectorImpl<Instruction *> &OtherInsts) {
1616 if (!I1->hasDbgRecords())
1617 return;
1618 using CurrentAndEndIt =
1619 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1620 // Vector of {Current, End} iterators.
1622 Itrs.reserve(OtherInsts.size() + 1);
1623 // Helper lambdas for lock-step checks:
1624 // Return true if this Current == End.
1625 auto atEnd = [](const CurrentAndEndIt &Pair) {
1626 return Pair.first == Pair.second;
1627 };
1628 // Return true if all Current are identical.
1629 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1630 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1632 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1633 });
1634 };
1635
1636 // Collect the iterators.
1637 Itrs.push_back(
1638 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1639 for (Instruction *Other : OtherInsts) {
1640 if (!Other->hasDbgRecords())
1641 return;
1642 Itrs.push_back(
1643 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1644 }
1645
1646 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1647 // the lock-step DbgRecord are identical, hoist all of them to TI.
1648 // This replicates the dbg.* intrinsic behaviour in
1649 // hoistCommonCodeFromSuccessors.
1650 while (none_of(Itrs, atEnd)) {
1651 bool HoistDVRs = allIdentical(Itrs);
1652 for (CurrentAndEndIt &Pair : Itrs) {
1653 // Increment Current iterator now as we may be about to move the
1654 // DbgRecord.
1655 DbgRecord &DR = *Pair.first++;
1656 if (HoistDVRs) {
1657 DR.removeFromParent();
1658 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1659 }
1660 }
1661 }
1662}
1663
1665 const Instruction *I2) {
1666 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1667 return true;
1668
1669 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1670 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1671 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1672 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1673 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1674
1675 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1676 return I1->getOperand(0) == I2->getOperand(1) &&
1677 I1->getOperand(1) == I2->getOperand(0) &&
1678 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1679 }
1680
1681 return false;
1682}
1683
1684/// If the target supports conditional faulting,
1685/// we look for the following pattern:
1686/// \code
1687/// BB:
1688/// ...
1689/// %cond = icmp ult %x, %y
1690/// br i1 %cond, label %TrueBB, label %FalseBB
1691/// FalseBB:
1692/// store i32 1, ptr %q, align 4
1693/// ...
1694/// TrueBB:
1695/// %maskedloadstore = load i32, ptr %b, align 4
1696/// store i32 %maskedloadstore, ptr %p, align 4
1697/// ...
1698/// \endcode
1699///
1700/// and transform it into:
1701///
1702/// \code
1703/// BB:
1704/// ...
1705/// %cond = icmp ult %x, %y
1706/// %maskedloadstore = cload i32, ptr %b, %cond
1707/// cstore i32 %maskedloadstore, ptr %p, %cond
1708/// cstore i32 1, ptr %q, ~%cond
1709/// br i1 %cond, label %TrueBB, label %FalseBB
1710/// FalseBB:
1711/// ...
1712/// TrueBB:
1713/// ...
1714/// \endcode
1715///
1716/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1717/// e.g.
1718///
1719/// \code
1720/// %vcond = bitcast i1 %cond to <1 x i1>
1721/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1722/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1723/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1724/// call void @llvm.masked.store.v1i32.p0
1725/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1726/// %cond.not = xor i1 %cond, true
1727/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1728/// call void @llvm.masked.store.v1i32.p0
1729/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1730/// \endcode
1731///
1732/// So we need to turn hoisted load/store into cload/cstore.
1733///
1734/// \param BI The branch instruction.
1735/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1736/// will be speculated.
1737/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1739 BranchInst *BI,
1740 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1741 std::optional<bool> Invert, Instruction *Sel) {
1742 auto &Context = BI->getParent()->getContext();
1743 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1744 auto *Cond = BI->getOperand(0);
1745 // Construct the condition if needed.
1746 BasicBlock *BB = BI->getParent();
1747 Value *Mask = nullptr;
1748 Value *MaskFalse = nullptr;
1749 Value *MaskTrue = nullptr;
1750 if (Invert.has_value()) {
1751 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1752 Mask = Builder.CreateBitCast(
1753 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1754 VCondTy);
1755 } else {
1756 IRBuilder<> Builder(BI);
1757 MaskFalse = Builder.CreateBitCast(
1758 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1759 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1760 }
1761 auto PeekThroughBitcasts = [](Value *V) {
1762 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1763 V = BitCast->getOperand(0);
1764 return V;
1765 };
1766 for (auto *I : SpeculatedConditionalLoadsStores) {
1767 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1768 if (!Invert.has_value())
1769 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1770 // We currently assume conditional faulting load/store is supported for
1771 // scalar types only when creating new instructions. This can be easily
1772 // extended for vector types in the future.
1773 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1774 auto *Op0 = I->getOperand(0);
1775 CallInst *MaskedLoadStore = nullptr;
1776 if (auto *LI = dyn_cast<LoadInst>(I)) {
1777 // Handle Load.
1778 auto *Ty = I->getType();
1779 PHINode *PN = nullptr;
1780 Value *PassThru = nullptr;
1781 if (Invert.has_value())
1782 for (User *U : I->users()) {
1783 if ((PN = dyn_cast<PHINode>(U))) {
1784 PassThru = Builder.CreateBitCast(
1785 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1786 FixedVectorType::get(Ty, 1));
1787 } else if (auto *Ins = cast<Instruction>(U);
1788 Sel && Ins->getParent() == BB) {
1789 // This happens when store or/and a speculative instruction between
1790 // load and store were hoisted to the BB. Make sure the masked load
1791 // inserted before its use.
1792 // We assume there's one of such use.
1793 Builder.SetInsertPoint(Ins);
1794 }
1795 }
1796 MaskedLoadStore = Builder.CreateMaskedLoad(
1797 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1798 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1799 if (PN)
1800 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1801 I->replaceAllUsesWith(NewLoadStore);
1802 } else {
1803 // Handle Store.
1804 auto *StoredVal = Builder.CreateBitCast(
1805 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1806 MaskedLoadStore = Builder.CreateMaskedStore(
1807 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1808 }
1809 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1810 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1811 //
1812 // !nonnull, !align : Not support pointer type, no need to keep.
1813 // !range: Load type is changed from scalar to vector, but the metadata on
1814 // vector specifies a per-element range, so the semantics stay the
1815 // same. Keep it.
1816 // !annotation: Not impact semantics. Keep it.
1817 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1818 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1819 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1820 // FIXME: DIAssignID is not supported for masked store yet.
1821 // (Verifier::visitDIAssignIDMetadata)
1823 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1824 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1825 });
1826 MaskedLoadStore->copyMetadata(*I);
1827 I->eraseFromParent();
1828 }
1829}
1830
1832 const TargetTransformInfo &TTI) {
1833 // Not handle volatile or atomic.
1834 bool IsStore = false;
1835 if (auto *L = dyn_cast<LoadInst>(I)) {
1836 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1837 return false;
1838 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1839 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1840 return false;
1841 IsStore = true;
1842 } else
1843 return false;
1844
1845 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1846 // That's why we have the alignment limitation.
1847 // FIXME: Update the prototype of the intrinsics?
1850}
1851
1852/// Hoist any common code in the successor blocks up into the block. This
1853/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1854/// given, only perform hoisting in case all successors blocks contain matching
1855/// instructions only. In that case, all instructions can be hoisted and the
1856/// original branch will be replaced and selects for PHIs are added.
1857bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1858 bool AllInstsEqOnly) {
1859 // This does very trivial matching, with limited scanning, to find identical
1860 // instructions in the two blocks. In particular, we don't want to get into
1861 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1862 // such, we currently just scan for obviously identical instructions in an
1863 // identical order, possibly separated by the same number of non-identical
1864 // instructions.
1865 BasicBlock *BB = TI->getParent();
1866 unsigned int SuccSize = succ_size(BB);
1867 if (SuccSize < 2)
1868 return false;
1869
1870 // If either of the blocks has it's address taken, then we can't do this fold,
1871 // because the code we'd hoist would no longer run when we jump into the block
1872 // by it's address.
1873 for (auto *Succ : successors(BB))
1874 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1875 return false;
1876
1877 // The second of pair is a SkipFlags bitmask.
1878 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1879 SmallVector<SuccIterPair, 8> SuccIterPairs;
1880 for (auto *Succ : successors(BB)) {
1881 BasicBlock::iterator SuccItr = Succ->begin();
1882 if (isa<PHINode>(*SuccItr))
1883 return false;
1884 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1885 }
1886
1887 if (AllInstsEqOnly) {
1888 // Check if all instructions in the successor blocks match. This allows
1889 // hoisting all instructions and removing the blocks we are hoisting from,
1890 // so does not add any new instructions.
1892 // Check if sizes and terminators of all successors match.
1893 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1894 Instruction *Term0 = Succs[0]->getTerminator();
1895 Instruction *Term = Succ->getTerminator();
1896 return !Term->isSameOperationAs(Term0) ||
1897 !equal(Term->operands(), Term0->operands()) ||
1898 Succs[0]->size() != Succ->size();
1899 });
1900 if (!AllSame)
1901 return false;
1902 if (AllSame) {
1904 while (LRI.isValid()) {
1905 Instruction *I0 = (*LRI)[0];
1906 if (any_of(*LRI, [I0](Instruction *I) {
1907 return !areIdenticalUpToCommutativity(I0, I);
1908 })) {
1909 return false;
1910 }
1911 --LRI;
1912 }
1913 }
1914 // Now we know that all instructions in all successors can be hoisted. Let
1915 // the loop below handle the hoisting.
1916 }
1917
1918 // Count how many instructions were not hoisted so far. There's a limit on how
1919 // many instructions we skip, serving as a compilation time control as well as
1920 // preventing excessive increase of life ranges.
1921 unsigned NumSkipped = 0;
1922 // If we find an unreachable instruction at the beginning of a basic block, we
1923 // can still hoist instructions from the rest of the basic blocks.
1924 if (SuccIterPairs.size() > 2) {
1925 erase_if(SuccIterPairs,
1926 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1927 if (SuccIterPairs.size() < 2)
1928 return false;
1929 }
1930
1931 bool Changed = false;
1932
1933 for (;;) {
1934 auto *SuccIterPairBegin = SuccIterPairs.begin();
1935 auto &BB1ItrPair = *SuccIterPairBegin++;
1936 auto OtherSuccIterPairRange =
1937 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1938 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1939
1940 Instruction *I1 = &*BB1ItrPair.first;
1941
1942 bool AllInstsAreIdentical = true;
1943 bool HasTerminator = I1->isTerminator();
1944 for (auto &SuccIter : OtherSuccIterRange) {
1945 Instruction *I2 = &*SuccIter;
1946 HasTerminator |= I2->isTerminator();
1947 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1948 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1949 AllInstsAreIdentical = false;
1950 }
1951
1953 for (auto &SuccIter : OtherSuccIterRange)
1954 OtherInsts.push_back(&*SuccIter);
1955
1956 // If we are hoisting the terminator instruction, don't move one (making a
1957 // broken BB), instead clone it, and remove BI.
1958 if (HasTerminator) {
1959 // Even if BB, which contains only one unreachable instruction, is ignored
1960 // at the beginning of the loop, we can hoist the terminator instruction.
1961 // If any instructions remain in the block, we cannot hoist terminators.
1962 if (NumSkipped || !AllInstsAreIdentical) {
1963 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1964 return Changed;
1965 }
1966
1967 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1968 Changed;
1969 }
1970
1971 if (AllInstsAreIdentical) {
1972 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1973 AllInstsAreIdentical =
1974 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1975 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1976 Instruction *I2 = &*Pair.first;
1977 unsigned SkipFlagsBB2 = Pair.second;
1978 // Even if the instructions are identical, it may not
1979 // be safe to hoist them if we have skipped over
1980 // instructions with side effects or their operands
1981 // weren't hoisted.
1982 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1984 });
1985 }
1986
1987 if (AllInstsAreIdentical) {
1988 BB1ItrPair.first++;
1989 // For a normal instruction, we just move one to right before the
1990 // branch, then replace all uses of the other with the first. Finally,
1991 // we remove the now redundant second instruction.
1992 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1993 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1994 // and leave any that were not hoisted behind (by calling moveBefore
1995 // rather than moveBeforePreserving).
1996 I1->moveBefore(TI->getIterator());
1997 for (auto &SuccIter : OtherSuccIterRange) {
1998 Instruction *I2 = &*SuccIter++;
1999 assert(I2 != I1);
2000 if (!I2->use_empty())
2001 I2->replaceAllUsesWith(I1);
2002 I1->andIRFlags(I2);
2003 if (auto *CB = dyn_cast<CallBase>(I1)) {
2004 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2005 assert(Success && "We should not be trying to hoist callbases "
2006 "with non-intersectable attributes");
2007 // For NDEBUG Compile.
2008 (void)Success;
2009 }
2010
2011 combineMetadataForCSE(I1, I2, true);
2012 // I1 and I2 are being combined into a single instruction. Its debug
2013 // location is the merged locations of the original instructions.
2014 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2015 I2->eraseFromParent();
2016 }
2017 if (!Changed)
2018 NumHoistCommonCode += SuccIterPairs.size();
2019 Changed = true;
2020 NumHoistCommonInstrs += SuccIterPairs.size();
2021 } else {
2022 if (NumSkipped >= HoistCommonSkipLimit) {
2023 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2024 return Changed;
2025 }
2026 // We are about to skip over a pair of non-identical instructions. Record
2027 // if any have characteristics that would prevent reordering instructions
2028 // across them.
2029 for (auto &SuccIterPair : SuccIterPairs) {
2030 Instruction *I = &*SuccIterPair.first++;
2031 SuccIterPair.second |= skippedInstrFlags(I);
2032 }
2033 ++NumSkipped;
2034 }
2035 }
2036}
2037
2038bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2039 Instruction *TI, Instruction *I1,
2040 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2041
2042 auto *BI = dyn_cast<BranchInst>(TI);
2043
2044 bool Changed = false;
2045 BasicBlock *TIParent = TI->getParent();
2046 BasicBlock *BB1 = I1->getParent();
2047
2048 // Use only for an if statement.
2049 auto *I2 = *OtherSuccTIs.begin();
2050 auto *BB2 = I2->getParent();
2051 if (BI) {
2052 assert(OtherSuccTIs.size() == 1);
2053 assert(BI->getSuccessor(0) == I1->getParent());
2054 assert(BI->getSuccessor(1) == I2->getParent());
2055 }
2056
2057 // In the case of an if statement, we try to hoist an invoke.
2058 // FIXME: Can we define a safety predicate for CallBr?
2059 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2060 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2061 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2062 return false;
2063
2064 // TODO: callbr hoisting currently disabled pending further study.
2065 if (isa<CallBrInst>(I1))
2066 return false;
2067
2068 for (BasicBlock *Succ : successors(BB1)) {
2069 for (PHINode &PN : Succ->phis()) {
2070 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2071 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2072 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2073 if (BB1V == BB2V)
2074 continue;
2075
2076 // In the case of an if statement, check for
2077 // passingValueIsAlwaysUndefined here because we would rather eliminate
2078 // undefined control flow then converting it to a select.
2079 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2081 return false;
2082 }
2083 }
2084 }
2085
2086 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2087 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2088 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2089 // Clone the terminator and hoist it into the pred, without any debug info.
2090 Instruction *NT = I1->clone();
2091 NT->insertInto(TIParent, TI->getIterator());
2092 if (!NT->getType()->isVoidTy()) {
2093 I1->replaceAllUsesWith(NT);
2094 for (Instruction *OtherSuccTI : OtherSuccTIs)
2095 OtherSuccTI->replaceAllUsesWith(NT);
2096 NT->takeName(I1);
2097 }
2098 Changed = true;
2099 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2100
2101 // Ensure terminator gets a debug location, even an unknown one, in case
2102 // it involves inlinable calls.
2104 Locs.push_back(I1->getDebugLoc());
2105 for (auto *OtherSuccTI : OtherSuccTIs)
2106 Locs.push_back(OtherSuccTI->getDebugLoc());
2107 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2108
2109 // PHIs created below will adopt NT's merged DebugLoc.
2110 IRBuilder<NoFolder> Builder(NT);
2111
2112 // In the case of an if statement, hoisting one of the terminators from our
2113 // successor is a great thing. Unfortunately, the successors of the if/else
2114 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2115 // must agree for all PHI nodes, so we insert select instruction to compute
2116 // the final result.
2117 if (BI) {
2118 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2119 for (BasicBlock *Succ : successors(BB1)) {
2120 for (PHINode &PN : Succ->phis()) {
2121 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2122 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2123 if (BB1V == BB2V)
2124 continue;
2125
2126 // These values do not agree. Insert a select instruction before NT
2127 // that determines the right value.
2128 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2129 if (!SI) {
2130 // Propagate fast-math-flags from phi node to its replacement select.
2131 SI = cast<SelectInst>(Builder.CreateSelectFMF(
2132 BI->getCondition(), BB1V, BB2V,
2133 isa<FPMathOperator>(PN) ? &PN : nullptr,
2134 BB1V->getName() + "." + BB2V->getName(), BI));
2135 }
2136
2137 // Make the PHI node use the select for all incoming values for BB1/BB2
2138 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2139 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2140 PN.setIncomingValue(i, SI);
2141 }
2142 }
2143 }
2144
2146
2147 // Update any PHI nodes in our new successors.
2148 for (BasicBlock *Succ : successors(BB1)) {
2149 addPredecessorToBlock(Succ, TIParent, BB1);
2150 if (DTU)
2151 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2152 }
2153
2154 if (DTU)
2155 for (BasicBlock *Succ : successors(TI))
2156 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2157
2159 if (DTU)
2160 DTU->applyUpdates(Updates);
2161 return Changed;
2162}
2163
2164// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2165// into variables.
2167 int OpIdx) {
2168 // Divide/Remainder by constant is typically much cheaper than by variable.
2169 if (I->isIntDivRem())
2170 return OpIdx != 1;
2171 return !isa<IntrinsicInst>(I);
2172}
2173
2174// All instructions in Insts belong to different blocks that all unconditionally
2175// branch to a common successor. Analyze each instruction and return true if it
2176// would be possible to sink them into their successor, creating one common
2177// instruction instead. For every value that would be required to be provided by
2178// PHI node (because an operand varies in each input block), add to PHIOperands.
2181 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2182 // Prune out obviously bad instructions to move. Each instruction must have
2183 // the same number of uses, and we check later that the uses are consistent.
2184 std::optional<unsigned> NumUses;
2185 for (auto *I : Insts) {
2186 // These instructions may change or break semantics if moved.
2187 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2188 I->getType()->isTokenTy())
2189 return false;
2190
2191 // Do not try to sink an instruction in an infinite loop - it can cause
2192 // this algorithm to infinite loop.
2193 if (I->getParent()->getSingleSuccessor() == I->getParent())
2194 return false;
2195
2196 // Conservatively return false if I is an inline-asm instruction. Sinking
2197 // and merging inline-asm instructions can potentially create arguments
2198 // that cannot satisfy the inline-asm constraints.
2199 // If the instruction has nomerge or convergent attribute, return false.
2200 if (const auto *C = dyn_cast<CallBase>(I))
2201 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2202 return false;
2203
2204 if (!NumUses)
2205 NumUses = I->getNumUses();
2206 else if (NumUses != I->getNumUses())
2207 return false;
2208 }
2209
2210 const Instruction *I0 = Insts.front();
2211 const auto I0MMRA = MMRAMetadata(*I0);
2212 for (auto *I : Insts) {
2213 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2214 return false;
2215
2216 // Treat MMRAs conservatively. This pass can be quite aggressive and
2217 // could drop a lot of MMRAs otherwise.
2218 if (MMRAMetadata(*I) != I0MMRA)
2219 return false;
2220 }
2221
2222 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2223 // then the other phi operands must match the instructions from Insts. This
2224 // also has to hold true for any phi nodes that would be created as a result
2225 // of sinking. Both of these cases are represented by PhiOperands.
2226 for (const Use &U : I0->uses()) {
2227 auto It = PHIOperands.find(&U);
2228 if (It == PHIOperands.end())
2229 // There may be uses in other blocks when sinking into a loop header.
2230 return false;
2231 if (!equal(Insts, It->second))
2232 return false;
2233 }
2234
2235 // For calls to be sinkable, they must all be indirect, or have same callee.
2236 // I.e. if we have two direct calls to different callees, we don't want to
2237 // turn that into an indirect call. Likewise, if we have an indirect call,
2238 // and a direct call, we don't actually want to have a single indirect call.
2239 if (isa<CallBase>(I0)) {
2240 auto IsIndirectCall = [](const Instruction *I) {
2241 return cast<CallBase>(I)->isIndirectCall();
2242 };
2243 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2244 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2245 if (HaveIndirectCalls) {
2246 if (!AllCallsAreIndirect)
2247 return false;
2248 } else {
2249 // All callees must be identical.
2250 Value *Callee = nullptr;
2251 for (const Instruction *I : Insts) {
2252 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2253 if (!Callee)
2254 Callee = CurrCallee;
2255 else if (Callee != CurrCallee)
2256 return false;
2257 }
2258 }
2259 }
2260
2261 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2262 Value *Op = I0->getOperand(OI);
2263 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2264 assert(I->getNumOperands() == I0->getNumOperands());
2265 return I->getOperand(OI) == I0->getOperand(OI);
2266 };
2267 if (!all_of(Insts, SameAsI0)) {
2268 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2270 // We can't create a PHI from this GEP.
2271 return false;
2272 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2273 for (auto *I : Insts)
2274 Ops.push_back(I->getOperand(OI));
2275 }
2276 }
2277 return true;
2278}
2279
2280// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2281// instruction of every block in Blocks to their common successor, commoning
2282// into one instruction.
2284 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2285
2286 // canSinkInstructions returning true guarantees that every block has at
2287 // least one non-terminator instruction.
2289 for (auto *BB : Blocks) {
2290 Instruction *I = BB->getTerminator();
2291 I = I->getPrevNode();
2292 Insts.push_back(I);
2293 }
2294
2295 // We don't need to do any more checking here; canSinkInstructions should
2296 // have done it all for us.
2297 SmallVector<Value*, 4> NewOperands;
2298 Instruction *I0 = Insts.front();
2299 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2300 // This check is different to that in canSinkInstructions. There, we
2301 // cared about the global view once simplifycfg (and instcombine) have
2302 // completed - it takes into account PHIs that become trivially
2303 // simplifiable. However here we need a more local view; if an operand
2304 // differs we create a PHI and rely on instcombine to clean up the very
2305 // small mess we may make.
2306 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2307 return I->getOperand(O) != I0->getOperand(O);
2308 });
2309 if (!NeedPHI) {
2310 NewOperands.push_back(I0->getOperand(O));
2311 continue;
2312 }
2313
2314 // Create a new PHI in the successor block and populate it.
2315 auto *Op = I0->getOperand(O);
2316 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2317 auto *PN =
2318 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2319 PN->insertBefore(BBEnd->begin());
2320 for (auto *I : Insts)
2321 PN->addIncoming(I->getOperand(O), I->getParent());
2322 NewOperands.push_back(PN);
2323 }
2324
2325 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2326 // and move it to the start of the successor block.
2327 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2328 I0->getOperandUse(O).set(NewOperands[O]);
2329
2330 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2331
2332 // Update metadata and IR flags, and merge debug locations.
2333 for (auto *I : Insts)
2334 if (I != I0) {
2335 // The debug location for the "common" instruction is the merged locations
2336 // of all the commoned instructions. We start with the original location
2337 // of the "common" instruction and iteratively merge each location in the
2338 // loop below.
2339 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2340 // However, as N-way merge for CallInst is rare, so we use simplified API
2341 // instead of using complex API for N-way merge.
2342 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2343 combineMetadataForCSE(I0, I, true);
2344 I0->andIRFlags(I);
2345 if (auto *CB = dyn_cast<CallBase>(I0)) {
2346 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2347 assert(Success && "We should not be trying to sink callbases "
2348 "with non-intersectable attributes");
2349 // For NDEBUG Compile.
2350 (void)Success;
2351 }
2352 }
2353
2354 for (User *U : make_early_inc_range(I0->users())) {
2355 // canSinkLastInstruction checked that all instructions are only used by
2356 // phi nodes in a way that allows replacing the phi node with the common
2357 // instruction.
2358 auto *PN = cast<PHINode>(U);
2359 PN->replaceAllUsesWith(I0);
2360 PN->eraseFromParent();
2361 }
2362
2363 // Finally nuke all instructions apart from the common instruction.
2364 for (auto *I : Insts) {
2365 if (I == I0)
2366 continue;
2367 // The remaining uses are debug users, replace those with the common inst.
2368 // In most (all?) cases this just introduces a use-before-def.
2369 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2370 I->replaceAllUsesWith(I0);
2371 I->eraseFromParent();
2372 }
2373}
2374
2375/// Check whether BB's predecessors end with unconditional branches. If it is
2376/// true, sink any common code from the predecessors to BB.
2378 DomTreeUpdater *DTU) {
2379 // We support two situations:
2380 // (1) all incoming arcs are unconditional
2381 // (2) there are non-unconditional incoming arcs
2382 //
2383 // (2) is very common in switch defaults and
2384 // else-if patterns;
2385 //
2386 // if (a) f(1);
2387 // else if (b) f(2);
2388 //
2389 // produces:
2390 //
2391 // [if]
2392 // / \
2393 // [f(1)] [if]
2394 // | | \
2395 // | | |
2396 // | [f(2)]|
2397 // \ | /
2398 // [ end ]
2399 //
2400 // [end] has two unconditional predecessor arcs and one conditional. The
2401 // conditional refers to the implicit empty 'else' arc. This conditional
2402 // arc can also be caused by an empty default block in a switch.
2403 //
2404 // In this case, we attempt to sink code from all *unconditional* arcs.
2405 // If we can sink instructions from these arcs (determined during the scan
2406 // phase below) we insert a common successor for all unconditional arcs and
2407 // connect that to [end], to enable sinking:
2408 //
2409 // [if]
2410 // / \
2411 // [x(1)] [if]
2412 // | | \
2413 // | | \
2414 // | [x(2)] |
2415 // \ / |
2416 // [sink.split] |
2417 // \ /
2418 // [ end ]
2419 //
2420 SmallVector<BasicBlock*,4> UnconditionalPreds;
2421 bool HaveNonUnconditionalPredecessors = false;
2422 for (auto *PredBB : predecessors(BB)) {
2423 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2424 if (PredBr && PredBr->isUnconditional())
2425 UnconditionalPreds.push_back(PredBB);
2426 else
2427 HaveNonUnconditionalPredecessors = true;
2428 }
2429 if (UnconditionalPreds.size() < 2)
2430 return false;
2431
2432 // We take a two-step approach to tail sinking. First we scan from the end of
2433 // each block upwards in lockstep. If the n'th instruction from the end of each
2434 // block can be sunk, those instructions are added to ValuesToSink and we
2435 // carry on. If we can sink an instruction but need to PHI-merge some operands
2436 // (because they're not identical in each instruction) we add these to
2437 // PHIOperands.
2438 // We prepopulate PHIOperands with the phis that already exist in BB.
2440 for (PHINode &PN : BB->phis()) {
2442 for (const Use &U : PN.incoming_values())
2443 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2444 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2445 for (BasicBlock *Pred : UnconditionalPreds)
2446 Ops.push_back(*IncomingVals[Pred]);
2447 }
2448
2449 int ScanIdx = 0;
2450 SmallPtrSet<Value*,4> InstructionsToSink;
2451 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2452 while (LRI.isValid() &&
2453 canSinkInstructions(*LRI, PHIOperands)) {
2454 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2455 << "\n");
2456 InstructionsToSink.insert_range(*LRI);
2457 ++ScanIdx;
2458 --LRI;
2459 }
2460
2461 // If no instructions can be sunk, early-return.
2462 if (ScanIdx == 0)
2463 return false;
2464
2465 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2466
2467 if (!followedByDeoptOrUnreachable) {
2468 // Check whether this is the pointer operand of a load/store.
2469 auto IsMemOperand = [](Use &U) {
2470 auto *I = cast<Instruction>(U.getUser());
2471 if (isa<LoadInst>(I))
2472 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2473 if (isa<StoreInst>(I))
2474 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2475 return false;
2476 };
2477
2478 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2479 // actually sink before encountering instruction that is unprofitable to
2480 // sink?
2481 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2482 unsigned NumPHIInsts = 0;
2483 for (Use &U : (*LRI)[0]->operands()) {
2484 auto It = PHIOperands.find(&U);
2485 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2486 return InstructionsToSink.contains(V);
2487 })) {
2488 ++NumPHIInsts;
2489 // Do not separate a load/store from the gep producing the address.
2490 // The gep can likely be folded into the load/store as an addressing
2491 // mode. Additionally, a load of a gep is easier to analyze than a
2492 // load of a phi.
2493 if (IsMemOperand(U) &&
2494 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2495 return false;
2496 // FIXME: this check is overly optimistic. We may end up not sinking
2497 // said instruction, due to the very same profitability check.
2498 // See @creating_too_many_phis in sink-common-code.ll.
2499 }
2500 }
2501 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2502 return NumPHIInsts <= 1;
2503 };
2504
2505 // We've determined that we are going to sink last ScanIdx instructions,
2506 // and recorded them in InstructionsToSink. Now, some instructions may be
2507 // unprofitable to sink. But that determination depends on the instructions
2508 // that we are going to sink.
2509
2510 // First, forward scan: find the first instruction unprofitable to sink,
2511 // recording all the ones that are profitable to sink.
2512 // FIXME: would it be better, after we detect that not all are profitable.
2513 // to either record the profitable ones, or erase the unprofitable ones?
2514 // Maybe we need to choose (at runtime) the one that will touch least
2515 // instrs?
2516 LRI.reset();
2517 int Idx = 0;
2518 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2519 while (Idx < ScanIdx) {
2520 if (!ProfitableToSinkInstruction(LRI)) {
2521 // Too many PHIs would be created.
2522 LLVM_DEBUG(
2523 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2524 break;
2525 }
2526 InstructionsProfitableToSink.insert_range(*LRI);
2527 --LRI;
2528 ++Idx;
2529 }
2530
2531 // If no instructions can be sunk, early-return.
2532 if (Idx == 0)
2533 return false;
2534
2535 // Did we determine that (only) some instructions are unprofitable to sink?
2536 if (Idx < ScanIdx) {
2537 // Okay, some instructions are unprofitable.
2538 ScanIdx = Idx;
2539 InstructionsToSink = InstructionsProfitableToSink;
2540
2541 // But, that may make other instructions unprofitable, too.
2542 // So, do a backward scan, do any earlier instructions become
2543 // unprofitable?
2544 assert(
2545 !ProfitableToSinkInstruction(LRI) &&
2546 "We already know that the last instruction is unprofitable to sink");
2547 ++LRI;
2548 --Idx;
2549 while (Idx >= 0) {
2550 // If we detect that an instruction becomes unprofitable to sink,
2551 // all earlier instructions won't be sunk either,
2552 // so preemptively keep InstructionsProfitableToSink in sync.
2553 // FIXME: is this the most performant approach?
2554 for (auto *I : *LRI)
2555 InstructionsProfitableToSink.erase(I);
2556 if (!ProfitableToSinkInstruction(LRI)) {
2557 // Everything starting with this instruction won't be sunk.
2558 ScanIdx = Idx;
2559 InstructionsToSink = InstructionsProfitableToSink;
2560 }
2561 ++LRI;
2562 --Idx;
2563 }
2564 }
2565
2566 // If no instructions can be sunk, early-return.
2567 if (ScanIdx == 0)
2568 return false;
2569 }
2570
2571 bool Changed = false;
2572
2573 if (HaveNonUnconditionalPredecessors) {
2574 if (!followedByDeoptOrUnreachable) {
2575 // It is always legal to sink common instructions from unconditional
2576 // predecessors. However, if not all predecessors are unconditional,
2577 // this transformation might be pessimizing. So as a rule of thumb,
2578 // don't do it unless we'd sink at least one non-speculatable instruction.
2579 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2580 LRI.reset();
2581 int Idx = 0;
2582 bool Profitable = false;
2583 while (Idx < ScanIdx) {
2584 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2585 Profitable = true;
2586 break;
2587 }
2588 --LRI;
2589 ++Idx;
2590 }
2591 if (!Profitable)
2592 return false;
2593 }
2594
2595 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2596 // We have a conditional edge and we're going to sink some instructions.
2597 // Insert a new block postdominating all blocks we're going to sink from.
2598 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2599 // Edges couldn't be split.
2600 return false;
2601 Changed = true;
2602 }
2603
2604 // Now that we've analyzed all potential sinking candidates, perform the
2605 // actual sink. We iteratively sink the last non-terminator of the source
2606 // blocks into their common successor unless doing so would require too
2607 // many PHI instructions to be generated (currently only one PHI is allowed
2608 // per sunk instruction).
2609 //
2610 // We can use InstructionsToSink to discount values needing PHI-merging that will
2611 // actually be sunk in a later iteration. This allows us to be more
2612 // aggressive in what we sink. This does allow a false positive where we
2613 // sink presuming a later value will also be sunk, but stop half way through
2614 // and never actually sink it which means we produce more PHIs than intended.
2615 // This is unlikely in practice though.
2616 int SinkIdx = 0;
2617 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2618 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2619 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2620 << "\n");
2621
2622 // Because we've sunk every instruction in turn, the current instruction to
2623 // sink is always at index 0.
2624 LRI.reset();
2625
2626 sinkLastInstruction(UnconditionalPreds);
2627 NumSinkCommonInstrs++;
2628 Changed = true;
2629 }
2630 if (SinkIdx != 0)
2631 ++NumSinkCommonCode;
2632 return Changed;
2633}
2634
2635namespace {
2636
2637struct CompatibleSets {
2638 using SetTy = SmallVector<InvokeInst *, 2>;
2639
2641
2642 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2643
2644 SetTy &getCompatibleSet(InvokeInst *II);
2645
2646 void insert(InvokeInst *II);
2647};
2648
2649CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2650 // Perform a linear scan over all the existing sets, see if the new `invoke`
2651 // is compatible with any particular set. Since we know that all the `invokes`
2652 // within a set are compatible, only check the first `invoke` in each set.
2653 // WARNING: at worst, this has quadratic complexity.
2654 for (CompatibleSets::SetTy &Set : Sets) {
2655 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2656 return Set;
2657 }
2658
2659 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2660 return Sets.emplace_back();
2661}
2662
2663void CompatibleSets::insert(InvokeInst *II) {
2664 getCompatibleSet(II).emplace_back(II);
2665}
2666
2667bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2668 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2669
2670 // Can we theoretically merge these `invoke`s?
2671 auto IsIllegalToMerge = [](InvokeInst *II) {
2672 return II->cannotMerge() || II->isInlineAsm();
2673 };
2674 if (any_of(Invokes, IsIllegalToMerge))
2675 return false;
2676
2677 // Either both `invoke`s must be direct,
2678 // or both `invoke`s must be indirect.
2679 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2680 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2681 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2682 if (HaveIndirectCalls) {
2683 if (!AllCallsAreIndirect)
2684 return false;
2685 } else {
2686 // All callees must be identical.
2687 Value *Callee = nullptr;
2688 for (InvokeInst *II : Invokes) {
2689 Value *CurrCallee = II->getCalledOperand();
2690 assert(CurrCallee && "There is always a called operand.");
2691 if (!Callee)
2692 Callee = CurrCallee;
2693 else if (Callee != CurrCallee)
2694 return false;
2695 }
2696 }
2697
2698 // Either both `invoke`s must not have a normal destination,
2699 // or both `invoke`s must have a normal destination,
2700 auto HasNormalDest = [](InvokeInst *II) {
2701 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2702 };
2703 if (any_of(Invokes, HasNormalDest)) {
2704 // Do not merge `invoke` that does not have a normal destination with one
2705 // that does have a normal destination, even though doing so would be legal.
2706 if (!all_of(Invokes, HasNormalDest))
2707 return false;
2708
2709 // All normal destinations must be identical.
2710 BasicBlock *NormalBB = nullptr;
2711 for (InvokeInst *II : Invokes) {
2712 BasicBlock *CurrNormalBB = II->getNormalDest();
2713 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2714 if (!NormalBB)
2715 NormalBB = CurrNormalBB;
2716 else if (NormalBB != CurrNormalBB)
2717 return false;
2718 }
2719
2720 // In the normal destination, the incoming values for these two `invoke`s
2721 // must be compatible.
2722 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2724 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2725 &EquivalenceSet))
2726 return false;
2727 }
2728
2729#ifndef NDEBUG
2730 // All unwind destinations must be identical.
2731 // We know that because we have started from said unwind destination.
2732 BasicBlock *UnwindBB = nullptr;
2733 for (InvokeInst *II : Invokes) {
2734 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2735 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2736 if (!UnwindBB)
2737 UnwindBB = CurrUnwindBB;
2738 else
2739 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2740 }
2741#endif
2742
2743 // In the unwind destination, the incoming values for these two `invoke`s
2744 // must be compatible.
2746 Invokes.front()->getUnwindDest(),
2747 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2748 return false;
2749
2750 // Ignoring arguments, these `invoke`s must be identical,
2751 // including operand bundles.
2752 const InvokeInst *II0 = Invokes.front();
2753 for (auto *II : Invokes.drop_front())
2754 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2755 return false;
2756
2757 // Can we theoretically form the data operands for the merged `invoke`?
2758 auto IsIllegalToMergeArguments = [](auto Ops) {
2759 Use &U0 = std::get<0>(Ops);
2760 Use &U1 = std::get<1>(Ops);
2761 if (U0 == U1)
2762 return false;
2763 return !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2764 U0.getOperandNo());
2765 };
2766 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2767 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2768 IsIllegalToMergeArguments))
2769 return false;
2770
2771 return true;
2772}
2773
2774} // namespace
2775
2776// Merge all invokes in the provided set, all of which are compatible
2777// as per the `CompatibleSets::shouldBelongToSameSet()`.
2779 DomTreeUpdater *DTU) {
2780 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2781
2783 if (DTU)
2784 Updates.reserve(2 + 3 * Invokes.size());
2785
2786 bool HasNormalDest =
2787 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2788
2789 // Clone one of the invokes into a new basic block.
2790 // Since they are all compatible, it doesn't matter which invoke is cloned.
2791 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2792 InvokeInst *II0 = Invokes.front();
2793 BasicBlock *II0BB = II0->getParent();
2794 BasicBlock *InsertBeforeBlock =
2795 II0->getParent()->getIterator()->getNextNode();
2796 Function *Func = II0BB->getParent();
2797 LLVMContext &Ctx = II0->getContext();
2798
2799 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2800 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2801
2802 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2803 // NOTE: all invokes have the same attributes, so no handling needed.
2804 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2805
2806 if (!HasNormalDest) {
2807 // This set does not have a normal destination,
2808 // so just form a new block with unreachable terminator.
2809 BasicBlock *MergedNormalDest = BasicBlock::Create(
2810 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2811 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2812 UI->setDebugLoc(DebugLoc::getTemporary());
2813 MergedInvoke->setNormalDest(MergedNormalDest);
2814 }
2815
2816 // The unwind destination, however, remainds identical for all invokes here.
2817
2818 return MergedInvoke;
2819 }();
2820
2821 if (DTU) {
2822 // Predecessor blocks that contained these invokes will now branch to
2823 // the new block that contains the merged invoke, ...
2824 for (InvokeInst *II : Invokes)
2825 Updates.push_back(
2826 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2827
2828 // ... which has the new `unreachable` block as normal destination,
2829 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2830 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2831 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2832 SuccBBOfMergedInvoke});
2833
2834 // Since predecessor blocks now unconditionally branch to a new block,
2835 // they no longer branch to their original successors.
2836 for (InvokeInst *II : Invokes)
2837 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2838 Updates.push_back(
2839 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2840 }
2841
2842 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2843
2844 // Form the merged operands for the merged invoke.
2845 for (Use &U : MergedInvoke->operands()) {
2846 // Only PHI together the indirect callees and data operands.
2847 if (MergedInvoke->isCallee(&U)) {
2848 if (!IsIndirectCall)
2849 continue;
2850 } else if (!MergedInvoke->isDataOperand(&U))
2851 continue;
2852
2853 // Don't create trivial PHI's with all-identical incoming values.
2854 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2855 return II->getOperand(U.getOperandNo()) != U.get();
2856 });
2857 if (!NeedPHI)
2858 continue;
2859
2860 // Form a PHI out of all the data ops under this index.
2862 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2863 for (InvokeInst *II : Invokes)
2864 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2865
2866 U.set(PN);
2867 }
2868
2869 // We've ensured that each PHI node has compatible (identical) incoming values
2870 // when coming from each of the `invoke`s in the current merge set,
2871 // so update the PHI nodes accordingly.
2872 for (BasicBlock *Succ : successors(MergedInvoke))
2873 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2874 /*ExistPred=*/Invokes.front()->getParent());
2875
2876 // And finally, replace the original `invoke`s with an unconditional branch
2877 // to the block with the merged `invoke`. Also, give that merged `invoke`
2878 // the merged debugloc of all the original `invoke`s.
2879 DILocation *MergedDebugLoc = nullptr;
2880 for (InvokeInst *II : Invokes) {
2881 // Compute the debug location common to all the original `invoke`s.
2882 if (!MergedDebugLoc)
2883 MergedDebugLoc = II->getDebugLoc();
2884 else
2885 MergedDebugLoc =
2886 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2887
2888 // And replace the old `invoke` with an unconditionally branch
2889 // to the block with the merged `invoke`.
2890 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2891 OrigSuccBB->removePredecessor(II->getParent());
2892 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2893 // The unconditional branch is part of the replacement for the original
2894 // invoke, so should use its DebugLoc.
2895 BI->setDebugLoc(II->getDebugLoc());
2896 bool Success = MergedInvoke->tryIntersectAttributes(II);
2897 assert(Success && "Merged invokes with incompatible attributes");
2898 // For NDEBUG Compile
2899 (void)Success;
2900 II->replaceAllUsesWith(MergedInvoke);
2901 II->eraseFromParent();
2902 ++NumInvokesMerged;
2903 }
2904 MergedInvoke->setDebugLoc(MergedDebugLoc);
2905 ++NumInvokeSetsFormed;
2906
2907 if (DTU)
2908 DTU->applyUpdates(Updates);
2909}
2910
2911/// If this block is a `landingpad` exception handling block, categorize all
2912/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2913/// being "mergeable" together, and then merge invokes in each set together.
2914///
2915/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2916/// [...] [...]
2917/// | |
2918/// [invoke0] [invoke1]
2919/// / \ / \
2920/// [cont0] [landingpad] [cont1]
2921/// to:
2922/// [...] [...]
2923/// \ /
2924/// [invoke]
2925/// / \
2926/// [cont] [landingpad]
2927///
2928/// But of course we can only do that if the invokes share the `landingpad`,
2929/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2930/// and the invoked functions are "compatible".
2933 return false;
2934
2935 bool Changed = false;
2936
2937 // FIXME: generalize to all exception handling blocks?
2938 if (!BB->isLandingPad())
2939 return Changed;
2940
2941 CompatibleSets Grouper;
2942
2943 // Record all the predecessors of this `landingpad`. As per verifier,
2944 // the only allowed predecessor is the unwind edge of an `invoke`.
2945 // We want to group "compatible" `invokes` into the same set to be merged.
2946 for (BasicBlock *PredBB : predecessors(BB))
2947 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2948
2949 // And now, merge `invoke`s that were grouped togeter.
2950 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2951 if (Invokes.size() < 2)
2952 continue;
2953 Changed = true;
2954 mergeCompatibleInvokesImpl(Invokes, DTU);
2955 }
2956
2957 return Changed;
2958}
2959
2960namespace {
2961/// Track ephemeral values, which should be ignored for cost-modelling
2962/// purposes. Requires walking instructions in reverse order.
2963class EphemeralValueTracker {
2965
2966 bool isEphemeral(const Instruction *I) {
2967 if (isa<AssumeInst>(I))
2968 return true;
2969 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2970 all_of(I->users(), [&](const User *U) {
2971 return EphValues.count(cast<Instruction>(U));
2972 });
2973 }
2974
2975public:
2976 bool track(const Instruction *I) {
2977 if (isEphemeral(I)) {
2978 EphValues.insert(I);
2979 return true;
2980 }
2981 return false;
2982 }
2983
2984 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2985};
2986} // namespace
2987
2988/// Determine if we can hoist sink a sole store instruction out of a
2989/// conditional block.
2990///
2991/// We are looking for code like the following:
2992/// BrBB:
2993/// store i32 %add, i32* %arrayidx2
2994/// ... // No other stores or function calls (we could be calling a memory
2995/// ... // function).
2996/// %cmp = icmp ult %x, %y
2997/// br i1 %cmp, label %EndBB, label %ThenBB
2998/// ThenBB:
2999/// store i32 %add5, i32* %arrayidx2
3000/// br label EndBB
3001/// EndBB:
3002/// ...
3003/// We are going to transform this into:
3004/// BrBB:
3005/// store i32 %add, i32* %arrayidx2
3006/// ... //
3007/// %cmp = icmp ult %x, %y
3008/// %add.add5 = select i1 %cmp, i32 %add, %add5
3009/// store i32 %add.add5, i32* %arrayidx2
3010/// ...
3011///
3012/// \return The pointer to the value of the previous store if the store can be
3013/// hoisted into the predecessor block. 0 otherwise.
3015 BasicBlock *StoreBB, BasicBlock *EndBB) {
3016 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3017 if (!StoreToHoist)
3018 return nullptr;
3019
3020 // Volatile or atomic.
3021 if (!StoreToHoist->isSimple())
3022 return nullptr;
3023
3024 Value *StorePtr = StoreToHoist->getPointerOperand();
3025 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3026
3027 // Look for a store to the same pointer in BrBB.
3028 unsigned MaxNumInstToLookAt = 9;
3029 // Skip pseudo probe intrinsic calls which are not really killing any memory
3030 // accesses.
3031 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3032 if (!MaxNumInstToLookAt)
3033 break;
3034 --MaxNumInstToLookAt;
3035
3036 // Could be calling an instruction that affects memory like free().
3037 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3038 return nullptr;
3039
3040 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3041 // Found the previous store to same location and type. Make sure it is
3042 // simple, to avoid introducing a spurious non-atomic write after an
3043 // atomic write.
3044 if (SI->getPointerOperand() == StorePtr &&
3045 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3046 SI->getAlign() >= StoreToHoist->getAlign())
3047 // Found the previous store, return its value operand.
3048 return SI->getValueOperand();
3049 return nullptr; // Unknown store.
3050 }
3051
3052 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3053 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3054 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3055 Value *Obj = getUnderlyingObject(StorePtr);
3056 bool ExplicitlyDereferenceableOnly;
3057 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3059 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3060 CaptureComponents::Provenance)) &&
3061 (!ExplicitlyDereferenceableOnly ||
3062 isDereferenceablePointer(StorePtr, StoreTy,
3063 LI->getDataLayout()))) {
3064 // Found a previous load, return it.
3065 return LI;
3066 }
3067 }
3068 // The load didn't work out, but we may still find a store.
3069 }
3070 }
3071
3072 return nullptr;
3073}
3074
3075/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3076/// converted to selects.
3078 BasicBlock *EndBB,
3079 unsigned &SpeculatedInstructions,
3081 const TargetTransformInfo &TTI) {
3083 BB->getParent()->hasMinSize()
3086
3087 bool HaveRewritablePHIs = false;
3088 for (PHINode &PN : EndBB->phis()) {
3089 Value *OrigV = PN.getIncomingValueForBlock(BB);
3090 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3091
3092 // FIXME: Try to remove some of the duplication with
3093 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3094 if (ThenV == OrigV)
3095 continue;
3096
3097 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3100
3101 // Don't convert to selects if we could remove undefined behavior instead.
3102 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3104 return false;
3105
3106 HaveRewritablePHIs = true;
3107 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3108 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3109 if (!OrigCE && !ThenCE)
3110 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3111
3112 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3113 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3114 InstructionCost MaxCost =
3116 if (OrigCost + ThenCost > MaxCost)
3117 return false;
3118
3119 // Account for the cost of an unfolded ConstantExpr which could end up
3120 // getting expanded into Instructions.
3121 // FIXME: This doesn't account for how many operations are combined in the
3122 // constant expression.
3123 ++SpeculatedInstructions;
3124 if (SpeculatedInstructions > 1)
3125 return false;
3126 }
3127
3128 return HaveRewritablePHIs;
3129}
3130
3132 std::optional<bool> Invert,
3133 const TargetTransformInfo &TTI) {
3134 // If the branch is non-unpredictable, and is predicted to *not* branch to
3135 // the `then` block, then avoid speculating it.
3136 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3137 return true;
3138
3139 uint64_t TWeight, FWeight;
3140 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3141 return true;
3142
3143 if (!Invert.has_value())
3144 return false;
3145
3146 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3147 BranchProbability BIEndProb =
3148 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3150 return BIEndProb < Likely;
3151}
3152
3153/// Speculate a conditional basic block flattening the CFG.
3154///
3155/// Note that this is a very risky transform currently. Speculating
3156/// instructions like this is most often not desirable. Instead, there is an MI
3157/// pass which can do it with full awareness of the resource constraints.
3158/// However, some cases are "obvious" and we should do directly. An example of
3159/// this is speculating a single, reasonably cheap instruction.
3160///
3161/// There is only one distinct advantage to flattening the CFG at the IR level:
3162/// it makes very common but simplistic optimizations such as are common in
3163/// instcombine and the DAG combiner more powerful by removing CFG edges and
3164/// modeling their effects with easier to reason about SSA value graphs.
3165///
3166///
3167/// An illustration of this transform is turning this IR:
3168/// \code
3169/// BB:
3170/// %cmp = icmp ult %x, %y
3171/// br i1 %cmp, label %EndBB, label %ThenBB
3172/// ThenBB:
3173/// %sub = sub %x, %y
3174/// br label BB2
3175/// EndBB:
3176/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3177/// ...
3178/// \endcode
3179///
3180/// Into this IR:
3181/// \code
3182/// BB:
3183/// %cmp = icmp ult %x, %y
3184/// %sub = sub %x, %y
3185/// %cond = select i1 %cmp, 0, %sub
3186/// ...
3187/// \endcode
3188///
3189/// \returns true if the conditional block is removed.
3190bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3191 BasicBlock *ThenBB) {
3192 if (!Options.SpeculateBlocks)
3193 return false;
3194
3195 // Be conservative for now. FP select instruction can often be expensive.
3196 Value *BrCond = BI->getCondition();
3197 if (isa<FCmpInst>(BrCond))
3198 return false;
3199
3200 BasicBlock *BB = BI->getParent();
3201 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3202 InstructionCost Budget =
3204
3205 // If ThenBB is actually on the false edge of the conditional branch, remember
3206 // to swap the select operands later.
3207 bool Invert = false;
3208 if (ThenBB != BI->getSuccessor(0)) {
3209 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3210 Invert = true;
3211 }
3212 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3213
3214 if (!isProfitableToSpeculate(BI, Invert, TTI))
3215 return false;
3216
3217 // Keep a count of how many times instructions are used within ThenBB when
3218 // they are candidates for sinking into ThenBB. Specifically:
3219 // - They are defined in BB, and
3220 // - They have no side effects, and
3221 // - All of their uses are in ThenBB.
3222 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3223
3224 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3225
3226 unsigned SpeculatedInstructions = 0;
3227 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3228 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3229 Value *SpeculatedStoreValue = nullptr;
3230 StoreInst *SpeculatedStore = nullptr;
3231 EphemeralValueTracker EphTracker;
3232 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3233 // Skip pseudo probes. The consequence is we lose track of the branch
3234 // probability for ThenBB, which is fine since the optimization here takes
3235 // place regardless of the branch probability.
3236 if (isa<PseudoProbeInst>(I)) {
3237 // The probe should be deleted so that it will not be over-counted when
3238 // the samples collected on the non-conditional path are counted towards
3239 // the conditional path. We leave it for the counts inference algorithm to
3240 // figure out a proper count for an unknown probe.
3241 SpeculatedPseudoProbes.push_back(&I);
3242 continue;
3243 }
3244
3245 // Ignore ephemeral values, they will be dropped by the transform.
3246 if (EphTracker.track(&I))
3247 continue;
3248
3249 // Only speculatively execute a single instruction (not counting the
3250 // terminator) for now.
3251 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3253 SpeculatedConditionalLoadsStores.size() <
3255 // Not count load/store into cost if target supports conditional faulting
3256 // b/c it's cheap to speculate it.
3257 if (IsSafeCheapLoadStore)
3258 SpeculatedConditionalLoadsStores.push_back(&I);
3259 else
3260 ++SpeculatedInstructions;
3261
3262 if (SpeculatedInstructions > 1)
3263 return false;
3264
3265 // Don't hoist the instruction if it's unsafe or expensive.
3266 if (!IsSafeCheapLoadStore &&
3268 !(HoistCondStores && !SpeculatedStoreValue &&
3269 (SpeculatedStoreValue =
3270 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3271 return false;
3272 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3275 return false;
3276
3277 // Store the store speculation candidate.
3278 if (!SpeculatedStore && SpeculatedStoreValue)
3279 SpeculatedStore = cast<StoreInst>(&I);
3280
3281 // Do not hoist the instruction if any of its operands are defined but not
3282 // used in BB. The transformation will prevent the operand from
3283 // being sunk into the use block.
3284 for (Use &Op : I.operands()) {
3285 Instruction *OpI = dyn_cast<Instruction>(Op);
3286 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3287 continue; // Not a candidate for sinking.
3288
3289 ++SinkCandidateUseCounts[OpI];
3290 }
3291 }
3292
3293 // Consider any sink candidates which are only used in ThenBB as costs for
3294 // speculation. Note, while we iterate over a DenseMap here, we are summing
3295 // and so iteration order isn't significant.
3296 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3297 if (Inst->hasNUses(Count)) {
3298 ++SpeculatedInstructions;
3299 if (SpeculatedInstructions > 1)
3300 return false;
3301 }
3302
3303 // Check that we can insert the selects and that it's not too expensive to do
3304 // so.
3305 bool Convert =
3306 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3308 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3309 SpeculatedInstructions, Cost, TTI);
3310 if (!Convert || Cost > Budget)
3311 return false;
3312
3313 // If we get here, we can hoist the instruction and if-convert.
3314 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3315
3316 Instruction *Sel = nullptr;
3317 // Insert a select of the value of the speculated store.
3318 if (SpeculatedStoreValue) {
3319 IRBuilder<NoFolder> Builder(BI);
3320 Value *OrigV = SpeculatedStore->getValueOperand();
3321 Value *TrueV = SpeculatedStore->getValueOperand();
3322 Value *FalseV = SpeculatedStoreValue;
3323 if (Invert)
3324 std::swap(TrueV, FalseV);
3325 Value *S = Builder.CreateSelect(
3326 BrCond, TrueV, FalseV, "spec.store.select", BI);
3327 Sel = cast<Instruction>(S);
3328 SpeculatedStore->setOperand(0, S);
3329 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3330 SpeculatedStore->getDebugLoc());
3331 // The value stored is still conditional, but the store itself is now
3332 // unconditonally executed, so we must be sure that any linked dbg.assign
3333 // intrinsics are tracking the new stored value (the result of the
3334 // select). If we don't, and the store were to be removed by another pass
3335 // (e.g. DSE), then we'd eventually end up emitting a location describing
3336 // the conditional value, unconditionally.
3337 //
3338 // === Before this transformation ===
3339 // pred:
3340 // store %one, %x.dest, !DIAssignID !1
3341 // dbg.assign %one, "x", ..., !1, ...
3342 // br %cond if.then
3343 //
3344 // if.then:
3345 // store %two, %x.dest, !DIAssignID !2
3346 // dbg.assign %two, "x", ..., !2, ...
3347 //
3348 // === After this transformation ===
3349 // pred:
3350 // store %one, %x.dest, !DIAssignID !1
3351 // dbg.assign %one, "x", ..., !1
3352 /// ...
3353 // %merge = select %cond, %two, %one
3354 // store %merge, %x.dest, !DIAssignID !2
3355 // dbg.assign %merge, "x", ..., !2
3356 for (DbgVariableRecord *DbgAssign :
3357 at::getDVRAssignmentMarkers(SpeculatedStore))
3358 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3359 DbgAssign->replaceVariableLocationOp(OrigV, S);
3360 }
3361
3362 // Metadata can be dependent on the condition we are hoisting above.
3363 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3364 // to avoid making it appear as if the condition is a constant, which would
3365 // be misleading while debugging.
3366 // Similarly strip attributes that maybe dependent on condition we are
3367 // hoisting above.
3368 for (auto &I : make_early_inc_range(*ThenBB)) {
3369 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3370 I.setDebugLoc(DebugLoc::getDropped());
3371 }
3372 I.dropUBImplyingAttrsAndMetadata();
3373
3374 // Drop ephemeral values.
3375 if (EphTracker.contains(&I)) {
3376 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3377 I.eraseFromParent();
3378 }
3379 }
3380
3381 // Hoist the instructions.
3382 // Drop DbgVariableRecords attached to these instructions.
3383 for (auto &It : *ThenBB)
3384 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3385 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3386 // equivalent).
3387 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3388 !DVR || !DVR->isDbgAssign())
3389 It.dropOneDbgRecord(&DR);
3390 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3391 std::prev(ThenBB->end()));
3392
3393 if (!SpeculatedConditionalLoadsStores.empty())
3394 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3395 Sel);
3396
3397 // Insert selects and rewrite the PHI operands.
3398 IRBuilder<NoFolder> Builder(BI);
3399 for (PHINode &PN : EndBB->phis()) {
3400 unsigned OrigI = PN.getBasicBlockIndex(BB);
3401 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3402 Value *OrigV = PN.getIncomingValue(OrigI);
3403 Value *ThenV = PN.getIncomingValue(ThenI);
3404
3405 // Skip PHIs which are trivial.
3406 if (OrigV == ThenV)
3407 continue;
3408
3409 // Create a select whose true value is the speculatively executed value and
3410 // false value is the pre-existing value. Swap them if the branch
3411 // destinations were inverted.
3412 Value *TrueV = ThenV, *FalseV = OrigV;
3413 if (Invert)
3414 std::swap(TrueV, FalseV);
3415 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3416 PN.setIncomingValue(OrigI, V);
3417 PN.setIncomingValue(ThenI, V);
3418 }
3419
3420 // Remove speculated pseudo probes.
3421 for (Instruction *I : SpeculatedPseudoProbes)
3422 I->eraseFromParent();
3423
3424 ++NumSpeculations;
3425 return true;
3426}
3427
3429
3430// Return false if number of blocks searched is too much.
3431static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3432 BlocksSet &ReachesNonLocalUses) {
3433 if (BB == DefBB)
3434 return true;
3435 if (!ReachesNonLocalUses.insert(BB).second)
3436 return true;
3437
3438 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3439 return false;
3440 for (BasicBlock *Pred : predecessors(BB))
3441 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3442 return false;
3443 return true;
3444}
3445
3446/// Return true if we can thread a branch across this block.
3448 BlocksSet &NonLocalUseBlocks) {
3449 int Size = 0;
3450 EphemeralValueTracker EphTracker;
3451
3452 // Walk the loop in reverse so that we can identify ephemeral values properly
3453 // (values only feeding assumes).
3454 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3455 // Can't fold blocks that contain noduplicate or convergent calls.
3456 if (CallInst *CI = dyn_cast<CallInst>(&I))
3457 if (CI->cannotDuplicate() || CI->isConvergent())
3458 return false;
3459
3460 // Ignore ephemeral values which are deleted during codegen.
3461 // We will delete Phis while threading, so Phis should not be accounted in
3462 // block's size.
3463 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3464 if (Size++ > MaxSmallBlockSize)
3465 return false; // Don't clone large BB's.
3466 }
3467
3468 // Record blocks with non-local uses of values defined in the current basic
3469 // block.
3470 for (User *U : I.users()) {
3471 Instruction *UI = cast<Instruction>(U);
3472 BasicBlock *UsedInBB = UI->getParent();
3473 if (UsedInBB == BB) {
3474 if (isa<PHINode>(UI))
3475 return false;
3476 } else
3477 NonLocalUseBlocks.insert(UsedInBB);
3478 }
3479
3480 // Looks ok, continue checking.
3481 }
3482
3483 return true;
3484}
3485
3487 BasicBlock *To) {
3488 // Don't look past the block defining the value, we might get the value from
3489 // a previous loop iteration.
3490 auto *I = dyn_cast<Instruction>(V);
3491 if (I && I->getParent() == To)
3492 return nullptr;
3493
3494 // We know the value if the From block branches on it.
3495 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3496 if (BI && BI->isConditional() && BI->getCondition() == V &&
3497 BI->getSuccessor(0) != BI->getSuccessor(1))
3498 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3500
3501 return nullptr;
3502}
3503
3504/// If we have a conditional branch on something for which we know the constant
3505/// value in predecessors (e.g. a phi node in the current block), thread edges
3506/// from the predecessor to their ultimate destination.
3507static std::optional<bool>
3509 const DataLayout &DL,
3510 AssumptionCache *AC) {
3512 BasicBlock *BB = BI->getParent();
3513 Value *Cond = BI->getCondition();
3514 PHINode *PN = dyn_cast<PHINode>(Cond);
3515 if (PN && PN->getParent() == BB) {
3516 // Degenerate case of a single entry PHI.
3517 if (PN->getNumIncomingValues() == 1) {
3519 return true;
3520 }
3521
3522 for (Use &U : PN->incoming_values())
3523 if (auto *CB = dyn_cast<ConstantInt>(U))
3524 KnownValues[CB].insert(PN->getIncomingBlock(U));
3525 } else {
3526 for (BasicBlock *Pred : predecessors(BB)) {
3527 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3528 KnownValues[CB].insert(Pred);
3529 }
3530 }
3531
3532 if (KnownValues.empty())
3533 return false;
3534
3535 // Now we know that this block has multiple preds and two succs.
3536 // Check that the block is small enough and record which non-local blocks use
3537 // values defined in the block.
3538
3539 BlocksSet NonLocalUseBlocks;
3540 BlocksSet ReachesNonLocalUseBlocks;
3541 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3542 return false;
3543
3544 // Jump-threading can only be done to destinations where no values defined
3545 // in BB are live.
3546
3547 // Quickly check if both destinations have uses. If so, jump-threading cannot
3548 // be done.
3549 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3550 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3551 return false;
3552
3553 // Search backward from NonLocalUseBlocks to find which blocks
3554 // reach non-local uses.
3555 for (BasicBlock *UseBB : NonLocalUseBlocks)
3556 // Give up if too many blocks are searched.
3557 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3558 return false;
3559
3560 for (const auto &Pair : KnownValues) {
3561 ConstantInt *CB = Pair.first;
3562 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3563 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3564
3565 // Okay, we now know that all edges from PredBB should be revectored to
3566 // branch to RealDest.
3567 if (RealDest == BB)
3568 continue; // Skip self loops.
3569
3570 // Skip if the predecessor's terminator is an indirect branch.
3571 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3572 return isa<IndirectBrInst>(PredBB->getTerminator());
3573 }))
3574 continue;
3575
3576 // Only revector to RealDest if no values defined in BB are live.
3577 if (ReachesNonLocalUseBlocks.contains(RealDest))
3578 continue;
3579
3580 LLVM_DEBUG({
3581 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3582 << " has value " << *Pair.first << " in predecessors:\n";
3583 for (const BasicBlock *PredBB : Pair.second)
3584 dbgs() << " " << PredBB->getName() << "\n";
3585 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3586 });
3587
3588 // Split the predecessors we are threading into a new edge block. We'll
3589 // clone the instructions into this block, and then redirect it to RealDest.
3590 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3591
3592 // TODO: These just exist to reduce test diff, we can drop them if we like.
3593 EdgeBB->setName(RealDest->getName() + ".critedge");
3594 EdgeBB->moveBefore(RealDest);
3595
3596 // Update PHI nodes.
3597 addPredecessorToBlock(RealDest, EdgeBB, BB);
3598
3599 // BB may have instructions that are being threaded over. Clone these
3600 // instructions into EdgeBB. We know that there will be no uses of the
3601 // cloned instructions outside of EdgeBB.
3602 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3603 ValueToValueMapTy TranslateMap; // Track translated values.
3604 TranslateMap[Cond] = CB;
3605
3606 // RemoveDIs: track instructions that we optimise away while folding, so
3607 // that we can copy DbgVariableRecords from them later.
3608 BasicBlock::iterator SrcDbgCursor = BB->begin();
3609 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3610 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3611 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3612 continue;
3613 }
3614 // Clone the instruction.
3615 Instruction *N = BBI->clone();
3616 // Insert the new instruction into its new home.
3617 N->insertInto(EdgeBB, InsertPt);
3618
3619 if (BBI->hasName())
3620 N->setName(BBI->getName() + ".c");
3621
3622 // Update operands due to translation.
3623 // Key Instructions: Remap all the atom groups.
3624 if (const DebugLoc &DL = BBI->getDebugLoc())
3625 mapAtomInstance(DL, TranslateMap);
3626 RemapInstruction(N, TranslateMap,
3628
3629 // Check for trivial simplification.
3630 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3631 if (!BBI->use_empty())
3632 TranslateMap[&*BBI] = V;
3633 if (!N->mayHaveSideEffects()) {
3634 N->eraseFromParent(); // Instruction folded away, don't need actual
3635 // inst
3636 N = nullptr;
3637 }
3638 } else {
3639 if (!BBI->use_empty())
3640 TranslateMap[&*BBI] = N;
3641 }
3642 if (N) {
3643 // Copy all debug-info attached to instructions from the last we
3644 // successfully clone, up to this instruction (they might have been
3645 // folded away).
3646 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3647 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3648 SrcDbgCursor = std::next(BBI);
3649 // Clone debug-info on this instruction too.
3650 N->cloneDebugInfoFrom(&*BBI);
3651
3652 // Register the new instruction with the assumption cache if necessary.
3653 if (auto *Assume = dyn_cast<AssumeInst>(N))
3654 if (AC)
3655 AC->registerAssumption(Assume);
3656 }
3657 }
3658
3659 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3660 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3661 InsertPt->cloneDebugInfoFrom(BI);
3662
3663 BB->removePredecessor(EdgeBB);
3664 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3665 EdgeBI->setSuccessor(0, RealDest);
3666 EdgeBI->setDebugLoc(BI->getDebugLoc());
3667
3668 if (DTU) {
3670 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3671 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3672 DTU->applyUpdates(Updates);
3673 }
3674
3675 // For simplicity, we created a separate basic block for the edge. Merge
3676 // it back into the predecessor if possible. This not only avoids
3677 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3678 // bypass the check for trivial cycles above.
3679 MergeBlockIntoPredecessor(EdgeBB, DTU);
3680
3681 // Signal repeat, simplifying any other constants.
3682 return std::nullopt;
3683 }
3684
3685 return false;
3686}
3687
3688bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3689 // Note: If BB is a loop header then there is a risk that threading introduces
3690 // a non-canonical loop by moving a back edge. So we avoid this optimization
3691 // for loop headers if NeedCanonicalLoop is set.
3692 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3693 return false;
3694
3695 std::optional<bool> Result;
3696 bool EverChanged = false;
3697 do {
3698 // Note that None means "we changed things, but recurse further."
3699 Result =
3701 EverChanged |= Result == std::nullopt || *Result;
3702 } while (Result == std::nullopt);
3703 return EverChanged;
3704}
3705
3706/// Given a BB that starts with the specified two-entry PHI node,
3707/// see if we can eliminate it.
3710 const DataLayout &DL,
3711 bool SpeculateUnpredictables) {
3712 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3713 // statement", which has a very simple dominance structure. Basically, we
3714 // are trying to find the condition that is being branched on, which
3715 // subsequently causes this merge to happen. We really want control
3716 // dependence information for this check, but simplifycfg can't keep it up
3717 // to date, and this catches most of the cases we care about anyway.
3718 BasicBlock *BB = PN->getParent();
3719
3720 BasicBlock *IfTrue, *IfFalse;
3721 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3722 if (!DomBI)
3723 return false;
3724 Value *IfCond = DomBI->getCondition();
3725 // Don't bother if the branch will be constant folded trivially.
3726 if (isa<ConstantInt>(IfCond))
3727 return false;
3728
3729 BasicBlock *DomBlock = DomBI->getParent();
3732 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3733 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3734 });
3735 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3736 "Will have either one or two blocks to speculate.");
3737
3738 // If the branch is non-unpredictable, see if we either predictably jump to
3739 // the merge bb (if we have only a single 'then' block), or if we predictably
3740 // jump to one specific 'then' block (if we have two of them).
3741 // It isn't beneficial to speculatively execute the code
3742 // from the block that we know is predictably not entered.
3743 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3744 if (!IsUnpredictable) {
3745 uint64_t TWeight, FWeight;
3746 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3747 (TWeight + FWeight) != 0) {
3748 BranchProbability BITrueProb =
3749 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3751 BranchProbability BIFalseProb = BITrueProb.getCompl();
3752 if (IfBlocks.size() == 1) {
3753 BranchProbability BIBBProb =
3754 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3755 if (BIBBProb >= Likely)
3756 return false;
3757 } else {
3758 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3759 return false;
3760 }
3761 }
3762 }
3763
3764 // Don't try to fold an unreachable block. For example, the phi node itself
3765 // can't be the candidate if-condition for a select that we want to form.
3766 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3767 if (IfCondPhiInst->getParent() == BB)
3768 return false;
3769
3770 // Okay, we found that we can merge this two-entry phi node into a select.
3771 // Doing so would require us to fold *all* two entry phi nodes in this block.
3772 // At some point this becomes non-profitable (particularly if the target
3773 // doesn't support cmov's). Only do this transformation if there are two or
3774 // fewer PHI nodes in this block.
3775 unsigned NumPhis = 0;
3776 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3777 if (NumPhis > 2)
3778 return false;
3779
3780 // Loop over the PHI's seeing if we can promote them all to select
3781 // instructions. While we are at it, keep track of the instructions
3782 // that need to be moved to the dominating block.
3783 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3784 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3786 InstructionCost Budget =
3788 if (SpeculateUnpredictables && IsUnpredictable)
3789 Budget += TTI.getBranchMispredictPenalty();
3790
3791 bool Changed = false;
3792 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3793 PHINode *PN = cast<PHINode>(II++);
3794 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3795 PN->replaceAllUsesWith(V);
3796 PN->eraseFromParent();
3797 Changed = true;
3798 continue;
3799 }
3800
3801 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3802 AggressiveInsts, Cost, Budget, TTI, AC,
3803 ZeroCostInstructions) ||
3804 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3805 AggressiveInsts, Cost, Budget, TTI, AC,
3806 ZeroCostInstructions))
3807 return Changed;
3808 }
3809
3810 // If we folded the first phi, PN dangles at this point. Refresh it. If
3811 // we ran out of PHIs then we simplified them all.
3812 PN = dyn_cast<PHINode>(BB->begin());
3813 if (!PN)
3814 return true;
3815
3816 // Return true if at least one of these is a 'not', and another is either
3817 // a 'not' too, or a constant.
3818 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3819 if (!match(V0, m_Not(m_Value())))
3820 std::swap(V0, V1);
3821 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3822 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3823 };
3824
3825 // Don't fold i1 branches on PHIs which contain binary operators or
3826 // (possibly inverted) select form of or/ands, unless one of
3827 // the incoming values is an 'not' and another one is freely invertible.
3828 // These can often be turned into switches and other things.
3829 auto IsBinOpOrAnd = [](Value *V) {
3830 return match(
3832 };
3833 if (PN->getType()->isIntegerTy(1) &&
3834 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3835 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3836 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3837 PN->getIncomingValue(1)))
3838 return Changed;
3839
3840 // If all PHI nodes are promotable, check to make sure that all instructions
3841 // in the predecessor blocks can be promoted as well. If not, we won't be able
3842 // to get rid of the control flow, so it's not worth promoting to select
3843 // instructions.
3844 for (BasicBlock *IfBlock : IfBlocks)
3845 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3846 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3847 // This is not an aggressive instruction that we can promote.
3848 // Because of this, we won't be able to get rid of the control flow, so
3849 // the xform is not worth it.
3850 return Changed;
3851 }
3852
3853 // If either of the blocks has it's address taken, we can't do this fold.
3854 if (any_of(IfBlocks,
3855 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3856 return Changed;
3857
3858 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3859 if (IsUnpredictable) dbgs() << " (unpredictable)";
3860 dbgs() << " T: " << IfTrue->getName()
3861 << " F: " << IfFalse->getName() << "\n");
3862
3863 // If we can still promote the PHI nodes after this gauntlet of tests,
3864 // do all of the PHI's now.
3865
3866 // Move all 'aggressive' instructions, which are defined in the
3867 // conditional parts of the if's up to the dominating block.
3868 for (BasicBlock *IfBlock : IfBlocks)
3869 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3870
3871 IRBuilder<NoFolder> Builder(DomBI);
3872 // Propagate fast-math-flags from phi nodes to replacement selects.
3873 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3874 // Change the PHI node into a select instruction.
3875 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3876 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3877
3878 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3879 isa<FPMathOperator>(PN) ? PN : nullptr,
3880 "", DomBI);
3881 PN->replaceAllUsesWith(Sel);
3882 Sel->takeName(PN);
3883 PN->eraseFromParent();
3884 }
3885
3886 // At this point, all IfBlocks are empty, so our if statement
3887 // has been flattened. Change DomBlock to jump directly to our new block to
3888 // avoid other simplifycfg's kicking in on the diamond.
3889 Builder.CreateBr(BB);
3890
3892 if (DTU) {
3893 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3894 for (auto *Successor : successors(DomBlock))
3895 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3896 }
3897
3898 DomBI->eraseFromParent();
3899 if (DTU)
3900 DTU->applyUpdates(Updates);
3901
3902 return true;
3903}
3904
3907 Value *RHS, const Twine &Name = "") {
3908 // Try to relax logical op to binary op.
3909 if (impliesPoison(RHS, LHS))
3910 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3911 if (Opc == Instruction::And)
3912 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3913 if (Opc == Instruction::Or)
3914 return Builder.CreateLogicalOr(LHS, RHS, Name);
3915 llvm_unreachable("Invalid logical opcode");
3916}
3917
3918/// Return true if either PBI or BI has branch weight available, and store
3919/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3920/// not have branch weight, use 1:1 as its weight.
3922 uint64_t &PredTrueWeight,
3923 uint64_t &PredFalseWeight,
3924 uint64_t &SuccTrueWeight,
3925 uint64_t &SuccFalseWeight) {
3926 bool PredHasWeights =
3927 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3928 bool SuccHasWeights =
3929 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3930 if (PredHasWeights || SuccHasWeights) {
3931 if (!PredHasWeights)
3932 PredTrueWeight = PredFalseWeight = 1;
3933 if (!SuccHasWeights)
3934 SuccTrueWeight = SuccFalseWeight = 1;
3935 return true;
3936 } else {
3937 return false;
3938 }
3939}
3940
3941/// Determine if the two branches share a common destination and deduce a glue
3942/// that joins the branches' conditions to arrive at the common destination if
3943/// that would be profitable.
3944static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3946 const TargetTransformInfo *TTI) {
3947 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3948 "Both blocks must end with a conditional branches.");
3950 "PredBB must be a predecessor of BB.");
3951
3952 // We have the potential to fold the conditions together, but if the
3953 // predecessor branch is predictable, we may not want to merge them.
3954 uint64_t PTWeight, PFWeight;
3955 BranchProbability PBITrueProb, Likely;
3956 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3957 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3958 (PTWeight + PFWeight) != 0) {
3959 PBITrueProb =
3960 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3962 }
3963
3964 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3965 // Speculate the 2nd condition unless the 1st is probably true.
3966 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3967 return {{BI->getSuccessor(0), Instruction::Or, false}};
3968 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3969 // Speculate the 2nd condition unless the 1st is probably false.
3970 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3971 return {{BI->getSuccessor(1), Instruction::And, false}};
3972 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3973 // Speculate the 2nd condition unless the 1st is probably true.
3974 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3975 return {{BI->getSuccessor(1), Instruction::And, true}};
3976 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3977 // Speculate the 2nd condition unless the 1st is probably false.
3978 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3979 return {{BI->getSuccessor(0), Instruction::Or, true}};
3980 }
3981 return std::nullopt;
3982}
3983
3985 DomTreeUpdater *DTU,
3986 MemorySSAUpdater *MSSAU,
3987 const TargetTransformInfo *TTI) {
3988 BasicBlock *BB = BI->getParent();
3989 BasicBlock *PredBlock = PBI->getParent();
3990
3991 // Determine if the two branches share a common destination.
3992 BasicBlock *CommonSucc;
3994 bool InvertPredCond;
3995 std::tie(CommonSucc, Opc, InvertPredCond) =
3997
3998 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3999
4000 IRBuilder<> Builder(PBI);
4001 // The builder is used to create instructions to eliminate the branch in BB.
4002 // If BB's terminator has !annotation metadata, add it to the new
4003 // instructions.
4005 {LLVMContext::MD_annotation});
4006
4007 // If we need to invert the condition in the pred block to match, do so now.
4008 if (InvertPredCond) {
4009 InvertBranch(PBI, Builder);
4010 }
4011
4012 BasicBlock *UniqueSucc =
4013 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4014
4015 // Before cloning instructions, notify the successor basic block that it
4016 // is about to have a new predecessor. This will update PHI nodes,
4017 // which will allow us to update live-out uses of bonus instructions.
4018 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4019
4020 // Try to update branch weights.
4021 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4022 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4023 SuccTrueWeight, SuccFalseWeight)) {
4024 SmallVector<uint64_t, 8> NewWeights;
4025
4026 if (PBI->getSuccessor(0) == BB) {
4027 // PBI: br i1 %x, BB, FalseDest
4028 // BI: br i1 %y, UniqueSucc, FalseDest
4029 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4030 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4031 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4032 // TrueWeight for PBI * FalseWeight for BI.
4033 // We assume that total weights of a BranchInst can fit into 32 bits.
4034 // Therefore, we will not have overflow using 64-bit arithmetic.
4035 NewWeights.push_back(PredFalseWeight *
4036 (SuccFalseWeight + SuccTrueWeight) +
4037 PredTrueWeight * SuccFalseWeight);
4038 } else {
4039 // PBI: br i1 %x, TrueDest, BB
4040 // BI: br i1 %y, TrueDest, UniqueSucc
4041 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4042 // FalseWeight for PBI * TrueWeight for BI.
4043 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4044 PredFalseWeight * SuccTrueWeight);
4045 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4046 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4047 }
4048
4049 // Halve the weights if any of them cannot fit in an uint32_t
4050 fitWeights(NewWeights);
4051
4052 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
4053 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4054
4055 // TODO: If BB is reachable from all paths through PredBlock, then we
4056 // could replace PBI's branch probabilities with BI's.
4057 } else
4058 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4059
4060 // Now, update the CFG.
4061 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4062
4063 if (DTU)
4064 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4065 {DominatorTree::Delete, PredBlock, BB}});
4066
4067 // If BI was a loop latch, it may have had associated loop metadata.
4068 // We need to copy it to the new latch, that is, PBI.
4069 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4070 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4071
4072 ValueToValueMapTy VMap; // maps original values to cloned values
4074
4075 Module *M = BB->getModule();
4076
4077 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4078 for (DbgVariableRecord &DVR :
4080 RemapDbgRecord(M, &DVR, VMap,
4082 }
4083
4084 // Now that the Cond was cloned into the predecessor basic block,
4085 // or/and the two conditions together.
4086 Value *BICond = VMap[BI->getCondition()];
4087 PBI->setCondition(
4088 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4089
4090 ++NumFoldBranchToCommonDest;
4091 return true;
4092}
4093
4094/// Return if an instruction's type or any of its operands' types are a vector
4095/// type.
4096static bool isVectorOp(Instruction &I) {
4097 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4098 return U->getType()->isVectorTy();
4099 });
4100}
4101
4102/// If this basic block is simple enough, and if a predecessor branches to us
4103/// and one of our successors, fold the block into the predecessor and use
4104/// logical operations to pick the right destination.
4106 MemorySSAUpdater *MSSAU,
4107 const TargetTransformInfo *TTI,
4108 unsigned BonusInstThreshold) {
4109 // If this block ends with an unconditional branch,
4110 // let speculativelyExecuteBB() deal with it.
4111 if (!BI->isConditional())
4112 return false;
4113
4114 BasicBlock *BB = BI->getParent();
4118
4119 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4120
4121 if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Cond) ||
4122 Cond->getParent() != BB || !Cond->hasOneUse())
4123 return false;
4124
4125 // Finally, don't infinitely unroll conditional loops.
4126 if (is_contained(successors(BB), BB))
4127 return false;
4128
4129 // With which predecessors will we want to deal with?
4131 for (BasicBlock *PredBlock : predecessors(BB)) {
4132 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4133
4134 // Check that we have two conditional branches. If there is a PHI node in
4135 // the common successor, verify that the same value flows in from both
4136 // blocks.
4137 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4138 continue;
4139
4140 // Determine if the two branches share a common destination.
4141 BasicBlock *CommonSucc;
4143 bool InvertPredCond;
4144 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4145 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4146 else
4147 continue;
4148
4149 // Check the cost of inserting the necessary logic before performing the
4150 // transformation.
4151 if (TTI) {
4152 Type *Ty = BI->getCondition()->getType();
4154 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4155 !isa<CmpInst>(PBI->getCondition())))
4156 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4157
4159 continue;
4160 }
4161
4162 // Ok, we do want to deal with this predecessor. Record it.
4163 Preds.emplace_back(PredBlock);
4164 }
4165
4166 // If there aren't any predecessors into which we can fold,
4167 // don't bother checking the cost.
4168 if (Preds.empty())
4169 return false;
4170
4171 // Only allow this transformation if computing the condition doesn't involve
4172 // too many instructions and these involved instructions can be executed
4173 // unconditionally. We denote all involved instructions except the condition
4174 // as "bonus instructions", and only allow this transformation when the
4175 // number of the bonus instructions we'll need to create when cloning into
4176 // each predecessor does not exceed a certain threshold.
4177 unsigned NumBonusInsts = 0;
4178 bool SawVectorOp = false;
4179 const unsigned PredCount = Preds.size();
4180 for (Instruction &I : *BB) {
4181 // Don't check the branch condition comparison itself.
4182 if (&I == Cond)
4183 continue;
4184 // Ignore the terminator.
4185 if (isa<BranchInst>(I))
4186 continue;
4187 // I must be safe to execute unconditionally.
4189 return false;
4190 SawVectorOp |= isVectorOp(I);
4191
4192 // Account for the cost of duplicating this instruction into each
4193 // predecessor. Ignore free instructions.
4194 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4196 NumBonusInsts += PredCount;
4197
4198 // Early exits once we reach the limit.
4199 if (NumBonusInsts >
4200 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4201 return false;
4202 }
4203
4204 auto IsBCSSAUse = [BB, &I](Use &U) {
4205 auto *UI = cast<Instruction>(U.getUser());
4206 if (auto *PN = dyn_cast<PHINode>(UI))
4207 return PN->getIncomingBlock(U) == BB;
4208 return UI->getParent() == BB && I.comesBefore(UI);
4209 };
4210
4211 // Does this instruction require rewriting of uses?
4212 if (!all_of(I.uses(), IsBCSSAUse))
4213 return false;
4214 }
4215 if (NumBonusInsts >
4216 BonusInstThreshold *
4217 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4218 return false;
4219
4220 // Ok, we have the budget. Perform the transformation.
4221 for (BasicBlock *PredBlock : Preds) {
4222 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4223 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4224 }
4225 return false;
4226}
4227
4228// If there is only one store in BB1 and BB2, return it, otherwise return
4229// nullptr.
4231 StoreInst *S = nullptr;
4232 for (auto *BB : {BB1, BB2}) {
4233 if (!BB)
4234 continue;
4235 for (auto &I : *BB)
4236 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4237 if (S)
4238 // Multiple stores seen.
4239 return nullptr;
4240 else
4241 S = SI;
4242 }
4243 }
4244 return S;
4245}
4246
4248 Value *AlternativeV = nullptr) {
4249 // PHI is going to be a PHI node that allows the value V that is defined in
4250 // BB to be referenced in BB's only successor.
4251 //
4252 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4253 // doesn't matter to us what the other operand is (it'll never get used). We
4254 // could just create a new PHI with an undef incoming value, but that could
4255 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4256 // other PHI. So here we directly look for some PHI in BB's successor with V
4257 // as an incoming operand. If we find one, we use it, else we create a new
4258 // one.
4259 //
4260 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4261 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4262 // where OtherBB is the single other predecessor of BB's only successor.
4263 PHINode *PHI = nullptr;
4264 BasicBlock *Succ = BB->getSingleSuccessor();
4265
4266 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4267 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4268 PHI = cast<PHINode>(I);
4269 if (!AlternativeV)
4270 break;
4271
4272 assert(Succ->hasNPredecessors(2));
4273 auto PredI = pred_begin(Succ);
4274 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4275 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4276 break;
4277 PHI = nullptr;
4278 }
4279 if (PHI)
4280 return PHI;
4281
4282 // If V is not an instruction defined in BB, just return it.
4283 if (!AlternativeV &&
4284 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4285 return V;
4286
4287 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4288 PHI->insertBefore(Succ->begin());
4289 PHI->addIncoming(V, BB);
4290 for (BasicBlock *PredBB : predecessors(Succ))
4291 if (PredBB != BB)
4292 PHI->addIncoming(
4293 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4294 return PHI;
4295}
4296
4298 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4299 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4300 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4301 // For every pointer, there must be exactly two stores, one coming from
4302 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4303 // store (to any address) in PTB,PFB or QTB,QFB.
4304 // FIXME: We could relax this restriction with a bit more work and performance
4305 // testing.
4306 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4307 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4308 if (!PStore || !QStore)
4309 return false;
4310
4311 // Now check the stores are compatible.
4312 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4313 PStore->getValueOperand()->getType() !=
4314 QStore->getValueOperand()->getType())
4315 return false;
4316
4317 // Check that sinking the store won't cause program behavior changes. Sinking
4318 // the store out of the Q blocks won't change any behavior as we're sinking
4319 // from a block to its unconditional successor. But we're moving a store from
4320 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4321 // So we need to check that there are no aliasing loads or stores in
4322 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4323 // operations between PStore and the end of its parent block.
4324 //
4325 // The ideal way to do this is to query AliasAnalysis, but we don't
4326 // preserve AA currently so that is dangerous. Be super safe and just
4327 // check there are no other memory operations at all.
4328 for (auto &I : *QFB->getSinglePredecessor())
4329 if (I.mayReadOrWriteMemory())
4330 return false;
4331 for (auto &I : *QFB)
4332 if (&I != QStore && I.mayReadOrWriteMemory())
4333 return false;
4334 if (QTB)
4335 for (auto &I : *QTB)
4336 if (&I != QStore && I.mayReadOrWriteMemory())
4337 return false;
4338 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4339 I != E; ++I)
4340 if (&*I != PStore && I->mayReadOrWriteMemory())
4341 return false;
4342
4343 // If we're not in aggressive mode, we only optimize if we have some
4344 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4345 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4346 if (!BB)
4347 return true;
4348 // Heuristic: if the block can be if-converted/phi-folded and the
4349 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4350 // thread this store.
4352 InstructionCost Budget =
4354 for (auto &I : BB->instructionsWithoutDebug(false)) {
4355 // Consider terminator instruction to be free.
4356 if (I.isTerminator())
4357 continue;
4358 // If this is one the stores that we want to speculate out of this BB,
4359 // then don't count it's cost, consider it to be free.
4360 if (auto *S = dyn_cast<StoreInst>(&I))
4361 if (llvm::find(FreeStores, S))
4362 continue;
4363 // Else, we have a white-list of instructions that we are ak speculating.
4364 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4365 return false; // Not in white-list - not worthwhile folding.
4366 // And finally, if this is a non-free instruction that we are okay
4367 // speculating, ensure that we consider the speculation budget.
4368 Cost +=
4370 if (Cost > Budget)
4371 return false; // Eagerly refuse to fold as soon as we're out of budget.
4372 }
4373 assert(Cost <= Budget &&
4374 "When we run out of budget we will eagerly return from within the "
4375 "per-instruction loop.");
4376 return true;
4377 };
4378
4379 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4381 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4382 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4383 return false;
4384
4385 // If PostBB has more than two predecessors, we need to split it so we can
4386 // sink the store.
4387 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4388 // We know that QFB's only successor is PostBB. And QFB has a single
4389 // predecessor. If QTB exists, then its only successor is also PostBB.
4390 // If QTB does not exist, then QFB's only predecessor has a conditional
4391 // branch to QFB and PostBB.
4392 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4393 BasicBlock *NewBB =
4394 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4395 if (!NewBB)
4396 return false;
4397 PostBB = NewBB;
4398 }
4399
4400 // OK, we're going to sink the stores to PostBB. The store has to be
4401 // conditional though, so first create the predicate.
4402 BranchInst *PBranch =
4403 cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator());
4404 BranchInst *QBranch =
4405 cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator());
4406 Value *PCond = PBranch->getCondition();
4407 Value *QCond = QBranch->getCondition();
4408
4410 PStore->getParent());
4412 QStore->getParent(), PPHI);
4413
4414 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4415 IRBuilder<> QB(PostBB, PostBBFirst);
4416 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4417
4418 InvertPCond ^= (PStore->getParent() != PTB);
4419 InvertQCond ^= (QStore->getParent() != QTB);
4420 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4421 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4422
4423 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4424
4425 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4426 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4427 /*Unreachable=*/false,
4428 /*BranchWeights=*/nullptr, DTU);
4429
4430 QB.SetInsertPoint(T);
4431 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4432 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4433 // Choose the minimum alignment. If we could prove both stores execute, we
4434 // could use biggest one. In this case, though, we only know that one of the
4435 // stores executes. And we don't know it's safe to take the alignment from a
4436 // store that doesn't execute.
4437 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4438
4439 QStore->eraseFromParent();
4440 PStore->eraseFromParent();
4441
4442 return true;
4443}
4444
4446 DomTreeUpdater *DTU, const DataLayout &DL,
4447 const TargetTransformInfo &TTI) {
4448 // The intention here is to find diamonds or triangles (see below) where each
4449 // conditional block contains a store to the same address. Both of these
4450 // stores are conditional, so they can't be unconditionally sunk. But it may
4451 // be profitable to speculatively sink the stores into one merged store at the
4452 // end, and predicate the merged store on the union of the two conditions of
4453 // PBI and QBI.
4454 //
4455 // This can reduce the number of stores executed if both of the conditions are
4456 // true, and can allow the blocks to become small enough to be if-converted.
4457 // This optimization will also chain, so that ladders of test-and-set
4458 // sequences can be if-converted away.
4459 //
4460 // We only deal with simple diamonds or triangles:
4461 //
4462 // PBI or PBI or a combination of the two
4463 // / \ | \
4464 // PTB PFB | PFB
4465 // \ / | /
4466 // QBI QBI
4467 // / \ | \
4468 // QTB QFB | QFB
4469 // \ / | /
4470 // PostBB PostBB
4471 //
4472 // We model triangles as a type of diamond with a nullptr "true" block.
4473 // Triangles are canonicalized so that the fallthrough edge is represented by
4474 // a true condition, as in the diagram above.
4475 BasicBlock *PTB = PBI->getSuccessor(0);
4476 BasicBlock *PFB = PBI->getSuccessor(1);
4477 BasicBlock *QTB = QBI->getSuccessor(0);
4478 BasicBlock *QFB = QBI->getSuccessor(1);
4479 BasicBlock *PostBB = QFB->getSingleSuccessor();
4480
4481 // Make sure we have a good guess for PostBB. If QTB's only successor is
4482 // QFB, then QFB is a better PostBB.
4483 if (QTB->getSingleSuccessor() == QFB)
4484 PostBB = QFB;
4485
4486 // If we couldn't find a good PostBB, stop.
4487 if (!PostBB)
4488 return false;
4489
4490 bool InvertPCond = false, InvertQCond = false;
4491 // Canonicalize fallthroughs to the true branches.
4492 if (PFB == QBI->getParent()) {
4493 std::swap(PFB, PTB);
4494 InvertPCond = true;
4495 }
4496 if (QFB == PostBB) {
4497 std::swap(QFB, QTB);
4498 InvertQCond = true;
4499 }
4500
4501 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4502 // and QFB may not. Model fallthroughs as a nullptr block.
4503 if (PTB == QBI->getParent())
4504 PTB = nullptr;
4505 if (QTB == PostBB)
4506 QTB = nullptr;
4507
4508 // Legality bailouts. We must have at least the non-fallthrough blocks and
4509 // the post-dominating block, and the non-fallthroughs must only have one
4510 // predecessor.
4511 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4512 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4513 };
4514 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4515 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4516 return false;
4517 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4518 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4519 return false;
4520 if (!QBI->getParent()->hasNUses(2))
4521 return false;
4522
4523 // OK, this is a sequence of two diamonds or triangles.
4524 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4525 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4526 for (auto *BB : {PTB, PFB}) {
4527 if (!BB)
4528 continue;
4529 for (auto &I : *BB)
4530 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4531 PStoreAddresses.insert(SI->getPointerOperand());
4532 }
4533 for (auto *BB : {QTB, QFB}) {
4534 if (!BB)
4535 continue;
4536 for (auto &I : *BB)
4537 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4538 QStoreAddresses.insert(SI->getPointerOperand());
4539 }
4540
4541 set_intersect(PStoreAddresses, QStoreAddresses);
4542 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4543 // clear what it contains.
4544 auto &CommonAddresses = PStoreAddresses;
4545
4546 bool Changed = false;
4547 for (auto *Address : CommonAddresses)
4548 Changed |=
4549 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4550 InvertPCond, InvertQCond, DTU, DL, TTI);
4551 return Changed;
4552}
4553
4554/// If the previous block ended with a widenable branch, determine if reusing
4555/// the target block is profitable and legal. This will have the effect of
4556/// "widening" PBI, but doesn't require us to reason about hosting safety.
4558 DomTreeUpdater *DTU) {
4559 // TODO: This can be generalized in two important ways:
4560 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4561 // values from the PBI edge.
4562 // 2) We can sink side effecting instructions into BI's fallthrough
4563 // successor provided they doesn't contribute to computation of
4564 // BI's condition.
4565 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4566 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4567 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4568 !BI->getParent()->getSinglePredecessor())
4569 return false;
4570 if (!IfFalseBB->phis().empty())
4571 return false; // TODO
4572 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4573 // may undo the transform done here.
4574 // TODO: There might be a more fine-grained solution to this.
4575 if (!llvm::succ_empty(IfFalseBB))
4576 return false;
4577 // Use lambda to lazily compute expensive condition after cheap ones.
4578 auto NoSideEffects = [](BasicBlock &BB) {
4579 return llvm::none_of(BB, [](const Instruction &I) {
4580 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4581 });
4582 };
4583 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4584 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4585 NoSideEffects(*BI->getParent())) {
4586 auto *OldSuccessor = BI->getSuccessor(1);
4587 OldSuccessor->removePredecessor(BI->getParent());
4588 BI->setSuccessor(1, IfFalseBB);
4589 if (DTU)
4590 DTU->applyUpdates(
4591 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4592 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4593 return true;
4594 }
4595 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4596 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4597 NoSideEffects(*BI->getParent())) {
4598 auto *OldSuccessor = BI->getSuccessor(0);
4599 OldSuccessor->removePredecessor(BI->getParent());
4600 BI->setSuccessor(0, IfFalseBB);
4601 if (DTU)
4602 DTU->applyUpdates(
4603 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4604 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4605 return true;
4606 }
4607 return false;
4608}
4609
4610/// If we have a conditional branch as a predecessor of another block,
4611/// this function tries to simplify it. We know
4612/// that PBI and BI are both conditional branches, and BI is in one of the
4613/// successor blocks of PBI - PBI branches to BI.
4615 DomTreeUpdater *DTU,
4616 const DataLayout &DL,
4617 const TargetTransformInfo &TTI) {
4618 assert(PBI->isConditional() && BI->isConditional());
4619 BasicBlock *BB = BI->getParent();
4620
4621 // If this block ends with a branch instruction, and if there is a
4622 // predecessor that ends on a branch of the same condition, make
4623 // this conditional branch redundant.
4624 if (PBI->getCondition() == BI->getCondition() &&
4625 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4626 // Okay, the outcome of this conditional branch is statically
4627 // knowable. If this block had a single pred, handle specially, otherwise
4628 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4629 if (BB->getSinglePredecessor()) {
4630 // Turn this into a branch on constant.
4631 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4632 BI->setCondition(
4633 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4634 return true; // Nuke the branch on constant.
4635 }
4636 }
4637
4638 // If the previous block ended with a widenable branch, determine if reusing
4639 // the target block is profitable and legal. This will have the effect of
4640 // "widening" PBI, but doesn't require us to reason about hosting safety.
4641 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4642 return true;
4643
4644 // If both branches are conditional and both contain stores to the same
4645 // address, remove the stores from the conditionals and create a conditional
4646 // merged store at the end.
4647 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4648 return true;
4649
4650 // If this is a conditional branch in an empty block, and if any
4651 // predecessors are a conditional branch to one of our destinations,
4652 // fold the conditions into logical ops and one cond br.
4653
4654 // Ignore dbg intrinsics.
4655 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4656 return false;
4657
4658 int PBIOp, BIOp;
4659 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4660 PBIOp = 0;
4661 BIOp = 0;
4662 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4663 PBIOp = 0;
4664 BIOp = 1;
4665 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4666 PBIOp = 1;
4667 BIOp = 0;
4668 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4669 PBIOp = 1;
4670 BIOp = 1;
4671 } else {
4672 return false;
4673 }
4674
4675 // Check to make sure that the other destination of this branch
4676 // isn't BB itself. If so, this is an infinite loop that will
4677 // keep getting unwound.
4678 if (PBI->getSuccessor(PBIOp) == BB)
4679 return false;
4680
4681 // If predecessor's branch probability to BB is too low don't merge branches.
4682 SmallVector<uint32_t, 2> PredWeights;
4683 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4684 extractBranchWeights(*PBI, PredWeights) &&
4685 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4686
4688 PredWeights[PBIOp],
4689 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4690
4692 if (CommonDestProb >= Likely)
4693 return false;
4694 }
4695
4696 // Do not perform this transformation if it would require
4697 // insertion of a large number of select instructions. For targets
4698 // without predication/cmovs, this is a big pessimization.
4699
4700 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4701 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4702 unsigned NumPhis = 0;
4703 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4704 ++II, ++NumPhis) {
4705 if (NumPhis > 2) // Disable this xform.
4706 return false;
4707 }
4708
4709 // Finally, if everything is ok, fold the branches to logical ops.
4710 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4711
4712 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4713 << "AND: " << *BI->getParent());
4714
4716
4717 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4718 // branch in it, where one edge (OtherDest) goes back to itself but the other
4719 // exits. We don't *know* that the program avoids the infinite loop
4720 // (even though that seems likely). If we do this xform naively, we'll end up
4721 // recursively unpeeling the loop. Since we know that (after the xform is
4722 // done) that the block *is* infinite if reached, we just make it an obviously
4723 // infinite loop with no cond branch.
4724 if (OtherDest == BB) {
4725 // Insert it at the end of the function, because it's either code,
4726 // or it won't matter if it's hot. :)
4727 BasicBlock *InfLoopBlock =
4728 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4729 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4730 if (DTU)
4731 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4732 OtherDest = InfLoopBlock;
4733 }
4734
4735 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4736
4737 // BI may have other predecessors. Because of this, we leave
4738 // it alone, but modify PBI.
4739
4740 // Make sure we get to CommonDest on True&True directions.
4741 Value *PBICond = PBI->getCondition();
4742 IRBuilder<NoFolder> Builder(PBI);
4743 if (PBIOp)
4744 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4745
4746 Value *BICond = BI->getCondition();
4747 if (BIOp)
4748 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4749
4750 // Merge the conditions.
4751 Value *Cond =
4752 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4753
4754 // Modify PBI to branch on the new condition to the new dests.
4755 PBI->setCondition(Cond);
4756 PBI->setSuccessor(0, CommonDest);
4757 PBI->setSuccessor(1, OtherDest);
4758
4759 if (DTU) {
4760 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4761 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4762
4763 DTU->applyUpdates(Updates);
4764 }
4765
4766 // Update branch weight for PBI.
4767 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4768 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4769 bool HasWeights =
4770 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4771 SuccTrueWeight, SuccFalseWeight);
4772 if (HasWeights) {
4773 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4774 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4775 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4776 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4777 // The weight to CommonDest should be PredCommon * SuccTotal +
4778 // PredOther * SuccCommon.
4779 // The weight to OtherDest should be PredOther * SuccOther.
4780 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4781 PredOther * SuccCommon,
4782 PredOther * SuccOther};
4783 // Halve the weights if any of them cannot fit in an uint32_t
4784 fitWeights(NewWeights);
4785
4786 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4787 }
4788
4789 // OtherDest may have phi nodes. If so, add an entry from PBI's
4790 // block that are identical to the entries for BI's block.
4791 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4792
4793 // We know that the CommonDest already had an edge from PBI to
4794 // it. If it has PHIs though, the PHIs may have different
4795 // entries for BB and PBI's BB. If so, insert a select to make
4796 // them agree.
4797 for (PHINode &PN : CommonDest->phis()) {
4798 Value *BIV = PN.getIncomingValueForBlock(BB);
4799 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4800 Value *PBIV = PN.getIncomingValue(PBBIdx);
4801 if (BIV != PBIV) {
4802 // Insert a select in PBI to pick the right value.
4803 SelectInst *NV = cast<SelectInst>(
4804 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4805 PN.setIncomingValue(PBBIdx, NV);
4806 // Although the select has the same condition as PBI, the original branch
4807 // weights for PBI do not apply to the new select because the select's
4808 // 'logical' edges are incoming edges of the phi that is eliminated, not
4809 // the outgoing edges of PBI.
4810 if (HasWeights) {
4811 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4812 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4813 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4814 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4815 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4816 // The weight to PredOtherDest should be PredOther * SuccCommon.
4817 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4818 PredOther * SuccCommon};
4819
4820 fitWeights(NewWeights);
4821
4822 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4823 /*IsExpected=*/false);
4824 }
4825 }
4826 }
4827
4828 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4829 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4830
4831 // This basic block is probably dead. We know it has at least
4832 // one fewer predecessor.
4833 return true;
4834}
4835
4836// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4837// true or to FalseBB if Cond is false.
4838// Takes care of updating the successors and removing the old terminator.
4839// Also makes sure not to introduce new successors by assuming that edges to
4840// non-successor TrueBBs and FalseBBs aren't reachable.
4841bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4842 Value *Cond, BasicBlock *TrueBB,
4843 BasicBlock *FalseBB,
4844 uint32_t TrueWeight,
4845 uint32_t FalseWeight) {
4846 auto *BB = OldTerm->getParent();
4847 // Remove any superfluous successor edges from the CFG.
4848 // First, figure out which successors to preserve.
4849 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4850 // successor.
4851 BasicBlock *KeepEdge1 = TrueBB;
4852 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4853
4854 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4855
4856 // Then remove the rest.
4857 for (BasicBlock *Succ : successors(OldTerm)) {
4858 // Make sure only to keep exactly one copy of each edge.
4859 if (Succ == KeepEdge1)
4860 KeepEdge1 = nullptr;
4861 else if (Succ == KeepEdge2)
4862 KeepEdge2 = nullptr;
4863 else {
4864 Succ->removePredecessor(BB,
4865 /*KeepOneInputPHIs=*/true);
4866
4867 if (Succ != TrueBB && Succ != FalseBB)
4868 RemovedSuccessors.insert(Succ);
4869 }
4870 }
4871
4872 IRBuilder<> Builder(OldTerm);
4873 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4874
4875 // Insert an appropriate new terminator.
4876 if (!KeepEdge1 && !KeepEdge2) {
4877 if (TrueBB == FalseBB) {
4878 // We were only looking for one successor, and it was present.
4879 // Create an unconditional branch to it.
4880 Builder.CreateBr(TrueBB);
4881 } else {
4882 // We found both of the successors we were looking for.
4883 // Create a conditional branch sharing the condition of the select.
4884 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4885 if (TrueWeight != FalseWeight)
4886 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4887 }
4888 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4889 // Neither of the selected blocks were successors, so this
4890 // terminator must be unreachable.
4891 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4892 } else {
4893 // One of the selected values was a successor, but the other wasn't.
4894 // Insert an unconditional branch to the one that was found;
4895 // the edge to the one that wasn't must be unreachable.
4896 if (!KeepEdge1) {
4897 // Only TrueBB was found.
4898 Builder.CreateBr(TrueBB);
4899 } else {
4900 // Only FalseBB was found.
4901 Builder.CreateBr(FalseBB);
4902 }
4903 }
4904
4906
4907 if (DTU) {
4909 Updates.reserve(RemovedSuccessors.size());
4910 for (auto *RemovedSuccessor : RemovedSuccessors)
4911 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4912 DTU->applyUpdates(Updates);
4913 }
4914
4915 return true;
4916}
4917
4918// Replaces
4919// (switch (select cond, X, Y)) on constant X, Y
4920// with a branch - conditional if X and Y lead to distinct BBs,
4921// unconditional otherwise.
4922bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4923 SelectInst *Select) {
4924 // Check for constant integer values in the select.
4925 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4926 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4927 if (!TrueVal || !FalseVal)
4928 return false;
4929
4930 // Find the relevant condition and destinations.
4931 Value *Condition = Select->getCondition();
4932 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4933 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4934
4935 // Get weight for TrueBB and FalseBB.
4936 uint32_t TrueWeight = 0, FalseWeight = 0;
4938 bool HasWeights = hasBranchWeightMD(*SI);
4939 if (HasWeights) {
4940 getBranchWeights(SI, Weights);
4941 if (Weights.size() == 1 + SI->getNumCases()) {
4942 TrueWeight =
4943 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4944 FalseWeight =
4945 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4946 }
4947 }
4948
4949 // Perform the actual simplification.
4950 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4951 FalseWeight);
4952}
4953
4954// Replaces
4955// (indirectbr (select cond, blockaddress(@fn, BlockA),
4956// blockaddress(@fn, BlockB)))
4957// with
4958// (br cond, BlockA, BlockB).
4959bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4960 SelectInst *SI) {
4961 // Check that both operands of the select are block addresses.
4962 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4963 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4964 if (!TBA || !FBA)
4965 return false;
4966
4967 // Extract the actual blocks.
4968 BasicBlock *TrueBB = TBA->getBasicBlock();
4969 BasicBlock *FalseBB = FBA->getBasicBlock();
4970
4971 // Perform the actual simplification.
4972 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4973 0);
4974}
4975
4976/// This is called when we find an icmp instruction
4977/// (a seteq/setne with a constant) as the only instruction in a
4978/// block that ends with an uncond branch. We are looking for a very specific
4979/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4980/// this case, we merge the first two "or's of icmp" into a switch, but then the
4981/// default value goes to an uncond block with a seteq in it, we get something
4982/// like:
4983///
4984/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4985/// DEFAULT:
4986/// %tmp = icmp eq i8 %A, 92
4987/// br label %end
4988/// end:
4989/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4990///
4991/// We prefer to split the edge to 'end' so that there is a true/false entry to
4992/// the PHI, merging the third icmp into the switch.
4993bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4994 ICmpInst *ICI, IRBuilder<> &Builder) {
4995 BasicBlock *BB = ICI->getParent();
4996
4997 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4998 // complex.
4999 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5000 return false;
5001
5002 Value *V = ICI->getOperand(0);
5003 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5004
5005 // The pattern we're looking for is where our only predecessor is a switch on
5006 // 'V' and this block is the default case for the switch. In this case we can
5007 // fold the compared value into the switch to simplify things.
5008 BasicBlock *Pred = BB->getSinglePredecessor();
5009 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5010 return false;
5011
5012 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5013 if (SI->getCondition() != V)
5014 return false;
5015
5016 // If BB is reachable on a non-default case, then we simply know the value of
5017 // V in this block. Substitute it and constant fold the icmp instruction
5018 // away.
5019 if (SI->getDefaultDest() != BB) {
5020 ConstantInt *VVal = SI->findCaseDest(BB);
5021 assert(VVal && "Should have a unique destination value");
5022 ICI->setOperand(0, VVal);
5023
5024 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5025 ICI->replaceAllUsesWith(V);
5026 ICI->eraseFromParent();
5027 }
5028 // BB is now empty, so it is likely to simplify away.
5029 return requestResimplify();
5030 }
5031
5032 // Ok, the block is reachable from the default dest. If the constant we're
5033 // comparing exists in one of the other edges, then we can constant fold ICI
5034 // and zap it.
5035 if (SI->findCaseValue(Cst) != SI->case_default()) {
5036 Value *V;
5037 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5039 else
5041
5042 ICI->replaceAllUsesWith(V);
5043 ICI->eraseFromParent();
5044 // BB is now empty, so it is likely to simplify away.
5045 return requestResimplify();
5046 }
5047
5048 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5049 // the block.
5050 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5051 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5052 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5053 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
5054 return false;
5055
5056 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5057 // true in the PHI.
5058 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5059 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5060
5061 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5062 std::swap(DefaultCst, NewCst);
5063
5064 // Replace ICI (which is used by the PHI for the default value) with true or
5065 // false depending on if it is EQ or NE.
5066 ICI->replaceAllUsesWith(DefaultCst);
5067 ICI->eraseFromParent();
5068
5070
5071 // Okay, the switch goes to this block on a default value. Add an edge from
5072 // the switch to the merge point on the compared value.
5073 BasicBlock *NewBB =
5074 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5075 {
5077 auto W0 = SIW.getSuccessorWeight(0);
5079 if (W0) {
5080 NewW = ((uint64_t(*W0) + 1) >> 1);
5081 SIW.setSuccessorWeight(0, *NewW);
5082 }
5083 SIW.addCase(Cst, NewBB, NewW);
5084 if (DTU)
5085 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5086 }
5087
5088 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5089 Builder.SetInsertPoint(NewBB);
5090 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5091 Builder.CreateBr(SuccBlock);
5092 PHIUse->addIncoming(NewCst, NewBB);
5093 if (DTU) {
5094 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5095 DTU->applyUpdates(Updates);
5096 }
5097 return true;
5098}
5099
5100/// The specified branch is a conditional branch.
5101/// Check to see if it is branching on an or/and chain of icmp instructions, and
5102/// fold it into a switch instruction if so.
5103bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5104 IRBuilder<> &Builder,
5105 const DataLayout &DL) {
5106 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
5107 if (!Cond)
5108 return false;
5109
5110 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5111 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5112 // 'setne's and'ed together, collect them.
5113
5114 // Try to gather values from a chain of and/or to be turned into a switch
5115 ConstantComparesGatherer ConstantCompare(Cond, DL);
5116 // Unpack the result
5117 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5118 Value *CompVal = ConstantCompare.CompValue;
5119 unsigned UsedICmps = ConstantCompare.UsedICmps;
5120 Value *ExtraCase = ConstantCompare.Extra;
5121 bool TrueWhenEqual = ConstantCompare.IsEq;
5122
5123 // If we didn't have a multiply compared value, fail.
5124 if (!CompVal)
5125 return false;
5126
5127 // Avoid turning single icmps into a switch.
5128 if (UsedICmps <= 1)
5129 return false;
5130
5131 // There might be duplicate constants in the list, which the switch
5132 // instruction can't handle, remove them now.
5133 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5134 Values.erase(llvm::unique(Values), Values.end());
5135
5136 // If Extra was used, we require at least two switch values to do the
5137 // transformation. A switch with one value is just a conditional branch.
5138 if (ExtraCase && Values.size() < 2)
5139 return false;
5140
5141 // TODO: Preserve branch weight metadata, similarly to how
5142 // foldValueComparisonIntoPredecessors preserves it.
5143
5144 // Figure out which block is which destination.
5145 BasicBlock *DefaultBB = BI->getSuccessor(1);
5146 BasicBlock *EdgeBB = BI->getSuccessor(0);
5147 if (!TrueWhenEqual)
5148 std::swap(DefaultBB, EdgeBB);
5149
5150 BasicBlock *BB = BI->getParent();
5151
5152 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5153 << " cases into SWITCH. BB is:\n"
5154 << *BB);
5155
5157
5158 // If there are any extra values that couldn't be folded into the switch
5159 // then we evaluate them with an explicit branch first. Split the block
5160 // right before the condbr to handle it.
5161 if (ExtraCase) {
5162 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5163 /*MSSAU=*/nullptr, "switch.early.test");
5164
5165 // Remove the uncond branch added to the old block.
5166 Instruction *OldTI = BB->getTerminator();
5167 Builder.SetInsertPoint(OldTI);
5168
5169 // There can be an unintended UB if extra values are Poison. Before the
5170 // transformation, extra values may not be evaluated according to the
5171 // condition, and it will not raise UB. But after transformation, we are
5172 // evaluating extra values before checking the condition, and it will raise
5173 // UB. It can be solved by adding freeze instruction to extra values.
5174 AssumptionCache *AC = Options.AC;
5175
5176 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5177 ExtraCase = Builder.CreateFreeze(ExtraCase);
5178
5179 if (TrueWhenEqual)
5180 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5181 else
5182 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5183
5184 OldTI->eraseFromParent();
5185
5186 if (DTU)
5187 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5188
5189 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5190 // for the edge we just added.
5191 addPredecessorToBlock(EdgeBB, BB, NewBB);
5192
5193 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5194 << "\nEXTRABB = " << *BB);
5195 BB = NewBB;
5196 }
5197
5198 Builder.SetInsertPoint(BI);
5199 // Convert pointer to int before we switch.
5200 if (CompVal->getType()->isPointerTy()) {
5201 CompVal = Builder.CreatePtrToInt(
5202 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5203 }
5204
5205 // Create the new switch instruction now.
5206 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5207
5208 // Add all of the 'cases' to the switch instruction.
5209 for (ConstantInt *Val : Values)
5210 New->addCase(Val, EdgeBB);
5211
5212 // We added edges from PI to the EdgeBB. As such, if there were any
5213 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5214 // the number of edges added.
5215 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5216 PHINode *PN = cast<PHINode>(BBI);
5217 Value *InVal = PN->getIncomingValueForBlock(BB);
5218 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5219 PN->addIncoming(InVal, BB);
5220 }
5221
5222 // Erase the old branch instruction.
5224 if (DTU)
5225 DTU->applyUpdates(Updates);
5226
5227 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5228 return true;
5229}
5230
5231bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5232 if (isa<PHINode>(RI->getValue()))
5233 return simplifyCommonResume(RI);
5234 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5235 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5236 // The resume must unwind the exception that caused control to branch here.
5237 return simplifySingleResume(RI);
5238
5239 return false;
5240}
5241
5242// Check if cleanup block is empty
5244 for (Instruction &I : R) {
5245 auto *II = dyn_cast<IntrinsicInst>(&I);
5246 if (!II)
5247 return false;
5248
5249 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5250 switch (IntrinsicID) {
5251 case Intrinsic::dbg_declare:
5252 case Intrinsic::dbg_value:
5253 case Intrinsic::dbg_label:
5254 case Intrinsic::lifetime_end:
5255 break;
5256 default:
5257 return false;
5258 }
5259 }
5260 return true;
5261}
5262
5263// Simplify resume that is shared by several landing pads (phi of landing pad).
5264bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5265 BasicBlock *BB = RI->getParent();
5266
5267 // Check that there are no other instructions except for debug and lifetime
5268 // intrinsics between the phi's and resume instruction.
5269 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5270 BB->getTerminator()->getIterator())))
5271 return false;
5272
5273 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5274 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5275
5276 // Check incoming blocks to see if any of them are trivial.
5277 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5278 Idx++) {
5279 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5280 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5281
5282 // If the block has other successors, we can not delete it because
5283 // it has other dependents.
5284 if (IncomingBB->getUniqueSuccessor() != BB)
5285 continue;
5286
5287 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5288 // Not the landing pad that caused the control to branch here.
5289 if (IncomingValue != LandingPad)
5290 continue;
5291
5293 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5294 TrivialUnwindBlocks.insert(IncomingBB);
5295 }
5296
5297 // If no trivial unwind blocks, don't do any simplifications.
5298 if (TrivialUnwindBlocks.empty())
5299 return false;
5300
5301 // Turn all invokes that unwind here into calls.
5302 for (auto *TrivialBB : TrivialUnwindBlocks) {
5303 // Blocks that will be simplified should be removed from the phi node.
5304 // Note there could be multiple edges to the resume block, and we need
5305 // to remove them all.
5306 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5307 BB->removePredecessor(TrivialBB, true);
5308
5309 for (BasicBlock *Pred :
5311 removeUnwindEdge(Pred, DTU);
5312 ++NumInvokes;
5313 }
5314
5315 // In each SimplifyCFG run, only the current processed block can be erased.
5316 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5317 // of erasing TrivialBB, we only remove the branch to the common resume
5318 // block so that we can later erase the resume block since it has no
5319 // predecessors.
5320 TrivialBB->getTerminator()->eraseFromParent();
5321 new UnreachableInst(RI->getContext(), TrivialBB);
5322 if (DTU)
5323 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5324 }
5325
5326 // Delete the resume block if all its predecessors have been removed.
5327 if (pred_empty(BB))
5328 DeleteDeadBlock(BB, DTU);
5329
5330 return !TrivialUnwindBlocks.empty();
5331}
5332
5333// Simplify resume that is only used by a single (non-phi) landing pad.
5334bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5335 BasicBlock *BB = RI->getParent();
5336 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5337 assert(RI->getValue() == LPInst &&
5338 "Resume must unwind the exception that caused control to here");
5339
5340 // Check that there are no other instructions except for debug intrinsics.
5342 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5343 return false;
5344
5345 // Turn all invokes that unwind here into calls and delete the basic block.
5347 removeUnwindEdge(Pred, DTU);
5348 ++NumInvokes;
5349 }
5350
5351 // The landingpad is now unreachable. Zap it.
5352 DeleteDeadBlock(BB, DTU);
5353 return true;
5354}
5355
5357 // If this is a trivial cleanup pad that executes no instructions, it can be
5358 // eliminated. If the cleanup pad continues to the caller, any predecessor
5359 // that is an EH pad will be updated to continue to the caller and any
5360 // predecessor that terminates with an invoke instruction will have its invoke
5361 // instruction converted to a call instruction. If the cleanup pad being
5362 // simplified does not continue to the caller, each predecessor will be
5363 // updated to continue to the unwind destination of the cleanup pad being
5364 // simplified.
5365 BasicBlock *BB = RI->getParent();
5366 CleanupPadInst *CPInst = RI->getCleanupPad();
5367 if (CPInst->getParent() != BB)
5368 // This isn't an empty cleanup.
5369 return false;
5370
5371 // We cannot kill the pad if it has multiple uses. This typically arises
5372 // from unreachable basic blocks.
5373 if (!CPInst->hasOneUse())
5374 return false;
5375
5376 // Check that there are no other instructions except for benign intrinsics.
5378 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5379 return false;
5380
5381 // If the cleanup return we are simplifying unwinds to the caller, this will
5382 // set UnwindDest to nullptr.
5383 BasicBlock *UnwindDest = RI->getUnwindDest();
5384
5385 // We're about to remove BB from the control flow. Before we do, sink any
5386 // PHINodes into the unwind destination. Doing this before changing the
5387 // control flow avoids some potentially slow checks, since we can currently
5388 // be certain that UnwindDest and BB have no common predecessors (since they
5389 // are both EH pads).
5390 if (UnwindDest) {
5391 // First, go through the PHI nodes in UnwindDest and update any nodes that
5392 // reference the block we are removing
5393 for (PHINode &DestPN : UnwindDest->phis()) {
5394 int Idx = DestPN.getBasicBlockIndex(BB);
5395 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5396 assert(Idx != -1);
5397 // This PHI node has an incoming value that corresponds to a control
5398 // path through the cleanup pad we are removing. If the incoming
5399 // value is in the cleanup pad, it must be a PHINode (because we
5400 // verified above that the block is otherwise empty). Otherwise, the
5401 // value is either a constant or a value that dominates the cleanup
5402 // pad being removed.
5403 //
5404 // Because BB and UnwindDest are both EH pads, all of their
5405 // predecessors must unwind to these blocks, and since no instruction
5406 // can have multiple unwind destinations, there will be no overlap in
5407 // incoming blocks between SrcPN and DestPN.
5408 Value *SrcVal = DestPN.getIncomingValue(Idx);
5409 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5410
5411 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5412 for (auto *Pred : predecessors(BB)) {
5413 Value *Incoming =
5414 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5415 DestPN.addIncoming(Incoming, Pred);
5416 }
5417 }
5418
5419 // Sink any remaining PHI nodes directly into UnwindDest.
5420 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5421 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5422 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5423 // If the PHI node has no uses or all of its uses are in this basic
5424 // block (meaning they are debug or lifetime intrinsics), just leave
5425 // it. It will be erased when we erase BB below.
5426 continue;
5427
5428 // Otherwise, sink this PHI node into UnwindDest.
5429 // Any predecessors to UnwindDest which are not already represented
5430 // must be back edges which inherit the value from the path through
5431 // BB. In this case, the PHI value must reference itself.
5432 for (auto *pred : predecessors(UnwindDest))
5433 if (pred != BB)
5434 PN.addIncoming(&PN, pred);
5435 PN.moveBefore(InsertPt);
5436 // Also, add a dummy incoming value for the original BB itself,
5437 // so that the PHI is well-formed until we drop said predecessor.
5438 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5439 }
5440 }
5441
5442 std::vector<DominatorTree::UpdateType> Updates;
5443
5444 // We use make_early_inc_range here because we will remove all predecessors.
5446 if (UnwindDest == nullptr) {
5447 if (DTU) {
5448 DTU->applyUpdates(Updates);
5449 Updates.clear();
5450 }
5451 removeUnwindEdge(PredBB, DTU);
5452 ++NumInvokes;
5453 } else {
5454 BB->removePredecessor(PredBB);
5455 Instruction *TI = PredBB->getTerminator();
5456 TI->replaceUsesOfWith(BB, UnwindDest);
5457 if (DTU) {
5458 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5459 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5460 }
5461 }
5462 }
5463
5464 if (DTU)
5465 DTU->applyUpdates(Updates);
5466
5467 DeleteDeadBlock(BB, DTU);
5468
5469 return true;
5470}
5471
5472// Try to merge two cleanuppads together.
5474 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5475 // with.
5476 BasicBlock *UnwindDest = RI->getUnwindDest();
5477 if (!UnwindDest)
5478 return false;
5479
5480 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5481 // be safe to merge without code duplication.
5482 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5483 return false;
5484
5485 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5486 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5487 if (!SuccessorCleanupPad)
5488 return false;
5489
5490 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5491 // Replace any uses of the successor cleanupad with the predecessor pad
5492 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5493 // funclet bundle operands.
5494 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5495 // Remove the old cleanuppad.
5496 SuccessorCleanupPad->eraseFromParent();
5497 // Now, we simply replace the cleanupret with a branch to the unwind
5498 // destination.
5499 BranchInst::Create(UnwindDest, RI->getParent());
5500 RI->eraseFromParent();
5501
5502 return true;
5503}
5504
5505bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5506 // It is possible to transiantly have an undef cleanuppad operand because we
5507 // have deleted some, but not all, dead blocks.
5508 // Eventually, this block will be deleted.
5509 if (isa<UndefValue>(RI->getOperand(0)))
5510 return false;
5511
5512 if (mergeCleanupPad(RI))
5513 return true;
5514
5515 if (removeEmptyCleanup(RI, DTU))
5516 return true;
5517
5518 return false;
5519}
5520
5521// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5522bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5523 BasicBlock *BB = UI->getParent();
5524
5525 bool Changed = false;
5526
5527 // Ensure that any debug-info records that used to occur after the Unreachable
5528 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5529 // the block.
5531
5532 // Debug-info records on the unreachable inst itself should be deleted, as
5533 // below we delete everything past the final executable instruction.
5534 UI->dropDbgRecords();
5535
5536 // If there are any instructions immediately before the unreachable that can
5537 // be removed, do so.
5538 while (UI->getIterator() != BB->begin()) {
5540 --BBI;
5541
5543 break; // Can not drop any more instructions. We're done here.
5544 // Otherwise, this instruction can be freely erased,
5545 // even if it is not side-effect free.
5546
5547 // Note that deleting EH's here is in fact okay, although it involves a bit
5548 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5549 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5550 // and we can therefore guarantee this block will be erased.
5551
5552 // If we're deleting this, we're deleting any subsequent debug info, so
5553 // delete DbgRecords.
5554 BBI->dropDbgRecords();
5555
5556 // Delete this instruction (any uses are guaranteed to be dead)
5557 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5558 BBI->eraseFromParent();
5559 Changed = true;
5560 }
5561
5562 // If the unreachable instruction is the first in the block, take a gander
5563 // at all of the predecessors of this instruction, and simplify them.
5564 if (&BB->front() != UI)
5565 return Changed;
5566
5567 std::vector<DominatorTree::UpdateType> Updates;
5568
5570 for (BasicBlock *Predecessor : Preds) {
5571 Instruction *TI = Predecessor->getTerminator();
5572 IRBuilder<> Builder(TI);
5573 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5574 // We could either have a proper unconditional branch,
5575 // or a degenerate conditional branch with matching destinations.
5576 if (all_of(BI->successors(),
5577 [BB](auto *Successor) { return Successor == BB; })) {
5578 new UnreachableInst(TI->getContext(), TI->getIterator());
5579 TI->eraseFromParent();
5580 Changed = true;
5581 } else {
5582 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5583 Value* Cond = BI->getCondition();
5584 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5585 "The destinations are guaranteed to be different here.");
5586 CallInst *Assumption;
5587 if (BI->getSuccessor(0) == BB) {
5588 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5589 Builder.CreateBr(BI->getSuccessor(1));
5590 } else {
5591 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5592 Assumption = Builder.CreateAssumption(Cond);
5593 Builder.CreateBr(BI->getSuccessor(0));
5594 }
5595 if (Options.AC)
5596 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5597
5599 Changed = true;
5600 }
5601 if (DTU)
5602 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5603 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5605 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5606 if (i->getCaseSuccessor() != BB) {
5607 ++i;
5608 continue;
5609 }
5610 BB->removePredecessor(SU->getParent());
5611 i = SU.removeCase(i);
5612 e = SU->case_end();
5613 Changed = true;
5614 }
5615 // Note that the default destination can't be removed!
5616 if (DTU && SI->getDefaultDest() != BB)
5617 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5618 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5619 if (II->getUnwindDest() == BB) {
5620 if (DTU) {
5621 DTU->applyUpdates(Updates);
5622 Updates.clear();
5623 }
5624 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5625 if (!CI->doesNotThrow())
5626 CI->setDoesNotThrow();
5627 Changed = true;
5628 }
5629 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5630 if (CSI->getUnwindDest() == BB) {
5631 if (DTU) {
5632 DTU->applyUpdates(Updates);
5633 Updates.clear();
5634 }
5635 removeUnwindEdge(TI->getParent(), DTU);
5636 Changed = true;
5637 continue;
5638 }
5639
5640 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5641 E = CSI->handler_end();
5642 I != E; ++I) {
5643 if (*I == BB) {
5644 CSI->removeHandler(I);
5645 --I;
5646 --E;
5647 Changed = true;
5648 }
5649 }
5650 if (DTU)
5651 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5652 if (CSI->getNumHandlers() == 0) {
5653 if (CSI->hasUnwindDest()) {
5654 // Redirect all predecessors of the block containing CatchSwitchInst
5655 // to instead branch to the CatchSwitchInst's unwind destination.
5656 if (DTU) {
5657 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5658 Updates.push_back({DominatorTree::Insert,
5659 PredecessorOfPredecessor,
5660 CSI->getUnwindDest()});
5661 Updates.push_back({DominatorTree::Delete,
5662 PredecessorOfPredecessor, Predecessor});
5663 }
5664 }
5665 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5666 } else {
5667 // Rewrite all preds to unwind to caller (or from invoke to call).
5668 if (DTU) {
5669 DTU->applyUpdates(Updates);
5670 Updates.clear();
5671 }
5672 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5673 for (BasicBlock *EHPred : EHPreds)
5674 removeUnwindEdge(EHPred, DTU);
5675 }
5676 // The catchswitch is no longer reachable.
5677 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5678 CSI->eraseFromParent();
5679 Changed = true;
5680 }
5681 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5682 (void)CRI;
5683 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5684 "Expected to always have an unwind to BB.");
5685 if (DTU)
5686 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5687 new UnreachableInst(TI->getContext(), TI->getIterator());
5688 TI->eraseFromParent();
5689 Changed = true;
5690 }
5691 }
5692
5693 if (DTU)
5694 DTU->applyUpdates(Updates);
5695
5696 // If this block is now dead, remove it.
5697 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5698 DeleteDeadBlock(BB, DTU);
5699 return true;
5700 }
5701
5702 return Changed;
5703}
5704
5706 assert(Cases.size() >= 1);
5707
5709 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5710 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5711 return false;
5712 }
5713 return true;
5714}
5715
5717 DomTreeUpdater *DTU,
5718 bool RemoveOrigDefaultBlock = true) {
5719 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5720 auto *BB = Switch->getParent();
5721 auto *OrigDefaultBlock = Switch->getDefaultDest();
5722 if (RemoveOrigDefaultBlock)
5723 OrigDefaultBlock->removePredecessor(BB);
5724 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5725 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5726 OrigDefaultBlock);
5727 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5729 Switch->setDefaultDest(&*NewDefaultBlock);
5730 if (DTU) {
5732 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5733 if (RemoveOrigDefaultBlock &&
5734 !is_contained(successors(BB), OrigDefaultBlock))
5735 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5736 DTU->applyUpdates(Updates);
5737 }
5738}
5739
5740/// Turn a switch into an integer range comparison and branch.
5741/// Switches with more than 2 destinations are ignored.
5742/// Switches with 1 destination are also ignored.
5743bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5744 IRBuilder<> &Builder) {
5745 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5746
5747 bool HasDefault = !SI->defaultDestUnreachable();
5748
5749 auto *BB = SI->getParent();
5750
5751 // Partition the cases into two sets with different destinations.
5752 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5753 BasicBlock *DestB = nullptr;
5756
5757 for (auto Case : SI->cases()) {
5758 BasicBlock *Dest = Case.getCaseSuccessor();
5759 if (!DestA)
5760 DestA = Dest;
5761 if (Dest == DestA) {
5762 CasesA.push_back(Case.getCaseValue());
5763 continue;
5764 }
5765 if (!DestB)
5766 DestB = Dest;
5767 if (Dest == DestB) {
5768 CasesB.push_back(Case.getCaseValue());
5769 continue;
5770 }
5771 return false; // More than two destinations.
5772 }
5773 if (!DestB)
5774 return false; // All destinations are the same and the default is unreachable
5775
5776 assert(DestA && DestB &&
5777 "Single-destination switch should have been folded.");
5778 assert(DestA != DestB);
5779 assert(DestB != SI->getDefaultDest());
5780 assert(!CasesB.empty() && "There must be non-default cases.");
5781 assert(!CasesA.empty() || HasDefault);
5782
5783 // Figure out if one of the sets of cases form a contiguous range.
5784 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5785 BasicBlock *ContiguousDest = nullptr;
5786 BasicBlock *OtherDest = nullptr;
5787 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5788 ContiguousCases = &CasesA;
5789 ContiguousDest = DestA;
5790 OtherDest = DestB;
5791 } else if (casesAreContiguous(CasesB)) {
5792 ContiguousCases = &CasesB;
5793 ContiguousDest = DestB;
5794 OtherDest = DestA;
5795 } else
5796 return false;
5797
5798 // Start building the compare and branch.
5799
5800 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5801 Constant *NumCases =
5802 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5803
5804 Value *Sub = SI->getCondition();
5805 if (!Offset->isNullValue())
5806 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5807
5808 Value *Cmp;
5809 // If NumCases overflowed, then all possible values jump to the successor.
5810 if (NumCases->isNullValue() && !ContiguousCases->empty())
5811 Cmp = ConstantInt::getTrue(SI->getContext());
5812 else
5813 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5814 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5815
5816 // Update weight for the newly-created conditional branch.
5817 if (hasBranchWeightMD(*SI)) {
5819 getBranchWeights(SI, Weights);
5820 if (Weights.size() == 1 + SI->getNumCases()) {
5821 uint64_t TrueWeight = 0;
5822 uint64_t FalseWeight = 0;
5823 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5824 if (SI->getSuccessor(I) == ContiguousDest)
5825 TrueWeight += Weights[I];
5826 else
5827 FalseWeight += Weights[I];
5828 }
5829 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5830 TrueWeight /= 2;
5831 FalseWeight /= 2;
5832 }
5833 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5834 }
5835 }
5836
5837 // Prune obsolete incoming values off the successors' PHI nodes.
5838 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5839 unsigned PreviousEdges = ContiguousCases->size();
5840 if (ContiguousDest == SI->getDefaultDest())
5841 ++PreviousEdges;
5842 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5843 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5844 }
5845 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5846 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5847 if (OtherDest == SI->getDefaultDest())
5848 ++PreviousEdges;
5849 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5850 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5851 }
5852
5853 // Clean up the default block - it may have phis or other instructions before
5854 // the unreachable terminator.
5855 if (!HasDefault)
5857
5858 auto *UnreachableDefault = SI->getDefaultDest();
5859
5860 // Drop the switch.
5861 SI->eraseFromParent();
5862
5863 if (!HasDefault && DTU)
5864 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5865
5866 return true;
5867}
5868
5869/// Compute masked bits for the condition of a switch
5870/// and use it to remove dead cases.
5872 AssumptionCache *AC,
5873 const DataLayout &DL) {
5874 Value *Cond = SI->getCondition();
5875 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
5876
5877 // We can also eliminate cases by determining that their values are outside of
5878 // the limited range of the condition based on how many significant (non-sign)
5879 // bits are in the condition value.
5880 unsigned MaxSignificantBitsInCond =
5882
5883 // Gather dead cases.
5885 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5886 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5887 for (const auto &Case : SI->cases()) {
5888 auto *Successor = Case.getCaseSuccessor();
5889 if (DTU) {
5890 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
5891 if (Inserted)
5892 UniqueSuccessors.push_back(Successor);
5893 ++It->second;
5894 }
5895 const APInt &CaseVal = Case.getCaseValue()->getValue();
5896 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5897 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5898 DeadCases.push_back(Case.getCaseValue());
5899 if (DTU)
5900 --NumPerSuccessorCases[Successor];
5901 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5902 << " is dead.\n");
5903 }
5904 }
5905
5906 // If we can prove that the cases must cover all possible values, the
5907 // default destination becomes dead and we can remove it. If we know some
5908 // of the bits in the value, we can use that to more precisely compute the
5909 // number of possible unique case values.
5910 bool HasDefault = !SI->defaultDestUnreachable();
5911 const unsigned NumUnknownBits =
5912 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5913 assert(NumUnknownBits <= Known.getBitWidth());
5914 if (HasDefault && DeadCases.empty() &&
5915 NumUnknownBits < 64 /* avoid overflow */) {
5916 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5917 if (SI->getNumCases() == AllNumCases) {
5919 return true;
5920 }
5921 // When only one case value is missing, replace default with that case.
5922 // Eliminating the default branch will provide more opportunities for
5923 // optimization, such as lookup tables.
5924 if (SI->getNumCases() == AllNumCases - 1) {
5925 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5926 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5927 if (CondTy->getIntegerBitWidth() > 64 ||
5928 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5929 return false;
5930
5931 uint64_t MissingCaseVal = 0;
5932 for (const auto &Case : SI->cases())
5933 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5934 auto *MissingCase =
5935 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5937 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5938 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5939 SIW.setSuccessorWeight(0, 0);
5940 return true;
5941 }
5942 }
5943
5944 if (DeadCases.empty())
5945 return false;
5946
5948 for (ConstantInt *DeadCase : DeadCases) {
5949 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5950 assert(CaseI != SI->case_default() &&
5951 "Case was not found. Probably mistake in DeadCases forming.");
5952 // Prune unused values from PHI nodes.
5953 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5954 SIW.removeCase(CaseI);
5955 }
5956
5957 if (DTU) {
5958 std::vector<DominatorTree::UpdateType> Updates;
5959 for (auto *Successor : UniqueSuccessors)
5960 if (NumPerSuccessorCases[Successor] == 0)
5961 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5962 DTU->applyUpdates(Updates);
5963 }
5964
5965 return true;
5966}
5967
5968/// If BB would be eligible for simplification by
5969/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5970/// by an unconditional branch), look at the phi node for BB in the successor
5971/// block and see if the incoming value is equal to CaseValue. If so, return
5972/// the phi node, and set PhiIndex to BB's index in the phi node.
5974 BasicBlock *BB, int *PhiIndex) {
5975 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
5976 return nullptr; // BB must be empty to be a candidate for simplification.
5977 if (!BB->getSinglePredecessor())
5978 return nullptr; // BB must be dominated by the switch.
5979
5980 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5981 if (!Branch || !Branch->isUnconditional())
5982 return nullptr; // Terminator must be unconditional branch.
5983
5984 BasicBlock *Succ = Branch->getSuccessor(0);
5985
5986 for (PHINode &PHI : Succ->phis()) {
5987 int Idx = PHI.getBasicBlockIndex(BB);
5988 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5989
5990 Value *InValue = PHI.getIncomingValue(Idx);
5991 if (InValue != CaseValue)
5992 continue;
5993
5994 *PhiIndex = Idx;
5995 return &PHI;
5996 }
5997
5998 return nullptr;
5999}
6000
6001/// Try to forward the condition of a switch instruction to a phi node
6002/// dominated by the switch, if that would mean that some of the destination
6003/// blocks of the switch can be folded away. Return true if a change is made.
6005 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6006
6007 ForwardingNodesMap ForwardingNodes;
6008 BasicBlock *SwitchBlock = SI->getParent();
6009 bool Changed = false;
6010 for (const auto &Case : SI->cases()) {
6011 ConstantInt *CaseValue = Case.getCaseValue();
6012 BasicBlock *CaseDest = Case.getCaseSuccessor();
6013
6014 // Replace phi operands in successor blocks that are using the constant case
6015 // value rather than the switch condition variable:
6016 // switchbb:
6017 // switch i32 %x, label %default [
6018 // i32 17, label %succ
6019 // ...
6020 // succ:
6021 // %r = phi i32 ... [ 17, %switchbb ] ...
6022 // -->
6023 // %r = phi i32 ... [ %x, %switchbb ] ...
6024
6025 for (PHINode &Phi : CaseDest->phis()) {
6026 // This only works if there is exactly 1 incoming edge from the switch to
6027 // a phi. If there is >1, that means multiple cases of the switch map to 1
6028 // value in the phi, and that phi value is not the switch condition. Thus,
6029 // this transform would not make sense (the phi would be invalid because
6030 // a phi can't have different incoming values from the same block).
6031 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6032 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6033 count(Phi.blocks(), SwitchBlock) == 1) {
6034 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6035 Changed = true;
6036 }
6037 }
6038
6039 // Collect phi nodes that are indirectly using this switch's case constants.
6040 int PhiIdx;
6041 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6042 ForwardingNodes[Phi].push_back(PhiIdx);
6043 }
6044
6045 for (auto &ForwardingNode : ForwardingNodes) {
6046 PHINode *Phi = ForwardingNode.first;
6047 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6048 // Check if it helps to fold PHI.
6049 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6050 continue;
6051
6052 for (int Index : Indexes)
6053 Phi->setIncomingValue(Index, SI->getCondition());
6054 Changed = true;
6055 }
6056
6057 return Changed;
6058}
6059
6060/// Return true if the backend will be able to handle
6061/// initializing an array of constants like C.
6063 if (C->isThreadDependent())
6064 return false;
6065 if (C->isDLLImportDependent())
6066 return false;
6067
6068 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6069 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
6070 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
6071 return false;
6072
6073 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
6074 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6075 // materializing the array of constants.
6076 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6077 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6078 return false;
6079 }
6080
6082 return false;
6083
6084 return true;
6085}
6086
6087/// If V is a Constant, return it. Otherwise, try to look up
6088/// its constant value in ConstantPool, returning 0 if it's not there.
6089static Constant *
6092 if (Constant *C = dyn_cast<Constant>(V))
6093 return C;
6094 return ConstantPool.lookup(V);
6095}
6096
6097/// Try to fold instruction I into a constant. This works for
6098/// simple instructions such as binary operations where both operands are
6099/// constant or can be replaced by constants from the ConstantPool. Returns the
6100/// resulting constant on success, 0 otherwise.
6101static Constant *
6104 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
6105 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6106 if (!A)
6107 return nullptr;
6108 if (A->isAllOnesValue())
6109 return lookupConstant(Select->getTrueValue(), ConstantPool);
6110 if (A->isNullValue())
6111 return lookupConstant(Select->getFalseValue(), ConstantPool);
6112 return nullptr;
6113 }
6114
6116 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6117 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6118 COps.push_back(A);
6119 else
6120 return nullptr;
6121 }
6122
6123 return ConstantFoldInstOperands(I, COps, DL);
6124}
6125
6126/// Try to determine the resulting constant values in phi nodes
6127/// at the common destination basic block, *CommonDest, for one of the case
6128/// destionations CaseDest corresponding to value CaseVal (0 for the default
6129/// case), of a switch instruction SI.
6130static bool
6132 BasicBlock **CommonDest,
6133 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6134 const DataLayout &DL, const TargetTransformInfo &TTI) {
6135 // The block from which we enter the common destination.
6136 BasicBlock *Pred = SI->getParent();
6137
6138 // If CaseDest is empty except for some side-effect free instructions through
6139 // which we can constant-propagate the CaseVal, continue to its successor.
6141 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6142 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6143 if (I.isTerminator()) {
6144 // If the terminator is a simple branch, continue to the next block.
6145 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6146 return false;
6147 Pred = CaseDest;
6148 CaseDest = I.getSuccessor(0);
6149 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6150 // Instruction is side-effect free and constant.
6151
6152 // If the instruction has uses outside this block or a phi node slot for
6153 // the block, it is not safe to bypass the instruction since it would then
6154 // no longer dominate all its uses.
6155 for (auto &Use : I.uses()) {
6156 User *User = Use.getUser();
6157 if (Instruction *I = dyn_cast<Instruction>(User))
6158 if (I->getParent() == CaseDest)
6159 continue;
6160 if (PHINode *Phi = dyn_cast<PHINode>(User))
6161 if (Phi->getIncomingBlock(Use) == CaseDest)
6162 continue;
6163 return false;
6164 }
6165
6166 ConstantPool.insert(std::make_pair(&I, C));
6167 } else {
6168 break;
6169 }
6170 }
6171
6172 // If we did not have a CommonDest before, use the current one.
6173 if (!*CommonDest)
6174 *CommonDest = CaseDest;
6175 // If the destination isn't the common one, abort.
6176 if (CaseDest != *CommonDest)
6177 return false;
6178
6179 // Get the values for this case from phi nodes in the destination block.
6180 for (PHINode &PHI : (*CommonDest)->phis()) {
6181 int Idx = PHI.getBasicBlockIndex(Pred);
6182 if (Idx == -1)
6183 continue;
6184
6185 Constant *ConstVal =
6186 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6187 if (!ConstVal)
6188 return false;
6189
6190 // Be conservative about which kinds of constants we support.
6191 if (!validLookupTableConstant(ConstVal, TTI))
6192 return false;
6193
6194 Res.push_back(std::make_pair(&PHI, ConstVal));
6195 }
6196
6197 return Res.size() > 0;
6198}
6199
6200// Helper function used to add CaseVal to the list of cases that generate
6201// Result. Returns the updated number of cases that generate this result.
6202static size_t mapCaseToResult(ConstantInt *CaseVal,
6203 SwitchCaseResultVectorTy &UniqueResults,
6204 Constant *Result) {
6205 for (auto &I : UniqueResults) {
6206 if (I.first == Result) {
6207 I.second.push_back(CaseVal);
6208 return I.second.size();
6209 }
6210 }
6211 UniqueResults.push_back(
6212 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6213 return 1;
6214}
6215
6216// Helper function that initializes a map containing
6217// results for the PHI node of the common destination block for a switch
6218// instruction. Returns false if multiple PHI nodes have been found or if
6219// there is not a common destination block for the switch.
6221 BasicBlock *&CommonDest,
6222 SwitchCaseResultVectorTy &UniqueResults,
6223 Constant *&DefaultResult,
6224 const DataLayout &DL,
6225 const TargetTransformInfo &TTI,
6226 uintptr_t MaxUniqueResults) {
6227 for (const auto &I : SI->cases()) {
6228 ConstantInt *CaseVal = I.getCaseValue();
6229
6230 // Resulting value at phi nodes for this case value.
6231 SwitchCaseResultsTy Results;
6232 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6233 DL, TTI))
6234 return false;
6235
6236 // Only one value per case is permitted.
6237 if (Results.size() > 1)
6238 return false;
6239
6240 // Add the case->result mapping to UniqueResults.
6241 const size_t NumCasesForResult =
6242 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6243
6244 // Early out if there are too many cases for this result.
6245 if (NumCasesForResult > MaxSwitchCasesPerResult)
6246 return false;
6247
6248 // Early out if there are too many unique results.
6249 if (UniqueResults.size() > MaxUniqueResults)
6250 return false;
6251
6252 // Check the PHI consistency.
6253 if (!PHI)
6254 PHI = Results[0].first;
6255 else if (PHI != Results[0].first)
6256 return false;
6257 }
6258 // Find the default result value.
6260 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6261 DL, TTI);
6262 // If the default value is not found abort unless the default destination
6263 // is unreachable.
6264 DefaultResult =
6265 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6266
6267 return DefaultResult || SI->defaultDestUnreachable();
6268}
6269
6270// Helper function that checks if it is possible to transform a switch with only
6271// two cases (or two cases + default) that produces a result into a select.
6272// TODO: Handle switches with more than 2 cases that map to the same result.
6273static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6274 Constant *DefaultResult, Value *Condition,
6275 IRBuilder<> &Builder, const DataLayout &DL) {
6276 // If we are selecting between only two cases transform into a simple
6277 // select or a two-way select if default is possible.
6278 // Example:
6279 // switch (a) { %0 = icmp eq i32 %a, 10
6280 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6281 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6282 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6283 // }
6284 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6285 ResultVector[1].second.size() == 1) {
6286 ConstantInt *FirstCase = ResultVector[0].second[0];
6287 ConstantInt *SecondCase = ResultVector[1].second[0];
6288 Value *SelectValue = ResultVector[1].first;
6289 if (DefaultResult) {
6290 Value *ValueCompare =
6291 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6292 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6293 DefaultResult, "switch.select");
6294 }
6295 Value *ValueCompare =
6296 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6297 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6298 SelectValue, "switch.select");
6299 }
6300
6301 // Handle the degenerate case where two cases have the same result value.
6302 if (ResultVector.size() == 1 && DefaultResult) {
6303 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6304 unsigned CaseCount = CaseValues.size();
6305 // n bits group cases map to the same result:
6306 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6307 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6308 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6309 if (isPowerOf2_32(CaseCount)) {
6310 ConstantInt *MinCaseVal = CaseValues[0];
6311 // If there are bits that are set exclusively by CaseValues, we
6312 // can transform the switch into a select if the conjunction of
6313 // all the values uniquely identify CaseValues.
6314 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6315
6316 // Find the minimum value and compute the and of all the case values.
6317 for (auto *Case : CaseValues) {
6318 if (Case->getValue().slt(MinCaseVal->getValue()))
6319 MinCaseVal = Case;
6320 AndMask &= Case->getValue();
6321 }
6322 KnownBits Known = computeKnownBits(Condition, DL);
6323
6324 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6325 // Compute the number of bits that are free to vary.
6326 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6327
6328 // Check if the number of values covered by the mask is equal
6329 // to the number of cases.
6330 if (FreeBits == Log2_32(CaseCount)) {
6331 Value *And = Builder.CreateAnd(Condition, AndMask);
6332 Value *Cmp = Builder.CreateICmpEQ(
6333 And, Constant::getIntegerValue(And->getType(), AndMask));
6334 return Builder.CreateSelect(Cmp, ResultVector[0].first,
6335 DefaultResult);
6336 }
6337 }
6338
6339 // Mark the bits case number touched.
6340 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6341 for (auto *Case : CaseValues)
6342 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6343
6344 // Check if cases with the same result can cover all number
6345 // in touched bits.
6346 if (BitMask.popcount() == Log2_32(CaseCount)) {
6347 if (!MinCaseVal->isNullValue())
6348 Condition = Builder.CreateSub(Condition, MinCaseVal);
6349 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6350 Value *Cmp = Builder.CreateICmpEQ(
6351 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6352 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6353 }
6354 }
6355
6356 // Handle the degenerate case where two cases have the same value.
6357 if (CaseValues.size() == 2) {
6358 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6359 "switch.selectcmp.case1");
6360 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6361 "switch.selectcmp.case2");
6362 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6363 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6364 }
6365 }
6366
6367 return nullptr;
6368}
6369
6370// Helper function to cleanup a switch instruction that has been converted into
6371// a select, fixing up PHI nodes and basic blocks.
6373 Value *SelectValue,
6374 IRBuilder<> &Builder,
6375 DomTreeUpdater *DTU) {
6376 std::vector<DominatorTree::UpdateType> Updates;
6377
6378 BasicBlock *SelectBB = SI->getParent();
6379 BasicBlock *DestBB = PHI->getParent();
6380
6381 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6382 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6383 Builder.CreateBr(DestBB);
6384
6385 // Remove the switch.
6386
6387 PHI->removeIncomingValueIf(
6388 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6389 PHI->addIncoming(SelectValue, SelectBB);
6390
6391 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6392 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6393 BasicBlock *Succ = SI->getSuccessor(i);
6394
6395 if (Succ == DestBB)
6396 continue;
6397 Succ->removePredecessor(SelectBB);
6398 if (DTU && RemovedSuccessors.insert(Succ).second)
6399 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6400 }
6401 SI->eraseFromParent();
6402 if (DTU)
6403 DTU->applyUpdates(Updates);
6404}
6405
6406/// If a switch is only used to initialize one or more phi nodes in a common
6407/// successor block with only two different constant values, try to replace the
6408/// switch with a select. Returns true if the fold was made.
6409static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6410 DomTreeUpdater *DTU, const DataLayout &DL,
6411 const TargetTransformInfo &TTI) {
6412 Value *const Cond = SI->getCondition();
6413 PHINode *PHI = nullptr;
6414 BasicBlock *CommonDest = nullptr;
6415 Constant *DefaultResult;
6416 SwitchCaseResultVectorTy UniqueResults;
6417 // Collect all the cases that will deliver the same value from the switch.
6418 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6419 DL, TTI, /*MaxUniqueResults*/ 2))
6420 return false;
6421
6422 assert(PHI != nullptr && "PHI for value select not found");
6423 Builder.SetInsertPoint(SI);
6424 Value *SelectValue =
6425 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder, DL);
6426 if (!SelectValue)
6427 return false;
6428
6429 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6430 return true;
6431}
6432
6433namespace {
6434
6435/// This class represents a lookup table that can be used to replace a switch.
6436class SwitchLookupTable {
6437public:
6438 /// Create a lookup table to use as a switch replacement with the contents
6439 /// of Values, using DefaultValue to fill any holes in the table.
6440 SwitchLookupTable(
6441 Module &M, uint64_t TableSize, ConstantInt *Offset,
6442 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6443 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6444
6445 /// Build instructions with Builder to retrieve the value at
6446 /// the position given by Index in the lookup table.
6447 Value *buildLookup(Value *Index, IRBuilder<> &Builder, const DataLayout &DL);
6448
6449 /// Return true if a table with TableSize elements of
6450 /// type ElementType would fit in a target-legal register.
6451 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6452 Type *ElementType);
6453
6454private:
6455 // Depending on the contents of the table, it can be represented in
6456 // different ways.
6457 enum {
6458 // For tables where each element contains the same value, we just have to
6459 // store that single value and return it for each lookup.
6460 SingleValueKind,
6461
6462 // For tables where there is a linear relationship between table index
6463 // and values. We calculate the result with a simple multiplication
6464 // and addition instead of a table lookup.
6465 LinearMapKind,
6466
6467 // For small tables with integer elements, we can pack them into a bitmap
6468 // that fits into a target-legal register. Values are retrieved by
6469 // shift and mask operations.
6470 BitMapKind,
6471
6472 // The table is stored as an array of values. Values are retrieved by load
6473 // instructions from the table.
6474 ArrayKind
6475 } Kind;
6476
6477 // For SingleValueKind, this is the single value.
6478 Constant *SingleValue = nullptr;
6479
6480 // For BitMapKind, this is the bitmap.
6481 ConstantInt *BitMap = nullptr;
6482 IntegerType *BitMapElementTy = nullptr;
6483
6484 // For LinearMapKind, these are the constants used to derive the value.
6485 ConstantInt *LinearOffset = nullptr;
6486 ConstantInt *LinearMultiplier = nullptr;
6487 bool LinearMapValWrapped = false;
6488
6489 // For ArrayKind, this is the array.
6490 GlobalVariable *Array = nullptr;
6491};
6492
6493} // end anonymous namespace
6494
6495SwitchLookupTable::SwitchLookupTable(
6496 Module &M, uint64_t TableSize, ConstantInt *Offset,
6497 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6498 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6499 assert(Values.size() && "Can't build lookup table without values!");
6500 assert(TableSize >= Values.size() && "Can't fit values in table!");
6501
6502 // If all values in the table are equal, this is that value.
6503 SingleValue = Values.begin()->second;
6504
6505 Type *ValueType = Values.begin()->second->getType();
6506
6507 // Build up the table contents.
6508 SmallVector<Constant *, 64> TableContents(TableSize);
6509 for (const auto &[CaseVal, CaseRes] : Values) {
6510 assert(CaseRes->getType() == ValueType);
6511
6512 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6513 TableContents[Idx] = CaseRes;
6514
6515 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6516 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6517 }
6518
6519 // Fill in any holes in the table with the default result.
6520 if (Values.size() < TableSize) {
6521 assert(DefaultValue &&
6522 "Need a default value to fill the lookup table holes.");
6523 assert(DefaultValue->getType() == ValueType);
6524 for (uint64_t I = 0; I < TableSize; ++I) {
6525 if (!TableContents[I])
6526 TableContents[I] = DefaultValue;
6527 }
6528
6529 // If the default value is poison, all the holes are poison.
6530 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6531
6532 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6533 SingleValue = nullptr;
6534 }
6535
6536 // If each element in the table contains the same value, we only need to store
6537 // that single value.
6538 if (SingleValue) {
6539 Kind = SingleValueKind;
6540 return;
6541 }
6542
6543 // Check if we can derive the value with a linear transformation from the
6544 // table index.
6545 if (isa<IntegerType>(ValueType)) {
6546 bool LinearMappingPossible = true;
6547 APInt PrevVal;
6548 APInt DistToPrev;
6549 // When linear map is monotonic and signed overflow doesn't happen on
6550 // maximum index, we can attach nsw on Add and Mul.
6551 bool NonMonotonic = false;
6552 assert(TableSize >= 2 && "Should be a SingleValue table.");
6553 // Check if there is the same distance between two consecutive values.
6554 for (uint64_t I = 0; I < TableSize; ++I) {
6555 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6556
6557 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6558 // This is an poison, so it's (probably) a lookup table hole.
6559 // To prevent any regressions from before we switched to using poison as
6560 // the default value, holes will fall back to using the first value.
6561 // This can be removed once we add proper handling for poisons in lookup
6562 // tables.
6563 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6564 }
6565
6566 if (!ConstVal) {
6567 // This is an undef. We could deal with it, but undefs in lookup tables
6568 // are very seldom. It's probably not worth the additional complexity.
6569 LinearMappingPossible = false;
6570 break;
6571 }
6572 const APInt &Val = ConstVal->getValue();
6573 if (I != 0) {
6574 APInt Dist = Val - PrevVal;
6575 if (I == 1) {
6576 DistToPrev = Dist;
6577 } else if (Dist != DistToPrev) {
6578 LinearMappingPossible = false;
6579 break;
6580 }
6581 NonMonotonic |=
6582 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6583 }
6584 PrevVal = Val;
6585 }
6586 if (LinearMappingPossible) {
6587 LinearOffset = cast<ConstantInt>(TableContents[0]);
6588 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6589 APInt M = LinearMultiplier->getValue();
6590 bool MayWrap = true;
6591 if (isIntN(M.getBitWidth(), TableSize - 1))
6592 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6593 LinearMapValWrapped = NonMonotonic || MayWrap;
6594 Kind = LinearMapKind;
6595 ++NumLinearMaps;
6596 return;
6597 }
6598 }
6599
6600 // If the type is integer and the table fits in a register, build a bitmap.
6601 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6602 IntegerType *IT = cast<IntegerType>(ValueType);
6603 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6604 for (uint64_t I = TableSize; I > 0; --I) {
6605 TableInt <<= IT->getBitWidth();
6606 // Insert values into the bitmap. Undef values are set to zero.
6607 if (!isa<UndefValue>(TableContents[I - 1])) {
6608 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6609 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6610 }
6611 }
6612 BitMap = ConstantInt::get(M.getContext(), TableInt);
6613 BitMapElementTy = IT;
6614 Kind = BitMapKind;
6615 ++NumBitMaps;
6616 return;
6617 }
6618
6619 // Store the table in an array.
6620 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6621 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6622
6623 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6624 GlobalVariable::PrivateLinkage, Initializer,
6625 "switch.table." + FuncName);
6626 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6627 // Set the alignment to that of an array items. We will be only loading one
6628 // value out of it.
6629 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6630 Kind = ArrayKind;
6631}
6632
6633Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder,
6634 const DataLayout &DL) {
6635 switch (Kind) {
6636 case SingleValueKind:
6637 return SingleValue;
6638 case LinearMapKind: {
6639 // Derive the result value from the input value.
6640 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6641 false, "switch.idx.cast");
6642 if (!LinearMultiplier->isOne())
6643 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6644 /*HasNUW = */ false,
6645 /*HasNSW = */ !LinearMapValWrapped);
6646
6647 if (!LinearOffset->isZero())
6648 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6649 /*HasNUW = */ false,
6650 /*HasNSW = */ !LinearMapValWrapped);
6651 return Result;
6652 }
6653 case BitMapKind: {
6654 // Type of the bitmap (e.g. i59).
6655 IntegerType *MapTy = BitMap->getIntegerType();
6656
6657 // Cast Index to the same type as the bitmap.
6658 // Note: The Index is <= the number of elements in the table, so
6659 // truncating it to the width of the bitmask is safe.
6660 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6661
6662 // Multiply the shift amount by the element width. NUW/NSW can always be
6663 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6664 // BitMap's bit width.
6665 ShiftAmt = Builder.CreateMul(
6666 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6667 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6668
6669 // Shift down.
6670 Value *DownShifted =
6671 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6672 // Mask off.
6673 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6674 }
6675 case ArrayKind: {
6676 Type *IndexTy = DL.getIndexType(Array->getType());
6677 auto *ArrayTy = cast<ArrayType>(Array->getValueType());
6678
6679 if (Index->getType() != IndexTy) {
6680 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6681 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6682 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6683 Zext->setNonNeg(
6684 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6685 }
6686
6687 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6688 Value *GEP =
6689 Builder.CreateInBoundsGEP(ArrayTy, Array, GEPIndices, "switch.gep");
6690 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6691 }
6692 }
6693 llvm_unreachable("Unknown lookup table kind!");
6694}
6695
6696bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6697 uint64_t TableSize,
6698 Type *ElementType) {
6699 auto *IT = dyn_cast<IntegerType>(ElementType);
6700 if (!IT)
6701 return false;
6702 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6703 // are <= 15, we could try to narrow the type.
6704
6705 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6706 if (TableSize >= UINT_MAX / IT->getBitWidth())
6707 return false;
6708 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6709}
6710
6712 const DataLayout &DL) {
6713 // Allow any legal type.
6714 if (TTI.isTypeLegal(Ty))
6715 return true;
6716
6717 auto *IT = dyn_cast<IntegerType>(Ty);
6718 if (!IT)
6719 return false;
6720
6721 // Also allow power of 2 integer types that have at least 8 bits and fit in
6722 // a register. These types are common in frontend languages and targets
6723 // usually support loads of these types.
6724 // TODO: We could relax this to any integer that fits in a register and rely
6725 // on ABI alignment and padding in the table to allow the load to be widened.
6726 // Or we could widen the constants and truncate the load.
6727 unsigned BitWidth = IT->getBitWidth();
6728 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6729 DL.fitsInLegalInteger(IT->getBitWidth());
6730}
6731
6732static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6733 // 40% is the default density for building a jump table in optsize/minsize
6734 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6735 // function was based on.
6736 const uint64_t MinDensity = 40;
6737
6738 if (CaseRange >= UINT64_MAX / 100)
6739 return false; // Avoid multiplication overflows below.
6740
6741 return NumCases * 100 >= CaseRange * MinDensity;
6742}
6743
6745 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6746 uint64_t Range = Diff + 1;
6747 if (Range < Diff)
6748 return false; // Overflow.
6749
6750 return isSwitchDense(Values.size(), Range);
6751}
6752
6753/// Determine whether a lookup table should be built for this switch, based on
6754/// the number of cases, size of the table, and the types of the results.
6755// TODO: We could support larger than legal types by limiting based on the
6756// number of loads required and/or table size. If the constants are small we
6757// could use smaller table entries and extend after the load.
6758static bool
6760 const TargetTransformInfo &TTI, const DataLayout &DL,
6761 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6762 if (SI->getNumCases() > TableSize)
6763 return false; // TableSize overflowed.
6764
6765 bool AllTablesFitInRegister = true;
6766 bool HasIllegalType = false;
6767 for (const auto &I : ResultTypes) {
6768 Type *Ty = I.second;
6769
6770 // Saturate this flag to true.
6771 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6772
6773 // Saturate this flag to false.
6774 AllTablesFitInRegister =
6775 AllTablesFitInRegister &&
6776 SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty);
6777
6778 // If both flags saturate, we're done. NOTE: This *only* works with
6779 // saturating flags, and all flags have to saturate first due to the
6780 // non-deterministic behavior of iterating over a dense map.
6781 if (HasIllegalType && !AllTablesFitInRegister)
6782 break;
6783 }
6784
6785 // If each table would fit in a register, we should build it anyway.
6786 if (AllTablesFitInRegister)
6787 return true;
6788
6789 // Don't build a table that doesn't fit in-register if it has illegal types.
6790 if (HasIllegalType)
6791 return false;
6792
6793 return isSwitchDense(SI->getNumCases(), TableSize);
6794}
6795
6797 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6798 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6799 const DataLayout &DL, const TargetTransformInfo &TTI) {
6800 if (MinCaseVal.isNullValue())
6801 return true;
6802 if (MinCaseVal.isNegative() ||
6803 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6804 !HasDefaultResults)
6805 return false;
6806 return all_of(ResultTypes, [&](const auto &KV) {
6807 return SwitchLookupTable::wouldFitInRegister(
6808 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6809 KV.second /* ResultType */);
6810 });
6811}
6812
6813/// Try to reuse the switch table index compare. Following pattern:
6814/// \code
6815/// if (idx < tablesize)
6816/// r = table[idx]; // table does not contain default_value
6817/// else
6818/// r = default_value;
6819/// if (r != default_value)
6820/// ...
6821/// \endcode
6822/// Is optimized to:
6823/// \code
6824/// cond = idx < tablesize;
6825/// if (cond)
6826/// r = table[idx];
6827/// else
6828/// r = default_value;
6829/// if (cond)
6830/// ...
6831/// \endcode
6832/// Jump threading will then eliminate the second if(cond).
6834 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6835 Constant *DefaultValue,
6836 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6837 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6838 if (!CmpInst)
6839 return;
6840
6841 // We require that the compare is in the same block as the phi so that jump
6842 // threading can do its work afterwards.
6843 if (CmpInst->getParent() != PhiBlock)
6844 return;
6845
6846 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6847 if (!CmpOp1)
6848 return;
6849
6850 Value *RangeCmp = RangeCheckBranch->getCondition();
6851 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6852 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6853
6854 // Check if the compare with the default value is constant true or false.
6855 const DataLayout &DL = PhiBlock->getDataLayout();
6857 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6858 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6859 return;
6860
6861 // Check if the compare with the case values is distinct from the default
6862 // compare result.
6863 for (auto ValuePair : Values) {
6865 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6866 if (!CaseConst || CaseConst == DefaultConst ||
6867 (CaseConst != TrueConst && CaseConst != FalseConst))
6868 return;
6869 }
6870
6871 // Check if the branch instruction dominates the phi node. It's a simple
6872 // dominance check, but sufficient for our needs.
6873 // Although this check is invariant in the calling loops, it's better to do it
6874 // at this late stage. Practically we do it at most once for a switch.
6875 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6876 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6877 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6878 return;
6879 }
6880
6881 if (DefaultConst == FalseConst) {
6882 // The compare yields the same result. We can replace it.
6883 CmpInst->replaceAllUsesWith(RangeCmp);
6884 ++NumTableCmpReuses;
6885 } else {
6886 // The compare yields the same result, just inverted. We can replace it.
6887 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6888 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6889 RangeCheckBranch->getIterator());
6890 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6891 ++NumTableCmpReuses;
6892 }
6893}
6894
6895/// If the switch is only used to initialize one or more phi nodes in a common
6896/// successor block with different constant values, replace the switch with
6897/// lookup tables.
6899 DomTreeUpdater *DTU, const DataLayout &DL,
6900 const TargetTransformInfo &TTI) {
6901 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6902
6903 BasicBlock *BB = SI->getParent();
6904 Function *Fn = BB->getParent();
6905 // Only build lookup table when we have a target that supports it or the
6906 // attribute is not set.
6908 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6909 return false;
6910
6911 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6912 // split off a dense part and build a lookup table for that.
6913
6914 // FIXME: This creates arrays of GEPs to constant strings, which means each
6915 // GEP needs a runtime relocation in PIC code. We should just build one big
6916 // string and lookup indices into that.
6917
6918 // Ignore switches with less than three cases. Lookup tables will not make
6919 // them faster, so we don't analyze them.
6920 if (SI->getNumCases() < 3)
6921 return false;
6922
6923 // Figure out the corresponding result for each case value and phi node in the
6924 // common destination, as well as the min and max case values.
6925 assert(!SI->cases().empty());
6926 SwitchInst::CaseIt CI = SI->case_begin();
6927 ConstantInt *MinCaseVal = CI->getCaseValue();
6928 ConstantInt *MaxCaseVal = CI->getCaseValue();
6929
6930 BasicBlock *CommonDest = nullptr;
6931
6932 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6934
6938
6939 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6940 ConstantInt *CaseVal = CI->getCaseValue();
6941 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6942 MinCaseVal = CaseVal;
6943 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6944 MaxCaseVal = CaseVal;
6945
6946 // Resulting value at phi nodes for this case value.
6948 ResultsTy Results;
6949 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6950 Results, DL, TTI))
6951 return false;
6952
6953 // Append the result from this case to the list for each phi.
6954 for (const auto &I : Results) {
6955 PHINode *PHI = I.first;
6956 Constant *Value = I.second;
6957 auto [It, Inserted] = ResultLists.try_emplace(PHI);
6958 if (Inserted)
6959 PHIs.push_back(PHI);
6960 It->second.push_back(std::make_pair(CaseVal, Value));
6961 }
6962 }
6963
6964 // Keep track of the result types.
6965 for (PHINode *PHI : PHIs) {
6966 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6967 }
6968
6969 uint64_t NumResults = ResultLists[PHIs[0]].size();
6970
6971 // If the table has holes, we need a constant result for the default case
6972 // or a bitmask that fits in a register.
6973 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6974 bool HasDefaultResults =
6975 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6976 DefaultResultsList, DL, TTI);
6977
6978 for (const auto &I : DefaultResultsList) {
6979 PHINode *PHI = I.first;
6980 Constant *Result = I.second;
6981 DefaultResults[PHI] = Result;
6982 }
6983
6984 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6985 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6986 uint64_t TableSize;
6987 if (UseSwitchConditionAsTableIndex)
6988 TableSize = MaxCaseVal->getLimitedValue() + 1;
6989 else
6990 TableSize =
6991 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6992
6993 // If the default destination is unreachable, or if the lookup table covers
6994 // all values of the conditional variable, branch directly to the lookup table
6995 // BB. Otherwise, check that the condition is within the case range.
6996 bool DefaultIsReachable = !SI->defaultDestUnreachable();
6997
6998 bool TableHasHoles = (NumResults < TableSize);
6999
7000 // If the table has holes but the default destination doesn't produce any
7001 // constant results, the lookup table entries corresponding to the holes will
7002 // contain poison.
7003 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7004
7005 // If the default destination doesn't produce a constant result but is still
7006 // reachable, and the lookup table has holes, we need to use a mask to
7007 // determine if the current index should load from the lookup table or jump
7008 // to the default case.
7009 // The mask is unnecessary if the table has holes but the default destination
7010 // is unreachable, as in that case the holes must also be unreachable.
7011 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7012 if (NeedMask) {
7013 // As an extra penalty for the validity test we require more cases.
7014 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7015 return false;
7016 if (!DL.fitsInLegalInteger(TableSize))
7017 return false;
7018 }
7019
7020 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7021 return false;
7022
7023 std::vector<DominatorTree::UpdateType> Updates;
7024
7025 // Compute the maximum table size representable by the integer type we are
7026 // switching upon.
7027 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7028 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7029 assert(MaxTableSize >= TableSize &&
7030 "It is impossible for a switch to have more entries than the max "
7031 "representable value of its input integer type's size.");
7032
7033 // Create the BB that does the lookups.
7034 Module &Mod = *CommonDest->getParent()->getParent();
7035 BasicBlock *LookupBB = BasicBlock::Create(
7036 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7037
7038 // Compute the table index value.
7039 Builder.SetInsertPoint(SI);
7040 Value *TableIndex;
7041 ConstantInt *TableIndexOffset;
7042 if (UseSwitchConditionAsTableIndex) {
7043 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7044 TableIndex = SI->getCondition();
7045 } else {
7046 TableIndexOffset = MinCaseVal;
7047 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7048 // we can try to attach nsw.
7049 bool MayWrap = true;
7050 if (!DefaultIsReachable) {
7051 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7052 (void)Res;
7053 }
7054
7055 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7056 "switch.tableidx", /*HasNUW =*/false,
7057 /*HasNSW =*/!MayWrap);
7058 }
7059
7060 BranchInst *RangeCheckBranch = nullptr;
7061
7062 // Grow the table to cover all possible index values to avoid the range check.
7063 // It will use the default result to fill in the table hole later, so make
7064 // sure it exist.
7065 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
7066 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
7067 // Grow the table shouldn't have any size impact by checking
7068 // wouldFitInRegister.
7069 // TODO: Consider growing the table also when it doesn't fit in a register
7070 // if no optsize is specified.
7071 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7072 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
7073 return SwitchLookupTable::wouldFitInRegister(
7074 DL, UpperBound, KV.second /* ResultType */);
7075 })) {
7076 // There may be some case index larger than the UpperBound (unreachable
7077 // case), so make sure the table size does not get smaller.
7078 TableSize = std::max(UpperBound, TableSize);
7079 // The default branch is unreachable after we enlarge the lookup table.
7080 // Adjust DefaultIsReachable to reuse code path.
7081 DefaultIsReachable = false;
7082 }
7083 }
7084
7085 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7086 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7087 Builder.CreateBr(LookupBB);
7088 if (DTU)
7089 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7090 // Note: We call removeProdecessor later since we need to be able to get the
7091 // PHI value for the default case in case we're using a bit mask.
7092 } else {
7093 Value *Cmp = Builder.CreateICmpULT(
7094 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7095 RangeCheckBranch =
7096 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7097 if (DTU)
7098 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7099 }
7100
7101 // Populate the BB that does the lookups.
7102 Builder.SetInsertPoint(LookupBB);
7103
7104 if (NeedMask) {
7105 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7106 // re-purposed to do the hole check, and we create a new LookupBB.
7107 BasicBlock *MaskBB = LookupBB;
7108 MaskBB->setName("switch.hole_check");
7109 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7110 CommonDest->getParent(), CommonDest);
7111
7112 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7113 // unnecessary illegal types.
7114 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7115 APInt MaskInt(TableSizePowOf2, 0);
7116 APInt One(TableSizePowOf2, 1);
7117 // Build bitmask; fill in a 1 bit for every case.
7118 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7119 for (const auto &Result : ResultList) {
7120 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7121 .getLimitedValue();
7122 MaskInt |= One << Idx;
7123 }
7124 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7125
7126 // Get the TableIndex'th bit of the bitmask.
7127 // If this bit is 0 (meaning hole) jump to the default destination,
7128 // else continue with table lookup.
7129 IntegerType *MapTy = TableMask->getIntegerType();
7130 Value *MaskIndex =
7131 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7132 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7133 Value *LoBit = Builder.CreateTrunc(
7134 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7135 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7136 if (DTU) {
7137 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7138 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7139 }
7140 Builder.SetInsertPoint(LookupBB);
7141 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7142 }
7143
7144 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7145 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7146 // do not delete PHINodes here.
7147 SI->getDefaultDest()->removePredecessor(BB,
7148 /*KeepOneInputPHIs=*/true);
7149 if (DTU)
7150 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7151 }
7152
7153 for (PHINode *PHI : PHIs) {
7154 const ResultListTy &ResultList = ResultLists[PHI];
7155
7156 Type *ResultType = ResultList.begin()->second->getType();
7157
7158 // Use any value to fill the lookup table holes.
7159 Constant *DV =
7160 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7161 StringRef FuncName = Fn->getName();
7162 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
7163 DL, FuncName);
7164
7165 Value *Result = Table.buildLookup(TableIndex, Builder, DL);
7166
7167 // Do a small peephole optimization: re-use the switch table compare if
7168 // possible.
7169 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7170 BasicBlock *PhiBlock = PHI->getParent();
7171 // Search for compare instructions which use the phi.
7172 for (auto *User : PHI->users()) {
7173 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
7174 }
7175 }
7176
7177 PHI->addIncoming(Result, LookupBB);
7178 }
7179
7180 Builder.CreateBr(CommonDest);
7181 if (DTU)
7182 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7183
7184 // Remove the switch.
7185 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7186 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7187 BasicBlock *Succ = SI->getSuccessor(i);
7188
7189 if (Succ == SI->getDefaultDest())
7190 continue;
7191 Succ->removePredecessor(BB);
7192 if (DTU && RemovedSuccessors.insert(Succ).second)
7193 Updates.push_back({DominatorTree::Delete, BB, Succ});
7194 }
7195 SI->eraseFromParent();
7196
7197 if (DTU)
7198 DTU->applyUpdates(Updates);
7199
7200 ++NumLookupTables;
7201 if (NeedMask)
7202 ++NumLookupTablesHoles;
7203 return true;
7204}
7205
7206/// Try to transform a switch that has "holes" in it to a contiguous sequence
7207/// of cases.
7208///
7209/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7210/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7211///
7212/// This converts a sparse switch into a dense switch which allows better
7213/// lowering and could also allow transforming into a lookup table.
7214static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7215 const DataLayout &DL,
7216 const TargetTransformInfo &TTI) {
7217 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7218 if (CondTy->getIntegerBitWidth() > 64 ||
7219 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7220 return false;
7221 // Only bother with this optimization if there are more than 3 switch cases;
7222 // SDAG will only bother creating jump tables for 4 or more cases.
7223 if (SI->getNumCases() < 4)
7224 return false;
7225
7226 // This transform is agnostic to the signedness of the input or case values. We
7227 // can treat the case values as signed or unsigned. We can optimize more common
7228 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7229 // as signed.
7231 for (const auto &C : SI->cases())
7232 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7233 llvm::sort(Values);
7234
7235 // If the switch is already dense, there's nothing useful to do here.
7236 if (isSwitchDense(Values))
7237 return false;
7238
7239 // First, transform the values such that they start at zero and ascend.
7240 int64_t Base = Values[0];
7241 for (auto &V : Values)
7242 V -= (uint64_t)(Base);
7243
7244 // Now we have signed numbers that have been shifted so that, given enough
7245 // precision, there are no negative values. Since the rest of the transform
7246 // is bitwise only, we switch now to an unsigned representation.
7247
7248 // This transform can be done speculatively because it is so cheap - it
7249 // results in a single rotate operation being inserted.
7250
7251 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7252 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7253 // less than 64.
7254 unsigned Shift = 64;
7255 for (auto &V : Values)
7256 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7257 assert(Shift < 64);
7258 if (Shift > 0)
7259 for (auto &V : Values)
7260 V = (int64_t)((uint64_t)V >> Shift);
7261
7262 if (!isSwitchDense(Values))
7263 // Transform didn't create a dense switch.
7264 return false;
7265
7266 // The obvious transform is to shift the switch condition right and emit a
7267 // check that the condition actually cleanly divided by GCD, i.e.
7268 // C & (1 << Shift - 1) == 0
7269 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7270 //
7271 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7272 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7273 // are nonzero then the switch condition will be very large and will hit the
7274 // default case.
7275
7276 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7277 Builder.SetInsertPoint(SI);
7278 Value *Sub =
7279 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7280 Value *Rot = Builder.CreateIntrinsic(
7281 Ty, Intrinsic::fshl,
7282 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7283 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7284
7285 for (auto Case : SI->cases()) {
7286 auto *Orig = Case.getCaseValue();
7287 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7288 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7289 }
7290 return true;
7291}
7292
7293/// Tries to transform switch of powers of two to reduce switch range.
7294/// For example, switch like:
7295/// switch (C) { case 1: case 2: case 64: case 128: }
7296/// will be transformed to:
7297/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7298///
7299/// This transformation allows better lowering and may transform the switch
7300/// instruction into a sequence of bit manipulation and a smaller
7301/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7302/// address of the jump target, and indirectly jump to it).
7304 const DataLayout &DL,
7305 const TargetTransformInfo &TTI) {
7306 Value *Condition = SI->getCondition();
7307 LLVMContext &Context = SI->getContext();
7308 auto *CondTy = cast<IntegerType>(Condition->getType());
7309
7310 if (CondTy->getIntegerBitWidth() > 64 ||
7311 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7312 return false;
7313
7314 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7315 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7316 {Condition, ConstantInt::getTrue(Context)});
7318 TTI::TCC_Basic * 2)
7319 return false;
7320
7321 // Only bother with this optimization if there are more than 3 switch cases.
7322 // SDAG will start emitting jump tables for 4 or more cases.
7323 if (SI->getNumCases() < 4)
7324 return false;
7325
7326 // We perform this optimization only for switches with
7327 // unreachable default case.
7328 // This assumtion will save us from checking if `Condition` is a power of two.
7329 if (!SI->defaultDestUnreachable())
7330 return false;
7331
7332 // Check that switch cases are powers of two.
7334 for (const auto &Case : SI->cases()) {
7335 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7336 if (llvm::has_single_bit(CaseValue))
7337 Values.push_back(CaseValue);
7338 else
7339 return false;
7340 }
7341
7342 // isSwichDense requires case values to be sorted.
7343 llvm::sort(Values);
7344 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7345 llvm::countr_zero(Values.front()) + 1))
7346 // Transform is unable to generate dense switch.
7347 return false;
7348
7349 Builder.SetInsertPoint(SI);
7350
7351 // Replace each case with its trailing zeros number.
7352 for (auto &Case : SI->cases()) {
7353 auto *OrigValue = Case.getCaseValue();
7354 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7355 OrigValue->getValue().countr_zero()));
7356 }
7357
7358 // Replace condition with its trailing zeros number.
7359 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7360 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7361
7362 SI->setCondition(ConditionTrailingZeros);
7363
7364 return true;
7365}
7366
7367/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7368/// the same destination.
7370 DomTreeUpdater *DTU) {
7371 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7372 if (!Cmp || !Cmp->hasOneUse())
7373 return false;
7374
7376 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7377 if (!HasWeights)
7378 Weights.resize(4); // Avoid checking HasWeights everywhere.
7379
7380 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7381 int64_t Res;
7382 BasicBlock *Succ, *OtherSucc;
7383 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7384 BasicBlock *Unreachable = nullptr;
7385
7386 if (SI->getNumCases() == 2) {
7387 // Find which of 1, 0 or -1 is missing (handled by default dest).
7388 SmallSet<int64_t, 3> Missing;
7389 Missing.insert(1);
7390 Missing.insert(0);
7391 Missing.insert(-1);
7392
7393 Succ = SI->getDefaultDest();
7394 SuccWeight = Weights[0];
7395 OtherSucc = nullptr;
7396 for (auto &Case : SI->cases()) {
7397 std::optional<int64_t> Val =
7398 Case.getCaseValue()->getValue().trySExtValue();
7399 if (!Val)
7400 return false;
7401 if (!Missing.erase(*Val))
7402 return false;
7403 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7404 return false;
7405 OtherSucc = Case.getCaseSuccessor();
7406 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7407 }
7408
7409 assert(Missing.size() == 1 && "Should have one case left");
7410 Res = *Missing.begin();
7411 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7412 // Normalize so that Succ is taken once and OtherSucc twice.
7413 Unreachable = SI->getDefaultDest();
7414 Succ = OtherSucc = nullptr;
7415 for (auto &Case : SI->cases()) {
7416 BasicBlock *NewSucc = Case.getCaseSuccessor();
7417 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7418 if (!OtherSucc || OtherSucc == NewSucc) {
7419 OtherSucc = NewSucc;
7420 OtherSuccWeight += Weight;
7421 } else if (!Succ) {
7422 Succ = NewSucc;
7423 SuccWeight = Weight;
7424 } else if (Succ == NewSucc) {
7425 std::swap(Succ, OtherSucc);
7426 std::swap(SuccWeight, OtherSuccWeight);
7427 } else
7428 return false;
7429 }
7430 for (auto &Case : SI->cases()) {
7431 std::optional<int64_t> Val =
7432 Case.getCaseValue()->getValue().trySExtValue();
7433 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7434 return false;
7435 if (Case.getCaseSuccessor() == Succ) {
7436 Res = *Val;
7437 break;
7438 }
7439 }
7440 } else {
7441 return false;
7442 }
7443
7444 // Determine predicate for the missing case.
7446 switch (Res) {
7447 case 1:
7448 Pred = ICmpInst::ICMP_UGT;
7449 break;
7450 case 0:
7451 Pred = ICmpInst::ICMP_EQ;
7452 break;
7453 case -1:
7454 Pred = ICmpInst::ICMP_ULT;
7455 break;
7456 }
7457 if (Cmp->isSigned())
7458 Pred = ICmpInst::getSignedPredicate(Pred);
7459
7460 MDNode *NewWeights = nullptr;
7461 if (HasWeights)
7462 NewWeights = MDBuilder(SI->getContext())
7463 .createBranchWeights(SuccWeight, OtherSuccWeight);
7464
7465 BasicBlock *BB = SI->getParent();
7466 Builder.SetInsertPoint(SI->getIterator());
7467 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7468 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7469 SI->getMetadata(LLVMContext::MD_unpredictable));
7470 OtherSucc->removePredecessor(BB);
7471 if (Unreachable)
7472 Unreachable->removePredecessor(BB);
7473 SI->eraseFromParent();
7474 Cmp->eraseFromParent();
7475 if (DTU && Unreachable)
7476 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7477 return true;
7478}
7479
7480/// Checking whether two cases of SI are equal depends on the contents of the
7481/// BasicBlock and the incoming values of their successor PHINodes.
7482/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7483/// calling this function on each BasicBlock every time isEqual is called,
7484/// especially since the same BasicBlock may be passed as an argument multiple
7485/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7486/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7487/// of the incoming values.
7491};
7492
7493namespace llvm {
7494template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7496 return static_cast<SwitchSuccWrapper *>(
7498 }
7500 return static_cast<SwitchSuccWrapper *>(
7502 }
7503 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7504 BasicBlock *Succ = SSW->Dest;
7505 BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
7506 assert(BI->isUnconditional() &&
7507 "Only supporting unconditional branches for now");
7508 assert(BI->getNumSuccessors() == 1 &&
7509 "Expected unconditional branches to have one successor");
7510 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7511
7512 // Since we assume the BB is just a single BranchInst with a single
7513 // successor, we hash as the BB and the incoming Values of its successor
7514 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7515 // including the incoming PHI values leads to better performance.
7516 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7517 // time and passing it in SwitchSuccWrapper, but this slowed down the
7518 // average compile time without having any impact on the worst case compile
7519 // time.
7520 BasicBlock *BB = BI->getSuccessor(0);
7521 SmallVector<Value *> PhiValsForBB;
7522 for (PHINode &Phi : BB->phis())
7523 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7524
7525 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7526 }
7527 static bool isEqual(const SwitchSuccWrapper *LHS,
7528 const SwitchSuccWrapper *RHS) {
7531 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7532 return LHS == RHS;
7533
7534 BasicBlock *A = LHS->Dest;
7535 BasicBlock *B = RHS->Dest;
7536
7537 // FIXME: we checked that the size of A and B are both 1 in
7538 // simplifyDuplicateSwitchArms to make the Case list smaller to
7539 // improve performance. If we decide to support BasicBlocks with more
7540 // than just a single instruction, we need to check that A.size() ==
7541 // B.size() here, and we need to check more than just the BranchInsts
7542 // for equality.
7543
7544 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7545 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7546 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7547 "Only supporting unconditional branches for now");
7548 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7549 return false;
7550
7551 // Need to check that PHIs in successor have matching values
7552 BasicBlock *Succ = ABI->getSuccessor(0);
7553 for (PHINode &Phi : Succ->phis()) {
7554 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7555 if (PredIVs[A] != PredIVs[B])
7556 return false;
7557 }
7558
7559 return true;
7560 }
7561};
7562} // namespace llvm
7563
7564bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7565 DomTreeUpdater *DTU) {
7566 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7567 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7568 // an entire PHI at once after the loop, opposed to calling
7569 // getIncomingValueForBlock inside this loop, since each call to
7570 // getIncomingValueForBlock is O(|Preds|).
7576 Cases.reserve(SI->getNumSuccessors());
7577
7578 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7579 BasicBlock *BB = SI->getSuccessor(I);
7580
7581 // FIXME: Support more than just a single BranchInst. One way we could do
7582 // this is by taking a hashing approach of all insts in BB.
7583 if (BB->size() != 1)
7584 continue;
7585
7586 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7587 // on other kinds of terminators. We decide to only support unconditional
7588 // branches for now for compile time reasons.
7589 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7590 if (!BI || BI->isConditional())
7591 continue;
7592
7593 if (!Seen.insert(BB).second) {
7594 auto It = BBToSuccessorIndexes.find(BB);
7595 if (It != BBToSuccessorIndexes.end())
7596 It->second.emplace_back(I);
7597 continue;
7598 }
7599
7600 // FIXME: This case needs some extra care because the terminators other than
7601 // SI need to be updated. For now, consider only backedges to the SI.
7602 if (BB->getUniquePredecessor() != SI->getParent())
7603 continue;
7604
7605 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7606 for (BasicBlock *Succ : BI->successors())
7608
7609 // Add the successor only if not previously visited.
7610 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7611 BBToSuccessorIndexes[BB].emplace_back(I);
7612 }
7613
7614 // Precompute a data structure to improve performance of isEqual for
7615 // SwitchSuccWrapper.
7616 PhiPredIVs.reserve(Phis.size());
7617 for (PHINode *Phi : Phis) {
7618 auto &IVs =
7619 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7620 for (auto &IV : Phi->incoming_values())
7621 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7622 }
7623
7624 // Build a set such that if the SwitchSuccWrapper exists in the set and
7625 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7626 // which is not in the set should be replaced with the one in the set. If the
7627 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7628 // other SwitchSuccWrappers can check against it in the same manner. We use
7629 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7630 // around information to isEquality, getHashValue, and when doing the
7631 // replacement with better performance.
7633 ReplaceWith.reserve(Cases.size());
7634
7636 Updates.reserve(ReplaceWith.size());
7637 bool MadeChange = false;
7638 for (auto &SSW : Cases) {
7639 // SSW is a candidate for simplification. If we find a duplicate BB,
7640 // replace it.
7641 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7642 if (!Inserted) {
7643 // We know that SI's parent BB no longer dominates the old case successor
7644 // since we are making it dead.
7645 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7646 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7647 for (unsigned Idx : Successors)
7648 SI->setSuccessor(Idx, (*It)->Dest);
7649 MadeChange = true;
7650 }
7651 }
7652
7653 if (DTU)
7654 DTU->applyUpdates(Updates);
7655
7656 return MadeChange;
7657}
7658
7659bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7660 BasicBlock *BB = SI->getParent();
7661
7662 if (isValueEqualityComparison(SI)) {
7663 // If we only have one predecessor, and if it is a branch on this value,
7664 // see if that predecessor totally determines the outcome of this switch.
7665 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7666 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7667 return requestResimplify();
7668
7669 Value *Cond = SI->getCondition();
7670 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7671 if (simplifySwitchOnSelect(SI, Select))
7672 return requestResimplify();
7673
7674 // If the block only contains the switch, see if we can fold the block
7675 // away into any preds.
7676 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7677 if (foldValueComparisonIntoPredecessors(SI, Builder))
7678 return requestResimplify();
7679 }
7680
7681 // Try to transform the switch into an icmp and a branch.
7682 // The conversion from switch to comparison may lose information on
7683 // impossible switch values, so disable it early in the pipeline.
7684 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7685 return requestResimplify();
7686
7687 // Remove unreachable cases.
7688 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7689 return requestResimplify();
7690
7691 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7692 return requestResimplify();
7693
7694 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7695 return requestResimplify();
7696
7697 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7698 return requestResimplify();
7699
7700 // The conversion from switch to lookup tables results in difficult-to-analyze
7701 // code and makes pruning branches much harder. This is a problem if the
7702 // switch expression itself can still be restricted as a result of inlining or
7703 // CVP. Therefore, only apply this transformation during late stages of the
7704 // optimisation pipeline.
7705 if (Options.ConvertSwitchToLookupTable &&
7706 switchToLookupTable(SI, Builder, DTU, DL, TTI))
7707 return requestResimplify();
7708
7709 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7710 return requestResimplify();
7711
7712 if (reduceSwitchRange(SI, Builder, DL, TTI))
7713 return requestResimplify();
7714
7715 if (HoistCommon &&
7716 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7717 return requestResimplify();
7718
7719 if (simplifyDuplicateSwitchArms(SI, DTU))
7720 return requestResimplify();
7721
7722 return false;
7723}
7724
7725bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7726 BasicBlock *BB = IBI->getParent();
7727 bool Changed = false;
7728
7729 // Eliminate redundant destinations.
7732 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7733 BasicBlock *Dest = IBI->getDestination(i);
7734 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7735 if (!Dest->hasAddressTaken())
7736 RemovedSuccs.insert(Dest);
7737 Dest->removePredecessor(BB);
7738 IBI->removeDestination(i);
7739 --i;
7740 --e;
7741 Changed = true;
7742 }
7743 }
7744
7745 if (DTU) {
7746 std::vector<DominatorTree::UpdateType> Updates;
7747 Updates.reserve(RemovedSuccs.size());
7748 for (auto *RemovedSucc : RemovedSuccs)
7749 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7750 DTU->applyUpdates(Updates);
7751 }
7752
7753 if (IBI->getNumDestinations() == 0) {
7754 // If the indirectbr has no successors, change it to unreachable.
7755 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7757 return true;
7758 }
7759
7760 if (IBI->getNumDestinations() == 1) {
7761 // If the indirectbr has one successor, change it to a direct branch.
7764 return true;
7765 }
7766
7767 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7768 if (simplifyIndirectBrOnSelect(IBI, SI))
7769 return requestResimplify();
7770 }
7771 return Changed;
7772}
7773
7774/// Given an block with only a single landing pad and a unconditional branch
7775/// try to find another basic block which this one can be merged with. This
7776/// handles cases where we have multiple invokes with unique landing pads, but
7777/// a shared handler.
7778///
7779/// We specifically choose to not worry about merging non-empty blocks
7780/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7781/// practice, the optimizer produces empty landing pad blocks quite frequently
7782/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7783/// sinking in this file)
7784///
7785/// This is primarily a code size optimization. We need to avoid performing
7786/// any transform which might inhibit optimization (such as our ability to
7787/// specialize a particular handler via tail commoning). We do this by not
7788/// merging any blocks which require us to introduce a phi. Since the same
7789/// values are flowing through both blocks, we don't lose any ability to
7790/// specialize. If anything, we make such specialization more likely.
7791///
7792/// TODO - This transformation could remove entries from a phi in the target
7793/// block when the inputs in the phi are the same for the two blocks being
7794/// merged. In some cases, this could result in removal of the PHI entirely.
7796 BasicBlock *BB, DomTreeUpdater *DTU) {
7797 auto Succ = BB->getUniqueSuccessor();
7798 assert(Succ);
7799 // If there's a phi in the successor block, we'd likely have to introduce
7800 // a phi into the merged landing pad block.
7801 if (isa<PHINode>(*Succ->begin()))
7802 return false;
7803
7804 for (BasicBlock *OtherPred : predecessors(Succ)) {
7805 if (BB == OtherPred)
7806 continue;
7807 BasicBlock::iterator I = OtherPred->begin();
7808 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7809 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7810 continue;
7811 ++I;
7812 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7813 if (!BI2 || !BI2->isIdenticalTo(BI))
7814 continue;
7815
7816 std::vector<DominatorTree::UpdateType> Updates;
7817
7818 // We've found an identical block. Update our predecessors to take that
7819 // path instead and make ourselves dead.
7821 for (BasicBlock *Pred : UniquePreds) {
7822 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7823 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7824 "unexpected successor");
7825 II->setUnwindDest(OtherPred);
7826 if (DTU) {
7827 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7828 Updates.push_back({DominatorTree::Delete, Pred, BB});
7829 }
7830 }
7831
7833 for (BasicBlock *Succ : UniqueSuccs) {
7834 Succ->removePredecessor(BB);
7835 if (DTU)
7836 Updates.push_back({DominatorTree::Delete, BB, Succ});
7837 }
7838
7839 IRBuilder<> Builder(BI);
7840 Builder.CreateUnreachable();
7841 BI->eraseFromParent();
7842 if (DTU)
7843 DTU->applyUpdates(Updates);
7844 return true;
7845 }
7846 return false;
7847}
7848
7849bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7850 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7851 : simplifyCondBranch(Branch, Builder);
7852}
7853
7854bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7855 IRBuilder<> &Builder) {
7856 BasicBlock *BB = BI->getParent();
7857 BasicBlock *Succ = BI->getSuccessor(0);
7858
7859 // If the Terminator is the only non-phi instruction, simplify the block.
7860 // If LoopHeader is provided, check if the block or its successor is a loop
7861 // header. (This is for early invocations before loop simplify and
7862 // vectorization to keep canonical loop forms for nested loops. These blocks
7863 // can be eliminated when the pass is invoked later in the back-end.)
7864 // Note that if BB has only one predecessor then we do not introduce new
7865 // backedge, so we can eliminate BB.
7866 bool NeedCanonicalLoop =
7867 Options.NeedCanonicalLoop &&
7868 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7869 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7871 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7872 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7873 return true;
7874
7875 // If the only instruction in the block is a seteq/setne comparison against a
7876 // constant, try to simplify the block.
7877 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7878 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7879 ++I;
7880 if (I->isTerminator() &&
7881 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7882 return true;
7883 }
7884
7885 // See if we can merge an empty landing pad block with another which is
7886 // equivalent.
7887 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7888 ++I;
7889 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7890 return true;
7891 }
7892
7893 // If this basic block is ONLY a compare and a branch, and if a predecessor
7894 // branches to us and our successor, fold the comparison into the
7895 // predecessor and use logical operations to update the incoming value
7896 // for PHI nodes in common successor.
7897 if (Options.SpeculateBlocks &&
7898 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7899 Options.BonusInstThreshold))
7900 return requestResimplify();
7901 return false;
7902}
7903
7905 BasicBlock *PredPred = nullptr;
7906 for (auto *P : predecessors(BB)) {
7907 BasicBlock *PPred = P->getSinglePredecessor();
7908 if (!PPred || (PredPred && PredPred != PPred))
7909 return nullptr;
7910 PredPred = PPred;
7911 }
7912 return PredPred;
7913}
7914
7915/// Fold the following pattern:
7916/// bb0:
7917/// br i1 %cond1, label %bb1, label %bb2
7918/// bb1:
7919/// br i1 %cond2, label %bb3, label %bb4
7920/// bb2:
7921/// br i1 %cond2, label %bb4, label %bb3
7922/// bb3:
7923/// ...
7924/// bb4:
7925/// ...
7926/// into
7927/// bb0:
7928/// %cond = xor i1 %cond1, %cond2
7929/// br i1 %cond, label %bb4, label %bb3
7930/// bb3:
7931/// ...
7932/// bb4:
7933/// ...
7934/// NOTE: %cond2 always dominates the terminator of bb0.
7936 BasicBlock *BB = BI->getParent();
7937 BasicBlock *BB1 = BI->getSuccessor(0);
7938 BasicBlock *BB2 = BI->getSuccessor(1);
7939 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7940 if (Succ == BB)
7941 return false;
7942 if (&Succ->front() != Succ->getTerminator())
7943 return false;
7944 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7945 if (!SuccBI || !SuccBI->isConditional())
7946 return false;
7947 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7948 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7949 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7950 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7951 };
7952 BranchInst *BB1BI, *BB2BI;
7953 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7954 return false;
7955
7956 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7957 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7958 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7959 return false;
7960
7961 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7962 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7963 IRBuilder<> Builder(BI);
7964 BI->setCondition(
7965 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7966 BB1->removePredecessor(BB);
7967 BI->setSuccessor(0, BB4);
7968 BB2->removePredecessor(BB);
7969 BI->setSuccessor(1, BB3);
7970 if (DTU) {
7972 Updates.push_back({DominatorTree::Delete, BB, BB1});
7973 Updates.push_back({DominatorTree::Insert, BB, BB4});
7974 Updates.push_back({DominatorTree::Delete, BB, BB2});
7975 Updates.push_back({DominatorTree::Insert, BB, BB3});
7976
7977 DTU->applyUpdates(Updates);
7978 }
7979 bool HasWeight = false;
7980 uint64_t BBTWeight, BBFWeight;
7981 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
7982 HasWeight = true;
7983 else
7984 BBTWeight = BBFWeight = 1;
7985 uint64_t BB1TWeight, BB1FWeight;
7986 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
7987 HasWeight = true;
7988 else
7989 BB1TWeight = BB1FWeight = 1;
7990 uint64_t BB2TWeight, BB2FWeight;
7991 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
7992 HasWeight = true;
7993 else
7994 BB2TWeight = BB2FWeight = 1;
7995 if (HasWeight) {
7996 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
7997 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
7998 fitWeights(Weights);
7999 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8000 }
8001 return true;
8002}
8003
8004bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8005 assert(
8006 !isa<ConstantInt>(BI->getCondition()) &&
8007 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8008 "Tautological conditional branch should have been eliminated already.");
8009
8010 BasicBlock *BB = BI->getParent();
8011 if (!Options.SimplifyCondBranch ||
8012 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8013 return false;
8014
8015 // Conditional branch
8016 if (isValueEqualityComparison(BI)) {
8017 // If we only have one predecessor, and if it is a branch on this value,
8018 // see if that predecessor totally determines the outcome of this
8019 // switch.
8020 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8021 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8022 return requestResimplify();
8023
8024 // This block must be empty, except for the setcond inst, if it exists.
8025 // Ignore dbg and pseudo intrinsics.
8026 auto I = BB->instructionsWithoutDebug(true).begin();
8027 if (&*I == BI) {
8028 if (foldValueComparisonIntoPredecessors(BI, Builder))
8029 return requestResimplify();
8030 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8031 ++I;
8032 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8033 return requestResimplify();
8034 }
8035 }
8036
8037 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8038 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8039 return true;
8040
8041 // If this basic block has dominating predecessor blocks and the dominating
8042 // blocks' conditions imply BI's condition, we know the direction of BI.
8043 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8044 if (Imp) {
8045 // Turn this into a branch on constant.
8046 auto *OldCond = BI->getCondition();
8047 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8048 : ConstantInt::getFalse(BB->getContext());
8049 BI->setCondition(TorF);
8051 return requestResimplify();
8052 }
8053
8054 // If this basic block is ONLY a compare and a branch, and if a predecessor
8055 // branches to us and one of our successors, fold the comparison into the
8056 // predecessor and use logical operations to pick the right destination.
8057 if (Options.SpeculateBlocks &&
8058 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8059 Options.BonusInstThreshold))
8060 return requestResimplify();
8061
8062 // We have a conditional branch to two blocks that are only reachable
8063 // from BI. We know that the condbr dominates the two blocks, so see if
8064 // there is any identical code in the "then" and "else" blocks. If so, we
8065 // can hoist it up to the branching block.
8066 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8067 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8068 if (HoistCommon &&
8069 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8070 return requestResimplify();
8071
8072 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8073 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8074 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8075 auto CanSpeculateConditionalLoadsStores = [&]() {
8076 for (auto *Succ : successors(BB)) {
8077 for (Instruction &I : *Succ) {
8078 if (I.isTerminator()) {
8079 if (I.getNumSuccessors() > 1)
8080 return false;
8081 continue;
8082 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8083 SpeculatedConditionalLoadsStores.size() ==
8085 return false;
8086 }
8087 SpeculatedConditionalLoadsStores.push_back(&I);
8088 }
8089 }
8090 return !SpeculatedConditionalLoadsStores.empty();
8091 };
8092
8093 if (CanSpeculateConditionalLoadsStores()) {
8094 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8095 std::nullopt, nullptr);
8096 return requestResimplify();
8097 }
8098 }
8099 } else {
8100 // If Successor #1 has multiple preds, we may be able to conditionally
8101 // execute Successor #0 if it branches to Successor #1.
8102 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8103 if (Succ0TI->getNumSuccessors() == 1 &&
8104 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8105 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8106 return requestResimplify();
8107 }
8108 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8109 // If Successor #0 has multiple preds, we may be able to conditionally
8110 // execute Successor #1 if it branches to Successor #0.
8111 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8112 if (Succ1TI->getNumSuccessors() == 1 &&
8113 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8114 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8115 return requestResimplify();
8116 }
8117
8118 // If this is a branch on something for which we know the constant value in
8119 // predecessors (e.g. a phi node in the current block), thread control
8120 // through this block.
8121 if (foldCondBranchOnValueKnownInPredecessor(BI))
8122 return requestResimplify();
8123
8124 // Scan predecessor blocks for conditional branches.
8125 for (BasicBlock *Pred : predecessors(BB))
8126 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8127 if (PBI != BI && PBI->isConditional())
8128 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8129 return requestResimplify();
8130
8131 // Look for diamond patterns.
8132 if (MergeCondStores)
8134 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8135 if (PBI != BI && PBI->isConditional())
8136 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8137 return requestResimplify();
8138
8139 // Look for nested conditional branches.
8140 if (mergeNestedCondBranch(BI, DTU))
8141 return requestResimplify();
8142
8143 return false;
8144}
8145
8146/// Check if passing a value to an instruction will cause undefined behavior.
8147static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8148 assert(V->getType() == I->getType() && "Mismatched types");
8149 Constant *C = dyn_cast<Constant>(V);
8150 if (!C)
8151 return false;
8152
8153 if (I->use_empty())
8154 return false;
8155
8156 if (C->isNullValue() || isa<UndefValue>(C)) {
8157 // Only look at the first use we can handle, avoid hurting compile time with
8158 // long uselists
8159 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8160 auto *Use = cast<Instruction>(U.getUser());
8161 // Change this list when we want to add new instructions.
8162 switch (Use->getOpcode()) {
8163 default:
8164 return false;
8165 case Instruction::GetElementPtr:
8166 case Instruction::Ret:
8167 case Instruction::BitCast:
8168 case Instruction::Load:
8169 case Instruction::Store:
8170 case Instruction::Call:
8171 case Instruction::CallBr:
8172 case Instruction::Invoke:
8173 case Instruction::UDiv:
8174 case Instruction::URem:
8175 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8176 // implemented to avoid code complexity as it is unclear how useful such
8177 // logic is.
8178 case Instruction::SDiv:
8179 case Instruction::SRem:
8180 return true;
8181 }
8182 });
8183 if (FindUse == I->use_end())
8184 return false;
8185 auto &Use = *FindUse;
8186 auto *User = cast<Instruction>(Use.getUser());
8187 // Bail out if User is not in the same BB as I or User == I or User comes
8188 // before I in the block. The latter two can be the case if User is a
8189 // PHI node.
8190 if (User->getParent() != I->getParent() || User == I ||
8191 User->comesBefore(I))
8192 return false;
8193
8194 // Now make sure that there are no instructions in between that can alter
8195 // control flow (eg. calls)
8196 auto InstrRange =
8197 make_range(std::next(I->getIterator()), User->getIterator());
8198 if (any_of(InstrRange, [](Instruction &I) {
8200 }))
8201 return false;
8202
8203 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8204 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User))
8205 if (GEP->getPointerOperand() == I) {
8206 // The type of GEP may differ from the type of base pointer.
8207 // Bail out on vector GEPs, as they are not handled by other checks.
8208 if (GEP->getType()->isVectorTy())
8209 return false;
8210 // The current base address is null, there are four cases to consider:
8211 // getelementptr (TY, null, 0) -> null
8212 // getelementptr (TY, null, not zero) -> may be modified
8213 // getelementptr inbounds (TY, null, 0) -> null
8214 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8215 // undefined?
8216 if (!GEP->hasAllZeroIndices() &&
8217 (!GEP->isInBounds() ||
8218 NullPointerIsDefined(GEP->getFunction(),
8219 GEP->getPointerAddressSpace())))
8220 PtrValueMayBeModified = true;
8221 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8222 }
8223
8224 // Look through return.
8225 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8226 bool HasNoUndefAttr =
8227 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8228 // Return undefined to a noundef return value is undefined.
8229 if (isa<UndefValue>(C) && HasNoUndefAttr)
8230 return true;
8231 // Return null to a nonnull+noundef return value is undefined.
8232 if (C->isNullValue() && HasNoUndefAttr &&
8233 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8234 return !PtrValueMayBeModified;
8235 }
8236 }
8237
8238 // Load from null is undefined.
8239 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8240 if (!LI->isVolatile())
8241 return !NullPointerIsDefined(LI->getFunction(),
8242 LI->getPointerAddressSpace());
8243
8244 // Store to null is undefined.
8245 if (StoreInst *SI = dyn_cast<StoreInst>(User))
8246 if (!SI->isVolatile())
8247 return (!NullPointerIsDefined(SI->getFunction(),
8248 SI->getPointerAddressSpace())) &&
8249 SI->getPointerOperand() == I;
8250
8251 // llvm.assume(false/undef) always triggers immediate UB.
8252 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8253 // Ignore assume operand bundles.
8254 if (I == Assume->getArgOperand(0))
8255 return true;
8256 }
8257
8258 if (auto *CB = dyn_cast<CallBase>(User)) {
8259 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8260 return false;
8261 // A call to null is undefined.
8262 if (CB->getCalledOperand() == I)
8263 return true;
8264
8265 if (CB->isArgOperand(&Use)) {
8266 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8267 // Passing null to a nonnnull+noundef argument is undefined.
8268 if (isa<ConstantPointerNull>(C) &&
8269 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8270 return !PtrValueMayBeModified;
8271 // Passing undef to a noundef argument is undefined.
8272 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8273 return true;
8274 }
8275 }
8276 // Div/Rem by zero is immediate UB
8277 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8278 return true;
8279 }
8280 return false;
8281}
8282
8283/// If BB has an incoming value that will always trigger undefined behavior
8284/// (eg. null pointer dereference), remove the branch leading here.
8286 DomTreeUpdater *DTU,
8287 AssumptionCache *AC) {
8288 for (PHINode &PHI : BB->phis())
8289 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8290 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8291 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8292 Instruction *T = Predecessor->getTerminator();
8293 IRBuilder<> Builder(T);
8294 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8295 BB->removePredecessor(Predecessor);
8296 // Turn unconditional branches into unreachables and remove the dead
8297 // destination from conditional branches.
8298 if (BI->isUnconditional())
8299 Builder.CreateUnreachable();
8300 else {
8301 // Preserve guarding condition in assume, because it might not be
8302 // inferrable from any dominating condition.
8303 Value *Cond = BI->getCondition();
8304 CallInst *Assumption;
8305 if (BI->getSuccessor(0) == BB)
8306 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8307 else
8308 Assumption = Builder.CreateAssumption(Cond);
8309 if (AC)
8310 AC->registerAssumption(cast<AssumeInst>(Assumption));
8311 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8312 : BI->getSuccessor(0));
8313 }
8314 BI->eraseFromParent();
8315 if (DTU)
8316 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8317 return true;
8318 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8319 // Redirect all branches leading to UB into
8320 // a newly created unreachable block.
8321 BasicBlock *Unreachable = BasicBlock::Create(
8322 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8323 Builder.SetInsertPoint(Unreachable);
8324 // The new block contains only one instruction: Unreachable
8325 Builder.CreateUnreachable();
8326 for (const auto &Case : SI->cases())
8327 if (Case.getCaseSuccessor() == BB) {
8328 BB->removePredecessor(Predecessor);
8329 Case.setSuccessor(Unreachable);
8330 }
8331 if (SI->getDefaultDest() == BB) {
8332 BB->removePredecessor(Predecessor);
8333 SI->setDefaultDest(Unreachable);
8334 }
8335
8336 if (DTU)
8337 DTU->applyUpdates(
8338 { { DominatorTree::Insert, Predecessor, Unreachable },
8339 { DominatorTree::Delete, Predecessor, BB } });
8340 return true;
8341 }
8342 }
8343
8344 return false;
8345}
8346
8347bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8348 bool Changed = false;
8349
8350 assert(BB && BB->getParent() && "Block not embedded in function!");
8351 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8352
8353 // Remove basic blocks that have no predecessors (except the entry block)...
8354 // or that just have themself as a predecessor. These are unreachable.
8355 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8356 BB->getSinglePredecessor() == BB) {
8357 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8358 DeleteDeadBlock(BB, DTU);
8359 return true;
8360 }
8361
8362 // Check to see if we can constant propagate this terminator instruction
8363 // away...
8364 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8365 /*TLI=*/nullptr, DTU);
8366
8367 // Check for and eliminate duplicate PHI nodes in this block.
8368 Changed |= EliminateDuplicatePHINodes(BB);
8369
8370 // Check for and remove branches that will always cause undefined behavior.
8372 return requestResimplify();
8373
8374 // Merge basic blocks into their predecessor if there is only one distinct
8375 // pred, and if there is only one distinct successor of the predecessor, and
8376 // if there are no PHI nodes.
8377 if (MergeBlockIntoPredecessor(BB, DTU))
8378 return true;
8379
8380 if (SinkCommon && Options.SinkCommonInsts)
8381 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8382 mergeCompatibleInvokes(BB, DTU)) {
8383 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8384 // so we may now how duplicate PHI's.
8385 // Let's rerun EliminateDuplicatePHINodes() first,
8386 // before foldTwoEntryPHINode() potentially converts them into select's,
8387 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8388 return true;
8389 }
8390
8391 IRBuilder<> Builder(BB);
8392
8393 if (Options.SpeculateBlocks &&
8394 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8395 // If there is a trivial two-entry PHI node in this basic block, and we can
8396 // eliminate it, do so now.
8397 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8398 if (PN->getNumIncomingValues() == 2)
8399 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8400 Options.SpeculateUnpredictables))
8401 return true;
8402 }
8403
8405 Builder.SetInsertPoint(Terminator);
8406 switch (Terminator->getOpcode()) {
8407 case Instruction::Br:
8408 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8409 break;
8410 case Instruction::Resume:
8411 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8412 break;
8413 case Instruction::CleanupRet:
8414 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8415 break;
8416 case Instruction::Switch:
8417 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8418 break;
8419 case Instruction::Unreachable:
8420 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8421 break;
8422 case Instruction::IndirectBr:
8423 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8424 break;
8425 }
8426
8427 return Changed;
8428}
8429
8430bool SimplifyCFGOpt::run(BasicBlock *BB) {
8431 bool Changed = false;
8432
8433 // Repeated simplify BB as long as resimplification is requested.
8434 do {
8435 Resimplify = false;
8436
8437 // Perform one round of simplifcation. Resimplify flag will be set if
8438 // another iteration is requested.
8439 Changed |= simplifyOnce(BB);
8440 } while (Resimplify);
8441
8442 return Changed;
8443}
8444
8447 ArrayRef<WeakVH> LoopHeaders) {
8448 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8449 Options)
8450 .run(BB);
8451}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL)
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:83
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition: APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:386
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:206
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:337
const Instruction & front() const
Definition: BasicBlock.h:482
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:354
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:459
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:475
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:287
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:445
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:467
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:699
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
size_t size() const
Definition: BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:661
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:463
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:248
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:494
The address of a basic block.
Definition: Constants.h:899
BasicBlock * getBasicBlock() const
Definition: Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
Definition: InstrTypes.h:1586
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1314
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2635
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isNegative() const
Definition: Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This class represents a range of values.
Definition: ConstantRange.h:47
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:403
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition: DebugLoc.h:256
static DebugLoc getTemporary()
Definition: DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition: DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition: DebugLoc.cpp:170
static DebugLoc getDropped()
Definition: DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
unsigned size() const
Definition: DenseMap.h:120
iterator end()
Definition: DenseMap.h:87
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:221
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:124
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
iterator begin()
Definition: Function.h:851
size_t size() const
Definition: Function.h:856
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2345
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2100
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1339
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1010
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:463
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:488
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1005
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1931
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:262
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1847
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1860
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:508
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1191
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1725
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1731
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:105
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:428
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:315
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
Definition: Instruction.h:930
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1789
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:897
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:42
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:180
static unsigned getPointerOperandIndex()
Definition: Instructions.h:261
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:38
Metadata node.
Definition: Metadata.h:1077
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:115
size_type size() const
Definition: MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:705
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Align getAlign() const
Definition: Instructions.h:338
bool isSimple() const
Definition: Instructions.h:375
Value * getValueOperand()
Definition: Instructions.h:383
bool isUnordered() const
Definition: Instructions.h:377
static unsigned getPointerOperandIndex()
Definition: Instructions.h:388
Value * getPointerOperand()
Definition: Instructions.h:386
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
LLVM_ABI BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
LLVM_ABI bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const
LLVM_ABI InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
LLVM_ABI bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM_ABI void set(Value *Val)
Definition: Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:35
op_range operands()
Definition: User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:245
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
static constexpr uint64_t MaximumAlignment
Definition: Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
iterator_range< use_iterator > uses()
Definition: Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition: DenseSet.h:96
size_type size() const
Definition: DenseSet.h:87
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:359
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
Definition: PatternMatch.h:876
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:931
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition: DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1899
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
bool succ_empty(const Instruction *I)
Definition: CFG.h:256
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:134
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:252
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2095
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1796
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2147
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:317
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1172
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1444
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:2845
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1300
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3081
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3339
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:289
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:3839
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2049
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:306
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:252
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:257
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1629
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:595
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2107
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition: ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:469
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
BasicBlock * Dest
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:54
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:289
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:138
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249