LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
99 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101 cl::desc(
102 "Temporary development switch used to gradually uplift SimplifyCFG "
103 "into preserving DomTree,"));
104
105// Chosen as 2 so as to be cheap, but still to have enough power to fold
106// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107// To catch this, we need to fold a compare and a select, hence '2' being the
108// minimum reasonable default.
110 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
111 cl::desc(
112 "Control the amount of phi node folding to perform (default = 2)"));
113
115 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
116 cl::desc("Control the maximal total instruction cost that we are willing "
117 "to speculatively execute to fold a 2-entry PHI node into a "
118 "select (default = 4)"));
119
120static cl::opt<bool>
121 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
122 cl::desc("Hoist common instructions up to the parent block"));
123
125 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
126 cl::desc("Hoist loads if the target supports conditional faulting"));
127
129 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
130 cl::desc("Hoist stores if the target supports conditional faulting"));
131
133 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
134 cl::desc("Control the maximal conditional load/store that we are willing "
135 "to speculatively execute to eliminate conditional branch "
136 "(default = 6)"));
137
139 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140 cl::init(20),
141 cl::desc("Allow reordering across at most this many "
142 "instructions when hoisting"));
143
144static cl::opt<bool>
145 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
146 cl::desc("Sink common instructions down to the end block"));
147
149 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores if an unconditional store precedes"));
151
153 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
154 cl::desc("Hoist conditional stores even if an unconditional store does not "
155 "precede - hoist multiple conditional stores into a single "
156 "predicated store"));
157
159 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
160 cl::desc("When merging conditional stores, do so even if the resultant "
161 "basic blocks are unlikely to be if-converted as a result"));
162
164 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
165 cl::desc("Allow exactly one expensive instruction to be speculatively "
166 "executed"));
167
169 "max-speculation-depth", cl::Hidden, cl::init(10),
170 cl::desc("Limit maximum recursion depth when calculating costs of "
171 "speculatively executed instructions"));
172
173static cl::opt<int>
174 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175 cl::init(10),
176 cl::desc("Max size of a block which is still considered "
177 "small enough to thread through"));
178
179// Two is chosen to allow one negation and a logical combine.
181 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182 cl::init(2),
183 cl::desc("Maximum cost of combining conditions when "
184 "folding branches"));
185
187 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188 cl::init(2),
189 cl::desc("Multiplier to apply to threshold when determining whether or not "
190 "to fold branch to common destination when vector operations are "
191 "present"));
192
194 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
195 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
196
198 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
199 cl::desc("Limit cases to analyze when converting a switch to select"));
200
202 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
203 cl::desc("Limit number of blocks a define in a threaded block is allowed "
204 "to be live in"));
205
206STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
207STATISTIC(NumLinearMaps,
208 "Number of switch instructions turned into linear mapping");
209STATISTIC(NumLookupTables,
210 "Number of switch instructions turned into lookup tables");
212 NumLookupTablesHoles,
213 "Number of switch instructions turned into lookup tables (holes checked)");
214STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
215STATISTIC(NumFoldValueComparisonIntoPredecessors,
216 "Number of value comparisons folded into predecessor basic blocks");
217STATISTIC(NumFoldBranchToCommonDest,
218 "Number of branches folded into predecessor basic block");
220 NumHoistCommonCode,
221 "Number of common instruction 'blocks' hoisted up to the begin block");
222STATISTIC(NumHoistCommonInstrs,
223 "Number of common instructions hoisted up to the begin block");
224STATISTIC(NumSinkCommonCode,
225 "Number of common instruction 'blocks' sunk down to the end block");
226STATISTIC(NumSinkCommonInstrs,
227 "Number of common instructions sunk down to the end block");
228STATISTIC(NumSpeculations, "Number of speculative executed instructions");
229STATISTIC(NumInvokes,
230 "Number of invokes with empty resume blocks simplified into calls");
231STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
232STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
233
234namespace {
235
236// The first field contains the value that the switch produces when a certain
237// case group is selected, and the second field is a vector containing the
238// cases composing the case group.
239using SwitchCaseResultVectorTy =
241
242// The first field contains the phi node that generates a result of the switch
243// and the second field contains the value generated for a certain case in the
244// switch for that PHI.
245using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
246
247/// ValueEqualityComparisonCase - Represents a case of a switch.
248struct ValueEqualityComparisonCase {
250 BasicBlock *Dest;
251
252 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
253 : Value(Value), Dest(Dest) {}
254
255 bool operator<(ValueEqualityComparisonCase RHS) const {
256 // Comparing pointers is ok as we only rely on the order for uniquing.
257 return Value < RHS.Value;
258 }
259
260 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
261};
262
263class SimplifyCFGOpt {
265 DomTreeUpdater *DTU;
266 const DataLayout &DL;
267 ArrayRef<WeakVH> LoopHeaders;
268 const SimplifyCFGOptions &Options;
269 bool Resimplify;
270
271 Value *isValueEqualityComparison(Instruction *TI);
272 BasicBlock *getValueEqualityComparisonCases(
273 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
274 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
275 BasicBlock *Pred,
276 IRBuilder<> &Builder);
277 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
278 Instruction *PTI,
279 IRBuilder<> &Builder);
280 bool foldValueComparisonIntoPredecessors(Instruction *TI,
281 IRBuilder<> &Builder);
282
283 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
284 bool simplifySingleResume(ResumeInst *RI);
285 bool simplifyCommonResume(ResumeInst *RI);
286 bool simplifyCleanupReturn(CleanupReturnInst *RI);
287 bool simplifyUnreachable(UnreachableInst *UI);
288 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
289 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
290 bool simplifyIndirectBr(IndirectBrInst *IBI);
291 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
292 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
293 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
294 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
295
296 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
297 IRBuilder<> &Builder);
298
299 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
300 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
301 Instruction *TI, Instruction *I1,
302 SmallVectorImpl<Instruction *> &OtherSuccTIs);
303 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
304 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
305 BasicBlock *TrueBB, BasicBlock *FalseBB,
306 uint32_t TrueWeight, uint32_t FalseWeight);
307 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
308 const DataLayout &DL);
309 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
310 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
311 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
312
313public:
314 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
315 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
316 const SimplifyCFGOptions &Opts)
317 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
318 assert((!DTU || !DTU->hasPostDomTree()) &&
319 "SimplifyCFG is not yet capable of maintaining validity of a "
320 "PostDomTree, so don't ask for it.");
321 }
322
323 bool simplifyOnce(BasicBlock *BB);
324 bool run(BasicBlock *BB);
325
326 // Helper to set Resimplify and return change indication.
327 bool requestResimplify() {
328 Resimplify = true;
329 return true;
330 }
331};
332
333} // end anonymous namespace
334
335/// Return true if all the PHI nodes in the basic block \p BB
336/// receive compatible (identical) incoming values when coming from
337/// all of the predecessor blocks that are specified in \p IncomingBlocks.
338///
339/// Note that if the values aren't exactly identical, but \p EquivalenceSet
340/// is provided, and *both* of the values are present in the set,
341/// then they are considered equal.
343 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
344 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
345 assert(IncomingBlocks.size() == 2 &&
346 "Only for a pair of incoming blocks at the time!");
347
348 // FIXME: it is okay if one of the incoming values is an `undef` value,
349 // iff the other incoming value is guaranteed to be a non-poison value.
350 // FIXME: it is okay if one of the incoming values is a `poison` value.
351 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
352 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
353 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
354 if (IV0 == IV1)
355 return true;
356 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
357 EquivalenceSet->contains(IV1))
358 return true;
359 return false;
360 });
361}
362
363/// Return true if it is safe to merge these two
364/// terminator instructions together.
365static bool
367 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
368 if (SI1 == SI2)
369 return false; // Can't merge with self!
370
371 // It is not safe to merge these two switch instructions if they have a common
372 // successor, and if that successor has a PHI node, and if *that* PHI node has
373 // conflicting incoming values from the two switch blocks.
374 BasicBlock *SI1BB = SI1->getParent();
375 BasicBlock *SI2BB = SI2->getParent();
376
378 bool Fail = false;
379 for (BasicBlock *Succ : successors(SI2BB)) {
380 if (!SI1Succs.count(Succ))
381 continue;
382 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
383 continue;
384 Fail = true;
385 if (FailBlocks)
386 FailBlocks->insert(Succ);
387 else
388 break;
389 }
390
391 return !Fail;
392}
393
394/// Update PHI nodes in Succ to indicate that there will now be entries in it
395/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
396/// will be the same as those coming in from ExistPred, an existing predecessor
397/// of Succ.
398static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
399 BasicBlock *ExistPred,
400 MemorySSAUpdater *MSSAU = nullptr) {
401 for (PHINode &PN : Succ->phis())
402 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
403 if (MSSAU)
404 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
405 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
406}
407
408/// Compute an abstract "cost" of speculating the given instruction,
409/// which is assumed to be safe to speculate. TCC_Free means cheap,
410/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
411/// expensive.
413 const TargetTransformInfo &TTI) {
415}
416
417/// If we have a merge point of an "if condition" as accepted above,
418/// return true if the specified value dominates the block. We don't handle
419/// the true generality of domination here, just a special case which works
420/// well enough for us.
421///
422/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
423/// see if V (which must be an instruction) and its recursive operands
424/// that do not dominate BB have a combined cost lower than Budget and
425/// are non-trapping. If both are true, the instruction is inserted into the
426/// set and true is returned.
427///
428/// The cost for most non-trapping instructions is defined as 1 except for
429/// Select whose cost is 2.
430///
431/// After this function returns, Cost is increased by the cost of
432/// V plus its non-dominating operands. If that cost is greater than
433/// Budget, false is returned and Cost is undefined.
435 Value *V, BasicBlock *BB, Instruction *InsertPt,
438 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
439 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
440 // so limit the recursion depth.
441 // TODO: While this recursion limit does prevent pathological behavior, it
442 // would be better to track visited instructions to avoid cycles.
444 return false;
445
446 Instruction *I = dyn_cast<Instruction>(V);
447 if (!I) {
448 // Non-instructions dominate all instructions and can be executed
449 // unconditionally.
450 return true;
451 }
452 BasicBlock *PBB = I->getParent();
453
454 // We don't want to allow weird loops that might have the "if condition" in
455 // the bottom of this block.
456 if (PBB == BB)
457 return false;
458
459 // If this instruction is defined in a block that contains an unconditional
460 // branch to BB, then it must be in the 'conditional' part of the "if
461 // statement". If not, it definitely dominates the region.
462 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
463 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
464 return true;
465
466 // If we have seen this instruction before, don't count it again.
467 if (AggressiveInsts.count(I))
468 return true;
469
470 // Okay, it looks like the instruction IS in the "condition". Check to
471 // see if it's a cheap instruction to unconditionally compute, and if it
472 // only uses stuff defined outside of the condition. If so, hoist it out.
473 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
474 return false;
475
476 // Overflow arithmetic instruction plus extract value are usually generated
477 // when a division is being replaced. But, in this case, the zero check may
478 // still be kept in the code. In that case it would be worth to hoist these
479 // two instruction out of the basic block. Let's treat this pattern as one
480 // single cheap instruction here!
481 WithOverflowInst *OverflowInst;
482 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
483 ZeroCostInstructions.insert(OverflowInst);
484 Cost += 1;
485 } else if (!ZeroCostInstructions.contains(I))
487
488 // Allow exactly one instruction to be speculated regardless of its cost
489 // (as long as it is safe to do so).
490 // This is intended to flatten the CFG even if the instruction is a division
491 // or other expensive operation. The speculation of an expensive instruction
492 // is expected to be undone in CodeGenPrepare if the speculation has not
493 // enabled further IR optimizations.
494 if (Cost > Budget &&
495 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
496 !Cost.isValid()))
497 return false;
498
499 // Okay, we can only really hoist these out if their operands do
500 // not take us over the cost threshold.
501 for (Use &Op : I->operands())
502 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
503 TTI, AC, ZeroCostInstructions, Depth + 1))
504 return false;
505 // Okay, it's safe to do this! Remember this instruction.
506 AggressiveInsts.insert(I);
507 return true;
508}
509
510/// Extract ConstantInt from value, looking through IntToPtr
511/// and PointerNullValue. Return NULL if value is not a constant int.
513 // Normal constant int.
514 ConstantInt *CI = dyn_cast<ConstantInt>(V);
515 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
516 DL.isNonIntegralPointerType(V->getType()))
517 return CI;
518
519 // This is some kind of pointer constant. Turn it into a pointer-sized
520 // ConstantInt if possible.
521 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
522
523 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
524 if (isa<ConstantPointerNull>(V))
525 return ConstantInt::get(PtrTy, 0);
526
527 // IntToPtr const int.
528 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
529 if (CE->getOpcode() == Instruction::IntToPtr)
530 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
531 // The constant is very likely to have the right type already.
532 if (CI->getType() == PtrTy)
533 return CI;
534 else
535 return cast<ConstantInt>(
536 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
537 }
538 return nullptr;
539}
540
541namespace {
542
543/// Given a chain of or (||) or and (&&) comparison of a value against a
544/// constant, this will try to recover the information required for a switch
545/// structure.
546/// It will depth-first traverse the chain of comparison, seeking for patterns
547/// like %a == 12 or %a < 4 and combine them to produce a set of integer
548/// representing the different cases for the switch.
549/// Note that if the chain is composed of '||' it will build the set of elements
550/// that matches the comparisons (i.e. any of this value validate the chain)
551/// while for a chain of '&&' it will build the set elements that make the test
552/// fail.
553struct ConstantComparesGatherer {
554 const DataLayout &DL;
555
556 /// Value found for the switch comparison
557 Value *CompValue = nullptr;
558
559 /// Extra clause to be checked before the switch
560 Value *Extra = nullptr;
561
562 /// Set of integers to match in switch
564
565 /// Number of comparisons matched in the and/or chain
566 unsigned UsedICmps = 0;
567
568 /// If the elements in Vals matches the comparisons
569 bool IsEq = false;
570
571 // Used to check if the first matched CompValue shall be the Extra check.
572 bool IgnoreFirstMatch = false;
573 bool MultipleMatches = false;
574
575 /// Construct and compute the result for the comparison instruction Cond
576 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
577 gather(Cond);
578 if (CompValue || !MultipleMatches)
579 return;
580 Extra = nullptr;
581 Vals.clear();
582 UsedICmps = 0;
583 IgnoreFirstMatch = true;
584 gather(Cond);
585 }
586
587 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
588 ConstantComparesGatherer &
589 operator=(const ConstantComparesGatherer &) = delete;
590
591private:
592 /// Try to set the current value used for the comparison, it succeeds only if
593 /// it wasn't set before or if the new value is the same as the old one
594 bool setValueOnce(Value *NewVal) {
595 if (IgnoreFirstMatch) {
596 IgnoreFirstMatch = false;
597 return false;
598 }
599 if (CompValue && CompValue != NewVal) {
600 MultipleMatches = true;
601 return false;
602 }
603 CompValue = NewVal;
604 return true;
605 }
606
607 /// Try to match Instruction "I" as a comparison against a constant and
608 /// populates the array Vals with the set of values that match (or do not
609 /// match depending on isEQ).
610 /// Return false on failure. On success, the Value the comparison matched
611 /// against is placed in CompValue.
612 /// If CompValue is already set, the function is expected to fail if a match
613 /// is found but the value compared to is different.
614 bool matchInstruction(Instruction *I, bool isEQ) {
615 Value *Val;
616 if (match(I, m_NUWTrunc(m_Value(Val)))) {
617 // If we already have a value for the switch, it has to match!
618 if (!setValueOnce(Val))
619 return false;
620 UsedICmps++;
621 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
622 return true;
623 }
624 // If this is an icmp against a constant, handle this as one of the cases.
625 ICmpInst *ICI;
626 ConstantInt *C;
627 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
628 (C = getConstantInt(I->getOperand(1), DL)))) {
629 return false;
630 }
631
632 Value *RHSVal;
633 const APInt *RHSC;
634
635 // Pattern match a special case
636 // (x & ~2^z) == y --> x == y || x == y|2^z
637 // This undoes a transformation done by instcombine to fuse 2 compares.
638 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
639 // It's a little bit hard to see why the following transformations are
640 // correct. Here is a CVC3 program to verify them for 64-bit values:
641
642 /*
643 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
644 x : BITVECTOR(64);
645 y : BITVECTOR(64);
646 z : BITVECTOR(64);
647 mask : BITVECTOR(64) = BVSHL(ONE, z);
648 QUERY( (y & ~mask = y) =>
649 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
650 );
651 QUERY( (y | mask = y) =>
652 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
653 );
654 */
655
656 // Please note that each pattern must be a dual implication (<--> or
657 // iff). One directional implication can create spurious matches. If the
658 // implication is only one-way, an unsatisfiable condition on the left
659 // side can imply a satisfiable condition on the right side. Dual
660 // implication ensures that satisfiable conditions are transformed to
661 // other satisfiable conditions and unsatisfiable conditions are
662 // transformed to other unsatisfiable conditions.
663
664 // Here is a concrete example of a unsatisfiable condition on the left
665 // implying a satisfiable condition on the right:
666 //
667 // mask = (1 << z)
668 // (x & ~mask) == y --> (x == y || x == (y | mask))
669 //
670 // Substituting y = 3, z = 0 yields:
671 // (x & -2) == 3 --> (x == 3 || x == 2)
672
673 // Pattern match a special case:
674 /*
675 QUERY( (y & ~mask = y) =>
676 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
677 );
678 */
679 if (match(ICI->getOperand(0),
680 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
681 APInt Mask = ~*RHSC;
682 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
683 // If we already have a value for the switch, it has to match!
684 if (!setValueOnce(RHSVal))
685 return false;
686
687 Vals.push_back(C);
688 Vals.push_back(
689 ConstantInt::get(C->getContext(),
690 C->getValue() | Mask));
691 UsedICmps++;
692 return true;
693 }
694 }
695
696 // Pattern match a special case:
697 /*
698 QUERY( (y | mask = y) =>
699 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
700 );
701 */
702 if (match(ICI->getOperand(0),
703 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
704 APInt Mask = *RHSC;
705 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
706 // If we already have a value for the switch, it has to match!
707 if (!setValueOnce(RHSVal))
708 return false;
709
710 Vals.push_back(C);
711 Vals.push_back(ConstantInt::get(C->getContext(),
712 C->getValue() & ~Mask));
713 UsedICmps++;
714 return true;
715 }
716 }
717
718 // If we already have a value for the switch, it has to match!
719 if (!setValueOnce(ICI->getOperand(0)))
720 return false;
721
722 UsedICmps++;
723 Vals.push_back(C);
724 return true;
725 }
726
727 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
728 ConstantRange Span =
730
731 // Shift the range if the compare is fed by an add. This is the range
732 // compare idiom as emitted by instcombine.
733 Value *CandidateVal = I->getOperand(0);
734 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
735 Span = Span.subtract(*RHSC);
736 CandidateVal = RHSVal;
737 }
738
739 // If this is an and/!= check, then we are looking to build the set of
740 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
741 // x != 0 && x != 1.
742 if (!isEQ)
743 Span = Span.inverse();
744
745 // If there are a ton of values, we don't want to make a ginormous switch.
746 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
747 return false;
748 }
749
750 // If we already have a value for the switch, it has to match!
751 if (!setValueOnce(CandidateVal))
752 return false;
753
754 // Add all values from the range to the set
755 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
756 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
757
758 UsedICmps++;
759 return true;
760 }
761
762 /// Given a potentially 'or'd or 'and'd together collection of icmp
763 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
764 /// the value being compared, and stick the list constants into the Vals
765 /// vector.
766 /// One "Extra" case is allowed to differ from the other.
767 void gather(Value *V) {
768 Value *Op0, *Op1;
769 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
770 IsEq = true;
771 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
772 IsEq = false;
773 else
774 return;
775 // Keep a stack (SmallVector for efficiency) for depth-first traversal
776 SmallVector<Value *, 8> DFT{Op0, Op1};
777 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
778
779 while (!DFT.empty()) {
780 V = DFT.pop_back_val();
781
782 if (Instruction *I = dyn_cast<Instruction>(V)) {
783 // If it is a || (or && depending on isEQ), process the operands.
784 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
785 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
786 if (Visited.insert(Op1).second)
787 DFT.push_back(Op1);
788 if (Visited.insert(Op0).second)
789 DFT.push_back(Op0);
790
791 continue;
792 }
793
794 // Try to match the current instruction
795 if (matchInstruction(I, IsEq))
796 // Match succeed, continue the loop
797 continue;
798 }
799
800 // One element of the sequence of || (or &&) could not be match as a
801 // comparison against the same value as the others.
802 // We allow only one "Extra" case to be checked before the switch
803 if (!Extra) {
804 Extra = V;
805 continue;
806 }
807 // Failed to parse a proper sequence, abort now
808 CompValue = nullptr;
809 break;
810 }
811 }
812};
813
814} // end anonymous namespace
815
817 MemorySSAUpdater *MSSAU = nullptr) {
818 Instruction *Cond = nullptr;
819 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
820 Cond = dyn_cast<Instruction>(SI->getCondition());
821 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
822 if (BI->isConditional())
823 Cond = dyn_cast<Instruction>(BI->getCondition());
824 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
825 Cond = dyn_cast<Instruction>(IBI->getAddress());
826 }
827
828 TI->eraseFromParent();
829 if (Cond)
831}
832
833/// Return true if the specified terminator checks
834/// to see if a value is equal to constant integer value.
835Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
836 Value *CV = nullptr;
837 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
838 // Do not permit merging of large switch instructions into their
839 // predecessors unless there is only one predecessor.
840 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
841 CV = SI->getCondition();
842 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
843 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
844 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
845 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
846 CV = ICI->getOperand(0);
847 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
848 if (Trunc->hasNoUnsignedWrap())
849 CV = Trunc->getOperand(0);
850 }
851 }
852
853 // Unwrap any lossless ptrtoint cast.
854 if (CV) {
855 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
856 Value *Ptr = PTII->getPointerOperand();
857 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
858 CV = Ptr;
859 }
860 }
861 return CV;
862}
863
864/// Given a value comparison instruction,
865/// decode all of the 'cases' that it represents and return the 'default' block.
866BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
867 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
868 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
869 Cases.reserve(SI->getNumCases());
870 for (auto Case : SI->cases())
871 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
872 Case.getCaseSuccessor()));
873 return SI->getDefaultDest();
874 }
875
876 BranchInst *BI = cast<BranchInst>(TI);
877 Value *Cond = BI->getCondition();
879 ConstantInt *C;
880 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
881 Pred = ICI->getPredicate();
882 C = getConstantInt(ICI->getOperand(1), DL);
883 } else {
884 Pred = ICmpInst::ICMP_NE;
885 auto *Trunc = cast<TruncInst>(Cond);
886 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
887 }
888 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
889 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
890 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
891}
892
893/// Given a vector of bb/value pairs, remove any entries
894/// in the list that match the specified block.
895static void
897 std::vector<ValueEqualityComparisonCase> &Cases) {
898 llvm::erase(Cases, BB);
899}
900
901/// Return true if there are any keys in C1 that exist in C2 as well.
902static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
903 std::vector<ValueEqualityComparisonCase> &C2) {
904 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
905
906 // Make V1 be smaller than V2.
907 if (V1->size() > V2->size())
908 std::swap(V1, V2);
909
910 if (V1->empty())
911 return false;
912 if (V1->size() == 1) {
913 // Just scan V2.
914 ConstantInt *TheVal = (*V1)[0].Value;
915 for (const ValueEqualityComparisonCase &VECC : *V2)
916 if (TheVal == VECC.Value)
917 return true;
918 }
919
920 // Otherwise, just sort both lists and compare element by element.
921 array_pod_sort(V1->begin(), V1->end());
922 array_pod_sort(V2->begin(), V2->end());
923 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
924 while (i1 != e1 && i2 != e2) {
925 if ((*V1)[i1].Value == (*V2)[i2].Value)
926 return true;
927 if ((*V1)[i1].Value < (*V2)[i2].Value)
928 ++i1;
929 else
930 ++i2;
931 }
932 return false;
933}
934
935// Set branch weights on SwitchInst. This sets the metadata if there is at
936// least one non-zero weight.
938 bool IsExpected) {
939 // Check that there is at least one non-zero weight. Otherwise, pass
940 // nullptr to setMetadata which will erase the existing metadata.
941 MDNode *N = nullptr;
942 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
943 N = MDBuilder(SI->getParent()->getContext())
944 .createBranchWeights(Weights, IsExpected);
945 SI->setMetadata(LLVMContext::MD_prof, N);
946}
947
948// Similar to the above, but for branch and select instructions that take
949// exactly 2 weights.
950static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
951 uint32_t FalseWeight, bool IsExpected) {
952 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
953 // Check that there is at least one non-zero weight. Otherwise, pass
954 // nullptr to setMetadata which will erase the existing metadata.
955 MDNode *N = nullptr;
956 if (TrueWeight || FalseWeight)
957 N = MDBuilder(I->getParent()->getContext())
958 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
959 I->setMetadata(LLVMContext::MD_prof, N);
960}
961
962/// If TI is known to be a terminator instruction and its block is known to
963/// only have a single predecessor block, check to see if that predecessor is
964/// also a value comparison with the same value, and if that comparison
965/// determines the outcome of this comparison. If so, simplify TI. This does a
966/// very limited form of jump threading.
967bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
968 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
969 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
970 if (!PredVal)
971 return false; // Not a value comparison in predecessor.
972
973 Value *ThisVal = isValueEqualityComparison(TI);
974 assert(ThisVal && "This isn't a value comparison!!");
975 if (ThisVal != PredVal)
976 return false; // Different predicates.
977
978 // TODO: Preserve branch weight metadata, similarly to how
979 // foldValueComparisonIntoPredecessors preserves it.
980
981 // Find out information about when control will move from Pred to TI's block.
982 std::vector<ValueEqualityComparisonCase> PredCases;
983 BasicBlock *PredDef =
984 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
985 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
986
987 // Find information about how control leaves this block.
988 std::vector<ValueEqualityComparisonCase> ThisCases;
989 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
990 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
991
992 // If TI's block is the default block from Pred's comparison, potentially
993 // simplify TI based on this knowledge.
994 if (PredDef == TI->getParent()) {
995 // If we are here, we know that the value is none of those cases listed in
996 // PredCases. If there are any cases in ThisCases that are in PredCases, we
997 // can simplify TI.
998 if (!valuesOverlap(PredCases, ThisCases))
999 return false;
1000
1001 if (isa<BranchInst>(TI)) {
1002 // Okay, one of the successors of this condbr is dead. Convert it to a
1003 // uncond br.
1004 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1005 // Insert the new branch.
1006 Instruction *NI = Builder.CreateBr(ThisDef);
1007 (void)NI;
1008
1009 // Remove PHI node entries for the dead edge.
1010 ThisCases[0].Dest->removePredecessor(PredDef);
1011
1012 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1013 << "Through successor TI: " << *TI << "Leaving: " << *NI
1014 << "\n");
1015
1017
1018 if (DTU)
1019 DTU->applyUpdates(
1020 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1021
1022 return true;
1023 }
1024
1025 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1026 // Okay, TI has cases that are statically dead, prune them away.
1028 for (const ValueEqualityComparisonCase &Case : PredCases)
1029 DeadCases.insert(Case.Value);
1030
1031 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1032 << "Through successor TI: " << *TI);
1033
1034 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1035 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1036 --i;
1037 auto *Successor = i->getCaseSuccessor();
1038 if (DTU)
1039 ++NumPerSuccessorCases[Successor];
1040 if (DeadCases.count(i->getCaseValue())) {
1041 Successor->removePredecessor(PredDef);
1042 SI.removeCase(i);
1043 if (DTU)
1044 --NumPerSuccessorCases[Successor];
1045 }
1046 }
1047
1048 if (DTU) {
1049 std::vector<DominatorTree::UpdateType> Updates;
1050 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1051 if (I.second == 0)
1052 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1053 DTU->applyUpdates(Updates);
1054 }
1055
1056 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1057 return true;
1058 }
1059
1060 // Otherwise, TI's block must correspond to some matched value. Find out
1061 // which value (or set of values) this is.
1062 ConstantInt *TIV = nullptr;
1063 BasicBlock *TIBB = TI->getParent();
1064 for (const auto &[Value, Dest] : PredCases)
1065 if (Dest == TIBB) {
1066 if (TIV)
1067 return false; // Cannot handle multiple values coming to this block.
1068 TIV = Value;
1069 }
1070 assert(TIV && "No edge from pred to succ?");
1071
1072 // Okay, we found the one constant that our value can be if we get into TI's
1073 // BB. Find out which successor will unconditionally be branched to.
1074 BasicBlock *TheRealDest = nullptr;
1075 for (const auto &[Value, Dest] : ThisCases)
1076 if (Value == TIV) {
1077 TheRealDest = Dest;
1078 break;
1079 }
1080
1081 // If not handled by any explicit cases, it is handled by the default case.
1082 if (!TheRealDest)
1083 TheRealDest = ThisDef;
1084
1085 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1086
1087 // Remove PHI node entries for dead edges.
1088 BasicBlock *CheckEdge = TheRealDest;
1089 for (BasicBlock *Succ : successors(TIBB))
1090 if (Succ != CheckEdge) {
1091 if (Succ != TheRealDest)
1092 RemovedSuccs.insert(Succ);
1093 Succ->removePredecessor(TIBB);
1094 } else
1095 CheckEdge = nullptr;
1096
1097 // Insert the new branch.
1098 Instruction *NI = Builder.CreateBr(TheRealDest);
1099 (void)NI;
1100
1101 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1102 << "Through successor TI: " << *TI << "Leaving: " << *NI
1103 << "\n");
1104
1106 if (DTU) {
1108 Updates.reserve(RemovedSuccs.size());
1109 for (auto *RemovedSucc : RemovedSuccs)
1110 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1111 DTU->applyUpdates(Updates);
1112 }
1113 return true;
1114}
1115
1116namespace {
1117
1118/// This class implements a stable ordering of constant
1119/// integers that does not depend on their address. This is important for
1120/// applications that sort ConstantInt's to ensure uniqueness.
1121struct ConstantIntOrdering {
1122 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1123 return LHS->getValue().ult(RHS->getValue());
1124 }
1125};
1126
1127} // end anonymous namespace
1128
1130 ConstantInt *const *P2) {
1131 const ConstantInt *LHS = *P1;
1132 const ConstantInt *RHS = *P2;
1133 if (LHS == RHS)
1134 return 0;
1135 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1136}
1137
1138/// Get Weights of a given terminator, the default weight is at the front
1139/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1140/// metadata.
1142 SmallVectorImpl<uint64_t> &Weights) {
1143 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1144 assert(MD && "Invalid branch-weight metadata");
1145 extractFromBranchWeightMD64(MD, Weights);
1146
1147 // If TI is a conditional eq, the default case is the false case,
1148 // and the corresponding branch-weight data is at index 2. We swap the
1149 // default weight to be the first entry.
1150 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1151 assert(Weights.size() == 2);
1152 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1153 if (!ICI)
1154 return;
1155
1156 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1157 std::swap(Weights.front(), Weights.back());
1158 }
1159}
1160
1161/// Keep halving the weights until all can fit in uint32_t.
1163 uint64_t Max = *llvm::max_element(Weights);
1164 if (Max > UINT_MAX) {
1165 unsigned Offset = 32 - llvm::countl_zero(Max);
1166 for (uint64_t &I : Weights)
1167 I >>= Offset;
1168 }
1169}
1170
1172 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1173 Instruction *PTI = PredBlock->getTerminator();
1174
1175 // If we have bonus instructions, clone them into the predecessor block.
1176 // Note that there may be multiple predecessor blocks, so we cannot move
1177 // bonus instructions to a predecessor block.
1178 for (Instruction &BonusInst : *BB) {
1179 if (BonusInst.isTerminator())
1180 continue;
1181
1182 Instruction *NewBonusInst = BonusInst.clone();
1183
1184 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1185 // Unless the instruction has the same !dbg location as the original
1186 // branch, drop it. When we fold the bonus instructions we want to make
1187 // sure we reset their debug locations in order to avoid stepping on
1188 // dead code caused by folding dead branches.
1189 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1190 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1191 mapAtomInstance(DL, VMap);
1192 }
1193
1194 RemapInstruction(NewBonusInst, VMap,
1196
1197 // If we speculated an instruction, we need to drop any metadata that may
1198 // result in undefined behavior, as the metadata might have been valid
1199 // only given the branch precondition.
1200 // Similarly strip attributes on call parameters that may cause UB in
1201 // location the call is moved to.
1202 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1203
1204 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1205 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1206 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1208
1209 NewBonusInst->takeName(&BonusInst);
1210 BonusInst.setName(NewBonusInst->getName() + ".old");
1211 VMap[&BonusInst] = NewBonusInst;
1212
1213 // Update (liveout) uses of bonus instructions,
1214 // now that the bonus instruction has been cloned into predecessor.
1215 // Note that we expect to be in a block-closed SSA form for this to work!
1216 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1217 auto *UI = cast<Instruction>(U.getUser());
1218 auto *PN = dyn_cast<PHINode>(UI);
1219 if (!PN) {
1220 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1221 "If the user is not a PHI node, then it should be in the same "
1222 "block as, and come after, the original bonus instruction.");
1223 continue; // Keep using the original bonus instruction.
1224 }
1225 // Is this the block-closed SSA form PHI node?
1226 if (PN->getIncomingBlock(U) == BB)
1227 continue; // Great, keep using the original bonus instruction.
1228 // The only other alternative is an "use" when coming from
1229 // the predecessor block - here we should refer to the cloned bonus instr.
1230 assert(PN->getIncomingBlock(U) == PredBlock &&
1231 "Not in block-closed SSA form?");
1232 U.set(NewBonusInst);
1233 }
1234 }
1235
1236 // Key Instructions: We may have propagated atom info into the pred. If the
1237 // pred's terminator already has atom info do nothing as merging would drop
1238 // one atom group anyway. If it doesn't, propagte the remapped atom group
1239 // from BB's terminator.
1240 if (auto &PredDL = PTI->getDebugLoc()) {
1241 auto &DL = BB->getTerminator()->getDebugLoc();
1242 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1243 PredDL.isSameSourceLocation(DL)) {
1244 PTI->setDebugLoc(DL);
1245 RemapSourceAtom(PTI, VMap);
1246 }
1247 }
1248}
1249
1250bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1251 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1252 BasicBlock *BB = TI->getParent();
1253 BasicBlock *Pred = PTI->getParent();
1254
1256
1257 // Figure out which 'cases' to copy from SI to PSI.
1258 std::vector<ValueEqualityComparisonCase> BBCases;
1259 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1260
1261 std::vector<ValueEqualityComparisonCase> PredCases;
1262 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1263
1264 // Based on whether the default edge from PTI goes to BB or not, fill in
1265 // PredCases and PredDefault with the new switch cases we would like to
1266 // build.
1268
1269 // Update the branch weight metadata along the way
1271 bool PredHasWeights = hasBranchWeightMD(*PTI);
1272 bool SuccHasWeights = hasBranchWeightMD(*TI);
1273
1274 if (PredHasWeights) {
1275 getBranchWeights(PTI, Weights);
1276 // branch-weight metadata is inconsistent here.
1277 if (Weights.size() != 1 + PredCases.size())
1278 PredHasWeights = SuccHasWeights = false;
1279 } else if (SuccHasWeights)
1280 // If there are no predecessor weights but there are successor weights,
1281 // populate Weights with 1, which will later be scaled to the sum of
1282 // successor's weights
1283 Weights.assign(1 + PredCases.size(), 1);
1284
1285 SmallVector<uint64_t, 8> SuccWeights;
1286 if (SuccHasWeights) {
1287 getBranchWeights(TI, SuccWeights);
1288 // branch-weight metadata is inconsistent here.
1289 if (SuccWeights.size() != 1 + BBCases.size())
1290 PredHasWeights = SuccHasWeights = false;
1291 } else if (PredHasWeights)
1292 SuccWeights.assign(1 + BBCases.size(), 1);
1293
1294 if (PredDefault == BB) {
1295 // If this is the default destination from PTI, only the edges in TI
1296 // that don't occur in PTI, or that branch to BB will be activated.
1297 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1298 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1299 if (PredCases[i].Dest != BB)
1300 PTIHandled.insert(PredCases[i].Value);
1301 else {
1302 // The default destination is BB, we don't need explicit targets.
1303 std::swap(PredCases[i], PredCases.back());
1304
1305 if (PredHasWeights || SuccHasWeights) {
1306 // Increase weight for the default case.
1307 Weights[0] += Weights[i + 1];
1308 std::swap(Weights[i + 1], Weights.back());
1309 Weights.pop_back();
1310 }
1311
1312 PredCases.pop_back();
1313 --i;
1314 --e;
1315 }
1316
1317 // Reconstruct the new switch statement we will be building.
1318 if (PredDefault != BBDefault) {
1319 PredDefault->removePredecessor(Pred);
1320 if (DTU && PredDefault != BB)
1321 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1322 PredDefault = BBDefault;
1323 ++NewSuccessors[BBDefault];
1324 }
1325
1326 unsigned CasesFromPred = Weights.size();
1327 uint64_t ValidTotalSuccWeight = 0;
1328 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1329 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1330 PredCases.push_back(BBCases[i]);
1331 ++NewSuccessors[BBCases[i].Dest];
1332 if (SuccHasWeights || PredHasWeights) {
1333 // The default weight is at index 0, so weight for the ith case
1334 // should be at index i+1. Scale the cases from successor by
1335 // PredDefaultWeight (Weights[0]).
1336 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1337 ValidTotalSuccWeight += SuccWeights[i + 1];
1338 }
1339 }
1340
1341 if (SuccHasWeights || PredHasWeights) {
1342 ValidTotalSuccWeight += SuccWeights[0];
1343 // Scale the cases from predecessor by ValidTotalSuccWeight.
1344 for (unsigned i = 1; i < CasesFromPred; ++i)
1345 Weights[i] *= ValidTotalSuccWeight;
1346 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1347 Weights[0] *= SuccWeights[0];
1348 }
1349 } else {
1350 // If this is not the default destination from PSI, only the edges
1351 // in SI that occur in PSI with a destination of BB will be
1352 // activated.
1353 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1354 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1355 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1356 if (PredCases[i].Dest == BB) {
1357 PTIHandled.insert(PredCases[i].Value);
1358
1359 if (PredHasWeights || SuccHasWeights) {
1360 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1361 std::swap(Weights[i + 1], Weights.back());
1362 Weights.pop_back();
1363 }
1364
1365 std::swap(PredCases[i], PredCases.back());
1366 PredCases.pop_back();
1367 --i;
1368 --e;
1369 }
1370
1371 // Okay, now we know which constants were sent to BB from the
1372 // predecessor. Figure out where they will all go now.
1373 for (const ValueEqualityComparisonCase &Case : BBCases)
1374 if (PTIHandled.count(Case.Value)) {
1375 // If this is one we are capable of getting...
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(WeightsForHandled[Case.Value]);
1378 PredCases.push_back(Case);
1379 ++NewSuccessors[Case.Dest];
1380 PTIHandled.erase(Case.Value); // This constant is taken care of
1381 }
1382
1383 // If there are any constants vectored to BB that TI doesn't handle,
1384 // they must go to the default destination of TI.
1385 for (ConstantInt *I : PTIHandled) {
1386 if (PredHasWeights || SuccHasWeights)
1387 Weights.push_back(WeightsForHandled[I]);
1388 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1389 ++NewSuccessors[BBDefault];
1390 }
1391 }
1392
1393 // Okay, at this point, we know which new successor Pred will get. Make
1394 // sure we update the number of entries in the PHI nodes for these
1395 // successors.
1396 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1397 if (DTU) {
1398 SuccsOfPred = {llvm::from_range, successors(Pred)};
1399 Updates.reserve(Updates.size() + NewSuccessors.size());
1400 }
1401 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1402 NewSuccessors) {
1403 for (auto I : seq(NewSuccessor.second)) {
1404 (void)I;
1405 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1406 }
1407 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1408 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1409 }
1410
1411 Builder.SetInsertPoint(PTI);
1412 // Convert pointer to int before we switch.
1413 if (CV->getType()->isPointerTy()) {
1414 CV =
1415 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1416 }
1417
1418 // Now that the successors are updated, create the new Switch instruction.
1419 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1420 NewSI->setDebugLoc(PTI->getDebugLoc());
1421 for (ValueEqualityComparisonCase &V : PredCases)
1422 NewSI->addCase(V.Value, V.Dest);
1423
1424 if (PredHasWeights || SuccHasWeights) {
1425 // Halve the weights if any of them cannot fit in an uint32_t
1426 fitWeights(Weights);
1427
1428 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1429
1430 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1431 }
1432
1434
1435 // Okay, last check. If BB is still a successor of PSI, then we must
1436 // have an infinite loop case. If so, add an infinitely looping block
1437 // to handle the case to preserve the behavior of the code.
1438 BasicBlock *InfLoopBlock = nullptr;
1439 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1440 if (NewSI->getSuccessor(i) == BB) {
1441 if (!InfLoopBlock) {
1442 // Insert it at the end of the function, because it's either code,
1443 // or it won't matter if it's hot. :)
1444 InfLoopBlock =
1445 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1446 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1447 if (DTU)
1448 Updates.push_back(
1449 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1450 }
1451 NewSI->setSuccessor(i, InfLoopBlock);
1452 }
1453
1454 if (DTU) {
1455 if (InfLoopBlock)
1456 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1457
1458 Updates.push_back({DominatorTree::Delete, Pred, BB});
1459
1460 DTU->applyUpdates(Updates);
1461 }
1462
1463 ++NumFoldValueComparisonIntoPredecessors;
1464 return true;
1465}
1466
1467/// The specified terminator is a value equality comparison instruction
1468/// (either a switch or a branch on "X == c").
1469/// See if any of the predecessors of the terminator block are value comparisons
1470/// on the same value. If so, and if safe to do so, fold them together.
1471bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1472 IRBuilder<> &Builder) {
1473 BasicBlock *BB = TI->getParent();
1474 Value *CV = isValueEqualityComparison(TI); // CondVal
1475 assert(CV && "Not a comparison?");
1476
1477 bool Changed = false;
1478
1480 while (!Preds.empty()) {
1481 BasicBlock *Pred = Preds.pop_back_val();
1482 Instruction *PTI = Pred->getTerminator();
1483
1484 // Don't try to fold into itself.
1485 if (Pred == BB)
1486 continue;
1487
1488 // See if the predecessor is a comparison with the same value.
1489 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1490 if (PCV != CV)
1491 continue;
1492
1494 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1495 for (auto *Succ : FailBlocks) {
1496 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1497 return false;
1498 }
1499 }
1500
1501 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1502 Changed = true;
1503 }
1504 return Changed;
1505}
1506
1507// If we would need to insert a select that uses the value of this invoke
1508// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1509// need to do this), we can't hoist the invoke, as there is nowhere to put the
1510// select in this case.
1512 Instruction *I1, Instruction *I2) {
1513 for (BasicBlock *Succ : successors(BB1)) {
1514 for (const PHINode &PN : Succ->phis()) {
1515 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1516 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1517 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1518 return false;
1519 }
1520 }
1521 }
1522 return true;
1523}
1524
1525// Get interesting characteristics of instructions that
1526// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1527// instructions can be reordered across.
1533
1535 unsigned Flags = 0;
1536 if (I->mayReadFromMemory())
1537 Flags |= SkipReadMem;
1538 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1539 // inalloca) across stacksave/stackrestore boundaries.
1540 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1541 Flags |= SkipSideEffect;
1543 Flags |= SkipImplicitControlFlow;
1544 return Flags;
1545}
1546
1547// Returns true if it is safe to reorder an instruction across preceding
1548// instructions in a basic block.
1549static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1550 // Don't reorder a store over a load.
1551 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1552 return false;
1553
1554 // If we have seen an instruction with side effects, it's unsafe to reorder an
1555 // instruction which reads memory or itself has side effects.
1556 if ((Flags & SkipSideEffect) &&
1557 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1558 return false;
1559
1560 // Reordering across an instruction which does not necessarily transfer
1561 // control to the next instruction is speculation.
1563 return false;
1564
1565 // Hoisting of llvm.deoptimize is only legal together with the next return
1566 // instruction, which this pass is not always able to do.
1567 if (auto *CB = dyn_cast<CallBase>(I))
1568 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1569 return false;
1570
1571 // It's also unsafe/illegal to hoist an instruction above its instruction
1572 // operands
1573 BasicBlock *BB = I->getParent();
1574 for (Value *Op : I->operands()) {
1575 if (auto *J = dyn_cast<Instruction>(Op))
1576 if (J->getParent() == BB)
1577 return false;
1578 }
1579
1580 return true;
1581}
1582
1583static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1584
1585/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1586/// instructions \p I1 and \p I2 can and should be hoisted.
1588 const TargetTransformInfo &TTI) {
1589 // If we're going to hoist a call, make sure that the two instructions
1590 // we're commoning/hoisting are both marked with musttail, or neither of
1591 // them is marked as such. Otherwise, we might end up in a situation where
1592 // we hoist from a block where the terminator is a `ret` to a block where
1593 // the terminator is a `br`, and `musttail` calls expect to be followed by
1594 // a return.
1595 auto *C1 = dyn_cast<CallInst>(I1);
1596 auto *C2 = dyn_cast<CallInst>(I2);
1597 if (C1 && C2)
1598 if (C1->isMustTailCall() != C2->isMustTailCall())
1599 return false;
1600
1602 return false;
1603
1604 // If any of the two call sites has nomerge or convergent attribute, stop
1605 // hoisting.
1606 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1607 if (CB1->cannotMerge() || CB1->isConvergent())
1608 return false;
1609 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1610 if (CB2->cannotMerge() || CB2->isConvergent())
1611 return false;
1612
1613 return true;
1614}
1615
1616/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1617/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1618/// hoistCommonCodeFromSuccessors. e.g. The input:
1619/// I1 DVRs: { x, z },
1620/// OtherInsts: { I2 DVRs: { x, y, z } }
1621/// would result in hoisting only DbgVariableRecord x.
1623 Instruction *TI, Instruction *I1,
1624 SmallVectorImpl<Instruction *> &OtherInsts) {
1625 if (!I1->hasDbgRecords())
1626 return;
1627 using CurrentAndEndIt =
1628 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1629 // Vector of {Current, End} iterators.
1631 Itrs.reserve(OtherInsts.size() + 1);
1632 // Helper lambdas for lock-step checks:
1633 // Return true if this Current == End.
1634 auto atEnd = [](const CurrentAndEndIt &Pair) {
1635 return Pair.first == Pair.second;
1636 };
1637 // Return true if all Current are identical.
1638 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1639 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1641 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1642 });
1643 };
1644
1645 // Collect the iterators.
1646 Itrs.push_back(
1647 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1648 for (Instruction *Other : OtherInsts) {
1649 if (!Other->hasDbgRecords())
1650 return;
1651 Itrs.push_back(
1652 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1653 }
1654
1655 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1656 // the lock-step DbgRecord are identical, hoist all of them to TI.
1657 // This replicates the dbg.* intrinsic behaviour in
1658 // hoistCommonCodeFromSuccessors.
1659 while (none_of(Itrs, atEnd)) {
1660 bool HoistDVRs = allIdentical(Itrs);
1661 for (CurrentAndEndIt &Pair : Itrs) {
1662 // Increment Current iterator now as we may be about to move the
1663 // DbgRecord.
1664 DbgRecord &DR = *Pair.first++;
1665 if (HoistDVRs) {
1666 DR.removeFromParent();
1667 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1668 }
1669 }
1670 }
1671}
1672
1674 const Instruction *I2) {
1675 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1676 return true;
1677
1678 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1679 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1680 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1681 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1682 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1683
1684 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1685 return I1->getOperand(0) == I2->getOperand(1) &&
1686 I1->getOperand(1) == I2->getOperand(0) &&
1687 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1688 }
1689
1690 return false;
1691}
1692
1693/// If the target supports conditional faulting,
1694/// we look for the following pattern:
1695/// \code
1696/// BB:
1697/// ...
1698/// %cond = icmp ult %x, %y
1699/// br i1 %cond, label %TrueBB, label %FalseBB
1700/// FalseBB:
1701/// store i32 1, ptr %q, align 4
1702/// ...
1703/// TrueBB:
1704/// %maskedloadstore = load i32, ptr %b, align 4
1705/// store i32 %maskedloadstore, ptr %p, align 4
1706/// ...
1707/// \endcode
1708///
1709/// and transform it into:
1710///
1711/// \code
1712/// BB:
1713/// ...
1714/// %cond = icmp ult %x, %y
1715/// %maskedloadstore = cload i32, ptr %b, %cond
1716/// cstore i32 %maskedloadstore, ptr %p, %cond
1717/// cstore i32 1, ptr %q, ~%cond
1718/// br i1 %cond, label %TrueBB, label %FalseBB
1719/// FalseBB:
1720/// ...
1721/// TrueBB:
1722/// ...
1723/// \endcode
1724///
1725/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1726/// e.g.
1727///
1728/// \code
1729/// %vcond = bitcast i1 %cond to <1 x i1>
1730/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1731/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1732/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1733/// call void @llvm.masked.store.v1i32.p0
1734/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1735/// %cond.not = xor i1 %cond, true
1736/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1737/// call void @llvm.masked.store.v1i32.p0
1738/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1739/// \endcode
1740///
1741/// So we need to turn hoisted load/store into cload/cstore.
1742///
1743/// \param BI The branch instruction.
1744/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1745/// will be speculated.
1746/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1748 BranchInst *BI,
1749 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1750 std::optional<bool> Invert, Instruction *Sel) {
1751 auto &Context = BI->getParent()->getContext();
1752 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1753 auto *Cond = BI->getOperand(0);
1754 // Construct the condition if needed.
1755 BasicBlock *BB = BI->getParent();
1756 Value *Mask = nullptr;
1757 Value *MaskFalse = nullptr;
1758 Value *MaskTrue = nullptr;
1759 if (Invert.has_value()) {
1760 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1761 Mask = Builder.CreateBitCast(
1762 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1763 VCondTy);
1764 } else {
1765 IRBuilder<> Builder(BI);
1766 MaskFalse = Builder.CreateBitCast(
1767 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1768 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1769 }
1770 auto PeekThroughBitcasts = [](Value *V) {
1771 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1772 V = BitCast->getOperand(0);
1773 return V;
1774 };
1775 for (auto *I : SpeculatedConditionalLoadsStores) {
1776 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1777 if (!Invert.has_value())
1778 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1779 // We currently assume conditional faulting load/store is supported for
1780 // scalar types only when creating new instructions. This can be easily
1781 // extended for vector types in the future.
1782 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1783 auto *Op0 = I->getOperand(0);
1784 CallInst *MaskedLoadStore = nullptr;
1785 if (auto *LI = dyn_cast<LoadInst>(I)) {
1786 // Handle Load.
1787 auto *Ty = I->getType();
1788 PHINode *PN = nullptr;
1789 Value *PassThru = nullptr;
1790 if (Invert.has_value())
1791 for (User *U : I->users()) {
1792 if ((PN = dyn_cast<PHINode>(U))) {
1793 PassThru = Builder.CreateBitCast(
1794 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1795 FixedVectorType::get(Ty, 1));
1796 } else if (auto *Ins = cast<Instruction>(U);
1797 Sel && Ins->getParent() == BB) {
1798 // This happens when store or/and a speculative instruction between
1799 // load and store were hoisted to the BB. Make sure the masked load
1800 // inserted before its use.
1801 // We assume there's one of such use.
1802 Builder.SetInsertPoint(Ins);
1803 }
1804 }
1805 MaskedLoadStore = Builder.CreateMaskedLoad(
1806 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1807 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1808 if (PN)
1809 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1810 I->replaceAllUsesWith(NewLoadStore);
1811 } else {
1812 // Handle Store.
1813 auto *StoredVal = Builder.CreateBitCast(
1814 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1815 MaskedLoadStore = Builder.CreateMaskedStore(
1816 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1817 }
1818 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1819 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1820 //
1821 // !nonnull, !align : Not support pointer type, no need to keep.
1822 // !range: Load type is changed from scalar to vector, but the metadata on
1823 // vector specifies a per-element range, so the semantics stay the
1824 // same. Keep it.
1825 // !annotation: Not impact semantics. Keep it.
1826 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1827 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1828 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1829 // FIXME: DIAssignID is not supported for masked store yet.
1830 // (Verifier::visitDIAssignIDMetadata)
1832 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1833 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1834 });
1835 MaskedLoadStore->copyMetadata(*I);
1836 I->eraseFromParent();
1837 }
1838}
1839
1841 const TargetTransformInfo &TTI) {
1842 // Not handle volatile or atomic.
1843 bool IsStore = false;
1844 if (auto *L = dyn_cast<LoadInst>(I)) {
1845 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1846 return false;
1847 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1848 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1849 return false;
1850 IsStore = true;
1851 } else
1852 return false;
1853
1854 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1855 // That's why we have the alignment limitation.
1856 // FIXME: Update the prototype of the intrinsics?
1859}
1860
1861/// Hoist any common code in the successor blocks up into the block. This
1862/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1863/// given, only perform hoisting in case all successors blocks contain matching
1864/// instructions only. In that case, all instructions can be hoisted and the
1865/// original branch will be replaced and selects for PHIs are added.
1866bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1867 bool AllInstsEqOnly) {
1868 // This does very trivial matching, with limited scanning, to find identical
1869 // instructions in the two blocks. In particular, we don't want to get into
1870 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1871 // such, we currently just scan for obviously identical instructions in an
1872 // identical order, possibly separated by the same number of non-identical
1873 // instructions.
1874 BasicBlock *BB = TI->getParent();
1875 unsigned int SuccSize = succ_size(BB);
1876 if (SuccSize < 2)
1877 return false;
1878
1879 // If either of the blocks has it's address taken, then we can't do this fold,
1880 // because the code we'd hoist would no longer run when we jump into the block
1881 // by it's address.
1882 for (auto *Succ : successors(BB))
1883 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1884 return false;
1885
1886 // The second of pair is a SkipFlags bitmask.
1887 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1888 SmallVector<SuccIterPair, 8> SuccIterPairs;
1889 for (auto *Succ : successors(BB)) {
1890 BasicBlock::iterator SuccItr = Succ->begin();
1891 if (isa<PHINode>(*SuccItr))
1892 return false;
1893 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1894 }
1895
1896 if (AllInstsEqOnly) {
1897 // Check if all instructions in the successor blocks match. This allows
1898 // hoisting all instructions and removing the blocks we are hoisting from,
1899 // so does not add any new instructions.
1901 // Check if sizes and terminators of all successors match.
1902 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1903 Instruction *Term0 = Succs[0]->getTerminator();
1904 Instruction *Term = Succ->getTerminator();
1905 return !Term->isSameOperationAs(Term0) ||
1906 !equal(Term->operands(), Term0->operands()) ||
1907 Succs[0]->size() != Succ->size();
1908 });
1909 if (!AllSame)
1910 return false;
1911 if (AllSame) {
1913 while (LRI.isValid()) {
1914 Instruction *I0 = (*LRI)[0];
1915 if (any_of(*LRI, [I0](Instruction *I) {
1916 return !areIdenticalUpToCommutativity(I0, I);
1917 })) {
1918 return false;
1919 }
1920 --LRI;
1921 }
1922 }
1923 // Now we know that all instructions in all successors can be hoisted. Let
1924 // the loop below handle the hoisting.
1925 }
1926
1927 // Count how many instructions were not hoisted so far. There's a limit on how
1928 // many instructions we skip, serving as a compilation time control as well as
1929 // preventing excessive increase of life ranges.
1930 unsigned NumSkipped = 0;
1931 // If we find an unreachable instruction at the beginning of a basic block, we
1932 // can still hoist instructions from the rest of the basic blocks.
1933 if (SuccIterPairs.size() > 2) {
1934 erase_if(SuccIterPairs,
1935 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1936 if (SuccIterPairs.size() < 2)
1937 return false;
1938 }
1939
1940 bool Changed = false;
1941
1942 for (;;) {
1943 auto *SuccIterPairBegin = SuccIterPairs.begin();
1944 auto &BB1ItrPair = *SuccIterPairBegin++;
1945 auto OtherSuccIterPairRange =
1946 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1947 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1948
1949 Instruction *I1 = &*BB1ItrPair.first;
1950
1951 bool AllInstsAreIdentical = true;
1952 bool HasTerminator = I1->isTerminator();
1953 for (auto &SuccIter : OtherSuccIterRange) {
1954 Instruction *I2 = &*SuccIter;
1955 HasTerminator |= I2->isTerminator();
1956 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1957 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1958 AllInstsAreIdentical = false;
1959 }
1960
1962 for (auto &SuccIter : OtherSuccIterRange)
1963 OtherInsts.push_back(&*SuccIter);
1964
1965 // If we are hoisting the terminator instruction, don't move one (making a
1966 // broken BB), instead clone it, and remove BI.
1967 if (HasTerminator) {
1968 // Even if BB, which contains only one unreachable instruction, is ignored
1969 // at the beginning of the loop, we can hoist the terminator instruction.
1970 // If any instructions remain in the block, we cannot hoist terminators.
1971 if (NumSkipped || !AllInstsAreIdentical) {
1972 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1973 return Changed;
1974 }
1975
1976 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1977 Changed;
1978 }
1979
1980 if (AllInstsAreIdentical) {
1981 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1982 AllInstsAreIdentical =
1983 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1984 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1985 Instruction *I2 = &*Pair.first;
1986 unsigned SkipFlagsBB2 = Pair.second;
1987 // Even if the instructions are identical, it may not
1988 // be safe to hoist them if we have skipped over
1989 // instructions with side effects or their operands
1990 // weren't hoisted.
1991 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1993 });
1994 }
1995
1996 if (AllInstsAreIdentical) {
1997 BB1ItrPair.first++;
1998 // For a normal instruction, we just move one to right before the
1999 // branch, then replace all uses of the other with the first. Finally,
2000 // we remove the now redundant second instruction.
2001 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2002 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2003 // and leave any that were not hoisted behind (by calling moveBefore
2004 // rather than moveBeforePreserving).
2005 I1->moveBefore(TI->getIterator());
2006 for (auto &SuccIter : OtherSuccIterRange) {
2007 Instruction *I2 = &*SuccIter++;
2008 assert(I2 != I1);
2009 if (!I2->use_empty())
2010 I2->replaceAllUsesWith(I1);
2011 I1->andIRFlags(I2);
2012 if (auto *CB = dyn_cast<CallBase>(I1)) {
2013 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2014 assert(Success && "We should not be trying to hoist callbases "
2015 "with non-intersectable attributes");
2016 // For NDEBUG Compile.
2017 (void)Success;
2018 }
2019
2020 combineMetadataForCSE(I1, I2, true);
2021 // I1 and I2 are being combined into a single instruction. Its debug
2022 // location is the merged locations of the original instructions.
2023 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2024 I2->eraseFromParent();
2025 }
2026 if (!Changed)
2027 NumHoistCommonCode += SuccIterPairs.size();
2028 Changed = true;
2029 NumHoistCommonInstrs += SuccIterPairs.size();
2030 } else {
2031 if (NumSkipped >= HoistCommonSkipLimit) {
2032 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2033 return Changed;
2034 }
2035 // We are about to skip over a pair of non-identical instructions. Record
2036 // if any have characteristics that would prevent reordering instructions
2037 // across them.
2038 for (auto &SuccIterPair : SuccIterPairs) {
2039 Instruction *I = &*SuccIterPair.first++;
2040 SuccIterPair.second |= skippedInstrFlags(I);
2041 }
2042 ++NumSkipped;
2043 }
2044 }
2045}
2046
2047bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2048 Instruction *TI, Instruction *I1,
2049 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2050
2051 auto *BI = dyn_cast<BranchInst>(TI);
2052
2053 bool Changed = false;
2054 BasicBlock *TIParent = TI->getParent();
2055 BasicBlock *BB1 = I1->getParent();
2056
2057 // Use only for an if statement.
2058 auto *I2 = *OtherSuccTIs.begin();
2059 auto *BB2 = I2->getParent();
2060 if (BI) {
2061 assert(OtherSuccTIs.size() == 1);
2062 assert(BI->getSuccessor(0) == I1->getParent());
2063 assert(BI->getSuccessor(1) == I2->getParent());
2064 }
2065
2066 // In the case of an if statement, we try to hoist an invoke.
2067 // FIXME: Can we define a safety predicate for CallBr?
2068 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2069 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2070 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2071 return false;
2072
2073 // TODO: callbr hoisting currently disabled pending further study.
2074 if (isa<CallBrInst>(I1))
2075 return false;
2076
2077 for (BasicBlock *Succ : successors(BB1)) {
2078 for (PHINode &PN : Succ->phis()) {
2079 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2080 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2081 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2082 if (BB1V == BB2V)
2083 continue;
2084
2085 // In the case of an if statement, check for
2086 // passingValueIsAlwaysUndefined here because we would rather eliminate
2087 // undefined control flow then converting it to a select.
2088 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2090 return false;
2091 }
2092 }
2093 }
2094
2095 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2096 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2097 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2098 // Clone the terminator and hoist it into the pred, without any debug info.
2099 Instruction *NT = I1->clone();
2100 NT->insertInto(TIParent, TI->getIterator());
2101 if (!NT->getType()->isVoidTy()) {
2102 I1->replaceAllUsesWith(NT);
2103 for (Instruction *OtherSuccTI : OtherSuccTIs)
2104 OtherSuccTI->replaceAllUsesWith(NT);
2105 NT->takeName(I1);
2106 }
2107 Changed = true;
2108 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2109
2110 // Ensure terminator gets a debug location, even an unknown one, in case
2111 // it involves inlinable calls.
2113 Locs.push_back(I1->getDebugLoc());
2114 for (auto *OtherSuccTI : OtherSuccTIs)
2115 Locs.push_back(OtherSuccTI->getDebugLoc());
2116 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2117
2118 // PHIs created below will adopt NT's merged DebugLoc.
2119 IRBuilder<NoFolder> Builder(NT);
2120
2121 // In the case of an if statement, hoisting one of the terminators from our
2122 // successor is a great thing. Unfortunately, the successors of the if/else
2123 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2124 // must agree for all PHI nodes, so we insert select instruction to compute
2125 // the final result.
2126 if (BI) {
2127 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2128 for (BasicBlock *Succ : successors(BB1)) {
2129 for (PHINode &PN : Succ->phis()) {
2130 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2131 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2132 if (BB1V == BB2V)
2133 continue;
2134
2135 // These values do not agree. Insert a select instruction before NT
2136 // that determines the right value.
2137 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2138 if (!SI) {
2139 // Propagate fast-math-flags from phi node to its replacement select.
2140 SI = cast<SelectInst>(Builder.CreateSelectFMF(
2141 BI->getCondition(), BB1V, BB2V,
2142 isa<FPMathOperator>(PN) ? &PN : nullptr,
2143 BB1V->getName() + "." + BB2V->getName(), BI));
2144 }
2145
2146 // Make the PHI node use the select for all incoming values for BB1/BB2
2147 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2148 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2149 PN.setIncomingValue(i, SI);
2150 }
2151 }
2152 }
2153
2155
2156 // Update any PHI nodes in our new successors.
2157 for (BasicBlock *Succ : successors(BB1)) {
2158 addPredecessorToBlock(Succ, TIParent, BB1);
2159 if (DTU)
2160 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2161 }
2162
2163 if (DTU)
2164 for (BasicBlock *Succ : successors(TI))
2165 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2166
2168 if (DTU)
2169 DTU->applyUpdates(Updates);
2170 return Changed;
2171}
2172
2173// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2174// into variables.
2176 int OpIdx) {
2177 // Divide/Remainder by constant is typically much cheaper than by variable.
2178 if (I->isIntDivRem())
2179 return OpIdx != 1;
2180 return !isa<IntrinsicInst>(I);
2181}
2182
2183// All instructions in Insts belong to different blocks that all unconditionally
2184// branch to a common successor. Analyze each instruction and return true if it
2185// would be possible to sink them into their successor, creating one common
2186// instruction instead. For every value that would be required to be provided by
2187// PHI node (because an operand varies in each input block), add to PHIOperands.
2190 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2191 // Prune out obviously bad instructions to move. Each instruction must have
2192 // the same number of uses, and we check later that the uses are consistent.
2193 std::optional<unsigned> NumUses;
2194 for (auto *I : Insts) {
2195 // These instructions may change or break semantics if moved.
2196 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2197 I->getType()->isTokenTy())
2198 return false;
2199
2200 // Do not try to sink an instruction in an infinite loop - it can cause
2201 // this algorithm to infinite loop.
2202 if (I->getParent()->getSingleSuccessor() == I->getParent())
2203 return false;
2204
2205 // Conservatively return false if I is an inline-asm instruction. Sinking
2206 // and merging inline-asm instructions can potentially create arguments
2207 // that cannot satisfy the inline-asm constraints.
2208 // If the instruction has nomerge or convergent attribute, return false.
2209 if (const auto *C = dyn_cast<CallBase>(I))
2210 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2211 return false;
2212
2213 if (!NumUses)
2214 NumUses = I->getNumUses();
2215 else if (NumUses != I->getNumUses())
2216 return false;
2217 }
2218
2219 const Instruction *I0 = Insts.front();
2220 const auto I0MMRA = MMRAMetadata(*I0);
2221 for (auto *I : Insts) {
2222 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2223 return false;
2224
2225 // Treat MMRAs conservatively. This pass can be quite aggressive and
2226 // could drop a lot of MMRAs otherwise.
2227 if (MMRAMetadata(*I) != I0MMRA)
2228 return false;
2229 }
2230
2231 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2232 // then the other phi operands must match the instructions from Insts. This
2233 // also has to hold true for any phi nodes that would be created as a result
2234 // of sinking. Both of these cases are represented by PhiOperands.
2235 for (const Use &U : I0->uses()) {
2236 auto It = PHIOperands.find(&U);
2237 if (It == PHIOperands.end())
2238 // There may be uses in other blocks when sinking into a loop header.
2239 return false;
2240 if (!equal(Insts, It->second))
2241 return false;
2242 }
2243
2244 // For calls to be sinkable, they must all be indirect, or have same callee.
2245 // I.e. if we have two direct calls to different callees, we don't want to
2246 // turn that into an indirect call. Likewise, if we have an indirect call,
2247 // and a direct call, we don't actually want to have a single indirect call.
2248 if (isa<CallBase>(I0)) {
2249 auto IsIndirectCall = [](const Instruction *I) {
2250 return cast<CallBase>(I)->isIndirectCall();
2251 };
2252 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2253 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2254 if (HaveIndirectCalls) {
2255 if (!AllCallsAreIndirect)
2256 return false;
2257 } else {
2258 // All callees must be identical.
2259 Value *Callee = nullptr;
2260 for (const Instruction *I : Insts) {
2261 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2262 if (!Callee)
2263 Callee = CurrCallee;
2264 else if (Callee != CurrCallee)
2265 return false;
2266 }
2267 }
2268 }
2269
2270 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2271 Value *Op = I0->getOperand(OI);
2272 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2273 assert(I->getNumOperands() == I0->getNumOperands());
2274 return I->getOperand(OI) == I0->getOperand(OI);
2275 };
2276 if (!all_of(Insts, SameAsI0)) {
2277 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2279 // We can't create a PHI from this GEP.
2280 return false;
2281 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2282 for (auto *I : Insts)
2283 Ops.push_back(I->getOperand(OI));
2284 }
2285 }
2286 return true;
2287}
2288
2289// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2290// instruction of every block in Blocks to their common successor, commoning
2291// into one instruction.
2293 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2294
2295 // canSinkInstructions returning true guarantees that every block has at
2296 // least one non-terminator instruction.
2298 for (auto *BB : Blocks) {
2299 Instruction *I = BB->getTerminator();
2300 I = I->getPrevNode();
2301 Insts.push_back(I);
2302 }
2303
2304 // We don't need to do any more checking here; canSinkInstructions should
2305 // have done it all for us.
2306 SmallVector<Value*, 4> NewOperands;
2307 Instruction *I0 = Insts.front();
2308 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2309 // This check is different to that in canSinkInstructions. There, we
2310 // cared about the global view once simplifycfg (and instcombine) have
2311 // completed - it takes into account PHIs that become trivially
2312 // simplifiable. However here we need a more local view; if an operand
2313 // differs we create a PHI and rely on instcombine to clean up the very
2314 // small mess we may make.
2315 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2316 return I->getOperand(O) != I0->getOperand(O);
2317 });
2318 if (!NeedPHI) {
2319 NewOperands.push_back(I0->getOperand(O));
2320 continue;
2321 }
2322
2323 // Create a new PHI in the successor block and populate it.
2324 auto *Op = I0->getOperand(O);
2325 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2326 auto *PN =
2327 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2328 PN->insertBefore(BBEnd->begin());
2329 for (auto *I : Insts)
2330 PN->addIncoming(I->getOperand(O), I->getParent());
2331 NewOperands.push_back(PN);
2332 }
2333
2334 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2335 // and move it to the start of the successor block.
2336 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2337 I0->getOperandUse(O).set(NewOperands[O]);
2338
2339 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2340
2341 // Update metadata and IR flags, and merge debug locations.
2342 for (auto *I : Insts)
2343 if (I != I0) {
2344 // The debug location for the "common" instruction is the merged locations
2345 // of all the commoned instructions. We start with the original location
2346 // of the "common" instruction and iteratively merge each location in the
2347 // loop below.
2348 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2349 // However, as N-way merge for CallInst is rare, so we use simplified API
2350 // instead of using complex API for N-way merge.
2351 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2352 combineMetadataForCSE(I0, I, true);
2353 I0->andIRFlags(I);
2354 if (auto *CB = dyn_cast<CallBase>(I0)) {
2355 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2356 assert(Success && "We should not be trying to sink callbases "
2357 "with non-intersectable attributes");
2358 // For NDEBUG Compile.
2359 (void)Success;
2360 }
2361 }
2362
2363 for (User *U : make_early_inc_range(I0->users())) {
2364 // canSinkLastInstruction checked that all instructions are only used by
2365 // phi nodes in a way that allows replacing the phi node with the common
2366 // instruction.
2367 auto *PN = cast<PHINode>(U);
2368 PN->replaceAllUsesWith(I0);
2369 PN->eraseFromParent();
2370 }
2371
2372 // Finally nuke all instructions apart from the common instruction.
2373 for (auto *I : Insts) {
2374 if (I == I0)
2375 continue;
2376 // The remaining uses are debug users, replace those with the common inst.
2377 // In most (all?) cases this just introduces a use-before-def.
2378 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2379 I->replaceAllUsesWith(I0);
2380 I->eraseFromParent();
2381 }
2382}
2383
2384/// Check whether BB's predecessors end with unconditional branches. If it is
2385/// true, sink any common code from the predecessors to BB.
2387 DomTreeUpdater *DTU) {
2388 // We support two situations:
2389 // (1) all incoming arcs are unconditional
2390 // (2) there are non-unconditional incoming arcs
2391 //
2392 // (2) is very common in switch defaults and
2393 // else-if patterns;
2394 //
2395 // if (a) f(1);
2396 // else if (b) f(2);
2397 //
2398 // produces:
2399 //
2400 // [if]
2401 // / \
2402 // [f(1)] [if]
2403 // | | \
2404 // | | |
2405 // | [f(2)]|
2406 // \ | /
2407 // [ end ]
2408 //
2409 // [end] has two unconditional predecessor arcs and one conditional. The
2410 // conditional refers to the implicit empty 'else' arc. This conditional
2411 // arc can also be caused by an empty default block in a switch.
2412 //
2413 // In this case, we attempt to sink code from all *unconditional* arcs.
2414 // If we can sink instructions from these arcs (determined during the scan
2415 // phase below) we insert a common successor for all unconditional arcs and
2416 // connect that to [end], to enable sinking:
2417 //
2418 // [if]
2419 // / \
2420 // [x(1)] [if]
2421 // | | \
2422 // | | \
2423 // | [x(2)] |
2424 // \ / |
2425 // [sink.split] |
2426 // \ /
2427 // [ end ]
2428 //
2429 SmallVector<BasicBlock*,4> UnconditionalPreds;
2430 bool HaveNonUnconditionalPredecessors = false;
2431 for (auto *PredBB : predecessors(BB)) {
2432 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2433 if (PredBr && PredBr->isUnconditional())
2434 UnconditionalPreds.push_back(PredBB);
2435 else
2436 HaveNonUnconditionalPredecessors = true;
2437 }
2438 if (UnconditionalPreds.size() < 2)
2439 return false;
2440
2441 // We take a two-step approach to tail sinking. First we scan from the end of
2442 // each block upwards in lockstep. If the n'th instruction from the end of each
2443 // block can be sunk, those instructions are added to ValuesToSink and we
2444 // carry on. If we can sink an instruction but need to PHI-merge some operands
2445 // (because they're not identical in each instruction) we add these to
2446 // PHIOperands.
2447 // We prepopulate PHIOperands with the phis that already exist in BB.
2449 for (PHINode &PN : BB->phis()) {
2451 for (const Use &U : PN.incoming_values())
2452 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2453 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2454 for (BasicBlock *Pred : UnconditionalPreds)
2455 Ops.push_back(*IncomingVals[Pred]);
2456 }
2457
2458 int ScanIdx = 0;
2459 SmallPtrSet<Value*,4> InstructionsToSink;
2460 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2461 while (LRI.isValid() &&
2462 canSinkInstructions(*LRI, PHIOperands)) {
2463 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2464 << "\n");
2465 InstructionsToSink.insert_range(*LRI);
2466 ++ScanIdx;
2467 --LRI;
2468 }
2469
2470 // If no instructions can be sunk, early-return.
2471 if (ScanIdx == 0)
2472 return false;
2473
2474 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2475
2476 if (!followedByDeoptOrUnreachable) {
2477 // Check whether this is the pointer operand of a load/store.
2478 auto IsMemOperand = [](Use &U) {
2479 auto *I = cast<Instruction>(U.getUser());
2480 if (isa<LoadInst>(I))
2481 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2482 if (isa<StoreInst>(I))
2483 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2484 return false;
2485 };
2486
2487 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2488 // actually sink before encountering instruction that is unprofitable to
2489 // sink?
2490 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2491 unsigned NumPHIInsts = 0;
2492 for (Use &U : (*LRI)[0]->operands()) {
2493 auto It = PHIOperands.find(&U);
2494 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2495 return InstructionsToSink.contains(V);
2496 })) {
2497 ++NumPHIInsts;
2498 // Do not separate a load/store from the gep producing the address.
2499 // The gep can likely be folded into the load/store as an addressing
2500 // mode. Additionally, a load of a gep is easier to analyze than a
2501 // load of a phi.
2502 if (IsMemOperand(U) &&
2503 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2504 return false;
2505 // FIXME: this check is overly optimistic. We may end up not sinking
2506 // said instruction, due to the very same profitability check.
2507 // See @creating_too_many_phis in sink-common-code.ll.
2508 }
2509 }
2510 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2511 return NumPHIInsts <= 1;
2512 };
2513
2514 // We've determined that we are going to sink last ScanIdx instructions,
2515 // and recorded them in InstructionsToSink. Now, some instructions may be
2516 // unprofitable to sink. But that determination depends on the instructions
2517 // that we are going to sink.
2518
2519 // First, forward scan: find the first instruction unprofitable to sink,
2520 // recording all the ones that are profitable to sink.
2521 // FIXME: would it be better, after we detect that not all are profitable.
2522 // to either record the profitable ones, or erase the unprofitable ones?
2523 // Maybe we need to choose (at runtime) the one that will touch least
2524 // instrs?
2525 LRI.reset();
2526 int Idx = 0;
2527 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2528 while (Idx < ScanIdx) {
2529 if (!ProfitableToSinkInstruction(LRI)) {
2530 // Too many PHIs would be created.
2531 LLVM_DEBUG(
2532 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2533 break;
2534 }
2535 InstructionsProfitableToSink.insert_range(*LRI);
2536 --LRI;
2537 ++Idx;
2538 }
2539
2540 // If no instructions can be sunk, early-return.
2541 if (Idx == 0)
2542 return false;
2543
2544 // Did we determine that (only) some instructions are unprofitable to sink?
2545 if (Idx < ScanIdx) {
2546 // Okay, some instructions are unprofitable.
2547 ScanIdx = Idx;
2548 InstructionsToSink = InstructionsProfitableToSink;
2549
2550 // But, that may make other instructions unprofitable, too.
2551 // So, do a backward scan, do any earlier instructions become
2552 // unprofitable?
2553 assert(
2554 !ProfitableToSinkInstruction(LRI) &&
2555 "We already know that the last instruction is unprofitable to sink");
2556 ++LRI;
2557 --Idx;
2558 while (Idx >= 0) {
2559 // If we detect that an instruction becomes unprofitable to sink,
2560 // all earlier instructions won't be sunk either,
2561 // so preemptively keep InstructionsProfitableToSink in sync.
2562 // FIXME: is this the most performant approach?
2563 for (auto *I : *LRI)
2564 InstructionsProfitableToSink.erase(I);
2565 if (!ProfitableToSinkInstruction(LRI)) {
2566 // Everything starting with this instruction won't be sunk.
2567 ScanIdx = Idx;
2568 InstructionsToSink = InstructionsProfitableToSink;
2569 }
2570 ++LRI;
2571 --Idx;
2572 }
2573 }
2574
2575 // If no instructions can be sunk, early-return.
2576 if (ScanIdx == 0)
2577 return false;
2578 }
2579
2580 bool Changed = false;
2581
2582 if (HaveNonUnconditionalPredecessors) {
2583 if (!followedByDeoptOrUnreachable) {
2584 // It is always legal to sink common instructions from unconditional
2585 // predecessors. However, if not all predecessors are unconditional,
2586 // this transformation might be pessimizing. So as a rule of thumb,
2587 // don't do it unless we'd sink at least one non-speculatable instruction.
2588 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2589 LRI.reset();
2590 int Idx = 0;
2591 bool Profitable = false;
2592 while (Idx < ScanIdx) {
2593 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2594 Profitable = true;
2595 break;
2596 }
2597 --LRI;
2598 ++Idx;
2599 }
2600 if (!Profitable)
2601 return false;
2602 }
2603
2604 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2605 // We have a conditional edge and we're going to sink some instructions.
2606 // Insert a new block postdominating all blocks we're going to sink from.
2607 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2608 // Edges couldn't be split.
2609 return false;
2610 Changed = true;
2611 }
2612
2613 // Now that we've analyzed all potential sinking candidates, perform the
2614 // actual sink. We iteratively sink the last non-terminator of the source
2615 // blocks into their common successor unless doing so would require too
2616 // many PHI instructions to be generated (currently only one PHI is allowed
2617 // per sunk instruction).
2618 //
2619 // We can use InstructionsToSink to discount values needing PHI-merging that will
2620 // actually be sunk in a later iteration. This allows us to be more
2621 // aggressive in what we sink. This does allow a false positive where we
2622 // sink presuming a later value will also be sunk, but stop half way through
2623 // and never actually sink it which means we produce more PHIs than intended.
2624 // This is unlikely in practice though.
2625 int SinkIdx = 0;
2626 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2627 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2628 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2629 << "\n");
2630
2631 // Because we've sunk every instruction in turn, the current instruction to
2632 // sink is always at index 0.
2633 LRI.reset();
2634
2635 sinkLastInstruction(UnconditionalPreds);
2636 NumSinkCommonInstrs++;
2637 Changed = true;
2638 }
2639 if (SinkIdx != 0)
2640 ++NumSinkCommonCode;
2641 return Changed;
2642}
2643
2644namespace {
2645
2646struct CompatibleSets {
2647 using SetTy = SmallVector<InvokeInst *, 2>;
2648
2650
2651 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2652
2653 SetTy &getCompatibleSet(InvokeInst *II);
2654
2655 void insert(InvokeInst *II);
2656};
2657
2658CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2659 // Perform a linear scan over all the existing sets, see if the new `invoke`
2660 // is compatible with any particular set. Since we know that all the `invokes`
2661 // within a set are compatible, only check the first `invoke` in each set.
2662 // WARNING: at worst, this has quadratic complexity.
2663 for (CompatibleSets::SetTy &Set : Sets) {
2664 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2665 return Set;
2666 }
2667
2668 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2669 return Sets.emplace_back();
2670}
2671
2672void CompatibleSets::insert(InvokeInst *II) {
2673 getCompatibleSet(II).emplace_back(II);
2674}
2675
2676bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2677 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2678
2679 // Can we theoretically merge these `invoke`s?
2680 auto IsIllegalToMerge = [](InvokeInst *II) {
2681 return II->cannotMerge() || II->isInlineAsm();
2682 };
2683 if (any_of(Invokes, IsIllegalToMerge))
2684 return false;
2685
2686 // Either both `invoke`s must be direct,
2687 // or both `invoke`s must be indirect.
2688 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2689 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2690 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2691 if (HaveIndirectCalls) {
2692 if (!AllCallsAreIndirect)
2693 return false;
2694 } else {
2695 // All callees must be identical.
2696 Value *Callee = nullptr;
2697 for (InvokeInst *II : Invokes) {
2698 Value *CurrCallee = II->getCalledOperand();
2699 assert(CurrCallee && "There is always a called operand.");
2700 if (!Callee)
2701 Callee = CurrCallee;
2702 else if (Callee != CurrCallee)
2703 return false;
2704 }
2705 }
2706
2707 // Either both `invoke`s must not have a normal destination,
2708 // or both `invoke`s must have a normal destination,
2709 auto HasNormalDest = [](InvokeInst *II) {
2710 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2711 };
2712 if (any_of(Invokes, HasNormalDest)) {
2713 // Do not merge `invoke` that does not have a normal destination with one
2714 // that does have a normal destination, even though doing so would be legal.
2715 if (!all_of(Invokes, HasNormalDest))
2716 return false;
2717
2718 // All normal destinations must be identical.
2719 BasicBlock *NormalBB = nullptr;
2720 for (InvokeInst *II : Invokes) {
2721 BasicBlock *CurrNormalBB = II->getNormalDest();
2722 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2723 if (!NormalBB)
2724 NormalBB = CurrNormalBB;
2725 else if (NormalBB != CurrNormalBB)
2726 return false;
2727 }
2728
2729 // In the normal destination, the incoming values for these two `invoke`s
2730 // must be compatible.
2731 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2733 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2734 &EquivalenceSet))
2735 return false;
2736 }
2737
2738#ifndef NDEBUG
2739 // All unwind destinations must be identical.
2740 // We know that because we have started from said unwind destination.
2741 BasicBlock *UnwindBB = nullptr;
2742 for (InvokeInst *II : Invokes) {
2743 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2744 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2745 if (!UnwindBB)
2746 UnwindBB = CurrUnwindBB;
2747 else
2748 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2749 }
2750#endif
2751
2752 // In the unwind destination, the incoming values for these two `invoke`s
2753 // must be compatible.
2755 Invokes.front()->getUnwindDest(),
2756 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2757 return false;
2758
2759 // Ignoring arguments, these `invoke`s must be identical,
2760 // including operand bundles.
2761 const InvokeInst *II0 = Invokes.front();
2762 for (auto *II : Invokes.drop_front())
2763 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2764 return false;
2765
2766 // Can we theoretically form the data operands for the merged `invoke`?
2767 auto IsIllegalToMergeArguments = [](auto Ops) {
2768 Use &U0 = std::get<0>(Ops);
2769 Use &U1 = std::get<1>(Ops);
2770 if (U0 == U1)
2771 return false;
2772 return !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2773 U0.getOperandNo());
2774 };
2775 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2776 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2777 IsIllegalToMergeArguments))
2778 return false;
2779
2780 return true;
2781}
2782
2783} // namespace
2784
2785// Merge all invokes in the provided set, all of which are compatible
2786// as per the `CompatibleSets::shouldBelongToSameSet()`.
2788 DomTreeUpdater *DTU) {
2789 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2790
2792 if (DTU)
2793 Updates.reserve(2 + 3 * Invokes.size());
2794
2795 bool HasNormalDest =
2796 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2797
2798 // Clone one of the invokes into a new basic block.
2799 // Since they are all compatible, it doesn't matter which invoke is cloned.
2800 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2801 InvokeInst *II0 = Invokes.front();
2802 BasicBlock *II0BB = II0->getParent();
2803 BasicBlock *InsertBeforeBlock =
2804 II0->getParent()->getIterator()->getNextNode();
2805 Function *Func = II0BB->getParent();
2806 LLVMContext &Ctx = II0->getContext();
2807
2808 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2809 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2810
2811 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2812 // NOTE: all invokes have the same attributes, so no handling needed.
2813 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2814
2815 if (!HasNormalDest) {
2816 // This set does not have a normal destination,
2817 // so just form a new block with unreachable terminator.
2818 BasicBlock *MergedNormalDest = BasicBlock::Create(
2819 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2820 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2821 UI->setDebugLoc(DebugLoc::getTemporary());
2822 MergedInvoke->setNormalDest(MergedNormalDest);
2823 }
2824
2825 // The unwind destination, however, remainds identical for all invokes here.
2826
2827 return MergedInvoke;
2828 }();
2829
2830 if (DTU) {
2831 // Predecessor blocks that contained these invokes will now branch to
2832 // the new block that contains the merged invoke, ...
2833 for (InvokeInst *II : Invokes)
2834 Updates.push_back(
2835 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2836
2837 // ... which has the new `unreachable` block as normal destination,
2838 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2839 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2840 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2841 SuccBBOfMergedInvoke});
2842
2843 // Since predecessor blocks now unconditionally branch to a new block,
2844 // they no longer branch to their original successors.
2845 for (InvokeInst *II : Invokes)
2846 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2847 Updates.push_back(
2848 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2849 }
2850
2851 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2852
2853 // Form the merged operands for the merged invoke.
2854 for (Use &U : MergedInvoke->operands()) {
2855 // Only PHI together the indirect callees and data operands.
2856 if (MergedInvoke->isCallee(&U)) {
2857 if (!IsIndirectCall)
2858 continue;
2859 } else if (!MergedInvoke->isDataOperand(&U))
2860 continue;
2861
2862 // Don't create trivial PHI's with all-identical incoming values.
2863 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2864 return II->getOperand(U.getOperandNo()) != U.get();
2865 });
2866 if (!NeedPHI)
2867 continue;
2868
2869 // Form a PHI out of all the data ops under this index.
2871 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2872 for (InvokeInst *II : Invokes)
2873 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2874
2875 U.set(PN);
2876 }
2877
2878 // We've ensured that each PHI node has compatible (identical) incoming values
2879 // when coming from each of the `invoke`s in the current merge set,
2880 // so update the PHI nodes accordingly.
2881 for (BasicBlock *Succ : successors(MergedInvoke))
2882 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2883 /*ExistPred=*/Invokes.front()->getParent());
2884
2885 // And finally, replace the original `invoke`s with an unconditional branch
2886 // to the block with the merged `invoke`. Also, give that merged `invoke`
2887 // the merged debugloc of all the original `invoke`s.
2888 DILocation *MergedDebugLoc = nullptr;
2889 for (InvokeInst *II : Invokes) {
2890 // Compute the debug location common to all the original `invoke`s.
2891 if (!MergedDebugLoc)
2892 MergedDebugLoc = II->getDebugLoc();
2893 else
2894 MergedDebugLoc =
2895 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2896
2897 // And replace the old `invoke` with an unconditionally branch
2898 // to the block with the merged `invoke`.
2899 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2900 OrigSuccBB->removePredecessor(II->getParent());
2901 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2902 // The unconditional branch is part of the replacement for the original
2903 // invoke, so should use its DebugLoc.
2904 BI->setDebugLoc(II->getDebugLoc());
2905 bool Success = MergedInvoke->tryIntersectAttributes(II);
2906 assert(Success && "Merged invokes with incompatible attributes");
2907 // For NDEBUG Compile
2908 (void)Success;
2909 II->replaceAllUsesWith(MergedInvoke);
2910 II->eraseFromParent();
2911 ++NumInvokesMerged;
2912 }
2913 MergedInvoke->setDebugLoc(MergedDebugLoc);
2914 ++NumInvokeSetsFormed;
2915
2916 if (DTU)
2917 DTU->applyUpdates(Updates);
2918}
2919
2920/// If this block is a `landingpad` exception handling block, categorize all
2921/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2922/// being "mergeable" together, and then merge invokes in each set together.
2923///
2924/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2925/// [...] [...]
2926/// | |
2927/// [invoke0] [invoke1]
2928/// / \ / \
2929/// [cont0] [landingpad] [cont1]
2930/// to:
2931/// [...] [...]
2932/// \ /
2933/// [invoke]
2934/// / \
2935/// [cont] [landingpad]
2936///
2937/// But of course we can only do that if the invokes share the `landingpad`,
2938/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2939/// and the invoked functions are "compatible".
2942 return false;
2943
2944 bool Changed = false;
2945
2946 // FIXME: generalize to all exception handling blocks?
2947 if (!BB->isLandingPad())
2948 return Changed;
2949
2950 CompatibleSets Grouper;
2951
2952 // Record all the predecessors of this `landingpad`. As per verifier,
2953 // the only allowed predecessor is the unwind edge of an `invoke`.
2954 // We want to group "compatible" `invokes` into the same set to be merged.
2955 for (BasicBlock *PredBB : predecessors(BB))
2956 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2957
2958 // And now, merge `invoke`s that were grouped togeter.
2959 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2960 if (Invokes.size() < 2)
2961 continue;
2962 Changed = true;
2963 mergeCompatibleInvokesImpl(Invokes, DTU);
2964 }
2965
2966 return Changed;
2967}
2968
2969namespace {
2970/// Track ephemeral values, which should be ignored for cost-modelling
2971/// purposes. Requires walking instructions in reverse order.
2972class EphemeralValueTracker {
2974
2975 bool isEphemeral(const Instruction *I) {
2976 if (isa<AssumeInst>(I))
2977 return true;
2978 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2979 all_of(I->users(), [&](const User *U) {
2980 return EphValues.count(cast<Instruction>(U));
2981 });
2982 }
2983
2984public:
2985 bool track(const Instruction *I) {
2986 if (isEphemeral(I)) {
2987 EphValues.insert(I);
2988 return true;
2989 }
2990 return false;
2991 }
2992
2993 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2994};
2995} // namespace
2996
2997/// Determine if we can hoist sink a sole store instruction out of a
2998/// conditional block.
2999///
3000/// We are looking for code like the following:
3001/// BrBB:
3002/// store i32 %add, i32* %arrayidx2
3003/// ... // No other stores or function calls (we could be calling a memory
3004/// ... // function).
3005/// %cmp = icmp ult %x, %y
3006/// br i1 %cmp, label %EndBB, label %ThenBB
3007/// ThenBB:
3008/// store i32 %add5, i32* %arrayidx2
3009/// br label EndBB
3010/// EndBB:
3011/// ...
3012/// We are going to transform this into:
3013/// BrBB:
3014/// store i32 %add, i32* %arrayidx2
3015/// ... //
3016/// %cmp = icmp ult %x, %y
3017/// %add.add5 = select i1 %cmp, i32 %add, %add5
3018/// store i32 %add.add5, i32* %arrayidx2
3019/// ...
3020///
3021/// \return The pointer to the value of the previous store if the store can be
3022/// hoisted into the predecessor block. 0 otherwise.
3024 BasicBlock *StoreBB, BasicBlock *EndBB) {
3025 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3026 if (!StoreToHoist)
3027 return nullptr;
3028
3029 // Volatile or atomic.
3030 if (!StoreToHoist->isSimple())
3031 return nullptr;
3032
3033 Value *StorePtr = StoreToHoist->getPointerOperand();
3034 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3035
3036 // Look for a store to the same pointer in BrBB.
3037 unsigned MaxNumInstToLookAt = 9;
3038 // Skip pseudo probe intrinsic calls which are not really killing any memory
3039 // accesses.
3040 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3041 if (!MaxNumInstToLookAt)
3042 break;
3043 --MaxNumInstToLookAt;
3044
3045 // Could be calling an instruction that affects memory like free().
3046 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3047 return nullptr;
3048
3049 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3050 // Found the previous store to same location and type. Make sure it is
3051 // simple, to avoid introducing a spurious non-atomic write after an
3052 // atomic write.
3053 if (SI->getPointerOperand() == StorePtr &&
3054 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3055 SI->getAlign() >= StoreToHoist->getAlign())
3056 // Found the previous store, return its value operand.
3057 return SI->getValueOperand();
3058 return nullptr; // Unknown store.
3059 }
3060
3061 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3062 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3063 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3064 Value *Obj = getUnderlyingObject(StorePtr);
3065 bool ExplicitlyDereferenceableOnly;
3066 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3068 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3069 CaptureComponents::Provenance)) &&
3070 (!ExplicitlyDereferenceableOnly ||
3071 isDereferenceablePointer(StorePtr, StoreTy,
3072 LI->getDataLayout()))) {
3073 // Found a previous load, return it.
3074 return LI;
3075 }
3076 }
3077 // The load didn't work out, but we may still find a store.
3078 }
3079 }
3080
3081 return nullptr;
3082}
3083
3084/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3085/// converted to selects.
3087 BasicBlock *EndBB,
3088 unsigned &SpeculatedInstructions,
3090 const TargetTransformInfo &TTI) {
3092 BB->getParent()->hasMinSize()
3095
3096 bool HaveRewritablePHIs = false;
3097 for (PHINode &PN : EndBB->phis()) {
3098 Value *OrigV = PN.getIncomingValueForBlock(BB);
3099 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3100
3101 // FIXME: Try to remove some of the duplication with
3102 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3103 if (ThenV == OrigV)
3104 continue;
3105
3106 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3109
3110 // Don't convert to selects if we could remove undefined behavior instead.
3111 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3113 return false;
3114
3115 HaveRewritablePHIs = true;
3116 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3117 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3118 if (!OrigCE && !ThenCE)
3119 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3120
3121 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3122 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3123 InstructionCost MaxCost =
3125 if (OrigCost + ThenCost > MaxCost)
3126 return false;
3127
3128 // Account for the cost of an unfolded ConstantExpr which could end up
3129 // getting expanded into Instructions.
3130 // FIXME: This doesn't account for how many operations are combined in the
3131 // constant expression.
3132 ++SpeculatedInstructions;
3133 if (SpeculatedInstructions > 1)
3134 return false;
3135 }
3136
3137 return HaveRewritablePHIs;
3138}
3139
3141 std::optional<bool> Invert,
3142 const TargetTransformInfo &TTI) {
3143 // If the branch is non-unpredictable, and is predicted to *not* branch to
3144 // the `then` block, then avoid speculating it.
3145 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3146 return true;
3147
3148 uint64_t TWeight, FWeight;
3149 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3150 return true;
3151
3152 if (!Invert.has_value())
3153 return false;
3154
3155 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3156 BranchProbability BIEndProb =
3157 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3159 return BIEndProb < Likely;
3160}
3161
3162/// Speculate a conditional basic block flattening the CFG.
3163///
3164/// Note that this is a very risky transform currently. Speculating
3165/// instructions like this is most often not desirable. Instead, there is an MI
3166/// pass which can do it with full awareness of the resource constraints.
3167/// However, some cases are "obvious" and we should do directly. An example of
3168/// this is speculating a single, reasonably cheap instruction.
3169///
3170/// There is only one distinct advantage to flattening the CFG at the IR level:
3171/// it makes very common but simplistic optimizations such as are common in
3172/// instcombine and the DAG combiner more powerful by removing CFG edges and
3173/// modeling their effects with easier to reason about SSA value graphs.
3174///
3175///
3176/// An illustration of this transform is turning this IR:
3177/// \code
3178/// BB:
3179/// %cmp = icmp ult %x, %y
3180/// br i1 %cmp, label %EndBB, label %ThenBB
3181/// ThenBB:
3182/// %sub = sub %x, %y
3183/// br label BB2
3184/// EndBB:
3185/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3186/// ...
3187/// \endcode
3188///
3189/// Into this IR:
3190/// \code
3191/// BB:
3192/// %cmp = icmp ult %x, %y
3193/// %sub = sub %x, %y
3194/// %cond = select i1 %cmp, 0, %sub
3195/// ...
3196/// \endcode
3197///
3198/// \returns true if the conditional block is removed.
3199bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3200 BasicBlock *ThenBB) {
3201 if (!Options.SpeculateBlocks)
3202 return false;
3203
3204 // Be conservative for now. FP select instruction can often be expensive.
3205 Value *BrCond = BI->getCondition();
3206 if (isa<FCmpInst>(BrCond))
3207 return false;
3208
3209 BasicBlock *BB = BI->getParent();
3210 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3211 InstructionCost Budget =
3213
3214 // If ThenBB is actually on the false edge of the conditional branch, remember
3215 // to swap the select operands later.
3216 bool Invert = false;
3217 if (ThenBB != BI->getSuccessor(0)) {
3218 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3219 Invert = true;
3220 }
3221 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3222
3223 if (!isProfitableToSpeculate(BI, Invert, TTI))
3224 return false;
3225
3226 // Keep a count of how many times instructions are used within ThenBB when
3227 // they are candidates for sinking into ThenBB. Specifically:
3228 // - They are defined in BB, and
3229 // - They have no side effects, and
3230 // - All of their uses are in ThenBB.
3231 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3232
3233 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3234
3235 unsigned SpeculatedInstructions = 0;
3236 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3237 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3238 Value *SpeculatedStoreValue = nullptr;
3239 StoreInst *SpeculatedStore = nullptr;
3240 EphemeralValueTracker EphTracker;
3241 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3242 // Skip pseudo probes. The consequence is we lose track of the branch
3243 // probability for ThenBB, which is fine since the optimization here takes
3244 // place regardless of the branch probability.
3245 if (isa<PseudoProbeInst>(I)) {
3246 // The probe should be deleted so that it will not be over-counted when
3247 // the samples collected on the non-conditional path are counted towards
3248 // the conditional path. We leave it for the counts inference algorithm to
3249 // figure out a proper count for an unknown probe.
3250 SpeculatedPseudoProbes.push_back(&I);
3251 continue;
3252 }
3253
3254 // Ignore ephemeral values, they will be dropped by the transform.
3255 if (EphTracker.track(&I))
3256 continue;
3257
3258 // Only speculatively execute a single instruction (not counting the
3259 // terminator) for now.
3260 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3262 SpeculatedConditionalLoadsStores.size() <
3264 // Not count load/store into cost if target supports conditional faulting
3265 // b/c it's cheap to speculate it.
3266 if (IsSafeCheapLoadStore)
3267 SpeculatedConditionalLoadsStores.push_back(&I);
3268 else
3269 ++SpeculatedInstructions;
3270
3271 if (SpeculatedInstructions > 1)
3272 return false;
3273
3274 // Don't hoist the instruction if it's unsafe or expensive.
3275 if (!IsSafeCheapLoadStore &&
3277 !(HoistCondStores && !SpeculatedStoreValue &&
3278 (SpeculatedStoreValue =
3279 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3280 return false;
3281 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3284 return false;
3285
3286 // Store the store speculation candidate.
3287 if (!SpeculatedStore && SpeculatedStoreValue)
3288 SpeculatedStore = cast<StoreInst>(&I);
3289
3290 // Do not hoist the instruction if any of its operands are defined but not
3291 // used in BB. The transformation will prevent the operand from
3292 // being sunk into the use block.
3293 for (Use &Op : I.operands()) {
3294 Instruction *OpI = dyn_cast<Instruction>(Op);
3295 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3296 continue; // Not a candidate for sinking.
3297
3298 ++SinkCandidateUseCounts[OpI];
3299 }
3300 }
3301
3302 // Consider any sink candidates which are only used in ThenBB as costs for
3303 // speculation. Note, while we iterate over a DenseMap here, we are summing
3304 // and so iteration order isn't significant.
3305 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3306 if (Inst->hasNUses(Count)) {
3307 ++SpeculatedInstructions;
3308 if (SpeculatedInstructions > 1)
3309 return false;
3310 }
3311
3312 // Check that we can insert the selects and that it's not too expensive to do
3313 // so.
3314 bool Convert =
3315 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3317 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3318 SpeculatedInstructions, Cost, TTI);
3319 if (!Convert || Cost > Budget)
3320 return false;
3321
3322 // If we get here, we can hoist the instruction and if-convert.
3323 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3324
3325 Instruction *Sel = nullptr;
3326 // Insert a select of the value of the speculated store.
3327 if (SpeculatedStoreValue) {
3328 IRBuilder<NoFolder> Builder(BI);
3329 Value *OrigV = SpeculatedStore->getValueOperand();
3330 Value *TrueV = SpeculatedStore->getValueOperand();
3331 Value *FalseV = SpeculatedStoreValue;
3332 if (Invert)
3333 std::swap(TrueV, FalseV);
3334 Value *S = Builder.CreateSelect(
3335 BrCond, TrueV, FalseV, "spec.store.select", BI);
3336 Sel = cast<Instruction>(S);
3337 SpeculatedStore->setOperand(0, S);
3338 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3339 SpeculatedStore->getDebugLoc());
3340 // The value stored is still conditional, but the store itself is now
3341 // unconditonally executed, so we must be sure that any linked dbg.assign
3342 // intrinsics are tracking the new stored value (the result of the
3343 // select). If we don't, and the store were to be removed by another pass
3344 // (e.g. DSE), then we'd eventually end up emitting a location describing
3345 // the conditional value, unconditionally.
3346 //
3347 // === Before this transformation ===
3348 // pred:
3349 // store %one, %x.dest, !DIAssignID !1
3350 // dbg.assign %one, "x", ..., !1, ...
3351 // br %cond if.then
3352 //
3353 // if.then:
3354 // store %two, %x.dest, !DIAssignID !2
3355 // dbg.assign %two, "x", ..., !2, ...
3356 //
3357 // === After this transformation ===
3358 // pred:
3359 // store %one, %x.dest, !DIAssignID !1
3360 // dbg.assign %one, "x", ..., !1
3361 /// ...
3362 // %merge = select %cond, %two, %one
3363 // store %merge, %x.dest, !DIAssignID !2
3364 // dbg.assign %merge, "x", ..., !2
3365 for (DbgVariableRecord *DbgAssign :
3366 at::getDVRAssignmentMarkers(SpeculatedStore))
3367 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3368 DbgAssign->replaceVariableLocationOp(OrigV, S);
3369 }
3370
3371 // Metadata can be dependent on the condition we are hoisting above.
3372 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3373 // to avoid making it appear as if the condition is a constant, which would
3374 // be misleading while debugging.
3375 // Similarly strip attributes that maybe dependent on condition we are
3376 // hoisting above.
3377 for (auto &I : make_early_inc_range(*ThenBB)) {
3378 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3379 I.setDebugLoc(DebugLoc::getDropped());
3380 }
3381 I.dropUBImplyingAttrsAndMetadata();
3382
3383 // Drop ephemeral values.
3384 if (EphTracker.contains(&I)) {
3385 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3386 I.eraseFromParent();
3387 }
3388 }
3389
3390 // Hoist the instructions.
3391 // Drop DbgVariableRecords attached to these instructions.
3392 for (auto &It : *ThenBB)
3393 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3394 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3395 // equivalent).
3396 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3397 !DVR || !DVR->isDbgAssign())
3398 It.dropOneDbgRecord(&DR);
3399 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3400 std::prev(ThenBB->end()));
3401
3402 if (!SpeculatedConditionalLoadsStores.empty())
3403 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3404 Sel);
3405
3406 // Insert selects and rewrite the PHI operands.
3407 IRBuilder<NoFolder> Builder(BI);
3408 for (PHINode &PN : EndBB->phis()) {
3409 unsigned OrigI = PN.getBasicBlockIndex(BB);
3410 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3411 Value *OrigV = PN.getIncomingValue(OrigI);
3412 Value *ThenV = PN.getIncomingValue(ThenI);
3413
3414 // Skip PHIs which are trivial.
3415 if (OrigV == ThenV)
3416 continue;
3417
3418 // Create a select whose true value is the speculatively executed value and
3419 // false value is the pre-existing value. Swap them if the branch
3420 // destinations were inverted.
3421 Value *TrueV = ThenV, *FalseV = OrigV;
3422 if (Invert)
3423 std::swap(TrueV, FalseV);
3424 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3425 PN.setIncomingValue(OrigI, V);
3426 PN.setIncomingValue(ThenI, V);
3427 }
3428
3429 // Remove speculated pseudo probes.
3430 for (Instruction *I : SpeculatedPseudoProbes)
3431 I->eraseFromParent();
3432
3433 ++NumSpeculations;
3434 return true;
3435}
3436
3438
3439// Return false if number of blocks searched is too much.
3440static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3441 BlocksSet &ReachesNonLocalUses) {
3442 if (BB == DefBB)
3443 return true;
3444 if (!ReachesNonLocalUses.insert(BB).second)
3445 return true;
3446
3447 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3448 return false;
3449 for (BasicBlock *Pred : predecessors(BB))
3450 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3451 return false;
3452 return true;
3453}
3454
3455/// Return true if we can thread a branch across this block.
3457 BlocksSet &NonLocalUseBlocks) {
3458 int Size = 0;
3459 EphemeralValueTracker EphTracker;
3460
3461 // Walk the loop in reverse so that we can identify ephemeral values properly
3462 // (values only feeding assumes).
3463 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3464 // Can't fold blocks that contain noduplicate or convergent calls.
3465 if (CallInst *CI = dyn_cast<CallInst>(&I))
3466 if (CI->cannotDuplicate() || CI->isConvergent())
3467 return false;
3468
3469 // Ignore ephemeral values which are deleted during codegen.
3470 // We will delete Phis while threading, so Phis should not be accounted in
3471 // block's size.
3472 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3473 if (Size++ > MaxSmallBlockSize)
3474 return false; // Don't clone large BB's.
3475 }
3476
3477 // Record blocks with non-local uses of values defined in the current basic
3478 // block.
3479 for (User *U : I.users()) {
3480 Instruction *UI = cast<Instruction>(U);
3481 BasicBlock *UsedInBB = UI->getParent();
3482 if (UsedInBB == BB) {
3483 if (isa<PHINode>(UI))
3484 return false;
3485 } else
3486 NonLocalUseBlocks.insert(UsedInBB);
3487 }
3488
3489 // Looks ok, continue checking.
3490 }
3491
3492 return true;
3493}
3494
3496 BasicBlock *To) {
3497 // Don't look past the block defining the value, we might get the value from
3498 // a previous loop iteration.
3499 auto *I = dyn_cast<Instruction>(V);
3500 if (I && I->getParent() == To)
3501 return nullptr;
3502
3503 // We know the value if the From block branches on it.
3504 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3505 if (BI && BI->isConditional() && BI->getCondition() == V &&
3506 BI->getSuccessor(0) != BI->getSuccessor(1))
3507 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3509
3510 return nullptr;
3511}
3512
3513/// If we have a conditional branch on something for which we know the constant
3514/// value in predecessors (e.g. a phi node in the current block), thread edges
3515/// from the predecessor to their ultimate destination.
3516static std::optional<bool>
3518 const DataLayout &DL,
3519 AssumptionCache *AC) {
3521 BasicBlock *BB = BI->getParent();
3522 Value *Cond = BI->getCondition();
3523 PHINode *PN = dyn_cast<PHINode>(Cond);
3524 if (PN && PN->getParent() == BB) {
3525 // Degenerate case of a single entry PHI.
3526 if (PN->getNumIncomingValues() == 1) {
3528 return true;
3529 }
3530
3531 for (Use &U : PN->incoming_values())
3532 if (auto *CB = dyn_cast<ConstantInt>(U))
3533 KnownValues[CB].insert(PN->getIncomingBlock(U));
3534 } else {
3535 for (BasicBlock *Pred : predecessors(BB)) {
3536 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3537 KnownValues[CB].insert(Pred);
3538 }
3539 }
3540
3541 if (KnownValues.empty())
3542 return false;
3543
3544 // Now we know that this block has multiple preds and two succs.
3545 // Check that the block is small enough and record which non-local blocks use
3546 // values defined in the block.
3547
3548 BlocksSet NonLocalUseBlocks;
3549 BlocksSet ReachesNonLocalUseBlocks;
3550 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3551 return false;
3552
3553 // Jump-threading can only be done to destinations where no values defined
3554 // in BB are live.
3555
3556 // Quickly check if both destinations have uses. If so, jump-threading cannot
3557 // be done.
3558 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3559 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3560 return false;
3561
3562 // Search backward from NonLocalUseBlocks to find which blocks
3563 // reach non-local uses.
3564 for (BasicBlock *UseBB : NonLocalUseBlocks)
3565 // Give up if too many blocks are searched.
3566 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3567 return false;
3568
3569 for (const auto &Pair : KnownValues) {
3570 ConstantInt *CB = Pair.first;
3571 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3572 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3573
3574 // Okay, we now know that all edges from PredBB should be revectored to
3575 // branch to RealDest.
3576 if (RealDest == BB)
3577 continue; // Skip self loops.
3578
3579 // Skip if the predecessor's terminator is an indirect branch.
3580 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3581 return isa<IndirectBrInst>(PredBB->getTerminator());
3582 }))
3583 continue;
3584
3585 // Only revector to RealDest if no values defined in BB are live.
3586 if (ReachesNonLocalUseBlocks.contains(RealDest))
3587 continue;
3588
3589 LLVM_DEBUG({
3590 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3591 << " has value " << *Pair.first << " in predecessors:\n";
3592 for (const BasicBlock *PredBB : Pair.second)
3593 dbgs() << " " << PredBB->getName() << "\n";
3594 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3595 });
3596
3597 // Split the predecessors we are threading into a new edge block. We'll
3598 // clone the instructions into this block, and then redirect it to RealDest.
3599 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3600
3601 // TODO: These just exist to reduce test diff, we can drop them if we like.
3602 EdgeBB->setName(RealDest->getName() + ".critedge");
3603 EdgeBB->moveBefore(RealDest);
3604
3605 // Update PHI nodes.
3606 addPredecessorToBlock(RealDest, EdgeBB, BB);
3607
3608 // BB may have instructions that are being threaded over. Clone these
3609 // instructions into EdgeBB. We know that there will be no uses of the
3610 // cloned instructions outside of EdgeBB.
3611 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3612 ValueToValueMapTy TranslateMap; // Track translated values.
3613 TranslateMap[Cond] = CB;
3614
3615 // RemoveDIs: track instructions that we optimise away while folding, so
3616 // that we can copy DbgVariableRecords from them later.
3617 BasicBlock::iterator SrcDbgCursor = BB->begin();
3618 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3619 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3620 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3621 continue;
3622 }
3623 // Clone the instruction.
3624 Instruction *N = BBI->clone();
3625 // Insert the new instruction into its new home.
3626 N->insertInto(EdgeBB, InsertPt);
3627
3628 if (BBI->hasName())
3629 N->setName(BBI->getName() + ".c");
3630
3631 // Update operands due to translation.
3632 // Key Instructions: Remap all the atom groups.
3633 if (const DebugLoc &DL = BBI->getDebugLoc())
3634 mapAtomInstance(DL, TranslateMap);
3635 RemapInstruction(N, TranslateMap,
3637
3638 // Check for trivial simplification.
3639 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3640 if (!BBI->use_empty())
3641 TranslateMap[&*BBI] = V;
3642 if (!N->mayHaveSideEffects()) {
3643 N->eraseFromParent(); // Instruction folded away, don't need actual
3644 // inst
3645 N = nullptr;
3646 }
3647 } else {
3648 if (!BBI->use_empty())
3649 TranslateMap[&*BBI] = N;
3650 }
3651 if (N) {
3652 // Copy all debug-info attached to instructions from the last we
3653 // successfully clone, up to this instruction (they might have been
3654 // folded away).
3655 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3656 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3657 SrcDbgCursor = std::next(BBI);
3658 // Clone debug-info on this instruction too.
3659 N->cloneDebugInfoFrom(&*BBI);
3660
3661 // Register the new instruction with the assumption cache if necessary.
3662 if (auto *Assume = dyn_cast<AssumeInst>(N))
3663 if (AC)
3664 AC->registerAssumption(Assume);
3665 }
3666 }
3667
3668 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3669 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3670 InsertPt->cloneDebugInfoFrom(BI);
3671
3672 BB->removePredecessor(EdgeBB);
3673 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3674 EdgeBI->setSuccessor(0, RealDest);
3675 EdgeBI->setDebugLoc(BI->getDebugLoc());
3676
3677 if (DTU) {
3679 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3680 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3681 DTU->applyUpdates(Updates);
3682 }
3683
3684 // For simplicity, we created a separate basic block for the edge. Merge
3685 // it back into the predecessor if possible. This not only avoids
3686 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3687 // bypass the check for trivial cycles above.
3688 MergeBlockIntoPredecessor(EdgeBB, DTU);
3689
3690 // Signal repeat, simplifying any other constants.
3691 return std::nullopt;
3692 }
3693
3694 return false;
3695}
3696
3697bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3698 // Note: If BB is a loop header then there is a risk that threading introduces
3699 // a non-canonical loop by moving a back edge. So we avoid this optimization
3700 // for loop headers if NeedCanonicalLoop is set.
3701 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3702 return false;
3703
3704 std::optional<bool> Result;
3705 bool EverChanged = false;
3706 do {
3707 // Note that None means "we changed things, but recurse further."
3708 Result =
3710 EverChanged |= Result == std::nullopt || *Result;
3711 } while (Result == std::nullopt);
3712 return EverChanged;
3713}
3714
3715/// Given a BB that starts with the specified two-entry PHI node,
3716/// see if we can eliminate it.
3719 const DataLayout &DL,
3720 bool SpeculateUnpredictables) {
3721 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3722 // statement", which has a very simple dominance structure. Basically, we
3723 // are trying to find the condition that is being branched on, which
3724 // subsequently causes this merge to happen. We really want control
3725 // dependence information for this check, but simplifycfg can't keep it up
3726 // to date, and this catches most of the cases we care about anyway.
3727 BasicBlock *BB = PN->getParent();
3728
3729 BasicBlock *IfTrue, *IfFalse;
3730 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3731 if (!DomBI)
3732 return false;
3733 Value *IfCond = DomBI->getCondition();
3734 // Don't bother if the branch will be constant folded trivially.
3735 if (isa<ConstantInt>(IfCond))
3736 return false;
3737
3738 BasicBlock *DomBlock = DomBI->getParent();
3741 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3742 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3743 });
3744 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3745 "Will have either one or two blocks to speculate.");
3746
3747 // If the branch is non-unpredictable, see if we either predictably jump to
3748 // the merge bb (if we have only a single 'then' block), or if we predictably
3749 // jump to one specific 'then' block (if we have two of them).
3750 // It isn't beneficial to speculatively execute the code
3751 // from the block that we know is predictably not entered.
3752 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3753 if (!IsUnpredictable) {
3754 uint64_t TWeight, FWeight;
3755 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3756 (TWeight + FWeight) != 0) {
3757 BranchProbability BITrueProb =
3758 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3760 BranchProbability BIFalseProb = BITrueProb.getCompl();
3761 if (IfBlocks.size() == 1) {
3762 BranchProbability BIBBProb =
3763 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3764 if (BIBBProb >= Likely)
3765 return false;
3766 } else {
3767 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3768 return false;
3769 }
3770 }
3771 }
3772
3773 // Don't try to fold an unreachable block. For example, the phi node itself
3774 // can't be the candidate if-condition for a select that we want to form.
3775 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3776 if (IfCondPhiInst->getParent() == BB)
3777 return false;
3778
3779 // Okay, we found that we can merge this two-entry phi node into a select.
3780 // Doing so would require us to fold *all* two entry phi nodes in this block.
3781 // At some point this becomes non-profitable (particularly if the target
3782 // doesn't support cmov's). Only do this transformation if there are two or
3783 // fewer PHI nodes in this block.
3784 unsigned NumPhis = 0;
3785 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3786 if (NumPhis > 2)
3787 return false;
3788
3789 // Loop over the PHI's seeing if we can promote them all to select
3790 // instructions. While we are at it, keep track of the instructions
3791 // that need to be moved to the dominating block.
3792 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3793 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3795 InstructionCost Budget =
3797 if (SpeculateUnpredictables && IsUnpredictable)
3798 Budget += TTI.getBranchMispredictPenalty();
3799
3800 bool Changed = false;
3801 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3802 PHINode *PN = cast<PHINode>(II++);
3803 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3804 PN->replaceAllUsesWith(V);
3805 PN->eraseFromParent();
3806 Changed = true;
3807 continue;
3808 }
3809
3810 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3811 AggressiveInsts, Cost, Budget, TTI, AC,
3812 ZeroCostInstructions) ||
3813 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3814 AggressiveInsts, Cost, Budget, TTI, AC,
3815 ZeroCostInstructions))
3816 return Changed;
3817 }
3818
3819 // If we folded the first phi, PN dangles at this point. Refresh it. If
3820 // we ran out of PHIs then we simplified them all.
3821 PN = dyn_cast<PHINode>(BB->begin());
3822 if (!PN)
3823 return true;
3824
3825 // Return true if at least one of these is a 'not', and another is either
3826 // a 'not' too, or a constant.
3827 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3828 if (!match(V0, m_Not(m_Value())))
3829 std::swap(V0, V1);
3830 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3831 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3832 };
3833
3834 // Don't fold i1 branches on PHIs which contain binary operators or
3835 // (possibly inverted) select form of or/ands, unless one of
3836 // the incoming values is an 'not' and another one is freely invertible.
3837 // These can often be turned into switches and other things.
3838 auto IsBinOpOrAnd = [](Value *V) {
3839 return match(
3841 };
3842 if (PN->getType()->isIntegerTy(1) &&
3843 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3844 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3845 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3846 PN->getIncomingValue(1)))
3847 return Changed;
3848
3849 // If all PHI nodes are promotable, check to make sure that all instructions
3850 // in the predecessor blocks can be promoted as well. If not, we won't be able
3851 // to get rid of the control flow, so it's not worth promoting to select
3852 // instructions.
3853 for (BasicBlock *IfBlock : IfBlocks)
3854 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3855 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3856 // This is not an aggressive instruction that we can promote.
3857 // Because of this, we won't be able to get rid of the control flow, so
3858 // the xform is not worth it.
3859 return Changed;
3860 }
3861
3862 // If either of the blocks has it's address taken, we can't do this fold.
3863 if (any_of(IfBlocks,
3864 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3865 return Changed;
3866
3867 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3868 if (IsUnpredictable) dbgs() << " (unpredictable)";
3869 dbgs() << " T: " << IfTrue->getName()
3870 << " F: " << IfFalse->getName() << "\n");
3871
3872 // If we can still promote the PHI nodes after this gauntlet of tests,
3873 // do all of the PHI's now.
3874
3875 // Move all 'aggressive' instructions, which are defined in the
3876 // conditional parts of the if's up to the dominating block.
3877 for (BasicBlock *IfBlock : IfBlocks)
3878 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3879
3880 IRBuilder<NoFolder> Builder(DomBI);
3881 // Propagate fast-math-flags from phi nodes to replacement selects.
3882 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3883 // Change the PHI node into a select instruction.
3884 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3885 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3886
3887 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3888 isa<FPMathOperator>(PN) ? PN : nullptr,
3889 "", DomBI);
3890 PN->replaceAllUsesWith(Sel);
3891 Sel->takeName(PN);
3892 PN->eraseFromParent();
3893 }
3894
3895 // At this point, all IfBlocks are empty, so our if statement
3896 // has been flattened. Change DomBlock to jump directly to our new block to
3897 // avoid other simplifycfg's kicking in on the diamond.
3898 Builder.CreateBr(BB);
3899
3901 if (DTU) {
3902 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3903 for (auto *Successor : successors(DomBlock))
3904 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3905 }
3906
3907 DomBI->eraseFromParent();
3908 if (DTU)
3909 DTU->applyUpdates(Updates);
3910
3911 return true;
3912}
3913
3916 Value *RHS, const Twine &Name = "") {
3917 // Try to relax logical op to binary op.
3918 if (impliesPoison(RHS, LHS))
3919 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3920 if (Opc == Instruction::And)
3921 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3922 if (Opc == Instruction::Or)
3923 return Builder.CreateLogicalOr(LHS, RHS, Name);
3924 llvm_unreachable("Invalid logical opcode");
3925}
3926
3927/// Return true if either PBI or BI has branch weight available, and store
3928/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3929/// not have branch weight, use 1:1 as its weight.
3931 uint64_t &PredTrueWeight,
3932 uint64_t &PredFalseWeight,
3933 uint64_t &SuccTrueWeight,
3934 uint64_t &SuccFalseWeight) {
3935 bool PredHasWeights =
3936 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3937 bool SuccHasWeights =
3938 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3939 if (PredHasWeights || SuccHasWeights) {
3940 if (!PredHasWeights)
3941 PredTrueWeight = PredFalseWeight = 1;
3942 if (!SuccHasWeights)
3943 SuccTrueWeight = SuccFalseWeight = 1;
3944 return true;
3945 } else {
3946 return false;
3947 }
3948}
3949
3950/// Determine if the two branches share a common destination and deduce a glue
3951/// that joins the branches' conditions to arrive at the common destination if
3952/// that would be profitable.
3953static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3955 const TargetTransformInfo *TTI) {
3956 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3957 "Both blocks must end with a conditional branches.");
3959 "PredBB must be a predecessor of BB.");
3960
3961 // We have the potential to fold the conditions together, but if the
3962 // predecessor branch is predictable, we may not want to merge them.
3963 uint64_t PTWeight, PFWeight;
3964 BranchProbability PBITrueProb, Likely;
3965 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3966 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3967 (PTWeight + PFWeight) != 0) {
3968 PBITrueProb =
3969 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3971 }
3972
3973 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3974 // Speculate the 2nd condition unless the 1st is probably true.
3975 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3976 return {{BI->getSuccessor(0), Instruction::Or, false}};
3977 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3978 // Speculate the 2nd condition unless the 1st is probably false.
3979 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3980 return {{BI->getSuccessor(1), Instruction::And, false}};
3981 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3982 // Speculate the 2nd condition unless the 1st is probably true.
3983 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3984 return {{BI->getSuccessor(1), Instruction::And, true}};
3985 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3986 // Speculate the 2nd condition unless the 1st is probably false.
3987 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3988 return {{BI->getSuccessor(0), Instruction::Or, true}};
3989 }
3990 return std::nullopt;
3991}
3992
3994 DomTreeUpdater *DTU,
3995 MemorySSAUpdater *MSSAU,
3996 const TargetTransformInfo *TTI) {
3997 BasicBlock *BB = BI->getParent();
3998 BasicBlock *PredBlock = PBI->getParent();
3999
4000 // Determine if the two branches share a common destination.
4001 BasicBlock *CommonSucc;
4003 bool InvertPredCond;
4004 std::tie(CommonSucc, Opc, InvertPredCond) =
4006
4007 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4008
4009 IRBuilder<> Builder(PBI);
4010 // The builder is used to create instructions to eliminate the branch in BB.
4011 // If BB's terminator has !annotation metadata, add it to the new
4012 // instructions.
4014 {LLVMContext::MD_annotation});
4015
4016 // If we need to invert the condition in the pred block to match, do so now.
4017 if (InvertPredCond) {
4018 InvertBranch(PBI, Builder);
4019 }
4020
4021 BasicBlock *UniqueSucc =
4022 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4023
4024 // Before cloning instructions, notify the successor basic block that it
4025 // is about to have a new predecessor. This will update PHI nodes,
4026 // which will allow us to update live-out uses of bonus instructions.
4027 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4028
4029 // Try to update branch weights.
4030 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4031 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4032 SuccTrueWeight, SuccFalseWeight)) {
4033 SmallVector<uint64_t, 8> NewWeights;
4034
4035 if (PBI->getSuccessor(0) == BB) {
4036 // PBI: br i1 %x, BB, FalseDest
4037 // BI: br i1 %y, UniqueSucc, FalseDest
4038 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4039 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4040 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4041 // TrueWeight for PBI * FalseWeight for BI.
4042 // We assume that total weights of a BranchInst can fit into 32 bits.
4043 // Therefore, we will not have overflow using 64-bit arithmetic.
4044 NewWeights.push_back(PredFalseWeight *
4045 (SuccFalseWeight + SuccTrueWeight) +
4046 PredTrueWeight * SuccFalseWeight);
4047 } else {
4048 // PBI: br i1 %x, TrueDest, BB
4049 // BI: br i1 %y, TrueDest, UniqueSucc
4050 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4051 // FalseWeight for PBI * TrueWeight for BI.
4052 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4053 PredFalseWeight * SuccTrueWeight);
4054 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4055 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4056 }
4057
4058 // Halve the weights if any of them cannot fit in an uint32_t
4059 fitWeights(NewWeights);
4060
4061 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
4062 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4063
4064 // TODO: If BB is reachable from all paths through PredBlock, then we
4065 // could replace PBI's branch probabilities with BI's.
4066 } else
4067 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4068
4069 // Now, update the CFG.
4070 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4071
4072 if (DTU)
4073 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4074 {DominatorTree::Delete, PredBlock, BB}});
4075
4076 // If BI was a loop latch, it may have had associated loop metadata.
4077 // We need to copy it to the new latch, that is, PBI.
4078 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4079 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4080
4081 ValueToValueMapTy VMap; // maps original values to cloned values
4083
4084 Module *M = BB->getModule();
4085
4086 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4087 for (DbgVariableRecord &DVR :
4089 RemapDbgRecord(M, &DVR, VMap,
4091 }
4092
4093 // Now that the Cond was cloned into the predecessor basic block,
4094 // or/and the two conditions together.
4095 Value *BICond = VMap[BI->getCondition()];
4096 PBI->setCondition(
4097 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4098
4099 ++NumFoldBranchToCommonDest;
4100 return true;
4101}
4102
4103/// Return if an instruction's type or any of its operands' types are a vector
4104/// type.
4105static bool isVectorOp(Instruction &I) {
4106 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4107 return U->getType()->isVectorTy();
4108 });
4109}
4110
4111/// If this basic block is simple enough, and if a predecessor branches to us
4112/// and one of our successors, fold the block into the predecessor and use
4113/// logical operations to pick the right destination.
4115 MemorySSAUpdater *MSSAU,
4116 const TargetTransformInfo *TTI,
4117 unsigned BonusInstThreshold) {
4118 // If this block ends with an unconditional branch,
4119 // let speculativelyExecuteBB() deal with it.
4120 if (!BI->isConditional())
4121 return false;
4122
4123 BasicBlock *BB = BI->getParent();
4127
4128 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4129
4130 if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Cond) ||
4131 Cond->getParent() != BB || !Cond->hasOneUse())
4132 return false;
4133
4134 // Finally, don't infinitely unroll conditional loops.
4135 if (is_contained(successors(BB), BB))
4136 return false;
4137
4138 // With which predecessors will we want to deal with?
4140 for (BasicBlock *PredBlock : predecessors(BB)) {
4141 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4142
4143 // Check that we have two conditional branches. If there is a PHI node in
4144 // the common successor, verify that the same value flows in from both
4145 // blocks.
4146 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4147 continue;
4148
4149 // Determine if the two branches share a common destination.
4150 BasicBlock *CommonSucc;
4152 bool InvertPredCond;
4153 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4154 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4155 else
4156 continue;
4157
4158 // Check the cost of inserting the necessary logic before performing the
4159 // transformation.
4160 if (TTI) {
4161 Type *Ty = BI->getCondition()->getType();
4163 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4164 !isa<CmpInst>(PBI->getCondition())))
4165 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4166
4168 continue;
4169 }
4170
4171 // Ok, we do want to deal with this predecessor. Record it.
4172 Preds.emplace_back(PredBlock);
4173 }
4174
4175 // If there aren't any predecessors into which we can fold,
4176 // don't bother checking the cost.
4177 if (Preds.empty())
4178 return false;
4179
4180 // Only allow this transformation if computing the condition doesn't involve
4181 // too many instructions and these involved instructions can be executed
4182 // unconditionally. We denote all involved instructions except the condition
4183 // as "bonus instructions", and only allow this transformation when the
4184 // number of the bonus instructions we'll need to create when cloning into
4185 // each predecessor does not exceed a certain threshold.
4186 unsigned NumBonusInsts = 0;
4187 bool SawVectorOp = false;
4188 const unsigned PredCount = Preds.size();
4189 for (Instruction &I : *BB) {
4190 // Don't check the branch condition comparison itself.
4191 if (&I == Cond)
4192 continue;
4193 // Ignore the terminator.
4194 if (isa<BranchInst>(I))
4195 continue;
4196 // I must be safe to execute unconditionally.
4198 return false;
4199 SawVectorOp |= isVectorOp(I);
4200
4201 // Account for the cost of duplicating this instruction into each
4202 // predecessor. Ignore free instructions.
4203 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4205 NumBonusInsts += PredCount;
4206
4207 // Early exits once we reach the limit.
4208 if (NumBonusInsts >
4209 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4210 return false;
4211 }
4212
4213 auto IsBCSSAUse = [BB, &I](Use &U) {
4214 auto *UI = cast<Instruction>(U.getUser());
4215 if (auto *PN = dyn_cast<PHINode>(UI))
4216 return PN->getIncomingBlock(U) == BB;
4217 return UI->getParent() == BB && I.comesBefore(UI);
4218 };
4219
4220 // Does this instruction require rewriting of uses?
4221 if (!all_of(I.uses(), IsBCSSAUse))
4222 return false;
4223 }
4224 if (NumBonusInsts >
4225 BonusInstThreshold *
4226 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4227 return false;
4228
4229 // Ok, we have the budget. Perform the transformation.
4230 for (BasicBlock *PredBlock : Preds) {
4231 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4232 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4233 }
4234 return false;
4235}
4236
4237// If there is only one store in BB1 and BB2, return it, otherwise return
4238// nullptr.
4240 StoreInst *S = nullptr;
4241 for (auto *BB : {BB1, BB2}) {
4242 if (!BB)
4243 continue;
4244 for (auto &I : *BB)
4245 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4246 if (S)
4247 // Multiple stores seen.
4248 return nullptr;
4249 else
4250 S = SI;
4251 }
4252 }
4253 return S;
4254}
4255
4257 Value *AlternativeV = nullptr) {
4258 // PHI is going to be a PHI node that allows the value V that is defined in
4259 // BB to be referenced in BB's only successor.
4260 //
4261 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4262 // doesn't matter to us what the other operand is (it'll never get used). We
4263 // could just create a new PHI with an undef incoming value, but that could
4264 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4265 // other PHI. So here we directly look for some PHI in BB's successor with V
4266 // as an incoming operand. If we find one, we use it, else we create a new
4267 // one.
4268 //
4269 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4270 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4271 // where OtherBB is the single other predecessor of BB's only successor.
4272 PHINode *PHI = nullptr;
4273 BasicBlock *Succ = BB->getSingleSuccessor();
4274
4275 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4276 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4277 PHI = cast<PHINode>(I);
4278 if (!AlternativeV)
4279 break;
4280
4281 assert(Succ->hasNPredecessors(2));
4282 auto PredI = pred_begin(Succ);
4283 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4284 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4285 break;
4286 PHI = nullptr;
4287 }
4288 if (PHI)
4289 return PHI;
4290
4291 // If V is not an instruction defined in BB, just return it.
4292 if (!AlternativeV &&
4293 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4294 return V;
4295
4296 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4297 PHI->insertBefore(Succ->begin());
4298 PHI->addIncoming(V, BB);
4299 for (BasicBlock *PredBB : predecessors(Succ))
4300 if (PredBB != BB)
4301 PHI->addIncoming(
4302 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4303 return PHI;
4304}
4305
4307 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4308 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4309 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4310 // For every pointer, there must be exactly two stores, one coming from
4311 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4312 // store (to any address) in PTB,PFB or QTB,QFB.
4313 // FIXME: We could relax this restriction with a bit more work and performance
4314 // testing.
4315 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4316 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4317 if (!PStore || !QStore)
4318 return false;
4319
4320 // Now check the stores are compatible.
4321 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4322 PStore->getValueOperand()->getType() !=
4323 QStore->getValueOperand()->getType())
4324 return false;
4325
4326 // Check that sinking the store won't cause program behavior changes. Sinking
4327 // the store out of the Q blocks won't change any behavior as we're sinking
4328 // from a block to its unconditional successor. But we're moving a store from
4329 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4330 // So we need to check that there are no aliasing loads or stores in
4331 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4332 // operations between PStore and the end of its parent block.
4333 //
4334 // The ideal way to do this is to query AliasAnalysis, but we don't
4335 // preserve AA currently so that is dangerous. Be super safe and just
4336 // check there are no other memory operations at all.
4337 for (auto &I : *QFB->getSinglePredecessor())
4338 if (I.mayReadOrWriteMemory())
4339 return false;
4340 for (auto &I : *QFB)
4341 if (&I != QStore && I.mayReadOrWriteMemory())
4342 return false;
4343 if (QTB)
4344 for (auto &I : *QTB)
4345 if (&I != QStore && I.mayReadOrWriteMemory())
4346 return false;
4347 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4348 I != E; ++I)
4349 if (&*I != PStore && I->mayReadOrWriteMemory())
4350 return false;
4351
4352 // If we're not in aggressive mode, we only optimize if we have some
4353 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4354 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4355 if (!BB)
4356 return true;
4357 // Heuristic: if the block can be if-converted/phi-folded and the
4358 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4359 // thread this store.
4361 InstructionCost Budget =
4363 for (auto &I : BB->instructionsWithoutDebug(false)) {
4364 // Consider terminator instruction to be free.
4365 if (I.isTerminator())
4366 continue;
4367 // If this is one the stores that we want to speculate out of this BB,
4368 // then don't count it's cost, consider it to be free.
4369 if (auto *S = dyn_cast<StoreInst>(&I))
4370 if (llvm::find(FreeStores, S))
4371 continue;
4372 // Else, we have a white-list of instructions that we are ak speculating.
4373 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4374 return false; // Not in white-list - not worthwhile folding.
4375 // And finally, if this is a non-free instruction that we are okay
4376 // speculating, ensure that we consider the speculation budget.
4377 Cost +=
4379 if (Cost > Budget)
4380 return false; // Eagerly refuse to fold as soon as we're out of budget.
4381 }
4382 assert(Cost <= Budget &&
4383 "When we run out of budget we will eagerly return from within the "
4384 "per-instruction loop.");
4385 return true;
4386 };
4387
4388 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4390 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4391 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4392 return false;
4393
4394 // If PostBB has more than two predecessors, we need to split it so we can
4395 // sink the store.
4396 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4397 // We know that QFB's only successor is PostBB. And QFB has a single
4398 // predecessor. If QTB exists, then its only successor is also PostBB.
4399 // If QTB does not exist, then QFB's only predecessor has a conditional
4400 // branch to QFB and PostBB.
4401 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4402 BasicBlock *NewBB =
4403 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4404 if (!NewBB)
4405 return false;
4406 PostBB = NewBB;
4407 }
4408
4409 // OK, we're going to sink the stores to PostBB. The store has to be
4410 // conditional though, so first create the predicate.
4411 BranchInst *PBranch =
4412 cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator());
4413 BranchInst *QBranch =
4414 cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator());
4415 Value *PCond = PBranch->getCondition();
4416 Value *QCond = QBranch->getCondition();
4417
4419 PStore->getParent());
4421 QStore->getParent(), PPHI);
4422
4423 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4424 IRBuilder<> QB(PostBB, PostBBFirst);
4425 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4426
4427 InvertPCond ^= (PStore->getParent() != PTB);
4428 InvertQCond ^= (QStore->getParent() != QTB);
4429 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4430 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4431
4432 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4433
4434 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4435 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4436 /*Unreachable=*/false,
4437 /*BranchWeights=*/nullptr, DTU);
4438
4439 QB.SetInsertPoint(T);
4440 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4441 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4442 // Choose the minimum alignment. If we could prove both stores execute, we
4443 // could use biggest one. In this case, though, we only know that one of the
4444 // stores executes. And we don't know it's safe to take the alignment from a
4445 // store that doesn't execute.
4446 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4447
4448 QStore->eraseFromParent();
4449 PStore->eraseFromParent();
4450
4451 return true;
4452}
4453
4455 DomTreeUpdater *DTU, const DataLayout &DL,
4456 const TargetTransformInfo &TTI) {
4457 // The intention here is to find diamonds or triangles (see below) where each
4458 // conditional block contains a store to the same address. Both of these
4459 // stores are conditional, so they can't be unconditionally sunk. But it may
4460 // be profitable to speculatively sink the stores into one merged store at the
4461 // end, and predicate the merged store on the union of the two conditions of
4462 // PBI and QBI.
4463 //
4464 // This can reduce the number of stores executed if both of the conditions are
4465 // true, and can allow the blocks to become small enough to be if-converted.
4466 // This optimization will also chain, so that ladders of test-and-set
4467 // sequences can be if-converted away.
4468 //
4469 // We only deal with simple diamonds or triangles:
4470 //
4471 // PBI or PBI or a combination of the two
4472 // / \ | \
4473 // PTB PFB | PFB
4474 // \ / | /
4475 // QBI QBI
4476 // / \ | \
4477 // QTB QFB | QFB
4478 // \ / | /
4479 // PostBB PostBB
4480 //
4481 // We model triangles as a type of diamond with a nullptr "true" block.
4482 // Triangles are canonicalized so that the fallthrough edge is represented by
4483 // a true condition, as in the diagram above.
4484 BasicBlock *PTB = PBI->getSuccessor(0);
4485 BasicBlock *PFB = PBI->getSuccessor(1);
4486 BasicBlock *QTB = QBI->getSuccessor(0);
4487 BasicBlock *QFB = QBI->getSuccessor(1);
4488 BasicBlock *PostBB = QFB->getSingleSuccessor();
4489
4490 // Make sure we have a good guess for PostBB. If QTB's only successor is
4491 // QFB, then QFB is a better PostBB.
4492 if (QTB->getSingleSuccessor() == QFB)
4493 PostBB = QFB;
4494
4495 // If we couldn't find a good PostBB, stop.
4496 if (!PostBB)
4497 return false;
4498
4499 bool InvertPCond = false, InvertQCond = false;
4500 // Canonicalize fallthroughs to the true branches.
4501 if (PFB == QBI->getParent()) {
4502 std::swap(PFB, PTB);
4503 InvertPCond = true;
4504 }
4505 if (QFB == PostBB) {
4506 std::swap(QFB, QTB);
4507 InvertQCond = true;
4508 }
4509
4510 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4511 // and QFB may not. Model fallthroughs as a nullptr block.
4512 if (PTB == QBI->getParent())
4513 PTB = nullptr;
4514 if (QTB == PostBB)
4515 QTB = nullptr;
4516
4517 // Legality bailouts. We must have at least the non-fallthrough blocks and
4518 // the post-dominating block, and the non-fallthroughs must only have one
4519 // predecessor.
4520 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4521 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4522 };
4523 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4524 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4525 return false;
4526 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4527 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4528 return false;
4529 if (!QBI->getParent()->hasNUses(2))
4530 return false;
4531
4532 // OK, this is a sequence of two diamonds or triangles.
4533 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4534 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4535 for (auto *BB : {PTB, PFB}) {
4536 if (!BB)
4537 continue;
4538 for (auto &I : *BB)
4539 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4540 PStoreAddresses.insert(SI->getPointerOperand());
4541 }
4542 for (auto *BB : {QTB, QFB}) {
4543 if (!BB)
4544 continue;
4545 for (auto &I : *BB)
4546 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4547 QStoreAddresses.insert(SI->getPointerOperand());
4548 }
4549
4550 set_intersect(PStoreAddresses, QStoreAddresses);
4551 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4552 // clear what it contains.
4553 auto &CommonAddresses = PStoreAddresses;
4554
4555 bool Changed = false;
4556 for (auto *Address : CommonAddresses)
4557 Changed |=
4558 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4559 InvertPCond, InvertQCond, DTU, DL, TTI);
4560 return Changed;
4561}
4562
4563/// If the previous block ended with a widenable branch, determine if reusing
4564/// the target block is profitable and legal. This will have the effect of
4565/// "widening" PBI, but doesn't require us to reason about hosting safety.
4567 DomTreeUpdater *DTU) {
4568 // TODO: This can be generalized in two important ways:
4569 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4570 // values from the PBI edge.
4571 // 2) We can sink side effecting instructions into BI's fallthrough
4572 // successor provided they doesn't contribute to computation of
4573 // BI's condition.
4574 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4575 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4576 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4577 !BI->getParent()->getSinglePredecessor())
4578 return false;
4579 if (!IfFalseBB->phis().empty())
4580 return false; // TODO
4581 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4582 // may undo the transform done here.
4583 // TODO: There might be a more fine-grained solution to this.
4584 if (!llvm::succ_empty(IfFalseBB))
4585 return false;
4586 // Use lambda to lazily compute expensive condition after cheap ones.
4587 auto NoSideEffects = [](BasicBlock &BB) {
4588 return llvm::none_of(BB, [](const Instruction &I) {
4589 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4590 });
4591 };
4592 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4593 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4594 NoSideEffects(*BI->getParent())) {
4595 auto *OldSuccessor = BI->getSuccessor(1);
4596 OldSuccessor->removePredecessor(BI->getParent());
4597 BI->setSuccessor(1, IfFalseBB);
4598 if (DTU)
4599 DTU->applyUpdates(
4600 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4601 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4602 return true;
4603 }
4604 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4605 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4606 NoSideEffects(*BI->getParent())) {
4607 auto *OldSuccessor = BI->getSuccessor(0);
4608 OldSuccessor->removePredecessor(BI->getParent());
4609 BI->setSuccessor(0, IfFalseBB);
4610 if (DTU)
4611 DTU->applyUpdates(
4612 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4613 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4614 return true;
4615 }
4616 return false;
4617}
4618
4619/// If we have a conditional branch as a predecessor of another block,
4620/// this function tries to simplify it. We know
4621/// that PBI and BI are both conditional branches, and BI is in one of the
4622/// successor blocks of PBI - PBI branches to BI.
4624 DomTreeUpdater *DTU,
4625 const DataLayout &DL,
4626 const TargetTransformInfo &TTI) {
4627 assert(PBI->isConditional() && BI->isConditional());
4628 BasicBlock *BB = BI->getParent();
4629
4630 // If this block ends with a branch instruction, and if there is a
4631 // predecessor that ends on a branch of the same condition, make
4632 // this conditional branch redundant.
4633 if (PBI->getCondition() == BI->getCondition() &&
4634 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4635 // Okay, the outcome of this conditional branch is statically
4636 // knowable. If this block had a single pred, handle specially, otherwise
4637 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4638 if (BB->getSinglePredecessor()) {
4639 // Turn this into a branch on constant.
4640 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4641 BI->setCondition(
4642 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4643 return true; // Nuke the branch on constant.
4644 }
4645 }
4646
4647 // If the previous block ended with a widenable branch, determine if reusing
4648 // the target block is profitable and legal. This will have the effect of
4649 // "widening" PBI, but doesn't require us to reason about hosting safety.
4650 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4651 return true;
4652
4653 // If both branches are conditional and both contain stores to the same
4654 // address, remove the stores from the conditionals and create a conditional
4655 // merged store at the end.
4656 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4657 return true;
4658
4659 // If this is a conditional branch in an empty block, and if any
4660 // predecessors are a conditional branch to one of our destinations,
4661 // fold the conditions into logical ops and one cond br.
4662
4663 // Ignore dbg intrinsics.
4664 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4665 return false;
4666
4667 int PBIOp, BIOp;
4668 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4669 PBIOp = 0;
4670 BIOp = 0;
4671 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4672 PBIOp = 0;
4673 BIOp = 1;
4674 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4675 PBIOp = 1;
4676 BIOp = 0;
4677 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4678 PBIOp = 1;
4679 BIOp = 1;
4680 } else {
4681 return false;
4682 }
4683
4684 // Check to make sure that the other destination of this branch
4685 // isn't BB itself. If so, this is an infinite loop that will
4686 // keep getting unwound.
4687 if (PBI->getSuccessor(PBIOp) == BB)
4688 return false;
4689
4690 // If predecessor's branch probability to BB is too low don't merge branches.
4691 SmallVector<uint32_t, 2> PredWeights;
4692 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4693 extractBranchWeights(*PBI, PredWeights) &&
4694 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4695
4697 PredWeights[PBIOp],
4698 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4699
4701 if (CommonDestProb >= Likely)
4702 return false;
4703 }
4704
4705 // Do not perform this transformation if it would require
4706 // insertion of a large number of select instructions. For targets
4707 // without predication/cmovs, this is a big pessimization.
4708
4709 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4710 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4711 unsigned NumPhis = 0;
4712 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4713 ++II, ++NumPhis) {
4714 if (NumPhis > 2) // Disable this xform.
4715 return false;
4716 }
4717
4718 // Finally, if everything is ok, fold the branches to logical ops.
4719 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4720
4721 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4722 << "AND: " << *BI->getParent());
4723
4725
4726 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4727 // branch in it, where one edge (OtherDest) goes back to itself but the other
4728 // exits. We don't *know* that the program avoids the infinite loop
4729 // (even though that seems likely). If we do this xform naively, we'll end up
4730 // recursively unpeeling the loop. Since we know that (after the xform is
4731 // done) that the block *is* infinite if reached, we just make it an obviously
4732 // infinite loop with no cond branch.
4733 if (OtherDest == BB) {
4734 // Insert it at the end of the function, because it's either code,
4735 // or it won't matter if it's hot. :)
4736 BasicBlock *InfLoopBlock =
4737 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4738 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4739 if (DTU)
4740 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4741 OtherDest = InfLoopBlock;
4742 }
4743
4744 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4745
4746 // BI may have other predecessors. Because of this, we leave
4747 // it alone, but modify PBI.
4748
4749 // Make sure we get to CommonDest on True&True directions.
4750 Value *PBICond = PBI->getCondition();
4751 IRBuilder<NoFolder> Builder(PBI);
4752 if (PBIOp)
4753 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4754
4755 Value *BICond = BI->getCondition();
4756 if (BIOp)
4757 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4758
4759 // Merge the conditions.
4760 Value *Cond =
4761 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4762
4763 // Modify PBI to branch on the new condition to the new dests.
4764 PBI->setCondition(Cond);
4765 PBI->setSuccessor(0, CommonDest);
4766 PBI->setSuccessor(1, OtherDest);
4767
4768 if (DTU) {
4769 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4770 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4771
4772 DTU->applyUpdates(Updates);
4773 }
4774
4775 // Update branch weight for PBI.
4776 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4777 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4778 bool HasWeights =
4779 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4780 SuccTrueWeight, SuccFalseWeight);
4781 if (HasWeights) {
4782 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4783 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4784 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4785 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4786 // The weight to CommonDest should be PredCommon * SuccTotal +
4787 // PredOther * SuccCommon.
4788 // The weight to OtherDest should be PredOther * SuccOther.
4789 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4790 PredOther * SuccCommon,
4791 PredOther * SuccOther};
4792 // Halve the weights if any of them cannot fit in an uint32_t
4793 fitWeights(NewWeights);
4794
4795 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4796 }
4797
4798 // OtherDest may have phi nodes. If so, add an entry from PBI's
4799 // block that are identical to the entries for BI's block.
4800 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4801
4802 // We know that the CommonDest already had an edge from PBI to
4803 // it. If it has PHIs though, the PHIs may have different
4804 // entries for BB and PBI's BB. If so, insert a select to make
4805 // them agree.
4806 for (PHINode &PN : CommonDest->phis()) {
4807 Value *BIV = PN.getIncomingValueForBlock(BB);
4808 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4809 Value *PBIV = PN.getIncomingValue(PBBIdx);
4810 if (BIV != PBIV) {
4811 // Insert a select in PBI to pick the right value.
4812 SelectInst *NV = cast<SelectInst>(
4813 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4814 PN.setIncomingValue(PBBIdx, NV);
4815 // Although the select has the same condition as PBI, the original branch
4816 // weights for PBI do not apply to the new select because the select's
4817 // 'logical' edges are incoming edges of the phi that is eliminated, not
4818 // the outgoing edges of PBI.
4819 if (HasWeights) {
4820 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4821 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4822 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4823 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4824 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4825 // The weight to PredOtherDest should be PredOther * SuccCommon.
4826 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4827 PredOther * SuccCommon};
4828
4829 fitWeights(NewWeights);
4830
4831 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4832 /*IsExpected=*/false);
4833 }
4834 }
4835 }
4836
4837 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4838 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4839
4840 // This basic block is probably dead. We know it has at least
4841 // one fewer predecessor.
4842 return true;
4843}
4844
4845// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4846// true or to FalseBB if Cond is false.
4847// Takes care of updating the successors and removing the old terminator.
4848// Also makes sure not to introduce new successors by assuming that edges to
4849// non-successor TrueBBs and FalseBBs aren't reachable.
4850bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4851 Value *Cond, BasicBlock *TrueBB,
4852 BasicBlock *FalseBB,
4853 uint32_t TrueWeight,
4854 uint32_t FalseWeight) {
4855 auto *BB = OldTerm->getParent();
4856 // Remove any superfluous successor edges from the CFG.
4857 // First, figure out which successors to preserve.
4858 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4859 // successor.
4860 BasicBlock *KeepEdge1 = TrueBB;
4861 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4862
4863 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4864
4865 // Then remove the rest.
4866 for (BasicBlock *Succ : successors(OldTerm)) {
4867 // Make sure only to keep exactly one copy of each edge.
4868 if (Succ == KeepEdge1)
4869 KeepEdge1 = nullptr;
4870 else if (Succ == KeepEdge2)
4871 KeepEdge2 = nullptr;
4872 else {
4873 Succ->removePredecessor(BB,
4874 /*KeepOneInputPHIs=*/true);
4875
4876 if (Succ != TrueBB && Succ != FalseBB)
4877 RemovedSuccessors.insert(Succ);
4878 }
4879 }
4880
4881 IRBuilder<> Builder(OldTerm);
4882 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4883
4884 // Insert an appropriate new terminator.
4885 if (!KeepEdge1 && !KeepEdge2) {
4886 if (TrueBB == FalseBB) {
4887 // We were only looking for one successor, and it was present.
4888 // Create an unconditional branch to it.
4889 Builder.CreateBr(TrueBB);
4890 } else {
4891 // We found both of the successors we were looking for.
4892 // Create a conditional branch sharing the condition of the select.
4893 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4894 if (TrueWeight != FalseWeight)
4895 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4896 }
4897 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4898 // Neither of the selected blocks were successors, so this
4899 // terminator must be unreachable.
4900 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4901 } else {
4902 // One of the selected values was a successor, but the other wasn't.
4903 // Insert an unconditional branch to the one that was found;
4904 // the edge to the one that wasn't must be unreachable.
4905 if (!KeepEdge1) {
4906 // Only TrueBB was found.
4907 Builder.CreateBr(TrueBB);
4908 } else {
4909 // Only FalseBB was found.
4910 Builder.CreateBr(FalseBB);
4911 }
4912 }
4913
4915
4916 if (DTU) {
4918 Updates.reserve(RemovedSuccessors.size());
4919 for (auto *RemovedSuccessor : RemovedSuccessors)
4920 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4921 DTU->applyUpdates(Updates);
4922 }
4923
4924 return true;
4925}
4926
4927// Replaces
4928// (switch (select cond, X, Y)) on constant X, Y
4929// with a branch - conditional if X and Y lead to distinct BBs,
4930// unconditional otherwise.
4931bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4932 SelectInst *Select) {
4933 // Check for constant integer values in the select.
4934 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4935 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4936 if (!TrueVal || !FalseVal)
4937 return false;
4938
4939 // Find the relevant condition and destinations.
4940 Value *Condition = Select->getCondition();
4941 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4942 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4943
4944 // Get weight for TrueBB and FalseBB.
4945 uint32_t TrueWeight = 0, FalseWeight = 0;
4947 bool HasWeights = hasBranchWeightMD(*SI);
4948 if (HasWeights) {
4949 getBranchWeights(SI, Weights);
4950 if (Weights.size() == 1 + SI->getNumCases()) {
4951 TrueWeight =
4952 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4953 FalseWeight =
4954 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4955 }
4956 }
4957
4958 // Perform the actual simplification.
4959 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4960 FalseWeight);
4961}
4962
4963// Replaces
4964// (indirectbr (select cond, blockaddress(@fn, BlockA),
4965// blockaddress(@fn, BlockB)))
4966// with
4967// (br cond, BlockA, BlockB).
4968bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4969 SelectInst *SI) {
4970 // Check that both operands of the select are block addresses.
4971 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4972 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4973 if (!TBA || !FBA)
4974 return false;
4975
4976 // Extract the actual blocks.
4977 BasicBlock *TrueBB = TBA->getBasicBlock();
4978 BasicBlock *FalseBB = FBA->getBasicBlock();
4979
4980 // Perform the actual simplification.
4981 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4982 0);
4983}
4984
4985/// This is called when we find an icmp instruction
4986/// (a seteq/setne with a constant) as the only instruction in a
4987/// block that ends with an uncond branch. We are looking for a very specific
4988/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4989/// this case, we merge the first two "or's of icmp" into a switch, but then the
4990/// default value goes to an uncond block with a seteq in it, we get something
4991/// like:
4992///
4993/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4994/// DEFAULT:
4995/// %tmp = icmp eq i8 %A, 92
4996/// br label %end
4997/// end:
4998/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4999///
5000/// We prefer to split the edge to 'end' so that there is a true/false entry to
5001/// the PHI, merging the third icmp into the switch.
5002bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5003 ICmpInst *ICI, IRBuilder<> &Builder) {
5004 BasicBlock *BB = ICI->getParent();
5005
5006 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5007 // complex.
5008 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5009 return false;
5010
5011 Value *V = ICI->getOperand(0);
5012 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5013
5014 // The pattern we're looking for is where our only predecessor is a switch on
5015 // 'V' and this block is the default case for the switch. In this case we can
5016 // fold the compared value into the switch to simplify things.
5017 BasicBlock *Pred = BB->getSinglePredecessor();
5018 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5019 return false;
5020
5021 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5022 if (SI->getCondition() != V)
5023 return false;
5024
5025 // If BB is reachable on a non-default case, then we simply know the value of
5026 // V in this block. Substitute it and constant fold the icmp instruction
5027 // away.
5028 if (SI->getDefaultDest() != BB) {
5029 ConstantInt *VVal = SI->findCaseDest(BB);
5030 assert(VVal && "Should have a unique destination value");
5031 ICI->setOperand(0, VVal);
5032
5033 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5034 ICI->replaceAllUsesWith(V);
5035 ICI->eraseFromParent();
5036 }
5037 // BB is now empty, so it is likely to simplify away.
5038 return requestResimplify();
5039 }
5040
5041 // Ok, the block is reachable from the default dest. If the constant we're
5042 // comparing exists in one of the other edges, then we can constant fold ICI
5043 // and zap it.
5044 if (SI->findCaseValue(Cst) != SI->case_default()) {
5045 Value *V;
5046 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5048 else
5050
5051 ICI->replaceAllUsesWith(V);
5052 ICI->eraseFromParent();
5053 // BB is now empty, so it is likely to simplify away.
5054 return requestResimplify();
5055 }
5056
5057 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5058 // the block.
5059 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5060 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5061 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5062 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
5063 return false;
5064
5065 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5066 // true in the PHI.
5067 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5068 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5069
5070 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5071 std::swap(DefaultCst, NewCst);
5072
5073 // Replace ICI (which is used by the PHI for the default value) with true or
5074 // false depending on if it is EQ or NE.
5075 ICI->replaceAllUsesWith(DefaultCst);
5076 ICI->eraseFromParent();
5077
5079
5080 // Okay, the switch goes to this block on a default value. Add an edge from
5081 // the switch to the merge point on the compared value.
5082 BasicBlock *NewBB =
5083 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5084 {
5086 auto W0 = SIW.getSuccessorWeight(0);
5088 if (W0) {
5089 NewW = ((uint64_t(*W0) + 1) >> 1);
5090 SIW.setSuccessorWeight(0, *NewW);
5091 }
5092 SIW.addCase(Cst, NewBB, NewW);
5093 if (DTU)
5094 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5095 }
5096
5097 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5098 Builder.SetInsertPoint(NewBB);
5099 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5100 Builder.CreateBr(SuccBlock);
5101 PHIUse->addIncoming(NewCst, NewBB);
5102 if (DTU) {
5103 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5104 DTU->applyUpdates(Updates);
5105 }
5106 return true;
5107}
5108
5109/// The specified branch is a conditional branch.
5110/// Check to see if it is branching on an or/and chain of icmp instructions, and
5111/// fold it into a switch instruction if so.
5112bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5113 IRBuilder<> &Builder,
5114 const DataLayout &DL) {
5115 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
5116 if (!Cond)
5117 return false;
5118
5119 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5120 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5121 // 'setne's and'ed together, collect them.
5122
5123 // Try to gather values from a chain of and/or to be turned into a switch
5124 ConstantComparesGatherer ConstantCompare(Cond, DL);
5125 // Unpack the result
5126 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5127 Value *CompVal = ConstantCompare.CompValue;
5128 unsigned UsedICmps = ConstantCompare.UsedICmps;
5129 Value *ExtraCase = ConstantCompare.Extra;
5130 bool TrueWhenEqual = ConstantCompare.IsEq;
5131
5132 // If we didn't have a multiply compared value, fail.
5133 if (!CompVal)
5134 return false;
5135
5136 // Avoid turning single icmps into a switch.
5137 if (UsedICmps <= 1)
5138 return false;
5139
5140 // There might be duplicate constants in the list, which the switch
5141 // instruction can't handle, remove them now.
5142 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5143 Values.erase(llvm::unique(Values), Values.end());
5144
5145 // If Extra was used, we require at least two switch values to do the
5146 // transformation. A switch with one value is just a conditional branch.
5147 if (ExtraCase && Values.size() < 2)
5148 return false;
5149
5150 // TODO: Preserve branch weight metadata, similarly to how
5151 // foldValueComparisonIntoPredecessors preserves it.
5152
5153 // Figure out which block is which destination.
5154 BasicBlock *DefaultBB = BI->getSuccessor(1);
5155 BasicBlock *EdgeBB = BI->getSuccessor(0);
5156 if (!TrueWhenEqual)
5157 std::swap(DefaultBB, EdgeBB);
5158
5159 BasicBlock *BB = BI->getParent();
5160
5161 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5162 << " cases into SWITCH. BB is:\n"
5163 << *BB);
5164
5166
5167 // If there are any extra values that couldn't be folded into the switch
5168 // then we evaluate them with an explicit branch first. Split the block
5169 // right before the condbr to handle it.
5170 if (ExtraCase) {
5171 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5172 /*MSSAU=*/nullptr, "switch.early.test");
5173
5174 // Remove the uncond branch added to the old block.
5175 Instruction *OldTI = BB->getTerminator();
5176 Builder.SetInsertPoint(OldTI);
5177
5178 // There can be an unintended UB if extra values are Poison. Before the
5179 // transformation, extra values may not be evaluated according to the
5180 // condition, and it will not raise UB. But after transformation, we are
5181 // evaluating extra values before checking the condition, and it will raise
5182 // UB. It can be solved by adding freeze instruction to extra values.
5183 AssumptionCache *AC = Options.AC;
5184
5185 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5186 ExtraCase = Builder.CreateFreeze(ExtraCase);
5187
5188 if (TrueWhenEqual)
5189 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5190 else
5191 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5192
5193 OldTI->eraseFromParent();
5194
5195 if (DTU)
5196 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5197
5198 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5199 // for the edge we just added.
5200 addPredecessorToBlock(EdgeBB, BB, NewBB);
5201
5202 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5203 << "\nEXTRABB = " << *BB);
5204 BB = NewBB;
5205 }
5206
5207 Builder.SetInsertPoint(BI);
5208 // Convert pointer to int before we switch.
5209 if (CompVal->getType()->isPointerTy()) {
5210 CompVal = Builder.CreatePtrToInt(
5211 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5212 }
5213
5214 // Create the new switch instruction now.
5215 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5216
5217 // Add all of the 'cases' to the switch instruction.
5218 for (ConstantInt *Val : Values)
5219 New->addCase(Val, EdgeBB);
5220
5221 // We added edges from PI to the EdgeBB. As such, if there were any
5222 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5223 // the number of edges added.
5224 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5225 PHINode *PN = cast<PHINode>(BBI);
5226 Value *InVal = PN->getIncomingValueForBlock(BB);
5227 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5228 PN->addIncoming(InVal, BB);
5229 }
5230
5231 // Erase the old branch instruction.
5233 if (DTU)
5234 DTU->applyUpdates(Updates);
5235
5236 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5237 return true;
5238}
5239
5240bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5241 if (isa<PHINode>(RI->getValue()))
5242 return simplifyCommonResume(RI);
5243 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5244 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5245 // The resume must unwind the exception that caused control to branch here.
5246 return simplifySingleResume(RI);
5247
5248 return false;
5249}
5250
5251// Check if cleanup block is empty
5253 for (Instruction &I : R) {
5254 auto *II = dyn_cast<IntrinsicInst>(&I);
5255 if (!II)
5256 return false;
5257
5258 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5259 switch (IntrinsicID) {
5260 case Intrinsic::dbg_declare:
5261 case Intrinsic::dbg_value:
5262 case Intrinsic::dbg_label:
5263 case Intrinsic::lifetime_end:
5264 break;
5265 default:
5266 return false;
5267 }
5268 }
5269 return true;
5270}
5271
5272// Simplify resume that is shared by several landing pads (phi of landing pad).
5273bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5274 BasicBlock *BB = RI->getParent();
5275
5276 // Check that there are no other instructions except for debug and lifetime
5277 // intrinsics between the phi's and resume instruction.
5278 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5279 BB->getTerminator()->getIterator())))
5280 return false;
5281
5282 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5283 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5284
5285 // Check incoming blocks to see if any of them are trivial.
5286 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5287 Idx++) {
5288 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5289 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5290
5291 // If the block has other successors, we can not delete it because
5292 // it has other dependents.
5293 if (IncomingBB->getUniqueSuccessor() != BB)
5294 continue;
5295
5296 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5297 // Not the landing pad that caused the control to branch here.
5298 if (IncomingValue != LandingPad)
5299 continue;
5300
5302 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5303 TrivialUnwindBlocks.insert(IncomingBB);
5304 }
5305
5306 // If no trivial unwind blocks, don't do any simplifications.
5307 if (TrivialUnwindBlocks.empty())
5308 return false;
5309
5310 // Turn all invokes that unwind here into calls.
5311 for (auto *TrivialBB : TrivialUnwindBlocks) {
5312 // Blocks that will be simplified should be removed from the phi node.
5313 // Note there could be multiple edges to the resume block, and we need
5314 // to remove them all.
5315 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5316 BB->removePredecessor(TrivialBB, true);
5317
5318 for (BasicBlock *Pred :
5320 removeUnwindEdge(Pred, DTU);
5321 ++NumInvokes;
5322 }
5323
5324 // In each SimplifyCFG run, only the current processed block can be erased.
5325 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5326 // of erasing TrivialBB, we only remove the branch to the common resume
5327 // block so that we can later erase the resume block since it has no
5328 // predecessors.
5329 TrivialBB->getTerminator()->eraseFromParent();
5330 new UnreachableInst(RI->getContext(), TrivialBB);
5331 if (DTU)
5332 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5333 }
5334
5335 // Delete the resume block if all its predecessors have been removed.
5336 if (pred_empty(BB))
5337 DeleteDeadBlock(BB, DTU);
5338
5339 return !TrivialUnwindBlocks.empty();
5340}
5341
5342// Simplify resume that is only used by a single (non-phi) landing pad.
5343bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5344 BasicBlock *BB = RI->getParent();
5345 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5346 assert(RI->getValue() == LPInst &&
5347 "Resume must unwind the exception that caused control to here");
5348
5349 // Check that there are no other instructions except for debug intrinsics.
5351 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5352 return false;
5353
5354 // Turn all invokes that unwind here into calls and delete the basic block.
5356 removeUnwindEdge(Pred, DTU);
5357 ++NumInvokes;
5358 }
5359
5360 // The landingpad is now unreachable. Zap it.
5361 DeleteDeadBlock(BB, DTU);
5362 return true;
5363}
5364
5366 // If this is a trivial cleanup pad that executes no instructions, it can be
5367 // eliminated. If the cleanup pad continues to the caller, any predecessor
5368 // that is an EH pad will be updated to continue to the caller and any
5369 // predecessor that terminates with an invoke instruction will have its invoke
5370 // instruction converted to a call instruction. If the cleanup pad being
5371 // simplified does not continue to the caller, each predecessor will be
5372 // updated to continue to the unwind destination of the cleanup pad being
5373 // simplified.
5374 BasicBlock *BB = RI->getParent();
5375 CleanupPadInst *CPInst = RI->getCleanupPad();
5376 if (CPInst->getParent() != BB)
5377 // This isn't an empty cleanup.
5378 return false;
5379
5380 // We cannot kill the pad if it has multiple uses. This typically arises
5381 // from unreachable basic blocks.
5382 if (!CPInst->hasOneUse())
5383 return false;
5384
5385 // Check that there are no other instructions except for benign intrinsics.
5387 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5388 return false;
5389
5390 // If the cleanup return we are simplifying unwinds to the caller, this will
5391 // set UnwindDest to nullptr.
5392 BasicBlock *UnwindDest = RI->getUnwindDest();
5393
5394 // We're about to remove BB from the control flow. Before we do, sink any
5395 // PHINodes into the unwind destination. Doing this before changing the
5396 // control flow avoids some potentially slow checks, since we can currently
5397 // be certain that UnwindDest and BB have no common predecessors (since they
5398 // are both EH pads).
5399 if (UnwindDest) {
5400 // First, go through the PHI nodes in UnwindDest and update any nodes that
5401 // reference the block we are removing
5402 for (PHINode &DestPN : UnwindDest->phis()) {
5403 int Idx = DestPN.getBasicBlockIndex(BB);
5404 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5405 assert(Idx != -1);
5406 // This PHI node has an incoming value that corresponds to a control
5407 // path through the cleanup pad we are removing. If the incoming
5408 // value is in the cleanup pad, it must be a PHINode (because we
5409 // verified above that the block is otherwise empty). Otherwise, the
5410 // value is either a constant or a value that dominates the cleanup
5411 // pad being removed.
5412 //
5413 // Because BB and UnwindDest are both EH pads, all of their
5414 // predecessors must unwind to these blocks, and since no instruction
5415 // can have multiple unwind destinations, there will be no overlap in
5416 // incoming blocks between SrcPN and DestPN.
5417 Value *SrcVal = DestPN.getIncomingValue(Idx);
5418 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5419
5420 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5421 for (auto *Pred : predecessors(BB)) {
5422 Value *Incoming =
5423 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5424 DestPN.addIncoming(Incoming, Pred);
5425 }
5426 }
5427
5428 // Sink any remaining PHI nodes directly into UnwindDest.
5429 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5430 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5431 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5432 // If the PHI node has no uses or all of its uses are in this basic
5433 // block (meaning they are debug or lifetime intrinsics), just leave
5434 // it. It will be erased when we erase BB below.
5435 continue;
5436
5437 // Otherwise, sink this PHI node into UnwindDest.
5438 // Any predecessors to UnwindDest which are not already represented
5439 // must be back edges which inherit the value from the path through
5440 // BB. In this case, the PHI value must reference itself.
5441 for (auto *pred : predecessors(UnwindDest))
5442 if (pred != BB)
5443 PN.addIncoming(&PN, pred);
5444 PN.moveBefore(InsertPt);
5445 // Also, add a dummy incoming value for the original BB itself,
5446 // so that the PHI is well-formed until we drop said predecessor.
5447 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5448 }
5449 }
5450
5451 std::vector<DominatorTree::UpdateType> Updates;
5452
5453 // We use make_early_inc_range here because we will remove all predecessors.
5455 if (UnwindDest == nullptr) {
5456 if (DTU) {
5457 DTU->applyUpdates(Updates);
5458 Updates.clear();
5459 }
5460 removeUnwindEdge(PredBB, DTU);
5461 ++NumInvokes;
5462 } else {
5463 BB->removePredecessor(PredBB);
5464 Instruction *TI = PredBB->getTerminator();
5465 TI->replaceUsesOfWith(BB, UnwindDest);
5466 if (DTU) {
5467 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5468 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5469 }
5470 }
5471 }
5472
5473 if (DTU)
5474 DTU->applyUpdates(Updates);
5475
5476 DeleteDeadBlock(BB, DTU);
5477
5478 return true;
5479}
5480
5481// Try to merge two cleanuppads together.
5483 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5484 // with.
5485 BasicBlock *UnwindDest = RI->getUnwindDest();
5486 if (!UnwindDest)
5487 return false;
5488
5489 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5490 // be safe to merge without code duplication.
5491 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5492 return false;
5493
5494 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5495 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5496 if (!SuccessorCleanupPad)
5497 return false;
5498
5499 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5500 // Replace any uses of the successor cleanupad with the predecessor pad
5501 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5502 // funclet bundle operands.
5503 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5504 // Remove the old cleanuppad.
5505 SuccessorCleanupPad->eraseFromParent();
5506 // Now, we simply replace the cleanupret with a branch to the unwind
5507 // destination.
5508 BranchInst::Create(UnwindDest, RI->getParent());
5509 RI->eraseFromParent();
5510
5511 return true;
5512}
5513
5514bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5515 // It is possible to transiantly have an undef cleanuppad operand because we
5516 // have deleted some, but not all, dead blocks.
5517 // Eventually, this block will be deleted.
5518 if (isa<UndefValue>(RI->getOperand(0)))
5519 return false;
5520
5521 if (mergeCleanupPad(RI))
5522 return true;
5523
5524 if (removeEmptyCleanup(RI, DTU))
5525 return true;
5526
5527 return false;
5528}
5529
5530// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5531bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5532 BasicBlock *BB = UI->getParent();
5533
5534 bool Changed = false;
5535
5536 // Ensure that any debug-info records that used to occur after the Unreachable
5537 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5538 // the block.
5540
5541 // Debug-info records on the unreachable inst itself should be deleted, as
5542 // below we delete everything past the final executable instruction.
5543 UI->dropDbgRecords();
5544
5545 // If there are any instructions immediately before the unreachable that can
5546 // be removed, do so.
5547 while (UI->getIterator() != BB->begin()) {
5549 --BBI;
5550
5552 break; // Can not drop any more instructions. We're done here.
5553 // Otherwise, this instruction can be freely erased,
5554 // even if it is not side-effect free.
5555
5556 // Note that deleting EH's here is in fact okay, although it involves a bit
5557 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5558 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5559 // and we can therefore guarantee this block will be erased.
5560
5561 // If we're deleting this, we're deleting any subsequent debug info, so
5562 // delete DbgRecords.
5563 BBI->dropDbgRecords();
5564
5565 // Delete this instruction (any uses are guaranteed to be dead)
5566 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5567 BBI->eraseFromParent();
5568 Changed = true;
5569 }
5570
5571 // If the unreachable instruction is the first in the block, take a gander
5572 // at all of the predecessors of this instruction, and simplify them.
5573 if (&BB->front() != UI)
5574 return Changed;
5575
5576 std::vector<DominatorTree::UpdateType> Updates;
5577
5579 for (BasicBlock *Predecessor : Preds) {
5580 Instruction *TI = Predecessor->getTerminator();
5581 IRBuilder<> Builder(TI);
5582 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5583 // We could either have a proper unconditional branch,
5584 // or a degenerate conditional branch with matching destinations.
5585 if (all_of(BI->successors(),
5586 [BB](auto *Successor) { return Successor == BB; })) {
5587 new UnreachableInst(TI->getContext(), TI->getIterator());
5588 TI->eraseFromParent();
5589 Changed = true;
5590 } else {
5591 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5592 Value* Cond = BI->getCondition();
5593 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5594 "The destinations are guaranteed to be different here.");
5595 CallInst *Assumption;
5596 if (BI->getSuccessor(0) == BB) {
5597 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5598 Builder.CreateBr(BI->getSuccessor(1));
5599 } else {
5600 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5601 Assumption = Builder.CreateAssumption(Cond);
5602 Builder.CreateBr(BI->getSuccessor(0));
5603 }
5604 if (Options.AC)
5605 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5606
5608 Changed = true;
5609 }
5610 if (DTU)
5611 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5612 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5614 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5615 if (i->getCaseSuccessor() != BB) {
5616 ++i;
5617 continue;
5618 }
5619 BB->removePredecessor(SU->getParent());
5620 i = SU.removeCase(i);
5621 e = SU->case_end();
5622 Changed = true;
5623 }
5624 // Note that the default destination can't be removed!
5625 if (DTU && SI->getDefaultDest() != BB)
5626 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5627 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5628 if (II->getUnwindDest() == BB) {
5629 if (DTU) {
5630 DTU->applyUpdates(Updates);
5631 Updates.clear();
5632 }
5633 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5634 if (!CI->doesNotThrow())
5635 CI->setDoesNotThrow();
5636 Changed = true;
5637 }
5638 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5639 if (CSI->getUnwindDest() == BB) {
5640 if (DTU) {
5641 DTU->applyUpdates(Updates);
5642 Updates.clear();
5643 }
5644 removeUnwindEdge(TI->getParent(), DTU);
5645 Changed = true;
5646 continue;
5647 }
5648
5649 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5650 E = CSI->handler_end();
5651 I != E; ++I) {
5652 if (*I == BB) {
5653 CSI->removeHandler(I);
5654 --I;
5655 --E;
5656 Changed = true;
5657 }
5658 }
5659 if (DTU)
5660 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5661 if (CSI->getNumHandlers() == 0) {
5662 if (CSI->hasUnwindDest()) {
5663 // Redirect all predecessors of the block containing CatchSwitchInst
5664 // to instead branch to the CatchSwitchInst's unwind destination.
5665 if (DTU) {
5666 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5667 Updates.push_back({DominatorTree::Insert,
5668 PredecessorOfPredecessor,
5669 CSI->getUnwindDest()});
5670 Updates.push_back({DominatorTree::Delete,
5671 PredecessorOfPredecessor, Predecessor});
5672 }
5673 }
5674 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5675 } else {
5676 // Rewrite all preds to unwind to caller (or from invoke to call).
5677 if (DTU) {
5678 DTU->applyUpdates(Updates);
5679 Updates.clear();
5680 }
5681 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5682 for (BasicBlock *EHPred : EHPreds)
5683 removeUnwindEdge(EHPred, DTU);
5684 }
5685 // The catchswitch is no longer reachable.
5686 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5687 CSI->eraseFromParent();
5688 Changed = true;
5689 }
5690 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5691 (void)CRI;
5692 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5693 "Expected to always have an unwind to BB.");
5694 if (DTU)
5695 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5696 new UnreachableInst(TI->getContext(), TI->getIterator());
5697 TI->eraseFromParent();
5698 Changed = true;
5699 }
5700 }
5701
5702 if (DTU)
5703 DTU->applyUpdates(Updates);
5704
5705 // If this block is now dead, remove it.
5706 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5707 DeleteDeadBlock(BB, DTU);
5708 return true;
5709 }
5710
5711 return Changed;
5712}
5713
5715 assert(Cases.size() >= 1);
5716
5718 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5719 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5720 return false;
5721 }
5722 return true;
5723}
5724
5726 DomTreeUpdater *DTU,
5727 bool RemoveOrigDefaultBlock = true) {
5728 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5729 auto *BB = Switch->getParent();
5730 auto *OrigDefaultBlock = Switch->getDefaultDest();
5731 if (RemoveOrigDefaultBlock)
5732 OrigDefaultBlock->removePredecessor(BB);
5733 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5734 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5735 OrigDefaultBlock);
5736 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5738 Switch->setDefaultDest(&*NewDefaultBlock);
5739 if (DTU) {
5741 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5742 if (RemoveOrigDefaultBlock &&
5743 !is_contained(successors(BB), OrigDefaultBlock))
5744 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5745 DTU->applyUpdates(Updates);
5746 }
5747}
5748
5749/// Turn a switch into an integer range comparison and branch.
5750/// Switches with more than 2 destinations are ignored.
5751/// Switches with 1 destination are also ignored.
5752bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5753 IRBuilder<> &Builder) {
5754 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5755
5756 bool HasDefault = !SI->defaultDestUnreachable();
5757
5758 auto *BB = SI->getParent();
5759
5760 // Partition the cases into two sets with different destinations.
5761 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5762 BasicBlock *DestB = nullptr;
5765
5766 for (auto Case : SI->cases()) {
5767 BasicBlock *Dest = Case.getCaseSuccessor();
5768 if (!DestA)
5769 DestA = Dest;
5770 if (Dest == DestA) {
5771 CasesA.push_back(Case.getCaseValue());
5772 continue;
5773 }
5774 if (!DestB)
5775 DestB = Dest;
5776 if (Dest == DestB) {
5777 CasesB.push_back(Case.getCaseValue());
5778 continue;
5779 }
5780 return false; // More than two destinations.
5781 }
5782 if (!DestB)
5783 return false; // All destinations are the same and the default is unreachable
5784
5785 assert(DestA && DestB &&
5786 "Single-destination switch should have been folded.");
5787 assert(DestA != DestB);
5788 assert(DestB != SI->getDefaultDest());
5789 assert(!CasesB.empty() && "There must be non-default cases.");
5790 assert(!CasesA.empty() || HasDefault);
5791
5792 // Figure out if one of the sets of cases form a contiguous range.
5793 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5794 BasicBlock *ContiguousDest = nullptr;
5795 BasicBlock *OtherDest = nullptr;
5796 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5797 ContiguousCases = &CasesA;
5798 ContiguousDest = DestA;
5799 OtherDest = DestB;
5800 } else if (casesAreContiguous(CasesB)) {
5801 ContiguousCases = &CasesB;
5802 ContiguousDest = DestB;
5803 OtherDest = DestA;
5804 } else
5805 return false;
5806
5807 // Start building the compare and branch.
5808
5809 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5810 Constant *NumCases =
5811 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5812
5813 Value *Sub = SI->getCondition();
5814 if (!Offset->isNullValue())
5815 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5816
5817 Value *Cmp;
5818 // If NumCases overflowed, then all possible values jump to the successor.
5819 if (NumCases->isNullValue() && !ContiguousCases->empty())
5820 Cmp = ConstantInt::getTrue(SI->getContext());
5821 else
5822 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5823 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5824
5825 // Update weight for the newly-created conditional branch.
5826 if (hasBranchWeightMD(*SI)) {
5828 getBranchWeights(SI, Weights);
5829 if (Weights.size() == 1 + SI->getNumCases()) {
5830 uint64_t TrueWeight = 0;
5831 uint64_t FalseWeight = 0;
5832 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5833 if (SI->getSuccessor(I) == ContiguousDest)
5834 TrueWeight += Weights[I];
5835 else
5836 FalseWeight += Weights[I];
5837 }
5838 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5839 TrueWeight /= 2;
5840 FalseWeight /= 2;
5841 }
5842 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5843 }
5844 }
5845
5846 // Prune obsolete incoming values off the successors' PHI nodes.
5847 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5848 unsigned PreviousEdges = ContiguousCases->size();
5849 if (ContiguousDest == SI->getDefaultDest())
5850 ++PreviousEdges;
5851 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5852 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5853 }
5854 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5855 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5856 if (OtherDest == SI->getDefaultDest())
5857 ++PreviousEdges;
5858 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5859 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5860 }
5861
5862 // Clean up the default block - it may have phis or other instructions before
5863 // the unreachable terminator.
5864 if (!HasDefault)
5866
5867 auto *UnreachableDefault = SI->getDefaultDest();
5868
5869 // Drop the switch.
5870 SI->eraseFromParent();
5871
5872 if (!HasDefault && DTU)
5873 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5874
5875 return true;
5876}
5877
5878/// Compute masked bits for the condition of a switch
5879/// and use it to remove dead cases.
5881 AssumptionCache *AC,
5882 const DataLayout &DL) {
5883 Value *Cond = SI->getCondition();
5884 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
5885
5886 // We can also eliminate cases by determining that their values are outside of
5887 // the limited range of the condition based on how many significant (non-sign)
5888 // bits are in the condition value.
5889 unsigned MaxSignificantBitsInCond =
5891
5892 // Gather dead cases.
5894 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5895 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5896 for (const auto &Case : SI->cases()) {
5897 auto *Successor = Case.getCaseSuccessor();
5898 if (DTU) {
5899 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
5900 if (Inserted)
5901 UniqueSuccessors.push_back(Successor);
5902 ++It->second;
5903 }
5904 const APInt &CaseVal = Case.getCaseValue()->getValue();
5905 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5906 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5907 DeadCases.push_back(Case.getCaseValue());
5908 if (DTU)
5909 --NumPerSuccessorCases[Successor];
5910 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5911 << " is dead.\n");
5912 }
5913 }
5914
5915 // If we can prove that the cases must cover all possible values, the
5916 // default destination becomes dead and we can remove it. If we know some
5917 // of the bits in the value, we can use that to more precisely compute the
5918 // number of possible unique case values.
5919 bool HasDefault = !SI->defaultDestUnreachable();
5920 const unsigned NumUnknownBits =
5921 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5922 assert(NumUnknownBits <= Known.getBitWidth());
5923 if (HasDefault && DeadCases.empty() &&
5924 NumUnknownBits < 64 /* avoid overflow */) {
5925 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5926 if (SI->getNumCases() == AllNumCases) {
5928 return true;
5929 }
5930 // When only one case value is missing, replace default with that case.
5931 // Eliminating the default branch will provide more opportunities for
5932 // optimization, such as lookup tables.
5933 if (SI->getNumCases() == AllNumCases - 1) {
5934 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5935 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5936 if (CondTy->getIntegerBitWidth() > 64 ||
5937 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5938 return false;
5939
5940 uint64_t MissingCaseVal = 0;
5941 for (const auto &Case : SI->cases())
5942 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5943 auto *MissingCase =
5944 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5946 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5947 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5948 SIW.setSuccessorWeight(0, 0);
5949 return true;
5950 }
5951 }
5952
5953 if (DeadCases.empty())
5954 return false;
5955
5957 for (ConstantInt *DeadCase : DeadCases) {
5958 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5959 assert(CaseI != SI->case_default() &&
5960 "Case was not found. Probably mistake in DeadCases forming.");
5961 // Prune unused values from PHI nodes.
5962 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5963 SIW.removeCase(CaseI);
5964 }
5965
5966 if (DTU) {
5967 std::vector<DominatorTree::UpdateType> Updates;
5968 for (auto *Successor : UniqueSuccessors)
5969 if (NumPerSuccessorCases[Successor] == 0)
5970 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5971 DTU->applyUpdates(Updates);
5972 }
5973
5974 return true;
5975}
5976
5977/// If BB would be eligible for simplification by
5978/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5979/// by an unconditional branch), look at the phi node for BB in the successor
5980/// block and see if the incoming value is equal to CaseValue. If so, return
5981/// the phi node, and set PhiIndex to BB's index in the phi node.
5983 BasicBlock *BB, int *PhiIndex) {
5984 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
5985 return nullptr; // BB must be empty to be a candidate for simplification.
5986 if (!BB->getSinglePredecessor())
5987 return nullptr; // BB must be dominated by the switch.
5988
5989 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5990 if (!Branch || !Branch->isUnconditional())
5991 return nullptr; // Terminator must be unconditional branch.
5992
5993 BasicBlock *Succ = Branch->getSuccessor(0);
5994
5995 for (PHINode &PHI : Succ->phis()) {
5996 int Idx = PHI.getBasicBlockIndex(BB);
5997 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5998
5999 Value *InValue = PHI.getIncomingValue(Idx);
6000 if (InValue != CaseValue)
6001 continue;
6002
6003 *PhiIndex = Idx;
6004 return &PHI;
6005 }
6006
6007 return nullptr;
6008}
6009
6010/// Try to forward the condition of a switch instruction to a phi node
6011/// dominated by the switch, if that would mean that some of the destination
6012/// blocks of the switch can be folded away. Return true if a change is made.
6014 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6015
6016 ForwardingNodesMap ForwardingNodes;
6017 BasicBlock *SwitchBlock = SI->getParent();
6018 bool Changed = false;
6019 for (const auto &Case : SI->cases()) {
6020 ConstantInt *CaseValue = Case.getCaseValue();
6021 BasicBlock *CaseDest = Case.getCaseSuccessor();
6022
6023 // Replace phi operands in successor blocks that are using the constant case
6024 // value rather than the switch condition variable:
6025 // switchbb:
6026 // switch i32 %x, label %default [
6027 // i32 17, label %succ
6028 // ...
6029 // succ:
6030 // %r = phi i32 ... [ 17, %switchbb ] ...
6031 // -->
6032 // %r = phi i32 ... [ %x, %switchbb ] ...
6033
6034 for (PHINode &Phi : CaseDest->phis()) {
6035 // This only works if there is exactly 1 incoming edge from the switch to
6036 // a phi. If there is >1, that means multiple cases of the switch map to 1
6037 // value in the phi, and that phi value is not the switch condition. Thus,
6038 // this transform would not make sense (the phi would be invalid because
6039 // a phi can't have different incoming values from the same block).
6040 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6041 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6042 count(Phi.blocks(), SwitchBlock) == 1) {
6043 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6044 Changed = true;
6045 }
6046 }
6047
6048 // Collect phi nodes that are indirectly using this switch's case constants.
6049 int PhiIdx;
6050 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6051 ForwardingNodes[Phi].push_back(PhiIdx);
6052 }
6053
6054 for (auto &ForwardingNode : ForwardingNodes) {
6055 PHINode *Phi = ForwardingNode.first;
6056 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6057 // Check if it helps to fold PHI.
6058 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6059 continue;
6060
6061 for (int Index : Indexes)
6062 Phi->setIncomingValue(Index, SI->getCondition());
6063 Changed = true;
6064 }
6065
6066 return Changed;
6067}
6068
6069/// Return true if the backend will be able to handle
6070/// initializing an array of constants like C.
6072 if (C->isThreadDependent())
6073 return false;
6074 if (C->isDLLImportDependent())
6075 return false;
6076
6077 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6078 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
6079 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
6080 return false;
6081
6082 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
6083 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6084 // materializing the array of constants.
6085 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6086 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6087 return false;
6088 }
6089
6091 return false;
6092
6093 return true;
6094}
6095
6096/// If V is a Constant, return it. Otherwise, try to look up
6097/// its constant value in ConstantPool, returning 0 if it's not there.
6098static Constant *
6101 if (Constant *C = dyn_cast<Constant>(V))
6102 return C;
6103 return ConstantPool.lookup(V);
6104}
6105
6106/// Try to fold instruction I into a constant. This works for
6107/// simple instructions such as binary operations where both operands are
6108/// constant or can be replaced by constants from the ConstantPool. Returns the
6109/// resulting constant on success, 0 otherwise.
6110static Constant *
6113 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
6114 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6115 if (!A)
6116 return nullptr;
6117 if (A->isAllOnesValue())
6118 return lookupConstant(Select->getTrueValue(), ConstantPool);
6119 if (A->isNullValue())
6120 return lookupConstant(Select->getFalseValue(), ConstantPool);
6121 return nullptr;
6122 }
6123
6125 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6126 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6127 COps.push_back(A);
6128 else
6129 return nullptr;
6130 }
6131
6132 return ConstantFoldInstOperands(I, COps, DL);
6133}
6134
6135/// Try to determine the resulting constant values in phi nodes
6136/// at the common destination basic block, *CommonDest, for one of the case
6137/// destionations CaseDest corresponding to value CaseVal (0 for the default
6138/// case), of a switch instruction SI.
6139static bool
6141 BasicBlock **CommonDest,
6142 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6143 const DataLayout &DL, const TargetTransformInfo &TTI) {
6144 // The block from which we enter the common destination.
6145 BasicBlock *Pred = SI->getParent();
6146
6147 // If CaseDest is empty except for some side-effect free instructions through
6148 // which we can constant-propagate the CaseVal, continue to its successor.
6150 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6151 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6152 if (I.isTerminator()) {
6153 // If the terminator is a simple branch, continue to the next block.
6154 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6155 return false;
6156 Pred = CaseDest;
6157 CaseDest = I.getSuccessor(0);
6158 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6159 // Instruction is side-effect free and constant.
6160
6161 // If the instruction has uses outside this block or a phi node slot for
6162 // the block, it is not safe to bypass the instruction since it would then
6163 // no longer dominate all its uses.
6164 for (auto &Use : I.uses()) {
6165 User *User = Use.getUser();
6166 if (Instruction *I = dyn_cast<Instruction>(User))
6167 if (I->getParent() == CaseDest)
6168 continue;
6169 if (PHINode *Phi = dyn_cast<PHINode>(User))
6170 if (Phi->getIncomingBlock(Use) == CaseDest)
6171 continue;
6172 return false;
6173 }
6174
6175 ConstantPool.insert(std::make_pair(&I, C));
6176 } else {
6177 break;
6178 }
6179 }
6180
6181 // If we did not have a CommonDest before, use the current one.
6182 if (!*CommonDest)
6183 *CommonDest = CaseDest;
6184 // If the destination isn't the common one, abort.
6185 if (CaseDest != *CommonDest)
6186 return false;
6187
6188 // Get the values for this case from phi nodes in the destination block.
6189 for (PHINode &PHI : (*CommonDest)->phis()) {
6190 int Idx = PHI.getBasicBlockIndex(Pred);
6191 if (Idx == -1)
6192 continue;
6193
6194 Constant *ConstVal =
6195 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6196 if (!ConstVal)
6197 return false;
6198
6199 // Be conservative about which kinds of constants we support.
6200 if (!validLookupTableConstant(ConstVal, TTI))
6201 return false;
6202
6203 Res.push_back(std::make_pair(&PHI, ConstVal));
6204 }
6205
6206 return Res.size() > 0;
6207}
6208
6209// Helper function used to add CaseVal to the list of cases that generate
6210// Result. Returns the updated number of cases that generate this result.
6211static size_t mapCaseToResult(ConstantInt *CaseVal,
6212 SwitchCaseResultVectorTy &UniqueResults,
6213 Constant *Result) {
6214 for (auto &I : UniqueResults) {
6215 if (I.first == Result) {
6216 I.second.push_back(CaseVal);
6217 return I.second.size();
6218 }
6219 }
6220 UniqueResults.push_back(
6221 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6222 return 1;
6223}
6224
6225// Helper function that initializes a map containing
6226// results for the PHI node of the common destination block for a switch
6227// instruction. Returns false if multiple PHI nodes have been found or if
6228// there is not a common destination block for the switch.
6230 BasicBlock *&CommonDest,
6231 SwitchCaseResultVectorTy &UniqueResults,
6232 Constant *&DefaultResult,
6233 const DataLayout &DL,
6234 const TargetTransformInfo &TTI,
6235 uintptr_t MaxUniqueResults) {
6236 for (const auto &I : SI->cases()) {
6237 ConstantInt *CaseVal = I.getCaseValue();
6238
6239 // Resulting value at phi nodes for this case value.
6240 SwitchCaseResultsTy Results;
6241 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6242 DL, TTI))
6243 return false;
6244
6245 // Only one value per case is permitted.
6246 if (Results.size() > 1)
6247 return false;
6248
6249 // Add the case->result mapping to UniqueResults.
6250 const size_t NumCasesForResult =
6251 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6252
6253 // Early out if there are too many cases for this result.
6254 if (NumCasesForResult > MaxSwitchCasesPerResult)
6255 return false;
6256
6257 // Early out if there are too many unique results.
6258 if (UniqueResults.size() > MaxUniqueResults)
6259 return false;
6260
6261 // Check the PHI consistency.
6262 if (!PHI)
6263 PHI = Results[0].first;
6264 else if (PHI != Results[0].first)
6265 return false;
6266 }
6267 // Find the default result value.
6269 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6270 DL, TTI);
6271 // If the default value is not found abort unless the default destination
6272 // is unreachable.
6273 DefaultResult =
6274 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6275
6276 return DefaultResult || SI->defaultDestUnreachable();
6277}
6278
6279// Helper function that checks if it is possible to transform a switch with only
6280// two cases (or two cases + default) that produces a result into a select.
6281// TODO: Handle switches with more than 2 cases that map to the same result.
6282static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6283 Constant *DefaultResult, Value *Condition,
6284 IRBuilder<> &Builder, const DataLayout &DL) {
6285 // If we are selecting between only two cases transform into a simple
6286 // select or a two-way select if default is possible.
6287 // Example:
6288 // switch (a) { %0 = icmp eq i32 %a, 10
6289 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6290 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6291 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6292 // }
6293 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6294 ResultVector[1].second.size() == 1) {
6295 ConstantInt *FirstCase = ResultVector[0].second[0];
6296 ConstantInt *SecondCase = ResultVector[1].second[0];
6297 Value *SelectValue = ResultVector[1].first;
6298 if (DefaultResult) {
6299 Value *ValueCompare =
6300 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6301 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6302 DefaultResult, "switch.select");
6303 }
6304 Value *ValueCompare =
6305 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6306 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6307 SelectValue, "switch.select");
6308 }
6309
6310 // Handle the degenerate case where two cases have the same result value.
6311 if (ResultVector.size() == 1 && DefaultResult) {
6312 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6313 unsigned CaseCount = CaseValues.size();
6314 // n bits group cases map to the same result:
6315 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6316 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6317 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6318 if (isPowerOf2_32(CaseCount)) {
6319 ConstantInt *MinCaseVal = CaseValues[0];
6320 // If there are bits that are set exclusively by CaseValues, we
6321 // can transform the switch into a select if the conjunction of
6322 // all the values uniquely identify CaseValues.
6323 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6324
6325 // Find the minimum value and compute the and of all the case values.
6326 for (auto *Case : CaseValues) {
6327 if (Case->getValue().slt(MinCaseVal->getValue()))
6328 MinCaseVal = Case;
6329 AndMask &= Case->getValue();
6330 }
6331 KnownBits Known = computeKnownBits(Condition, DL);
6332
6333 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6334 // Compute the number of bits that are free to vary.
6335 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6336
6337 // Check if the number of values covered by the mask is equal
6338 // to the number of cases.
6339 if (FreeBits == Log2_32(CaseCount)) {
6340 Value *And = Builder.CreateAnd(Condition, AndMask);
6341 Value *Cmp = Builder.CreateICmpEQ(
6342 And, Constant::getIntegerValue(And->getType(), AndMask));
6343 return Builder.CreateSelect(Cmp, ResultVector[0].first,
6344 DefaultResult);
6345 }
6346 }
6347
6348 // Mark the bits case number touched.
6349 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6350 for (auto *Case : CaseValues)
6351 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6352
6353 // Check if cases with the same result can cover all number
6354 // in touched bits.
6355 if (BitMask.popcount() == Log2_32(CaseCount)) {
6356 if (!MinCaseVal->isNullValue())
6357 Condition = Builder.CreateSub(Condition, MinCaseVal);
6358 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6359 Value *Cmp = Builder.CreateICmpEQ(
6360 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6361 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6362 }
6363 }
6364
6365 // Handle the degenerate case where two cases have the same value.
6366 if (CaseValues.size() == 2) {
6367 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6368 "switch.selectcmp.case1");
6369 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6370 "switch.selectcmp.case2");
6371 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6372 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6373 }
6374 }
6375
6376 return nullptr;
6377}
6378
6379// Helper function to cleanup a switch instruction that has been converted into
6380// a select, fixing up PHI nodes and basic blocks.
6382 Value *SelectValue,
6383 IRBuilder<> &Builder,
6384 DomTreeUpdater *DTU) {
6385 std::vector<DominatorTree::UpdateType> Updates;
6386
6387 BasicBlock *SelectBB = SI->getParent();
6388 BasicBlock *DestBB = PHI->getParent();
6389
6390 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6391 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6392 Builder.CreateBr(DestBB);
6393
6394 // Remove the switch.
6395
6396 PHI->removeIncomingValueIf(
6397 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6398 PHI->addIncoming(SelectValue, SelectBB);
6399
6400 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6401 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6402 BasicBlock *Succ = SI->getSuccessor(i);
6403
6404 if (Succ == DestBB)
6405 continue;
6406 Succ->removePredecessor(SelectBB);
6407 if (DTU && RemovedSuccessors.insert(Succ).second)
6408 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6409 }
6410 SI->eraseFromParent();
6411 if (DTU)
6412 DTU->applyUpdates(Updates);
6413}
6414
6415/// If a switch is only used to initialize one or more phi nodes in a common
6416/// successor block with only two different constant values, try to replace the
6417/// switch with a select. Returns true if the fold was made.
6418static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6419 DomTreeUpdater *DTU, const DataLayout &DL,
6420 const TargetTransformInfo &TTI) {
6421 Value *const Cond = SI->getCondition();
6422 PHINode *PHI = nullptr;
6423 BasicBlock *CommonDest = nullptr;
6424 Constant *DefaultResult;
6425 SwitchCaseResultVectorTy UniqueResults;
6426 // Collect all the cases that will deliver the same value from the switch.
6427 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6428 DL, TTI, /*MaxUniqueResults*/ 2))
6429 return false;
6430
6431 assert(PHI != nullptr && "PHI for value select not found");
6432 Builder.SetInsertPoint(SI);
6433 Value *SelectValue =
6434 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder, DL);
6435 if (!SelectValue)
6436 return false;
6437
6438 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6439 return true;
6440}
6441
6442namespace {
6443
6444/// This class finds alternatives for switches to ultimately
6445/// replace the switch.
6446class SwitchReplacement {
6447public:
6448 /// Create a helper for optimizations to use as a switch replacement.
6449 /// Find a better representation for the content of Values,
6450 /// using DefaultValue to fill any holes in the table.
6451 SwitchReplacement(
6452 Module &M, uint64_t TableSize, ConstantInt *Offset,
6453 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6454 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6455
6456 /// Build instructions with Builder to retrieve values using Index
6457 /// and replace the switch.
6458 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6459 Function *Func);
6460
6461 /// Return true if a table with TableSize elements of
6462 /// type ElementType would fit in a target-legal register.
6463 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6464 Type *ElementType);
6465
6466 /// Return the default value of the switch.
6467 Constant *getDefaultValue();
6468
6469private:
6470 // Depending on the switch, there are different alternatives.
6471 enum {
6472 // For switches where each case contains the same value, we just have to
6473 // store that single value and return it for each lookup.
6474 SingleValueKind,
6475
6476 // For switches where there is a linear relationship between table index
6477 // and values. We calculate the result with a simple multiplication
6478 // and addition instead of a table lookup.
6479 LinearMapKind,
6480
6481 // For small tables with integer elements, we can pack them into a bitmap
6482 // that fits into a target-legal register. Values are retrieved by
6483 // shift and mask operations.
6484 BitMapKind,
6485
6486 // The table is stored as an array of values. Values are retrieved by load
6487 // instructions from the table.
6488 LookupTableKind
6489 } Kind;
6490
6491 // The default value of the switch.
6492 Constant *DefaultValue;
6493
6494 // The type of the output values.
6495 Type *ValueType;
6496
6497 // For SingleValueKind, this is the single value.
6498 Constant *SingleValue = nullptr;
6499
6500 // For BitMapKind, this is the bitmap.
6501 ConstantInt *BitMap = nullptr;
6502 IntegerType *BitMapElementTy = nullptr;
6503
6504 // For LinearMapKind, these are the constants used to derive the value.
6505 ConstantInt *LinearOffset = nullptr;
6506 ConstantInt *LinearMultiplier = nullptr;
6507 bool LinearMapValWrapped = false;
6508
6509 // For LookupTableKind, this is the table.
6510 Constant *Initializer = nullptr;
6511};
6512
6513} // end anonymous namespace
6514
6515SwitchReplacement::SwitchReplacement(
6516 Module &M, uint64_t TableSize, ConstantInt *Offset,
6517 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6518 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6519 : DefaultValue(DefaultValue) {
6520 assert(Values.size() && "Can't build lookup table without values!");
6521 assert(TableSize >= Values.size() && "Can't fit values in table!");
6522
6523 // If all values in the table are equal, this is that value.
6524 SingleValue = Values.begin()->second;
6525
6526 ValueType = Values.begin()->second->getType();
6527
6528 // Build up the table contents.
6529 SmallVector<Constant *, 64> TableContents(TableSize);
6530 for (const auto &[CaseVal, CaseRes] : Values) {
6531 assert(CaseRes->getType() == ValueType);
6532
6533 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6534 TableContents[Idx] = CaseRes;
6535
6536 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6537 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6538 }
6539
6540 // Fill in any holes in the table with the default result.
6541 if (Values.size() < TableSize) {
6542 assert(DefaultValue &&
6543 "Need a default value to fill the lookup table holes.");
6544 assert(DefaultValue->getType() == ValueType);
6545 for (uint64_t I = 0; I < TableSize; ++I) {
6546 if (!TableContents[I])
6547 TableContents[I] = DefaultValue;
6548 }
6549
6550 // If the default value is poison, all the holes are poison.
6551 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6552
6553 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6554 SingleValue = nullptr;
6555 }
6556
6557 // If each element in the table contains the same value, we only need to store
6558 // that single value.
6559 if (SingleValue) {
6560 Kind = SingleValueKind;
6561 return;
6562 }
6563
6564 // Check if we can derive the value with a linear transformation from the
6565 // table index.
6566 if (isa<IntegerType>(ValueType)) {
6567 bool LinearMappingPossible = true;
6568 APInt PrevVal;
6569 APInt DistToPrev;
6570 // When linear map is monotonic and signed overflow doesn't happen on
6571 // maximum index, we can attach nsw on Add and Mul.
6572 bool NonMonotonic = false;
6573 assert(TableSize >= 2 && "Should be a SingleValue table.");
6574 // Check if there is the same distance between two consecutive values.
6575 for (uint64_t I = 0; I < TableSize; ++I) {
6576 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6577
6578 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6579 // This is an poison, so it's (probably) a lookup table hole.
6580 // To prevent any regressions from before we switched to using poison as
6581 // the default value, holes will fall back to using the first value.
6582 // This can be removed once we add proper handling for poisons in lookup
6583 // tables.
6584 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6585 }
6586
6587 if (!ConstVal) {
6588 // This is an undef. We could deal with it, but undefs in lookup tables
6589 // are very seldom. It's probably not worth the additional complexity.
6590 LinearMappingPossible = false;
6591 break;
6592 }
6593 const APInt &Val = ConstVal->getValue();
6594 if (I != 0) {
6595 APInt Dist = Val - PrevVal;
6596 if (I == 1) {
6597 DistToPrev = Dist;
6598 } else if (Dist != DistToPrev) {
6599 LinearMappingPossible = false;
6600 break;
6601 }
6602 NonMonotonic |=
6603 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6604 }
6605 PrevVal = Val;
6606 }
6607 if (LinearMappingPossible) {
6608 LinearOffset = cast<ConstantInt>(TableContents[0]);
6609 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6610 APInt M = LinearMultiplier->getValue();
6611 bool MayWrap = true;
6612 if (isIntN(M.getBitWidth(), TableSize - 1))
6613 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6614 LinearMapValWrapped = NonMonotonic || MayWrap;
6615 Kind = LinearMapKind;
6616 ++NumLinearMaps;
6617 return;
6618 }
6619 }
6620
6621 // If the type is integer and the table fits in a register, build a bitmap.
6622 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6623 IntegerType *IT = cast<IntegerType>(ValueType);
6624 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6625 for (uint64_t I = TableSize; I > 0; --I) {
6626 TableInt <<= IT->getBitWidth();
6627 // Insert values into the bitmap. Undef values are set to zero.
6628 if (!isa<UndefValue>(TableContents[I - 1])) {
6629 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6630 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6631 }
6632 }
6633 BitMap = ConstantInt::get(M.getContext(), TableInt);
6634 BitMapElementTy = IT;
6635 Kind = BitMapKind;
6636 ++NumBitMaps;
6637 return;
6638 }
6639
6640 // Store the table in an array.
6641 auto *TableTy = ArrayType::get(ValueType, TableSize);
6642 Initializer = ConstantArray::get(TableTy, TableContents);
6643
6644 Kind = LookupTableKind;
6645}
6646
6647Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6648 const DataLayout &DL, Function *Func) {
6649 switch (Kind) {
6650 case SingleValueKind:
6651 return SingleValue;
6652 case LinearMapKind: {
6653 // Derive the result value from the input value.
6654 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6655 false, "switch.idx.cast");
6656 if (!LinearMultiplier->isOne())
6657 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6658 /*HasNUW = */ false,
6659 /*HasNSW = */ !LinearMapValWrapped);
6660
6661 if (!LinearOffset->isZero())
6662 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6663 /*HasNUW = */ false,
6664 /*HasNSW = */ !LinearMapValWrapped);
6665 return Result;
6666 }
6667 case BitMapKind: {
6668 // Type of the bitmap (e.g. i59).
6669 IntegerType *MapTy = BitMap->getIntegerType();
6670
6671 // Cast Index to the same type as the bitmap.
6672 // Note: The Index is <= the number of elements in the table, so
6673 // truncating it to the width of the bitmask is safe.
6674 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6675
6676 // Multiply the shift amount by the element width. NUW/NSW can always be
6677 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6678 // BitMap's bit width.
6679 ShiftAmt = Builder.CreateMul(
6680 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6681 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6682
6683 // Shift down.
6684 Value *DownShifted =
6685 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6686 // Mask off.
6687 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6688 }
6689 case LookupTableKind: {
6690 auto *Table =
6691 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6692 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6693 Initializer, "switch.table." + Func->getName());
6694 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6695 // Set the alignment to that of an array items. We will be only loading one
6696 // value out of it.
6697 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6698 Type *IndexTy = DL.getIndexType(Table->getType());
6699 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6700
6701 if (Index->getType() != IndexTy) {
6702 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6703 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6704 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6705 Zext->setNonNeg(
6706 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6707 }
6708
6709 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6710 Value *GEP =
6711 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6712 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6713 }
6714 }
6715 llvm_unreachable("Unknown helper kind!");
6716}
6717
6718bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6719 uint64_t TableSize,
6720 Type *ElementType) {
6721 auto *IT = dyn_cast<IntegerType>(ElementType);
6722 if (!IT)
6723 return false;
6724 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6725 // are <= 15, we could try to narrow the type.
6726
6727 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6728 if (TableSize >= UINT_MAX / IT->getBitWidth())
6729 return false;
6730 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6731}
6732
6734 const DataLayout &DL) {
6735 // Allow any legal type.
6736 if (TTI.isTypeLegal(Ty))
6737 return true;
6738
6739 auto *IT = dyn_cast<IntegerType>(Ty);
6740 if (!IT)
6741 return false;
6742
6743 // Also allow power of 2 integer types that have at least 8 bits and fit in
6744 // a register. These types are common in frontend languages and targets
6745 // usually support loads of these types.
6746 // TODO: We could relax this to any integer that fits in a register and rely
6747 // on ABI alignment and padding in the table to allow the load to be widened.
6748 // Or we could widen the constants and truncate the load.
6749 unsigned BitWidth = IT->getBitWidth();
6750 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6751 DL.fitsInLegalInteger(IT->getBitWidth());
6752}
6753
6754Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6755
6756static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6757 // 40% is the default density for building a jump table in optsize/minsize
6758 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6759 // function was based on.
6760 const uint64_t MinDensity = 40;
6761
6762 if (CaseRange >= UINT64_MAX / 100)
6763 return false; // Avoid multiplication overflows below.
6764
6765 return NumCases * 100 >= CaseRange * MinDensity;
6766}
6767
6769 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6770 uint64_t Range = Diff + 1;
6771 if (Range < Diff)
6772 return false; // Overflow.
6773
6774 return isSwitchDense(Values.size(), Range);
6775}
6776
6777/// Determine whether a lookup table should be built for this switch, based on
6778/// the number of cases, size of the table, and the types of the results.
6779// TODO: We could support larger than legal types by limiting based on the
6780// number of loads required and/or table size. If the constants are small we
6781// could use smaller table entries and extend after the load.
6782static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
6783 const TargetTransformInfo &TTI,
6784 const DataLayout &DL,
6785 const SmallVector<Type *> &ResultTypes) {
6786 if (SI->getNumCases() > TableSize)
6787 return false; // TableSize overflowed.
6788
6789 bool AllTablesFitInRegister = true;
6790 bool HasIllegalType = false;
6791 for (const auto &Ty : ResultTypes) {
6792 // Saturate this flag to true.
6793 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6794
6795 // Saturate this flag to false.
6796 AllTablesFitInRegister =
6797 AllTablesFitInRegister &&
6798 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
6799
6800 // If both flags saturate, we're done. NOTE: This *only* works with
6801 // saturating flags, and all flags have to saturate first due to the
6802 // non-deterministic behavior of iterating over a dense map.
6803 if (HasIllegalType && !AllTablesFitInRegister)
6804 break;
6805 }
6806
6807 // If each table would fit in a register, we should build it anyway.
6808 if (AllTablesFitInRegister)
6809 return true;
6810
6811 // Don't build a table that doesn't fit in-register if it has illegal types.
6812 if (HasIllegalType)
6813 return false;
6814
6815 return isSwitchDense(SI->getNumCases(), TableSize);
6816}
6817
6819 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6820 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
6821 const DataLayout &DL, const TargetTransformInfo &TTI) {
6822 if (MinCaseVal.isNullValue())
6823 return true;
6824 if (MinCaseVal.isNegative() ||
6825 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6826 !HasDefaultResults)
6827 return false;
6828 return all_of(ResultTypes, [&](const auto &ResultType) {
6829 return SwitchReplacement::wouldFitInRegister(
6830 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
6831 });
6832}
6833
6834/// Try to reuse the switch table index compare. Following pattern:
6835/// \code
6836/// if (idx < tablesize)
6837/// r = table[idx]; // table does not contain default_value
6838/// else
6839/// r = default_value;
6840/// if (r != default_value)
6841/// ...
6842/// \endcode
6843/// Is optimized to:
6844/// \code
6845/// cond = idx < tablesize;
6846/// if (cond)
6847/// r = table[idx];
6848/// else
6849/// r = default_value;
6850/// if (cond)
6851/// ...
6852/// \endcode
6853/// Jump threading will then eliminate the second if(cond).
6855 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6856 Constant *DefaultValue,
6857 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6858 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6859 if (!CmpInst)
6860 return;
6861
6862 // We require that the compare is in the same block as the phi so that jump
6863 // threading can do its work afterwards.
6864 if (CmpInst->getParent() != PhiBlock)
6865 return;
6866
6867 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6868 if (!CmpOp1)
6869 return;
6870
6871 Value *RangeCmp = RangeCheckBranch->getCondition();
6872 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6873 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6874
6875 // Check if the compare with the default value is constant true or false.
6876 const DataLayout &DL = PhiBlock->getDataLayout();
6878 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6879 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6880 return;
6881
6882 // Check if the compare with the case values is distinct from the default
6883 // compare result.
6884 for (auto ValuePair : Values) {
6886 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6887 if (!CaseConst || CaseConst == DefaultConst ||
6888 (CaseConst != TrueConst && CaseConst != FalseConst))
6889 return;
6890 }
6891
6892 // Check if the branch instruction dominates the phi node. It's a simple
6893 // dominance check, but sufficient for our needs.
6894 // Although this check is invariant in the calling loops, it's better to do it
6895 // at this late stage. Practically we do it at most once for a switch.
6896 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6897 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6898 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6899 return;
6900 }
6901
6902 if (DefaultConst == FalseConst) {
6903 // The compare yields the same result. We can replace it.
6904 CmpInst->replaceAllUsesWith(RangeCmp);
6905 ++NumTableCmpReuses;
6906 } else {
6907 // The compare yields the same result, just inverted. We can replace it.
6908 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6909 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6910 RangeCheckBranch->getIterator());
6911 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6912 ++NumTableCmpReuses;
6913 }
6914}
6915
6916/// If the switch is only used to initialize one or more phi nodes in a common
6917/// successor block with different constant values, replace the switch with
6918/// lookup tables.
6920 DomTreeUpdater *DTU, const DataLayout &DL,
6921 const TargetTransformInfo &TTI) {
6922 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6923
6924 BasicBlock *BB = SI->getParent();
6925 Function *Fn = BB->getParent();
6926 // Only build lookup table when we have a target that supports it or the
6927 // attribute is not set.
6929 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6930 return false;
6931
6932 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6933 // split off a dense part and build a lookup table for that.
6934
6935 // FIXME: This creates arrays of GEPs to constant strings, which means each
6936 // GEP needs a runtime relocation in PIC code. We should just build one big
6937 // string and lookup indices into that.
6938
6939 // Ignore switches with less than three cases. Lookup tables will not make
6940 // them faster, so we don't analyze them.
6941 if (SI->getNumCases() < 3)
6942 return false;
6943
6944 // Figure out the corresponding result for each case value and phi node in the
6945 // common destination, as well as the min and max case values.
6946 assert(!SI->cases().empty());
6947 SwitchInst::CaseIt CI = SI->case_begin();
6948 ConstantInt *MinCaseVal = CI->getCaseValue();
6949 ConstantInt *MaxCaseVal = CI->getCaseValue();
6950
6951 BasicBlock *CommonDest = nullptr;
6952
6953 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6955
6957 SmallVector<Type *> ResultTypes;
6959
6960 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6961 ConstantInt *CaseVal = CI->getCaseValue();
6962 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6963 MinCaseVal = CaseVal;
6964 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6965 MaxCaseVal = CaseVal;
6966
6967 // Resulting value at phi nodes for this case value.
6969 ResultsTy Results;
6970 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6971 Results, DL, TTI))
6972 return false;
6973
6974 // Append the result and result types from this case to the list for each
6975 // phi.
6976 for (const auto &I : Results) {
6977 PHINode *PHI = I.first;
6978 Constant *Value = I.second;
6979 auto [It, Inserted] = ResultLists.try_emplace(PHI);
6980 if (Inserted)
6981 PHIs.push_back(PHI);
6982 It->second.push_back(std::make_pair(CaseVal, Value));
6983 ResultTypes.push_back(PHI->getType());
6984 }
6985 }
6986
6987 // If the table has holes, we need a constant result for the default case
6988 // or a bitmask that fits in a register.
6989 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6990 bool HasDefaultResults =
6991 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6992 DefaultResultsList, DL, TTI);
6993 for (const auto &I : DefaultResultsList) {
6994 PHINode *PHI = I.first;
6995 Constant *Result = I.second;
6996 DefaultResults[PHI] = Result;
6997 }
6998
6999 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7000 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7001 uint64_t TableSize;
7002 ConstantInt *TableIndexOffset;
7003 if (UseSwitchConditionAsTableIndex) {
7004 TableSize = MaxCaseVal->getLimitedValue() + 1;
7005 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7006 } else {
7007 TableSize =
7008 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7009
7010 TableIndexOffset = MinCaseVal;
7011 }
7012
7013 // If the default destination is unreachable, or if the lookup table covers
7014 // all values of the conditional variable, branch directly to the lookup table
7015 // BB. Otherwise, check that the condition is within the case range.
7016 uint64_t NumResults = ResultLists[PHIs[0]].size();
7017 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7018
7019 bool TableHasHoles = (NumResults < TableSize);
7020
7021 // If the table has holes but the default destination doesn't produce any
7022 // constant results, the lookup table entries corresponding to the holes will
7023 // contain poison.
7024 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7025
7026 // If the default destination doesn't produce a constant result but is still
7027 // reachable, and the lookup table has holes, we need to use a mask to
7028 // determine if the current index should load from the lookup table or jump
7029 // to the default case.
7030 // The mask is unnecessary if the table has holes but the default destination
7031 // is unreachable, as in that case the holes must also be unreachable.
7032 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7033 if (NeedMask) {
7034 // As an extra penalty for the validity test we require more cases.
7035 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7036 return false;
7037 if (!DL.fitsInLegalInteger(TableSize))
7038 return false;
7039 }
7040
7041 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7042 return false;
7043
7044 // Compute the table index value.
7045 Value *TableIndex;
7046 if (UseSwitchConditionAsTableIndex) {
7047 TableIndex = SI->getCondition();
7048 if (HasDefaultResults) {
7049 // Grow the table to cover all possible index values to avoid the range
7050 // check. It will use the default result to fill in the table hole later,
7051 // so make sure it exist.
7052 ConstantRange CR =
7053 computeConstantRange(TableIndex, /* ForSigned */ false);
7054 // Grow the table shouldn't have any size impact by checking
7055 // wouldFitInRegister.
7056 // TODO: Consider growing the table also when it doesn't fit in a register
7057 // if no optsize is specified.
7058 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7059 if (!CR.isUpperWrapped() &&
7060 all_of(ResultTypes, [&](const auto &ResultType) {
7061 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7062 ResultType);
7063 })) {
7064 // There may be some case index larger than the UpperBound (unreachable
7065 // case), so make sure the table size does not get smaller.
7066 TableSize = std::max(UpperBound, TableSize);
7067 // The default branch is unreachable after we enlarge the lookup table.
7068 // Adjust DefaultIsReachable to reuse code path.
7069 DefaultIsReachable = false;
7070 }
7071 }
7072 }
7073
7074 // Keep track of the switch replacement for each phi
7076 for (PHINode *PHI : PHIs) {
7077 const auto &ResultList = ResultLists[PHI];
7078
7079 Type *ResultType = ResultList.begin()->second->getType();
7080 // Use any value to fill the lookup table holes.
7082 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7083 StringRef FuncName = Fn->getName();
7084 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7085 ResultList, DefaultVal, DL, FuncName);
7086 PhiToReplacementMap.insert({PHI, Replacement});
7087 }
7088
7089 Builder.SetInsertPoint(SI);
7090 // TableIndex is the switch condition - TableIndexOffset if we don't
7091 // use the condition directly
7092 if (!UseSwitchConditionAsTableIndex) {
7093 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7094 // we can try to attach nsw.
7095 bool MayWrap = true;
7096 if (!DefaultIsReachable) {
7097 APInt Res =
7098 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7099 (void)Res;
7100 }
7101 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7102 "switch.tableidx", /*HasNUW =*/false,
7103 /*HasNSW =*/!MayWrap);
7104 }
7105
7106 std::vector<DominatorTree::UpdateType> Updates;
7107
7108 // Compute the maximum table size representable by the integer type we are
7109 // switching upon.
7110 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7111 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7112 assert(MaxTableSize >= TableSize &&
7113 "It is impossible for a switch to have more entries than the max "
7114 "representable value of its input integer type's size.");
7115
7116 // Create the BB that does the lookups.
7117 Module &Mod = *CommonDest->getParent()->getParent();
7118 BasicBlock *LookupBB = BasicBlock::Create(
7119 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7120
7121 BranchInst *RangeCheckBranch = nullptr;
7122
7123 Builder.SetInsertPoint(SI);
7124 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7125 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7126 Builder.CreateBr(LookupBB);
7127 if (DTU)
7128 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7129 // Note: We call removeProdecessor later since we need to be able to get the
7130 // PHI value for the default case in case we're using a bit mask.
7131 } else {
7132 Value *Cmp = Builder.CreateICmpULT(
7133 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7134 RangeCheckBranch =
7135 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7136 if (DTU)
7137 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7138 }
7139
7140 // Populate the BB that does the lookups.
7141 Builder.SetInsertPoint(LookupBB);
7142
7143 if (NeedMask) {
7144 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7145 // re-purposed to do the hole check, and we create a new LookupBB.
7146 BasicBlock *MaskBB = LookupBB;
7147 MaskBB->setName("switch.hole_check");
7148 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7149 CommonDest->getParent(), CommonDest);
7150
7151 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7152 // unnecessary illegal types.
7153 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7154 APInt MaskInt(TableSizePowOf2, 0);
7155 APInt One(TableSizePowOf2, 1);
7156 // Build bitmask; fill in a 1 bit for every case.
7157 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7158 for (const auto &Result : ResultList) {
7159 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7160 .getLimitedValue();
7161 MaskInt |= One << Idx;
7162 }
7163 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7164
7165 // Get the TableIndex'th bit of the bitmask.
7166 // If this bit is 0 (meaning hole) jump to the default destination,
7167 // else continue with table lookup.
7168 IntegerType *MapTy = TableMask->getIntegerType();
7169 Value *MaskIndex =
7170 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7171 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7172 Value *LoBit = Builder.CreateTrunc(
7173 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7174 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7175 if (DTU) {
7176 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7177 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7178 }
7179 Builder.SetInsertPoint(LookupBB);
7180 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7181 }
7182
7183 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7184 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7185 // do not delete PHINodes here.
7186 SI->getDefaultDest()->removePredecessor(BB,
7187 /*KeepOneInputPHIs=*/true);
7188 if (DTU)
7189 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7190 }
7191
7192 for (PHINode *PHI : PHIs) {
7193 const ResultListTy &ResultList = ResultLists[PHI];
7194 auto Replacement = PhiToReplacementMap.at(PHI);
7195 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7196 // Do a small peephole optimization: re-use the switch table compare if
7197 // possible.
7198 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7199 BasicBlock *PhiBlock = PHI->getParent();
7200 // Search for compare instructions which use the phi.
7201 for (auto *User : PHI->users()) {
7202 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7203 Replacement.getDefaultValue(), ResultList);
7204 }
7205 }
7206
7207 PHI->addIncoming(Result, LookupBB);
7208 }
7209
7210 Builder.CreateBr(CommonDest);
7211 if (DTU)
7212 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7213
7214 // Remove the switch.
7215 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7216 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7217 BasicBlock *Succ = SI->getSuccessor(i);
7218
7219 if (Succ == SI->getDefaultDest())
7220 continue;
7221 Succ->removePredecessor(BB);
7222 if (DTU && RemovedSuccessors.insert(Succ).second)
7223 Updates.push_back({DominatorTree::Delete, BB, Succ});
7224 }
7225 SI->eraseFromParent();
7226
7227 if (DTU)
7228 DTU->applyUpdates(Updates);
7229
7230 ++NumLookupTables;
7231 if (NeedMask)
7232 ++NumLookupTablesHoles;
7233 return true;
7234}
7235
7236/// Try to transform a switch that has "holes" in it to a contiguous sequence
7237/// of cases.
7238///
7239/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7240/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7241///
7242/// This converts a sparse switch into a dense switch which allows better
7243/// lowering and could also allow transforming into a lookup table.
7244static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7245 const DataLayout &DL,
7246 const TargetTransformInfo &TTI) {
7247 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7248 if (CondTy->getIntegerBitWidth() > 64 ||
7249 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7250 return false;
7251 // Only bother with this optimization if there are more than 3 switch cases;
7252 // SDAG will only bother creating jump tables for 4 or more cases.
7253 if (SI->getNumCases() < 4)
7254 return false;
7255
7256 // This transform is agnostic to the signedness of the input or case values. We
7257 // can treat the case values as signed or unsigned. We can optimize more common
7258 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7259 // as signed.
7261 for (const auto &C : SI->cases())
7262 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7263 llvm::sort(Values);
7264
7265 // If the switch is already dense, there's nothing useful to do here.
7266 if (isSwitchDense(Values))
7267 return false;
7268
7269 // First, transform the values such that they start at zero and ascend.
7270 int64_t Base = Values[0];
7271 for (auto &V : Values)
7272 V -= (uint64_t)(Base);
7273
7274 // Now we have signed numbers that have been shifted so that, given enough
7275 // precision, there are no negative values. Since the rest of the transform
7276 // is bitwise only, we switch now to an unsigned representation.
7277
7278 // This transform can be done speculatively because it is so cheap - it
7279 // results in a single rotate operation being inserted.
7280
7281 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7282 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7283 // less than 64.
7284 unsigned Shift = 64;
7285 for (auto &V : Values)
7286 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7287 assert(Shift < 64);
7288 if (Shift > 0)
7289 for (auto &V : Values)
7290 V = (int64_t)((uint64_t)V >> Shift);
7291
7292 if (!isSwitchDense(Values))
7293 // Transform didn't create a dense switch.
7294 return false;
7295
7296 // The obvious transform is to shift the switch condition right and emit a
7297 // check that the condition actually cleanly divided by GCD, i.e.
7298 // C & (1 << Shift - 1) == 0
7299 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7300 //
7301 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7302 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7303 // are nonzero then the switch condition will be very large and will hit the
7304 // default case.
7305
7306 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7307 Builder.SetInsertPoint(SI);
7308 Value *Sub =
7309 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7310 Value *Rot = Builder.CreateIntrinsic(
7311 Ty, Intrinsic::fshl,
7312 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7313 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7314
7315 for (auto Case : SI->cases()) {
7316 auto *Orig = Case.getCaseValue();
7317 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7318 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7319 }
7320 return true;
7321}
7322
7323/// Tries to transform switch of powers of two to reduce switch range.
7324/// For example, switch like:
7325/// switch (C) { case 1: case 2: case 64: case 128: }
7326/// will be transformed to:
7327/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7328///
7329/// This transformation allows better lowering and may transform the switch
7330/// instruction into a sequence of bit manipulation and a smaller
7331/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7332/// address of the jump target, and indirectly jump to it).
7334 const DataLayout &DL,
7335 const TargetTransformInfo &TTI) {
7336 Value *Condition = SI->getCondition();
7337 LLVMContext &Context = SI->getContext();
7338 auto *CondTy = cast<IntegerType>(Condition->getType());
7339
7340 if (CondTy->getIntegerBitWidth() > 64 ||
7341 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7342 return false;
7343
7344 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7345 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7346 {Condition, ConstantInt::getTrue(Context)});
7348 TTI::TCC_Basic * 2)
7349 return false;
7350
7351 // Only bother with this optimization if there are more than 3 switch cases.
7352 // SDAG will start emitting jump tables for 4 or more cases.
7353 if (SI->getNumCases() < 4)
7354 return false;
7355
7356 // We perform this optimization only for switches with
7357 // unreachable default case.
7358 // This assumtion will save us from checking if `Condition` is a power of two.
7359 if (!SI->defaultDestUnreachable())
7360 return false;
7361
7362 // Check that switch cases are powers of two.
7364 for (const auto &Case : SI->cases()) {
7365 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7366 if (llvm::has_single_bit(CaseValue))
7367 Values.push_back(CaseValue);
7368 else
7369 return false;
7370 }
7371
7372 // isSwichDense requires case values to be sorted.
7373 llvm::sort(Values);
7374 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7375 llvm::countr_zero(Values.front()) + 1))
7376 // Transform is unable to generate dense switch.
7377 return false;
7378
7379 Builder.SetInsertPoint(SI);
7380
7381 // Replace each case with its trailing zeros number.
7382 for (auto &Case : SI->cases()) {
7383 auto *OrigValue = Case.getCaseValue();
7384 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7385 OrigValue->getValue().countr_zero()));
7386 }
7387
7388 // Replace condition with its trailing zeros number.
7389 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7390 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7391
7392 SI->setCondition(ConditionTrailingZeros);
7393
7394 return true;
7395}
7396
7397/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7398/// the same destination.
7400 DomTreeUpdater *DTU) {
7401 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7402 if (!Cmp || !Cmp->hasOneUse())
7403 return false;
7404
7406 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7407 if (!HasWeights)
7408 Weights.resize(4); // Avoid checking HasWeights everywhere.
7409
7410 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7411 int64_t Res;
7412 BasicBlock *Succ, *OtherSucc;
7413 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7414 BasicBlock *Unreachable = nullptr;
7415
7416 if (SI->getNumCases() == 2) {
7417 // Find which of 1, 0 or -1 is missing (handled by default dest).
7418 SmallSet<int64_t, 3> Missing;
7419 Missing.insert(1);
7420 Missing.insert(0);
7421 Missing.insert(-1);
7422
7423 Succ = SI->getDefaultDest();
7424 SuccWeight = Weights[0];
7425 OtherSucc = nullptr;
7426 for (auto &Case : SI->cases()) {
7427 std::optional<int64_t> Val =
7428 Case.getCaseValue()->getValue().trySExtValue();
7429 if (!Val)
7430 return false;
7431 if (!Missing.erase(*Val))
7432 return false;
7433 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7434 return false;
7435 OtherSucc = Case.getCaseSuccessor();
7436 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7437 }
7438
7439 assert(Missing.size() == 1 && "Should have one case left");
7440 Res = *Missing.begin();
7441 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7442 // Normalize so that Succ is taken once and OtherSucc twice.
7443 Unreachable = SI->getDefaultDest();
7444 Succ = OtherSucc = nullptr;
7445 for (auto &Case : SI->cases()) {
7446 BasicBlock *NewSucc = Case.getCaseSuccessor();
7447 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7448 if (!OtherSucc || OtherSucc == NewSucc) {
7449 OtherSucc = NewSucc;
7450 OtherSuccWeight += Weight;
7451 } else if (!Succ) {
7452 Succ = NewSucc;
7453 SuccWeight = Weight;
7454 } else if (Succ == NewSucc) {
7455 std::swap(Succ, OtherSucc);
7456 std::swap(SuccWeight, OtherSuccWeight);
7457 } else
7458 return false;
7459 }
7460 for (auto &Case : SI->cases()) {
7461 std::optional<int64_t> Val =
7462 Case.getCaseValue()->getValue().trySExtValue();
7463 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7464 return false;
7465 if (Case.getCaseSuccessor() == Succ) {
7466 Res = *Val;
7467 break;
7468 }
7469 }
7470 } else {
7471 return false;
7472 }
7473
7474 // Determine predicate for the missing case.
7476 switch (Res) {
7477 case 1:
7478 Pred = ICmpInst::ICMP_UGT;
7479 break;
7480 case 0:
7481 Pred = ICmpInst::ICMP_EQ;
7482 break;
7483 case -1:
7484 Pred = ICmpInst::ICMP_ULT;
7485 break;
7486 }
7487 if (Cmp->isSigned())
7488 Pred = ICmpInst::getSignedPredicate(Pred);
7489
7490 MDNode *NewWeights = nullptr;
7491 if (HasWeights)
7492 NewWeights = MDBuilder(SI->getContext())
7493 .createBranchWeights(SuccWeight, OtherSuccWeight);
7494
7495 BasicBlock *BB = SI->getParent();
7496 Builder.SetInsertPoint(SI->getIterator());
7497 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7498 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7499 SI->getMetadata(LLVMContext::MD_unpredictable));
7500 OtherSucc->removePredecessor(BB);
7501 if (Unreachable)
7502 Unreachable->removePredecessor(BB);
7503 SI->eraseFromParent();
7504 Cmp->eraseFromParent();
7505 if (DTU && Unreachable)
7506 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7507 return true;
7508}
7509
7510/// Checking whether two cases of SI are equal depends on the contents of the
7511/// BasicBlock and the incoming values of their successor PHINodes.
7512/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7513/// calling this function on each BasicBlock every time isEqual is called,
7514/// especially since the same BasicBlock may be passed as an argument multiple
7515/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7516/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7517/// of the incoming values.
7521};
7522
7523namespace llvm {
7524template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7526 return static_cast<SwitchSuccWrapper *>(
7528 }
7530 return static_cast<SwitchSuccWrapper *>(
7532 }
7533 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7534 BasicBlock *Succ = SSW->Dest;
7535 BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
7536 assert(BI->isUnconditional() &&
7537 "Only supporting unconditional branches for now");
7538 assert(BI->getNumSuccessors() == 1 &&
7539 "Expected unconditional branches to have one successor");
7540 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7541
7542 // Since we assume the BB is just a single BranchInst with a single
7543 // successor, we hash as the BB and the incoming Values of its successor
7544 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7545 // including the incoming PHI values leads to better performance.
7546 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7547 // time and passing it in SwitchSuccWrapper, but this slowed down the
7548 // average compile time without having any impact on the worst case compile
7549 // time.
7550 BasicBlock *BB = BI->getSuccessor(0);
7551 SmallVector<Value *> PhiValsForBB;
7552 for (PHINode &Phi : BB->phis())
7553 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7554
7555 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7556 }
7557 static bool isEqual(const SwitchSuccWrapper *LHS,
7558 const SwitchSuccWrapper *RHS) {
7561 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7562 return LHS == RHS;
7563
7564 BasicBlock *A = LHS->Dest;
7565 BasicBlock *B = RHS->Dest;
7566
7567 // FIXME: we checked that the size of A and B are both 1 in
7568 // simplifyDuplicateSwitchArms to make the Case list smaller to
7569 // improve performance. If we decide to support BasicBlocks with more
7570 // than just a single instruction, we need to check that A.size() ==
7571 // B.size() here, and we need to check more than just the BranchInsts
7572 // for equality.
7573
7574 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7575 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7576 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7577 "Only supporting unconditional branches for now");
7578 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7579 return false;
7580
7581 // Need to check that PHIs in successor have matching values
7582 BasicBlock *Succ = ABI->getSuccessor(0);
7583 for (PHINode &Phi : Succ->phis()) {
7584 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7585 if (PredIVs[A] != PredIVs[B])
7586 return false;
7587 }
7588
7589 return true;
7590 }
7591};
7592} // namespace llvm
7593
7594bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7595 DomTreeUpdater *DTU) {
7596 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7597 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7598 // an entire PHI at once after the loop, opposed to calling
7599 // getIncomingValueForBlock inside this loop, since each call to
7600 // getIncomingValueForBlock is O(|Preds|).
7606 Cases.reserve(SI->getNumSuccessors());
7607
7608 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7609 BasicBlock *BB = SI->getSuccessor(I);
7610
7611 // FIXME: Support more than just a single BranchInst. One way we could do
7612 // this is by taking a hashing approach of all insts in BB.
7613 if (BB->size() != 1)
7614 continue;
7615
7616 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7617 // on other kinds of terminators. We decide to only support unconditional
7618 // branches for now for compile time reasons.
7619 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7620 if (!BI || BI->isConditional())
7621 continue;
7622
7623 if (!Seen.insert(BB).second) {
7624 auto It = BBToSuccessorIndexes.find(BB);
7625 if (It != BBToSuccessorIndexes.end())
7626 It->second.emplace_back(I);
7627 continue;
7628 }
7629
7630 // FIXME: This case needs some extra care because the terminators other than
7631 // SI need to be updated. For now, consider only backedges to the SI.
7632 if (BB->getUniquePredecessor() != SI->getParent())
7633 continue;
7634
7635 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7636 for (BasicBlock *Succ : BI->successors())
7638
7639 // Add the successor only if not previously visited.
7640 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7641 BBToSuccessorIndexes[BB].emplace_back(I);
7642 }
7643
7644 // Precompute a data structure to improve performance of isEqual for
7645 // SwitchSuccWrapper.
7646 PhiPredIVs.reserve(Phis.size());
7647 for (PHINode *Phi : Phis) {
7648 auto &IVs =
7649 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7650 for (auto &IV : Phi->incoming_values())
7651 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7652 }
7653
7654 // Build a set such that if the SwitchSuccWrapper exists in the set and
7655 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7656 // which is not in the set should be replaced with the one in the set. If the
7657 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7658 // other SwitchSuccWrappers can check against it in the same manner. We use
7659 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7660 // around information to isEquality, getHashValue, and when doing the
7661 // replacement with better performance.
7663 ReplaceWith.reserve(Cases.size());
7664
7666 Updates.reserve(ReplaceWith.size());
7667 bool MadeChange = false;
7668 for (auto &SSW : Cases) {
7669 // SSW is a candidate for simplification. If we find a duplicate BB,
7670 // replace it.
7671 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7672 if (!Inserted) {
7673 // We know that SI's parent BB no longer dominates the old case successor
7674 // since we are making it dead.
7675 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7676 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7677 for (unsigned Idx : Successors)
7678 SI->setSuccessor(Idx, (*It)->Dest);
7679 MadeChange = true;
7680 }
7681 }
7682
7683 if (DTU)
7684 DTU->applyUpdates(Updates);
7685
7686 return MadeChange;
7687}
7688
7689bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7690 BasicBlock *BB = SI->getParent();
7691
7692 if (isValueEqualityComparison(SI)) {
7693 // If we only have one predecessor, and if it is a branch on this value,
7694 // see if that predecessor totally determines the outcome of this switch.
7695 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7696 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7697 return requestResimplify();
7698
7699 Value *Cond = SI->getCondition();
7700 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7701 if (simplifySwitchOnSelect(SI, Select))
7702 return requestResimplify();
7703
7704 // If the block only contains the switch, see if we can fold the block
7705 // away into any preds.
7706 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7707 if (foldValueComparisonIntoPredecessors(SI, Builder))
7708 return requestResimplify();
7709 }
7710
7711 // Try to transform the switch into an icmp and a branch.
7712 // The conversion from switch to comparison may lose information on
7713 // impossible switch values, so disable it early in the pipeline.
7714 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7715 return requestResimplify();
7716
7717 // Remove unreachable cases.
7718 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7719 return requestResimplify();
7720
7721 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7722 return requestResimplify();
7723
7724 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7725 return requestResimplify();
7726
7727 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7728 return requestResimplify();
7729
7730 // The conversion from switch to lookup tables results in difficult-to-analyze
7731 // code and makes pruning branches much harder. This is a problem if the
7732 // switch expression itself can still be restricted as a result of inlining or
7733 // CVP. Therefore, only apply this transformation during late stages of the
7734 // optimisation pipeline.
7735 if (Options.ConvertSwitchToLookupTable &&
7736 simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
7737 return requestResimplify();
7738
7739 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7740 return requestResimplify();
7741
7742 if (reduceSwitchRange(SI, Builder, DL, TTI))
7743 return requestResimplify();
7744
7745 if (HoistCommon &&
7746 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7747 return requestResimplify();
7748
7749 if (simplifyDuplicateSwitchArms(SI, DTU))
7750 return requestResimplify();
7751
7752 return false;
7753}
7754
7755bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7756 BasicBlock *BB = IBI->getParent();
7757 bool Changed = false;
7758
7759 // Eliminate redundant destinations.
7762 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7763 BasicBlock *Dest = IBI->getDestination(i);
7764 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7765 if (!Dest->hasAddressTaken())
7766 RemovedSuccs.insert(Dest);
7767 Dest->removePredecessor(BB);
7768 IBI->removeDestination(i);
7769 --i;
7770 --e;
7771 Changed = true;
7772 }
7773 }
7774
7775 if (DTU) {
7776 std::vector<DominatorTree::UpdateType> Updates;
7777 Updates.reserve(RemovedSuccs.size());
7778 for (auto *RemovedSucc : RemovedSuccs)
7779 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7780 DTU->applyUpdates(Updates);
7781 }
7782
7783 if (IBI->getNumDestinations() == 0) {
7784 // If the indirectbr has no successors, change it to unreachable.
7785 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7787 return true;
7788 }
7789
7790 if (IBI->getNumDestinations() == 1) {
7791 // If the indirectbr has one successor, change it to a direct branch.
7794 return true;
7795 }
7796
7797 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7798 if (simplifyIndirectBrOnSelect(IBI, SI))
7799 return requestResimplify();
7800 }
7801 return Changed;
7802}
7803
7804/// Given an block with only a single landing pad and a unconditional branch
7805/// try to find another basic block which this one can be merged with. This
7806/// handles cases where we have multiple invokes with unique landing pads, but
7807/// a shared handler.
7808///
7809/// We specifically choose to not worry about merging non-empty blocks
7810/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7811/// practice, the optimizer produces empty landing pad blocks quite frequently
7812/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7813/// sinking in this file)
7814///
7815/// This is primarily a code size optimization. We need to avoid performing
7816/// any transform which might inhibit optimization (such as our ability to
7817/// specialize a particular handler via tail commoning). We do this by not
7818/// merging any blocks which require us to introduce a phi. Since the same
7819/// values are flowing through both blocks, we don't lose any ability to
7820/// specialize. If anything, we make such specialization more likely.
7821///
7822/// TODO - This transformation could remove entries from a phi in the target
7823/// block when the inputs in the phi are the same for the two blocks being
7824/// merged. In some cases, this could result in removal of the PHI entirely.
7826 BasicBlock *BB, DomTreeUpdater *DTU) {
7827 auto Succ = BB->getUniqueSuccessor();
7828 assert(Succ);
7829 // If there's a phi in the successor block, we'd likely have to introduce
7830 // a phi into the merged landing pad block.
7831 if (isa<PHINode>(*Succ->begin()))
7832 return false;
7833
7834 for (BasicBlock *OtherPred : predecessors(Succ)) {
7835 if (BB == OtherPred)
7836 continue;
7837 BasicBlock::iterator I = OtherPred->begin();
7838 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7839 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7840 continue;
7841 ++I;
7842 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7843 if (!BI2 || !BI2->isIdenticalTo(BI))
7844 continue;
7845
7846 std::vector<DominatorTree::UpdateType> Updates;
7847
7848 // We've found an identical block. Update our predecessors to take that
7849 // path instead and make ourselves dead.
7851 for (BasicBlock *Pred : UniquePreds) {
7852 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7853 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7854 "unexpected successor");
7855 II->setUnwindDest(OtherPred);
7856 if (DTU) {
7857 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7858 Updates.push_back({DominatorTree::Delete, Pred, BB});
7859 }
7860 }
7861
7863 for (BasicBlock *Succ : UniqueSuccs) {
7864 Succ->removePredecessor(BB);
7865 if (DTU)
7866 Updates.push_back({DominatorTree::Delete, BB, Succ});
7867 }
7868
7869 IRBuilder<> Builder(BI);
7870 Builder.CreateUnreachable();
7871 BI->eraseFromParent();
7872 if (DTU)
7873 DTU->applyUpdates(Updates);
7874 return true;
7875 }
7876 return false;
7877}
7878
7879bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7880 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7881 : simplifyCondBranch(Branch, Builder);
7882}
7883
7884bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7885 IRBuilder<> &Builder) {
7886 BasicBlock *BB = BI->getParent();
7887 BasicBlock *Succ = BI->getSuccessor(0);
7888
7889 // If the Terminator is the only non-phi instruction, simplify the block.
7890 // If LoopHeader is provided, check if the block or its successor is a loop
7891 // header. (This is for early invocations before loop simplify and
7892 // vectorization to keep canonical loop forms for nested loops. These blocks
7893 // can be eliminated when the pass is invoked later in the back-end.)
7894 // Note that if BB has only one predecessor then we do not introduce new
7895 // backedge, so we can eliminate BB.
7896 bool NeedCanonicalLoop =
7897 Options.NeedCanonicalLoop &&
7898 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7899 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7901 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7902 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7903 return true;
7904
7905 // If the only instruction in the block is a seteq/setne comparison against a
7906 // constant, try to simplify the block.
7907 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7908 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7909 ++I;
7910 if (I->isTerminator() &&
7911 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7912 return true;
7913 }
7914
7915 // See if we can merge an empty landing pad block with another which is
7916 // equivalent.
7917 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7918 ++I;
7919 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7920 return true;
7921 }
7922
7923 // If this basic block is ONLY a compare and a branch, and if a predecessor
7924 // branches to us and our successor, fold the comparison into the
7925 // predecessor and use logical operations to update the incoming value
7926 // for PHI nodes in common successor.
7927 if (Options.SpeculateBlocks &&
7928 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7929 Options.BonusInstThreshold))
7930 return requestResimplify();
7931 return false;
7932}
7933
7935 BasicBlock *PredPred = nullptr;
7936 for (auto *P : predecessors(BB)) {
7937 BasicBlock *PPred = P->getSinglePredecessor();
7938 if (!PPred || (PredPred && PredPred != PPred))
7939 return nullptr;
7940 PredPred = PPred;
7941 }
7942 return PredPred;
7943}
7944
7945/// Fold the following pattern:
7946/// bb0:
7947/// br i1 %cond1, label %bb1, label %bb2
7948/// bb1:
7949/// br i1 %cond2, label %bb3, label %bb4
7950/// bb2:
7951/// br i1 %cond2, label %bb4, label %bb3
7952/// bb3:
7953/// ...
7954/// bb4:
7955/// ...
7956/// into
7957/// bb0:
7958/// %cond = xor i1 %cond1, %cond2
7959/// br i1 %cond, label %bb4, label %bb3
7960/// bb3:
7961/// ...
7962/// bb4:
7963/// ...
7964/// NOTE: %cond2 always dominates the terminator of bb0.
7966 BasicBlock *BB = BI->getParent();
7967 BasicBlock *BB1 = BI->getSuccessor(0);
7968 BasicBlock *BB2 = BI->getSuccessor(1);
7969 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7970 if (Succ == BB)
7971 return false;
7972 if (&Succ->front() != Succ->getTerminator())
7973 return false;
7974 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7975 if (!SuccBI || !SuccBI->isConditional())
7976 return false;
7977 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7978 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7979 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7980 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7981 };
7982 BranchInst *BB1BI, *BB2BI;
7983 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7984 return false;
7985
7986 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7987 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7988 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7989 return false;
7990
7991 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7992 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7993 IRBuilder<> Builder(BI);
7994 BI->setCondition(
7995 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7996 BB1->removePredecessor(BB);
7997 BI->setSuccessor(0, BB4);
7998 BB2->removePredecessor(BB);
7999 BI->setSuccessor(1, BB3);
8000 if (DTU) {
8002 Updates.push_back({DominatorTree::Delete, BB, BB1});
8003 Updates.push_back({DominatorTree::Insert, BB, BB4});
8004 Updates.push_back({DominatorTree::Delete, BB, BB2});
8005 Updates.push_back({DominatorTree::Insert, BB, BB3});
8006
8007 DTU->applyUpdates(Updates);
8008 }
8009 bool HasWeight = false;
8010 uint64_t BBTWeight, BBFWeight;
8011 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8012 HasWeight = true;
8013 else
8014 BBTWeight = BBFWeight = 1;
8015 uint64_t BB1TWeight, BB1FWeight;
8016 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8017 HasWeight = true;
8018 else
8019 BB1TWeight = BB1FWeight = 1;
8020 uint64_t BB2TWeight, BB2FWeight;
8021 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8022 HasWeight = true;
8023 else
8024 BB2TWeight = BB2FWeight = 1;
8025 if (HasWeight) {
8026 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8027 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8028 fitWeights(Weights);
8029 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8030 }
8031 return true;
8032}
8033
8034bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8035 assert(
8036 !isa<ConstantInt>(BI->getCondition()) &&
8037 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8038 "Tautological conditional branch should have been eliminated already.");
8039
8040 BasicBlock *BB = BI->getParent();
8041 if (!Options.SimplifyCondBranch ||
8042 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8043 return false;
8044
8045 // Conditional branch
8046 if (isValueEqualityComparison(BI)) {
8047 // If we only have one predecessor, and if it is a branch on this value,
8048 // see if that predecessor totally determines the outcome of this
8049 // switch.
8050 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8051 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8052 return requestResimplify();
8053
8054 // This block must be empty, except for the setcond inst, if it exists.
8055 // Ignore dbg and pseudo intrinsics.
8056 auto I = BB->instructionsWithoutDebug(true).begin();
8057 if (&*I == BI) {
8058 if (foldValueComparisonIntoPredecessors(BI, Builder))
8059 return requestResimplify();
8060 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8061 ++I;
8062 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8063 return requestResimplify();
8064 }
8065 }
8066
8067 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8068 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8069 return true;
8070
8071 // If this basic block has dominating predecessor blocks and the dominating
8072 // blocks' conditions imply BI's condition, we know the direction of BI.
8073 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8074 if (Imp) {
8075 // Turn this into a branch on constant.
8076 auto *OldCond = BI->getCondition();
8077 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8078 : ConstantInt::getFalse(BB->getContext());
8079 BI->setCondition(TorF);
8081 return requestResimplify();
8082 }
8083
8084 // If this basic block is ONLY a compare and a branch, and if a predecessor
8085 // branches to us and one of our successors, fold the comparison into the
8086 // predecessor and use logical operations to pick the right destination.
8087 if (Options.SpeculateBlocks &&
8088 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8089 Options.BonusInstThreshold))
8090 return requestResimplify();
8091
8092 // We have a conditional branch to two blocks that are only reachable
8093 // from BI. We know that the condbr dominates the two blocks, so see if
8094 // there is any identical code in the "then" and "else" blocks. If so, we
8095 // can hoist it up to the branching block.
8096 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8097 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8098 if (HoistCommon &&
8099 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8100 return requestResimplify();
8101
8102 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8103 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8104 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8105 auto CanSpeculateConditionalLoadsStores = [&]() {
8106 for (auto *Succ : successors(BB)) {
8107 for (Instruction &I : *Succ) {
8108 if (I.isTerminator()) {
8109 if (I.getNumSuccessors() > 1)
8110 return false;
8111 continue;
8112 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8113 SpeculatedConditionalLoadsStores.size() ==
8115 return false;
8116 }
8117 SpeculatedConditionalLoadsStores.push_back(&I);
8118 }
8119 }
8120 return !SpeculatedConditionalLoadsStores.empty();
8121 };
8122
8123 if (CanSpeculateConditionalLoadsStores()) {
8124 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8125 std::nullopt, nullptr);
8126 return requestResimplify();
8127 }
8128 }
8129 } else {
8130 // If Successor #1 has multiple preds, we may be able to conditionally
8131 // execute Successor #0 if it branches to Successor #1.
8132 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8133 if (Succ0TI->getNumSuccessors() == 1 &&
8134 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8135 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8136 return requestResimplify();
8137 }
8138 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8139 // If Successor #0 has multiple preds, we may be able to conditionally
8140 // execute Successor #1 if it branches to Successor #0.
8141 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8142 if (Succ1TI->getNumSuccessors() == 1 &&
8143 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8144 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8145 return requestResimplify();
8146 }
8147
8148 // If this is a branch on something for which we know the constant value in
8149 // predecessors (e.g. a phi node in the current block), thread control
8150 // through this block.
8151 if (foldCondBranchOnValueKnownInPredecessor(BI))
8152 return requestResimplify();
8153
8154 // Scan predecessor blocks for conditional branches.
8155 for (BasicBlock *Pred : predecessors(BB))
8156 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8157 if (PBI != BI && PBI->isConditional())
8158 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8159 return requestResimplify();
8160
8161 // Look for diamond patterns.
8162 if (MergeCondStores)
8164 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8165 if (PBI != BI && PBI->isConditional())
8166 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8167 return requestResimplify();
8168
8169 // Look for nested conditional branches.
8170 if (mergeNestedCondBranch(BI, DTU))
8171 return requestResimplify();
8172
8173 return false;
8174}
8175
8176/// Check if passing a value to an instruction will cause undefined behavior.
8177static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8178 assert(V->getType() == I->getType() && "Mismatched types");
8179 Constant *C = dyn_cast<Constant>(V);
8180 if (!C)
8181 return false;
8182
8183 if (I->use_empty())
8184 return false;
8185
8186 if (C->isNullValue() || isa<UndefValue>(C)) {
8187 // Only look at the first use we can handle, avoid hurting compile time with
8188 // long uselists
8189 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8190 auto *Use = cast<Instruction>(U.getUser());
8191 // Change this list when we want to add new instructions.
8192 switch (Use->getOpcode()) {
8193 default:
8194 return false;
8195 case Instruction::GetElementPtr:
8196 case Instruction::Ret:
8197 case Instruction::BitCast:
8198 case Instruction::Load:
8199 case Instruction::Store:
8200 case Instruction::Call:
8201 case Instruction::CallBr:
8202 case Instruction::Invoke:
8203 case Instruction::UDiv:
8204 case Instruction::URem:
8205 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8206 // implemented to avoid code complexity as it is unclear how useful such
8207 // logic is.
8208 case Instruction::SDiv:
8209 case Instruction::SRem:
8210 return true;
8211 }
8212 });
8213 if (FindUse == I->use_end())
8214 return false;
8215 auto &Use = *FindUse;
8216 auto *User = cast<Instruction>(Use.getUser());
8217 // Bail out if User is not in the same BB as I or User == I or User comes
8218 // before I in the block. The latter two can be the case if User is a
8219 // PHI node.
8220 if (User->getParent() != I->getParent() || User == I ||
8221 User->comesBefore(I))
8222 return false;
8223
8224 // Now make sure that there are no instructions in between that can alter
8225 // control flow (eg. calls)
8226 auto InstrRange =
8227 make_range(std::next(I->getIterator()), User->getIterator());
8228 if (any_of(InstrRange, [](Instruction &I) {
8230 }))
8231 return false;
8232
8233 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8234 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User))
8235 if (GEP->getPointerOperand() == I) {
8236 // The type of GEP may differ from the type of base pointer.
8237 // Bail out on vector GEPs, as they are not handled by other checks.
8238 if (GEP->getType()->isVectorTy())
8239 return false;
8240 // The current base address is null, there are four cases to consider:
8241 // getelementptr (TY, null, 0) -> null
8242 // getelementptr (TY, null, not zero) -> may be modified
8243 // getelementptr inbounds (TY, null, 0) -> null
8244 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8245 // undefined?
8246 if (!GEP->hasAllZeroIndices() &&
8247 (!GEP->isInBounds() ||
8248 NullPointerIsDefined(GEP->getFunction(),
8249 GEP->getPointerAddressSpace())))
8250 PtrValueMayBeModified = true;
8251 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8252 }
8253
8254 // Look through return.
8255 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8256 bool HasNoUndefAttr =
8257 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8258 // Return undefined to a noundef return value is undefined.
8259 if (isa<UndefValue>(C) && HasNoUndefAttr)
8260 return true;
8261 // Return null to a nonnull+noundef return value is undefined.
8262 if (C->isNullValue() && HasNoUndefAttr &&
8263 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8264 return !PtrValueMayBeModified;
8265 }
8266 }
8267
8268 // Load from null is undefined.
8269 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8270 if (!LI->isVolatile())
8271 return !NullPointerIsDefined(LI->getFunction(),
8272 LI->getPointerAddressSpace());
8273
8274 // Store to null is undefined.
8275 if (StoreInst *SI = dyn_cast<StoreInst>(User))
8276 if (!SI->isVolatile())
8277 return (!NullPointerIsDefined(SI->getFunction(),
8278 SI->getPointerAddressSpace())) &&
8279 SI->getPointerOperand() == I;
8280
8281 // llvm.assume(false/undef) always triggers immediate UB.
8282 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8283 // Ignore assume operand bundles.
8284 if (I == Assume->getArgOperand(0))
8285 return true;
8286 }
8287
8288 if (auto *CB = dyn_cast<CallBase>(User)) {
8289 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8290 return false;
8291 // A call to null is undefined.
8292 if (CB->getCalledOperand() == I)
8293 return true;
8294
8295 if (CB->isArgOperand(&Use)) {
8296 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8297 // Passing null to a nonnnull+noundef argument is undefined.
8298 if (isa<ConstantPointerNull>(C) &&
8299 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8300 return !PtrValueMayBeModified;
8301 // Passing undef to a noundef argument is undefined.
8302 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8303 return true;
8304 }
8305 }
8306 // Div/Rem by zero is immediate UB
8307 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8308 return true;
8309 }
8310 return false;
8311}
8312
8313/// If BB has an incoming value that will always trigger undefined behavior
8314/// (eg. null pointer dereference), remove the branch leading here.
8316 DomTreeUpdater *DTU,
8317 AssumptionCache *AC) {
8318 for (PHINode &PHI : BB->phis())
8319 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8320 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8321 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8322 Instruction *T = Predecessor->getTerminator();
8323 IRBuilder<> Builder(T);
8324 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8325 BB->removePredecessor(Predecessor);
8326 // Turn unconditional branches into unreachables and remove the dead
8327 // destination from conditional branches.
8328 if (BI->isUnconditional())
8329 Builder.CreateUnreachable();
8330 else {
8331 // Preserve guarding condition in assume, because it might not be
8332 // inferrable from any dominating condition.
8333 Value *Cond = BI->getCondition();
8334 CallInst *Assumption;
8335 if (BI->getSuccessor(0) == BB)
8336 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8337 else
8338 Assumption = Builder.CreateAssumption(Cond);
8339 if (AC)
8340 AC->registerAssumption(cast<AssumeInst>(Assumption));
8341 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8342 : BI->getSuccessor(0));
8343 }
8344 BI->eraseFromParent();
8345 if (DTU)
8346 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8347 return true;
8348 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8349 // Redirect all branches leading to UB into
8350 // a newly created unreachable block.
8351 BasicBlock *Unreachable = BasicBlock::Create(
8352 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8353 Builder.SetInsertPoint(Unreachable);
8354 // The new block contains only one instruction: Unreachable
8355 Builder.CreateUnreachable();
8356 for (const auto &Case : SI->cases())
8357 if (Case.getCaseSuccessor() == BB) {
8358 BB->removePredecessor(Predecessor);
8359 Case.setSuccessor(Unreachable);
8360 }
8361 if (SI->getDefaultDest() == BB) {
8362 BB->removePredecessor(Predecessor);
8363 SI->setDefaultDest(Unreachable);
8364 }
8365
8366 if (DTU)
8367 DTU->applyUpdates(
8368 { { DominatorTree::Insert, Predecessor, Unreachable },
8369 { DominatorTree::Delete, Predecessor, BB } });
8370 return true;
8371 }
8372 }
8373
8374 return false;
8375}
8376
8377bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8378 bool Changed = false;
8379
8380 assert(BB && BB->getParent() && "Block not embedded in function!");
8381 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8382
8383 // Remove basic blocks that have no predecessors (except the entry block)...
8384 // or that just have themself as a predecessor. These are unreachable.
8385 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8386 BB->getSinglePredecessor() == BB) {
8387 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8388 DeleteDeadBlock(BB, DTU);
8389 return true;
8390 }
8391
8392 // Check to see if we can constant propagate this terminator instruction
8393 // away...
8394 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8395 /*TLI=*/nullptr, DTU);
8396
8397 // Check for and eliminate duplicate PHI nodes in this block.
8398 Changed |= EliminateDuplicatePHINodes(BB);
8399
8400 // Check for and remove branches that will always cause undefined behavior.
8402 return requestResimplify();
8403
8404 // Merge basic blocks into their predecessor if there is only one distinct
8405 // pred, and if there is only one distinct successor of the predecessor, and
8406 // if there are no PHI nodes.
8407 if (MergeBlockIntoPredecessor(BB, DTU))
8408 return true;
8409
8410 if (SinkCommon && Options.SinkCommonInsts)
8411 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8412 mergeCompatibleInvokes(BB, DTU)) {
8413 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8414 // so we may now how duplicate PHI's.
8415 // Let's rerun EliminateDuplicatePHINodes() first,
8416 // before foldTwoEntryPHINode() potentially converts them into select's,
8417 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8418 return true;
8419 }
8420
8421 IRBuilder<> Builder(BB);
8422
8423 if (Options.SpeculateBlocks &&
8424 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8425 // If there is a trivial two-entry PHI node in this basic block, and we can
8426 // eliminate it, do so now.
8427 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8428 if (PN->getNumIncomingValues() == 2)
8429 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8430 Options.SpeculateUnpredictables))
8431 return true;
8432 }
8433
8435 Builder.SetInsertPoint(Terminator);
8436 switch (Terminator->getOpcode()) {
8437 case Instruction::Br:
8438 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8439 break;
8440 case Instruction::Resume:
8441 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8442 break;
8443 case Instruction::CleanupRet:
8444 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8445 break;
8446 case Instruction::Switch:
8447 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8448 break;
8449 case Instruction::Unreachable:
8450 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8451 break;
8452 case Instruction::IndirectBr:
8453 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8454 break;
8455 }
8456
8457 return Changed;
8458}
8459
8460bool SimplifyCFGOpt::run(BasicBlock *BB) {
8461 bool Changed = false;
8462
8463 // Repeated simplify BB as long as resimplification is requested.
8464 do {
8465 Resimplify = false;
8466
8467 // Perform one round of simplifcation. Resimplify flag will be set if
8468 // another iteration is requested.
8469 Changed |= simplifyOnce(BB);
8470 } while (Resimplify);
8471
8472 return Changed;
8473}
8474
8477 ArrayRef<WeakVH> LoopHeaders) {
8478 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8479 Options)
8480 .run(BB);
8481}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL)
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:83
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition: APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:386
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:206
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:337
const Instruction & front() const
Definition: BasicBlock.h:482
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:354
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:459
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:475
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:287
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:445
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:467
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:699
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
size_t size() const
Definition: BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:661
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:463
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:248
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:494
The address of a basic block.
Definition: Constants.h:899
BasicBlock * getBasicBlock() const
Definition: Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
Definition: InstrTypes.h:1586
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1314
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2635
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isNegative() const
Definition: Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This class represents a range of values.
Definition: ConstantRange.h:47
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:403
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition: DebugLoc.h:256
static DebugLoc getTemporary()
Definition: DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition: DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition: DebugLoc.cpp:170
static DebugLoc getDropped()
Definition: DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:165
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:229
unsigned size() const
Definition: DenseMap.h:108
iterator end()
Definition: DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:205
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:214
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:112
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
iterator begin()
Definition: Function.h:851
size_t size() const
Definition: Function.h:856
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2345
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2100
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1339
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1010
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:463
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:488
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1005
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1931
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:262
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1847
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1860
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:508
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1191
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1725
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1731
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:105
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:171
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:428
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:315
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
Definition: Instruction.h:930
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1789
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:897
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:42
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:180
static unsigned getPointerOperandIndex()
Definition: Instructions.h:261
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:38
Metadata node.
Definition: Metadata.h:1077
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:115
size_type size() const
Definition: MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
size_type size() const
Definition: SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:705
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Align getAlign() const
Definition: Instructions.h:338
bool isSimple() const
Definition: Instructions.h:375
Value * getValueOperand()
Definition: Instructions.h:383
bool isUnordered() const
Definition: Instructions.h:377
static unsigned getPointerOperandIndex()
Definition: Instructions.h:388
Value * getPointerOperand()
Definition: Instructions.h:386
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
LLVM_ABI BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
LLVM_ABI bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const
LLVM_ABI InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
LLVM_ABI bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM_ABI void set(Value *Val)
Definition: Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:35
op_range operands()
Definition: User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:245
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
static constexpr uint64_t MaximumAlignment
Definition: Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1101
iterator_range< use_iterator > uses()
Definition: Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition: DenseSet.h:96
size_type size() const
Definition: DenseSet.h:87
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:359
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:962
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
Definition: PatternMatch.h:876
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:931
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition: DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1899
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
bool succ_empty(const Instruction *I)
Definition: CFG.h:256
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:134
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:252
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2095
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1796
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2147
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:317
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1172
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1444
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:2845
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1300
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3081
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3339
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:289
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:3839
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2049
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:306
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:252
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:257
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1629
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:595
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2107
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition: ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:469
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
BasicBlock * Dest
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:54
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:289
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:138
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249