LLVM 21.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
78#include <algorithm>
79#include <cassert>
80#include <climits>
81#include <cstddef>
82#include <cstdint>
83#include <iterator>
84#include <map>
85#include <optional>
86#include <set>
87#include <tuple>
88#include <utility>
89#include <vector>
90
91using namespace llvm;
92using namespace PatternMatch;
93
94#define DEBUG_TYPE "simplifycfg"
95
97 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
98
100 "Temporary development switch used to gradually uplift SimplifyCFG "
101 "into preserving DomTree,"));
102
103// Chosen as 2 so as to be cheap, but still to have enough power to fold
104// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
105// To catch this, we need to fold a compare and a select, hence '2' being the
106// minimum reasonable default.
108 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
109 cl::desc(
110 "Control the amount of phi node folding to perform (default = 2)"));
111
113 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
114 cl::desc("Control the maximal total instruction cost that we are willing "
115 "to speculatively execute to fold a 2-entry PHI node into a "
116 "select (default = 4)"));
117
118static cl::opt<bool>
119 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
120 cl::desc("Hoist common instructions up to the parent block"));
121
123 "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
124 cl::init(true),
125 cl::desc("Hoist loads/stores if the target supports "
126 "conditional faulting"));
127
129 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
130 cl::desc("Control the maximal conditional load/store that we are willing "
131 "to speculatively execute to eliminate conditional branch "
132 "(default = 6)"));
133
135 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
136 cl::init(20),
137 cl::desc("Allow reordering across at most this many "
138 "instructions when hoisting"));
139
140static cl::opt<bool>
141 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
142 cl::desc("Sink common instructions down to the end block"));
143
145 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
146 cl::desc("Hoist conditional stores if an unconditional store precedes"));
147
149 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores even if an unconditional store does not "
151 "precede - hoist multiple conditional stores into a single "
152 "predicated store"));
153
155 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
156 cl::desc("When merging conditional stores, do so even if the resultant "
157 "basic blocks are unlikely to be if-converted as a result"));
158
160 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
161 cl::desc("Allow exactly one expensive instruction to be speculatively "
162 "executed"));
163
165 "max-speculation-depth", cl::Hidden, cl::init(10),
166 cl::desc("Limit maximum recursion depth when calculating costs of "
167 "speculatively executed instructions"));
168
169static cl::opt<int>
170 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
171 cl::init(10),
172 cl::desc("Max size of a block which is still considered "
173 "small enough to thread through"));
174
175// Two is chosen to allow one negation and a logical combine.
177 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
178 cl::init(2),
179 cl::desc("Maximum cost of combining conditions when "
180 "folding branches"));
181
183 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
184 cl::init(2),
185 cl::desc("Multiplier to apply to threshold when determining whether or not "
186 "to fold branch to common destination when vector operations are "
187 "present"));
188
190 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
191 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
192
194 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
195 cl::desc("Limit cases to analyze when converting a switch to select"));
196
197STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
198STATISTIC(NumLinearMaps,
199 "Number of switch instructions turned into linear mapping");
200STATISTIC(NumLookupTables,
201 "Number of switch instructions turned into lookup tables");
203 NumLookupTablesHoles,
204 "Number of switch instructions turned into lookup tables (holes checked)");
205STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
206STATISTIC(NumFoldValueComparisonIntoPredecessors,
207 "Number of value comparisons folded into predecessor basic blocks");
208STATISTIC(NumFoldBranchToCommonDest,
209 "Number of branches folded into predecessor basic block");
211 NumHoistCommonCode,
212 "Number of common instruction 'blocks' hoisted up to the begin block");
213STATISTIC(NumHoistCommonInstrs,
214 "Number of common instructions hoisted up to the begin block");
215STATISTIC(NumSinkCommonCode,
216 "Number of common instruction 'blocks' sunk down to the end block");
217STATISTIC(NumSinkCommonInstrs,
218 "Number of common instructions sunk down to the end block");
219STATISTIC(NumSpeculations, "Number of speculative executed instructions");
220STATISTIC(NumInvokes,
221 "Number of invokes with empty resume blocks simplified into calls");
222STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
223STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
224
225namespace {
226
227// The first field contains the value that the switch produces when a certain
228// case group is selected, and the second field is a vector containing the
229// cases composing the case group.
230using SwitchCaseResultVectorTy =
232
233// The first field contains the phi node that generates a result of the switch
234// and the second field contains the value generated for a certain case in the
235// switch for that PHI.
236using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
237
238/// ValueEqualityComparisonCase - Represents a case of a switch.
239struct ValueEqualityComparisonCase {
241 BasicBlock *Dest;
242
243 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
244 : Value(Value), Dest(Dest) {}
245
246 bool operator<(ValueEqualityComparisonCase RHS) const {
247 // Comparing pointers is ok as we only rely on the order for uniquing.
248 return Value < RHS.Value;
249 }
250
251 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
252};
253
254class SimplifyCFGOpt {
256 DomTreeUpdater *DTU;
257 const DataLayout &DL;
258 ArrayRef<WeakVH> LoopHeaders;
259 const SimplifyCFGOptions &Options;
260 bool Resimplify;
261
262 Value *isValueEqualityComparison(Instruction *TI);
263 BasicBlock *getValueEqualityComparisonCases(
264 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
265 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
266 BasicBlock *Pred,
267 IRBuilder<> &Builder);
268 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
269 Instruction *PTI,
270 IRBuilder<> &Builder);
271 bool foldValueComparisonIntoPredecessors(Instruction *TI,
272 IRBuilder<> &Builder);
273
274 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
275 bool simplifySingleResume(ResumeInst *RI);
276 bool simplifyCommonResume(ResumeInst *RI);
277 bool simplifyCleanupReturn(CleanupReturnInst *RI);
278 bool simplifyUnreachable(UnreachableInst *UI);
279 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
280 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
281 bool simplifyIndirectBr(IndirectBrInst *IBI);
282 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
283 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
284 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
285
286 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
287 IRBuilder<> &Builder);
288
289 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
290 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
291 Instruction *TI, Instruction *I1,
292 SmallVectorImpl<Instruction *> &OtherSuccTIs);
293 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
294 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
295 BasicBlock *TrueBB, BasicBlock *FalseBB,
296 uint32_t TrueWeight, uint32_t FalseWeight);
297 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
298 const DataLayout &DL);
299 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
300 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
301 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
302
303public:
304 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
305 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
306 const SimplifyCFGOptions &Opts)
307 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
308 assert((!DTU || !DTU->hasPostDomTree()) &&
309 "SimplifyCFG is not yet capable of maintaining validity of a "
310 "PostDomTree, so don't ask for it.");
311 }
312
313 bool simplifyOnce(BasicBlock *BB);
314 bool run(BasicBlock *BB);
315
316 // Helper to set Resimplify and return change indication.
317 bool requestResimplify() {
318 Resimplify = true;
319 return true;
320 }
321};
322
323} // end anonymous namespace
324
325/// Return true if all the PHI nodes in the basic block \p BB
326/// receive compatible (identical) incoming values when coming from
327/// all of the predecessor blocks that are specified in \p IncomingBlocks.
328///
329/// Note that if the values aren't exactly identical, but \p EquivalenceSet
330/// is provided, and *both* of the values are present in the set,
331/// then they are considered equal.
333 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
334 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
335 assert(IncomingBlocks.size() == 2 &&
336 "Only for a pair of incoming blocks at the time!");
337
338 // FIXME: it is okay if one of the incoming values is an `undef` value,
339 // iff the other incoming value is guaranteed to be a non-poison value.
340 // FIXME: it is okay if one of the incoming values is a `poison` value.
341 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
342 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
343 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
344 if (IV0 == IV1)
345 return true;
346 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
347 EquivalenceSet->contains(IV1))
348 return true;
349 return false;
350 });
351}
352
353/// Return true if it is safe to merge these two
354/// terminator instructions together.
355static bool
357 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
358 if (SI1 == SI2)
359 return false; // Can't merge with self!
360
361 // It is not safe to merge these two switch instructions if they have a common
362 // successor, and if that successor has a PHI node, and if *that* PHI node has
363 // conflicting incoming values from the two switch blocks.
364 BasicBlock *SI1BB = SI1->getParent();
365 BasicBlock *SI2BB = SI2->getParent();
366
367 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
368 bool Fail = false;
369 for (BasicBlock *Succ : successors(SI2BB)) {
370 if (!SI1Succs.count(Succ))
371 continue;
372 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
373 continue;
374 Fail = true;
375 if (FailBlocks)
376 FailBlocks->insert(Succ);
377 else
378 break;
379 }
380
381 return !Fail;
382}
383
384/// Update PHI nodes in Succ to indicate that there will now be entries in it
385/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
386/// will be the same as those coming in from ExistPred, an existing predecessor
387/// of Succ.
388static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
389 BasicBlock *ExistPred,
390 MemorySSAUpdater *MSSAU = nullptr) {
391 for (PHINode &PN : Succ->phis())
392 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
393 if (MSSAU)
394 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
395 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
396}
397
398/// Compute an abstract "cost" of speculating the given instruction,
399/// which is assumed to be safe to speculate. TCC_Free means cheap,
400/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
401/// expensive.
403 const TargetTransformInfo &TTI) {
405}
406
407/// If we have a merge point of an "if condition" as accepted above,
408/// return true if the specified value dominates the block. We don't handle
409/// the true generality of domination here, just a special case which works
410/// well enough for us.
411///
412/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
413/// see if V (which must be an instruction) and its recursive operands
414/// that do not dominate BB have a combined cost lower than Budget and
415/// are non-trapping. If both are true, the instruction is inserted into the
416/// set and true is returned.
417///
418/// The cost for most non-trapping instructions is defined as 1 except for
419/// Select whose cost is 2.
420///
421/// After this function returns, Cost is increased by the cost of
422/// V plus its non-dominating operands. If that cost is greater than
423/// Budget, false is returned and Cost is undefined.
424static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt,
425 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
428 AssumptionCache *AC, unsigned Depth = 0) {
429 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
430 // so limit the recursion depth.
431 // TODO: While this recursion limit does prevent pathological behavior, it
432 // would be better to track visited instructions to avoid cycles.
434 return false;
435
436 Instruction *I = dyn_cast<Instruction>(V);
437 if (!I) {
438 // Non-instructions dominate all instructions and can be executed
439 // unconditionally.
440 return true;
441 }
442 BasicBlock *PBB = I->getParent();
443
444 // We don't want to allow weird loops that might have the "if condition" in
445 // the bottom of this block.
446 if (PBB == BB)
447 return false;
448
449 // If this instruction is defined in a block that contains an unconditional
450 // branch to BB, then it must be in the 'conditional' part of the "if
451 // statement". If not, it definitely dominates the region.
452 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
453 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
454 return true;
455
456 // If we have seen this instruction before, don't count it again.
457 if (AggressiveInsts.count(I))
458 return true;
459
460 // Okay, it looks like the instruction IS in the "condition". Check to
461 // see if it's a cheap instruction to unconditionally compute, and if it
462 // only uses stuff defined outside of the condition. If so, hoist it out.
463 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
464 return false;
465
467
468 // Allow exactly one instruction to be speculated regardless of its cost
469 // (as long as it is safe to do so).
470 // This is intended to flatten the CFG even if the instruction is a division
471 // or other expensive operation. The speculation of an expensive instruction
472 // is expected to be undone in CodeGenPrepare if the speculation has not
473 // enabled further IR optimizations.
474 if (Cost > Budget &&
475 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
476 !Cost.isValid()))
477 return false;
478
479 // Okay, we can only really hoist these out if their operands do
480 // not take us over the cost threshold.
481 for (Use &Op : I->operands())
482 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
483 TTI, AC, Depth + 1))
484 return false;
485 // Okay, it's safe to do this! Remember this instruction.
486 AggressiveInsts.insert(I);
487 return true;
488}
489
490/// Extract ConstantInt from value, looking through IntToPtr
491/// and PointerNullValue. Return NULL if value is not a constant int.
493 // Normal constant int.
494 ConstantInt *CI = dyn_cast<ConstantInt>(V);
495 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
496 DL.isNonIntegralPointerType(V->getType()))
497 return CI;
498
499 // This is some kind of pointer constant. Turn it into a pointer-sized
500 // ConstantInt if possible.
501 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
502
503 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
504 if (isa<ConstantPointerNull>(V))
505 return ConstantInt::get(PtrTy, 0);
506
507 // IntToPtr const int.
508 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
509 if (CE->getOpcode() == Instruction::IntToPtr)
510 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
511 // The constant is very likely to have the right type already.
512 if (CI->getType() == PtrTy)
513 return CI;
514 else
515 return cast<ConstantInt>(
516 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
517 }
518 return nullptr;
519}
520
521namespace {
522
523/// Given a chain of or (||) or and (&&) comparison of a value against a
524/// constant, this will try to recover the information required for a switch
525/// structure.
526/// It will depth-first traverse the chain of comparison, seeking for patterns
527/// like %a == 12 or %a < 4 and combine them to produce a set of integer
528/// representing the different cases for the switch.
529/// Note that if the chain is composed of '||' it will build the set of elements
530/// that matches the comparisons (i.e. any of this value validate the chain)
531/// while for a chain of '&&' it will build the set elements that make the test
532/// fail.
533struct ConstantComparesGatherer {
534 const DataLayout &DL;
535
536 /// Value found for the switch comparison
537 Value *CompValue = nullptr;
538
539 /// Extra clause to be checked before the switch
540 Value *Extra = nullptr;
541
542 /// Set of integers to match in switch
544
545 /// Number of comparisons matched in the and/or chain
546 unsigned UsedICmps = 0;
547
548 /// Construct and compute the result for the comparison instruction Cond
549 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
550 gather(Cond);
551 }
552
553 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
554 ConstantComparesGatherer &
555 operator=(const ConstantComparesGatherer &) = delete;
556
557private:
558 /// Try to set the current value used for the comparison, it succeeds only if
559 /// it wasn't set before or if the new value is the same as the old one
560 bool setValueOnce(Value *NewVal) {
561 if (CompValue && CompValue != NewVal)
562 return false;
563 CompValue = NewVal;
564 return (CompValue != nullptr);
565 }
566
567 /// Try to match Instruction "I" as a comparison against a constant and
568 /// populates the array Vals with the set of values that match (or do not
569 /// match depending on isEQ).
570 /// Return false on failure. On success, the Value the comparison matched
571 /// against is placed in CompValue.
572 /// If CompValue is already set, the function is expected to fail if a match
573 /// is found but the value compared to is different.
574 bool matchInstruction(Instruction *I, bool isEQ) {
575 // If this is an icmp against a constant, handle this as one of the cases.
576 ICmpInst *ICI;
577 ConstantInt *C;
578 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
579 (C = getConstantInt(I->getOperand(1), DL)))) {
580 return false;
581 }
582
583 Value *RHSVal;
584 const APInt *RHSC;
585
586 // Pattern match a special case
587 // (x & ~2^z) == y --> x == y || x == y|2^z
588 // This undoes a transformation done by instcombine to fuse 2 compares.
589 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
590 // It's a little bit hard to see why the following transformations are
591 // correct. Here is a CVC3 program to verify them for 64-bit values:
592
593 /*
594 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
595 x : BITVECTOR(64);
596 y : BITVECTOR(64);
597 z : BITVECTOR(64);
598 mask : BITVECTOR(64) = BVSHL(ONE, z);
599 QUERY( (y & ~mask = y) =>
600 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
601 );
602 QUERY( (y | mask = y) =>
603 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
604 );
605 */
606
607 // Please note that each pattern must be a dual implication (<--> or
608 // iff). One directional implication can create spurious matches. If the
609 // implication is only one-way, an unsatisfiable condition on the left
610 // side can imply a satisfiable condition on the right side. Dual
611 // implication ensures that satisfiable conditions are transformed to
612 // other satisfiable conditions and unsatisfiable conditions are
613 // transformed to other unsatisfiable conditions.
614
615 // Here is a concrete example of a unsatisfiable condition on the left
616 // implying a satisfiable condition on the right:
617 //
618 // mask = (1 << z)
619 // (x & ~mask) == y --> (x == y || x == (y | mask))
620 //
621 // Substituting y = 3, z = 0 yields:
622 // (x & -2) == 3 --> (x == 3 || x == 2)
623
624 // Pattern match a special case:
625 /*
626 QUERY( (y & ~mask = y) =>
627 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
628 );
629 */
630 if (match(ICI->getOperand(0),
631 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
632 APInt Mask = ~*RHSC;
633 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
634 // If we already have a value for the switch, it has to match!
635 if (!setValueOnce(RHSVal))
636 return false;
637
638 Vals.push_back(C);
639 Vals.push_back(
640 ConstantInt::get(C->getContext(),
641 C->getValue() | Mask));
642 UsedICmps++;
643 return true;
644 }
645 }
646
647 // Pattern match a special case:
648 /*
649 QUERY( (y | mask = y) =>
650 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
651 );
652 */
653 if (match(ICI->getOperand(0),
654 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
655 APInt Mask = *RHSC;
656 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
657 // If we already have a value for the switch, it has to match!
658 if (!setValueOnce(RHSVal))
659 return false;
660
661 Vals.push_back(C);
662 Vals.push_back(ConstantInt::get(C->getContext(),
663 C->getValue() & ~Mask));
664 UsedICmps++;
665 return true;
666 }
667 }
668
669 // If we already have a value for the switch, it has to match!
670 if (!setValueOnce(ICI->getOperand(0)))
671 return false;
672
673 UsedICmps++;
674 Vals.push_back(C);
675 return ICI->getOperand(0);
676 }
677
678 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
679 ConstantRange Span =
681
682 // Shift the range if the compare is fed by an add. This is the range
683 // compare idiom as emitted by instcombine.
684 Value *CandidateVal = I->getOperand(0);
685 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
686 Span = Span.subtract(*RHSC);
687 CandidateVal = RHSVal;
688 }
689
690 // If this is an and/!= check, then we are looking to build the set of
691 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
692 // x != 0 && x != 1.
693 if (!isEQ)
694 Span = Span.inverse();
695
696 // If there are a ton of values, we don't want to make a ginormous switch.
697 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
698 return false;
699 }
700
701 // If we already have a value for the switch, it has to match!
702 if (!setValueOnce(CandidateVal))
703 return false;
704
705 // Add all values from the range to the set
706 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
707 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
708
709 UsedICmps++;
710 return true;
711 }
712
713 /// Given a potentially 'or'd or 'and'd together collection of icmp
714 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
715 /// the value being compared, and stick the list constants into the Vals
716 /// vector.
717 /// One "Extra" case is allowed to differ from the other.
718 void gather(Value *V) {
719 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
720
721 // Keep a stack (SmallVector for efficiency) for depth-first traversal
724
725 // Initialize
726 Visited.insert(V);
727 DFT.push_back(V);
728
729 while (!DFT.empty()) {
730 V = DFT.pop_back_val();
731
732 if (Instruction *I = dyn_cast<Instruction>(V)) {
733 // If it is a || (or && depending on isEQ), process the operands.
734 Value *Op0, *Op1;
735 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
736 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
737 if (Visited.insert(Op1).second)
738 DFT.push_back(Op1);
739 if (Visited.insert(Op0).second)
740 DFT.push_back(Op0);
741
742 continue;
743 }
744
745 // Try to match the current instruction
746 if (matchInstruction(I, isEQ))
747 // Match succeed, continue the loop
748 continue;
749 }
750
751 // One element of the sequence of || (or &&) could not be match as a
752 // comparison against the same value as the others.
753 // We allow only one "Extra" case to be checked before the switch
754 if (!Extra) {
755 Extra = V;
756 continue;
757 }
758 // Failed to parse a proper sequence, abort now
759 CompValue = nullptr;
760 break;
761 }
762 }
763};
764
765} // end anonymous namespace
766
768 MemorySSAUpdater *MSSAU = nullptr) {
769 Instruction *Cond = nullptr;
770 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
771 Cond = dyn_cast<Instruction>(SI->getCondition());
772 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
773 if (BI->isConditional())
774 Cond = dyn_cast<Instruction>(BI->getCondition());
775 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
776 Cond = dyn_cast<Instruction>(IBI->getAddress());
777 }
778
779 TI->eraseFromParent();
780 if (Cond)
782}
783
784/// Return true if the specified terminator checks
785/// to see if a value is equal to constant integer value.
786Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
787 Value *CV = nullptr;
788 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
789 // Do not permit merging of large switch instructions into their
790 // predecessors unless there is only one predecessor.
791 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
792 CV = SI->getCondition();
793 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
794 if (BI->isConditional() && BI->getCondition()->hasOneUse())
795 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
796 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
797 CV = ICI->getOperand(0);
798 }
799
800 // Unwrap any lossless ptrtoint cast.
801 if (CV) {
802 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
803 Value *Ptr = PTII->getPointerOperand();
804 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
805 CV = Ptr;
806 }
807 }
808 return CV;
809}
810
811/// Given a value comparison instruction,
812/// decode all of the 'cases' that it represents and return the 'default' block.
813BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
814 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
815 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
816 Cases.reserve(SI->getNumCases());
817 for (auto Case : SI->cases())
818 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
819 Case.getCaseSuccessor()));
820 return SI->getDefaultDest();
821 }
822
823 BranchInst *BI = cast<BranchInst>(TI);
824 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
825 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
826 Cases.push_back(ValueEqualityComparisonCase(
827 getConstantInt(ICI->getOperand(1), DL), Succ));
828 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
829}
830
831/// Given a vector of bb/value pairs, remove any entries
832/// in the list that match the specified block.
833static void
835 std::vector<ValueEqualityComparisonCase> &Cases) {
836 llvm::erase(Cases, BB);
837}
838
839/// Return true if there are any keys in C1 that exist in C2 as well.
840static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
841 std::vector<ValueEqualityComparisonCase> &C2) {
842 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
843
844 // Make V1 be smaller than V2.
845 if (V1->size() > V2->size())
846 std::swap(V1, V2);
847
848 if (V1->empty())
849 return false;
850 if (V1->size() == 1) {
851 // Just scan V2.
852 ConstantInt *TheVal = (*V1)[0].Value;
853 for (const ValueEqualityComparisonCase &VECC : *V2)
854 if (TheVal == VECC.Value)
855 return true;
856 }
857
858 // Otherwise, just sort both lists and compare element by element.
859 array_pod_sort(V1->begin(), V1->end());
860 array_pod_sort(V2->begin(), V2->end());
861 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
862 while (i1 != e1 && i2 != e2) {
863 if ((*V1)[i1].Value == (*V2)[i2].Value)
864 return true;
865 if ((*V1)[i1].Value < (*V2)[i2].Value)
866 ++i1;
867 else
868 ++i2;
869 }
870 return false;
871}
872
873// Set branch weights on SwitchInst. This sets the metadata if there is at
874// least one non-zero weight.
876 bool IsExpected) {
877 // Check that there is at least one non-zero weight. Otherwise, pass
878 // nullptr to setMetadata which will erase the existing metadata.
879 MDNode *N = nullptr;
880 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
881 N = MDBuilder(SI->getParent()->getContext())
882 .createBranchWeights(Weights, IsExpected);
883 SI->setMetadata(LLVMContext::MD_prof, N);
884}
885
886// Similar to the above, but for branch and select instructions that take
887// exactly 2 weights.
888static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
889 uint32_t FalseWeight, bool IsExpected) {
890 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
891 // Check that there is at least one non-zero weight. Otherwise, pass
892 // nullptr to setMetadata which will erase the existing metadata.
893 MDNode *N = nullptr;
894 if (TrueWeight || FalseWeight)
895 N = MDBuilder(I->getParent()->getContext())
896 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
897 I->setMetadata(LLVMContext::MD_prof, N);
898}
899
900/// If TI is known to be a terminator instruction and its block is known to
901/// only have a single predecessor block, check to see if that predecessor is
902/// also a value comparison with the same value, and if that comparison
903/// determines the outcome of this comparison. If so, simplify TI. This does a
904/// very limited form of jump threading.
905bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
906 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
907 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
908 if (!PredVal)
909 return false; // Not a value comparison in predecessor.
910
911 Value *ThisVal = isValueEqualityComparison(TI);
912 assert(ThisVal && "This isn't a value comparison!!");
913 if (ThisVal != PredVal)
914 return false; // Different predicates.
915
916 // TODO: Preserve branch weight metadata, similarly to how
917 // foldValueComparisonIntoPredecessors preserves it.
918
919 // Find out information about when control will move from Pred to TI's block.
920 std::vector<ValueEqualityComparisonCase> PredCases;
921 BasicBlock *PredDef =
922 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
923 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
924
925 // Find information about how control leaves this block.
926 std::vector<ValueEqualityComparisonCase> ThisCases;
927 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
928 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
929
930 // If TI's block is the default block from Pred's comparison, potentially
931 // simplify TI based on this knowledge.
932 if (PredDef == TI->getParent()) {
933 // If we are here, we know that the value is none of those cases listed in
934 // PredCases. If there are any cases in ThisCases that are in PredCases, we
935 // can simplify TI.
936 if (!valuesOverlap(PredCases, ThisCases))
937 return false;
938
939 if (isa<BranchInst>(TI)) {
940 // Okay, one of the successors of this condbr is dead. Convert it to a
941 // uncond br.
942 assert(ThisCases.size() == 1 && "Branch can only have one case!");
943 // Insert the new branch.
944 Instruction *NI = Builder.CreateBr(ThisDef);
945 (void)NI;
946
947 // Remove PHI node entries for the dead edge.
948 ThisCases[0].Dest->removePredecessor(PredDef);
949
950 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
951 << "Through successor TI: " << *TI << "Leaving: " << *NI
952 << "\n");
953
955
956 if (DTU)
957 DTU->applyUpdates(
958 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
959
960 return true;
961 }
962
963 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
964 // Okay, TI has cases that are statically dead, prune them away.
966 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
967 DeadCases.insert(PredCases[i].Value);
968
969 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
970 << "Through successor TI: " << *TI);
971
972 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
973 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
974 --i;
975 auto *Successor = i->getCaseSuccessor();
976 if (DTU)
977 ++NumPerSuccessorCases[Successor];
978 if (DeadCases.count(i->getCaseValue())) {
979 Successor->removePredecessor(PredDef);
980 SI.removeCase(i);
981 if (DTU)
982 --NumPerSuccessorCases[Successor];
983 }
984 }
985
986 if (DTU) {
987 std::vector<DominatorTree::UpdateType> Updates;
988 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
989 if (I.second == 0)
990 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
991 DTU->applyUpdates(Updates);
992 }
993
994 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
995 return true;
996 }
997
998 // Otherwise, TI's block must correspond to some matched value. Find out
999 // which value (or set of values) this is.
1000 ConstantInt *TIV = nullptr;
1001 BasicBlock *TIBB = TI->getParent();
1002 for (const auto &[Value, Dest] : PredCases)
1003 if (Dest == TIBB) {
1004 if (TIV)
1005 return false; // Cannot handle multiple values coming to this block.
1006 TIV = Value;
1007 }
1008 assert(TIV && "No edge from pred to succ?");
1009
1010 // Okay, we found the one constant that our value can be if we get into TI's
1011 // BB. Find out which successor will unconditionally be branched to.
1012 BasicBlock *TheRealDest = nullptr;
1013 for (const auto &[Value, Dest] : ThisCases)
1014 if (Value == TIV) {
1015 TheRealDest = Dest;
1016 break;
1017 }
1018
1019 // If not handled by any explicit cases, it is handled by the default case.
1020 if (!TheRealDest)
1021 TheRealDest = ThisDef;
1022
1023 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1024
1025 // Remove PHI node entries for dead edges.
1026 BasicBlock *CheckEdge = TheRealDest;
1027 for (BasicBlock *Succ : successors(TIBB))
1028 if (Succ != CheckEdge) {
1029 if (Succ != TheRealDest)
1030 RemovedSuccs.insert(Succ);
1031 Succ->removePredecessor(TIBB);
1032 } else
1033 CheckEdge = nullptr;
1034
1035 // Insert the new branch.
1036 Instruction *NI = Builder.CreateBr(TheRealDest);
1037 (void)NI;
1038
1039 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1040 << "Through successor TI: " << *TI << "Leaving: " << *NI
1041 << "\n");
1042
1044 if (DTU) {
1046 Updates.reserve(RemovedSuccs.size());
1047 for (auto *RemovedSucc : RemovedSuccs)
1048 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1049 DTU->applyUpdates(Updates);
1050 }
1051 return true;
1052}
1053
1054namespace {
1055
1056/// This class implements a stable ordering of constant
1057/// integers that does not depend on their address. This is important for
1058/// applications that sort ConstantInt's to ensure uniqueness.
1059struct ConstantIntOrdering {
1060 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1061 return LHS->getValue().ult(RHS->getValue());
1062 }
1063};
1064
1065} // end anonymous namespace
1066
1068 ConstantInt *const *P2) {
1069 const ConstantInt *LHS = *P1;
1070 const ConstantInt *RHS = *P2;
1071 if (LHS == RHS)
1072 return 0;
1073 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1074}
1075
1076/// Get Weights of a given terminator, the default weight is at the front
1077/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1078/// metadata.
1080 SmallVectorImpl<uint64_t> &Weights) {
1081 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1082 assert(MD && "Invalid branch-weight metadata");
1083 extractFromBranchWeightMD64(MD, Weights);
1084
1085 // If TI is a conditional eq, the default case is the false case,
1086 // and the corresponding branch-weight data is at index 2. We swap the
1087 // default weight to be the first entry.
1088 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1089 assert(Weights.size() == 2);
1090 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1091 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1092 std::swap(Weights.front(), Weights.back());
1093 }
1094}
1095
1096/// Keep halving the weights until all can fit in uint32_t.
1098 uint64_t Max = *llvm::max_element(Weights);
1099 if (Max > UINT_MAX) {
1100 unsigned Offset = 32 - llvm::countl_zero(Max);
1101 for (uint64_t &I : Weights)
1102 I >>= Offset;
1103 }
1104}
1105
1107 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1108 Instruction *PTI = PredBlock->getTerminator();
1109
1110 // If we have bonus instructions, clone them into the predecessor block.
1111 // Note that there may be multiple predecessor blocks, so we cannot move
1112 // bonus instructions to a predecessor block.
1113 for (Instruction &BonusInst : *BB) {
1114 if (BonusInst.isTerminator())
1115 continue;
1116
1117 Instruction *NewBonusInst = BonusInst.clone();
1118
1119 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1120 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1121 // Unless the instruction has the same !dbg location as the original
1122 // branch, drop it. When we fold the bonus instructions we want to make
1123 // sure we reset their debug locations in order to avoid stepping on
1124 // dead code caused by folding dead branches.
1125 NewBonusInst->setDebugLoc(DebugLoc());
1126 }
1127
1128 RemapInstruction(NewBonusInst, VMap,
1130
1131 // If we speculated an instruction, we need to drop any metadata that may
1132 // result in undefined behavior, as the metadata might have been valid
1133 // only given the branch precondition.
1134 // Similarly strip attributes on call parameters that may cause UB in
1135 // location the call is moved to.
1136 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1137
1138 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1139 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1140 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1142
1143 if (isa<DbgInfoIntrinsic>(BonusInst))
1144 continue;
1145
1146 NewBonusInst->takeName(&BonusInst);
1147 BonusInst.setName(NewBonusInst->getName() + ".old");
1148 VMap[&BonusInst] = NewBonusInst;
1149
1150 // Update (liveout) uses of bonus instructions,
1151 // now that the bonus instruction has been cloned into predecessor.
1152 // Note that we expect to be in a block-closed SSA form for this to work!
1153 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1154 auto *UI = cast<Instruction>(U.getUser());
1155 auto *PN = dyn_cast<PHINode>(UI);
1156 if (!PN) {
1157 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1158 "If the user is not a PHI node, then it should be in the same "
1159 "block as, and come after, the original bonus instruction.");
1160 continue; // Keep using the original bonus instruction.
1161 }
1162 // Is this the block-closed SSA form PHI node?
1163 if (PN->getIncomingBlock(U) == BB)
1164 continue; // Great, keep using the original bonus instruction.
1165 // The only other alternative is an "use" when coming from
1166 // the predecessor block - here we should refer to the cloned bonus instr.
1167 assert(PN->getIncomingBlock(U) == PredBlock &&
1168 "Not in block-closed SSA form?");
1169 U.set(NewBonusInst);
1170 }
1171 }
1172}
1173
1174bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1175 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1176 BasicBlock *BB = TI->getParent();
1177 BasicBlock *Pred = PTI->getParent();
1178
1180
1181 // Figure out which 'cases' to copy from SI to PSI.
1182 std::vector<ValueEqualityComparisonCase> BBCases;
1183 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1184
1185 std::vector<ValueEqualityComparisonCase> PredCases;
1186 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1187
1188 // Based on whether the default edge from PTI goes to BB or not, fill in
1189 // PredCases and PredDefault with the new switch cases we would like to
1190 // build.
1192
1193 // Update the branch weight metadata along the way
1195 bool PredHasWeights = hasBranchWeightMD(*PTI);
1196 bool SuccHasWeights = hasBranchWeightMD(*TI);
1197
1198 if (PredHasWeights) {
1199 getBranchWeights(PTI, Weights);
1200 // branch-weight metadata is inconsistent here.
1201 if (Weights.size() != 1 + PredCases.size())
1202 PredHasWeights = SuccHasWeights = false;
1203 } else if (SuccHasWeights)
1204 // If there are no predecessor weights but there are successor weights,
1205 // populate Weights with 1, which will later be scaled to the sum of
1206 // successor's weights
1207 Weights.assign(1 + PredCases.size(), 1);
1208
1209 SmallVector<uint64_t, 8> SuccWeights;
1210 if (SuccHasWeights) {
1211 getBranchWeights(TI, SuccWeights);
1212 // branch-weight metadata is inconsistent here.
1213 if (SuccWeights.size() != 1 + BBCases.size())
1214 PredHasWeights = SuccHasWeights = false;
1215 } else if (PredHasWeights)
1216 SuccWeights.assign(1 + BBCases.size(), 1);
1217
1218 if (PredDefault == BB) {
1219 // If this is the default destination from PTI, only the edges in TI
1220 // that don't occur in PTI, or that branch to BB will be activated.
1221 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1222 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1223 if (PredCases[i].Dest != BB)
1224 PTIHandled.insert(PredCases[i].Value);
1225 else {
1226 // The default destination is BB, we don't need explicit targets.
1227 std::swap(PredCases[i], PredCases.back());
1228
1229 if (PredHasWeights || SuccHasWeights) {
1230 // Increase weight for the default case.
1231 Weights[0] += Weights[i + 1];
1232 std::swap(Weights[i + 1], Weights.back());
1233 Weights.pop_back();
1234 }
1235
1236 PredCases.pop_back();
1237 --i;
1238 --e;
1239 }
1240
1241 // Reconstruct the new switch statement we will be building.
1242 if (PredDefault != BBDefault) {
1243 PredDefault->removePredecessor(Pred);
1244 if (DTU && PredDefault != BB)
1245 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1246 PredDefault = BBDefault;
1247 ++NewSuccessors[BBDefault];
1248 }
1249
1250 unsigned CasesFromPred = Weights.size();
1251 uint64_t ValidTotalSuccWeight = 0;
1252 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1253 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1254 PredCases.push_back(BBCases[i]);
1255 ++NewSuccessors[BBCases[i].Dest];
1256 if (SuccHasWeights || PredHasWeights) {
1257 // The default weight is at index 0, so weight for the ith case
1258 // should be at index i+1. Scale the cases from successor by
1259 // PredDefaultWeight (Weights[0]).
1260 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1261 ValidTotalSuccWeight += SuccWeights[i + 1];
1262 }
1263 }
1264
1265 if (SuccHasWeights || PredHasWeights) {
1266 ValidTotalSuccWeight += SuccWeights[0];
1267 // Scale the cases from predecessor by ValidTotalSuccWeight.
1268 for (unsigned i = 1; i < CasesFromPred; ++i)
1269 Weights[i] *= ValidTotalSuccWeight;
1270 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1271 Weights[0] *= SuccWeights[0];
1272 }
1273 } else {
1274 // If this is not the default destination from PSI, only the edges
1275 // in SI that occur in PSI with a destination of BB will be
1276 // activated.
1277 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1278 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1279 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1280 if (PredCases[i].Dest == BB) {
1281 PTIHandled.insert(PredCases[i].Value);
1282
1283 if (PredHasWeights || SuccHasWeights) {
1284 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1285 std::swap(Weights[i + 1], Weights.back());
1286 Weights.pop_back();
1287 }
1288
1289 std::swap(PredCases[i], PredCases.back());
1290 PredCases.pop_back();
1291 --i;
1292 --e;
1293 }
1294
1295 // Okay, now we know which constants were sent to BB from the
1296 // predecessor. Figure out where they will all go now.
1297 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1298 if (PTIHandled.count(BBCases[i].Value)) {
1299 // If this is one we are capable of getting...
1300 if (PredHasWeights || SuccHasWeights)
1301 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1302 PredCases.push_back(BBCases[i]);
1303 ++NewSuccessors[BBCases[i].Dest];
1304 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1305 }
1306
1307 // If there are any constants vectored to BB that TI doesn't handle,
1308 // they must go to the default destination of TI.
1309 for (ConstantInt *I : PTIHandled) {
1310 if (PredHasWeights || SuccHasWeights)
1311 Weights.push_back(WeightsForHandled[I]);
1312 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1313 ++NewSuccessors[BBDefault];
1314 }
1315 }
1316
1317 // Okay, at this point, we know which new successor Pred will get. Make
1318 // sure we update the number of entries in the PHI nodes for these
1319 // successors.
1320 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1321 if (DTU) {
1322 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1323 Updates.reserve(Updates.size() + NewSuccessors.size());
1324 }
1325 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1326 NewSuccessors) {
1327 for (auto I : seq(NewSuccessor.second)) {
1328 (void)I;
1329 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1330 }
1331 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1332 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1333 }
1334
1335 Builder.SetInsertPoint(PTI);
1336 // Convert pointer to int before we switch.
1337 if (CV->getType()->isPointerTy()) {
1338 CV =
1339 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1340 }
1341
1342 // Now that the successors are updated, create the new Switch instruction.
1343 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1344 NewSI->setDebugLoc(PTI->getDebugLoc());
1345 for (ValueEqualityComparisonCase &V : PredCases)
1346 NewSI->addCase(V.Value, V.Dest);
1347
1348 if (PredHasWeights || SuccHasWeights) {
1349 // Halve the weights if any of them cannot fit in an uint32_t
1350 fitWeights(Weights);
1351
1352 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1353
1354 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1355 }
1356
1358
1359 // Okay, last check. If BB is still a successor of PSI, then we must
1360 // have an infinite loop case. If so, add an infinitely looping block
1361 // to handle the case to preserve the behavior of the code.
1362 BasicBlock *InfLoopBlock = nullptr;
1363 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1364 if (NewSI->getSuccessor(i) == BB) {
1365 if (!InfLoopBlock) {
1366 // Insert it at the end of the function, because it's either code,
1367 // or it won't matter if it's hot. :)
1368 InfLoopBlock =
1369 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1370 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1371 if (DTU)
1372 Updates.push_back(
1373 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1374 }
1375 NewSI->setSuccessor(i, InfLoopBlock);
1376 }
1377
1378 if (DTU) {
1379 if (InfLoopBlock)
1380 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1381
1382 Updates.push_back({DominatorTree::Delete, Pred, BB});
1383
1384 DTU->applyUpdates(Updates);
1385 }
1386
1387 ++NumFoldValueComparisonIntoPredecessors;
1388 return true;
1389}
1390
1391/// The specified terminator is a value equality comparison instruction
1392/// (either a switch or a branch on "X == c").
1393/// See if any of the predecessors of the terminator block are value comparisons
1394/// on the same value. If so, and if safe to do so, fold them together.
1395bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1396 IRBuilder<> &Builder) {
1397 BasicBlock *BB = TI->getParent();
1398 Value *CV = isValueEqualityComparison(TI); // CondVal
1399 assert(CV && "Not a comparison?");
1400
1401 bool Changed = false;
1402
1404 while (!Preds.empty()) {
1405 BasicBlock *Pred = Preds.pop_back_val();
1406 Instruction *PTI = Pred->getTerminator();
1407
1408 // Don't try to fold into itself.
1409 if (Pred == BB)
1410 continue;
1411
1412 // See if the predecessor is a comparison with the same value.
1413 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1414 if (PCV != CV)
1415 continue;
1416
1418 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1419 for (auto *Succ : FailBlocks) {
1420 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1421 return false;
1422 }
1423 }
1424
1425 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1426 Changed = true;
1427 }
1428 return Changed;
1429}
1430
1431// If we would need to insert a select that uses the value of this invoke
1432// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1433// need to do this), we can't hoist the invoke, as there is nowhere to put the
1434// select in this case.
1436 Instruction *I1, Instruction *I2) {
1437 for (BasicBlock *Succ : successors(BB1)) {
1438 for (const PHINode &PN : Succ->phis()) {
1439 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1440 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1441 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1442 return false;
1443 }
1444 }
1445 }
1446 return true;
1447}
1448
1449// Get interesting characteristics of instructions that
1450// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1451// instructions can be reordered across.
1457
1459 unsigned Flags = 0;
1460 if (I->mayReadFromMemory())
1461 Flags |= SkipReadMem;
1462 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1463 // inalloca) across stacksave/stackrestore boundaries.
1464 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1465 Flags |= SkipSideEffect;
1467 Flags |= SkipImplicitControlFlow;
1468 return Flags;
1469}
1470
1471// Returns true if it is safe to reorder an instruction across preceding
1472// instructions in a basic block.
1473static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1474 // Don't reorder a store over a load.
1475 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1476 return false;
1477
1478 // If we have seen an instruction with side effects, it's unsafe to reorder an
1479 // instruction which reads memory or itself has side effects.
1480 if ((Flags & SkipSideEffect) &&
1481 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1482 return false;
1483
1484 // Reordering across an instruction which does not necessarily transfer
1485 // control to the next instruction is speculation.
1487 return false;
1488
1489 // Hoisting of llvm.deoptimize is only legal together with the next return
1490 // instruction, which this pass is not always able to do.
1491 if (auto *CB = dyn_cast<CallBase>(I))
1492 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1493 return false;
1494
1495 // It's also unsafe/illegal to hoist an instruction above its instruction
1496 // operands
1497 BasicBlock *BB = I->getParent();
1498 for (Value *Op : I->operands()) {
1499 if (auto *J = dyn_cast<Instruction>(Op))
1500 if (J->getParent() == BB)
1501 return false;
1502 }
1503
1504 return true;
1505}
1506
1507static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1508
1509/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1510/// instructions \p I1 and \p I2 can and should be hoisted.
1512 const TargetTransformInfo &TTI) {
1513 // If we're going to hoist a call, make sure that the two instructions
1514 // we're commoning/hoisting are both marked with musttail, or neither of
1515 // them is marked as such. Otherwise, we might end up in a situation where
1516 // we hoist from a block where the terminator is a `ret` to a block where
1517 // the terminator is a `br`, and `musttail` calls expect to be followed by
1518 // a return.
1519 auto *C1 = dyn_cast<CallInst>(I1);
1520 auto *C2 = dyn_cast<CallInst>(I2);
1521 if (C1 && C2)
1522 if (C1->isMustTailCall() != C2->isMustTailCall())
1523 return false;
1524
1526 return false;
1527
1528 // If any of the two call sites has nomerge or convergent attribute, stop
1529 // hoisting.
1530 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1531 if (CB1->cannotMerge() || CB1->isConvergent())
1532 return false;
1533 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1534 if (CB2->cannotMerge() || CB2->isConvergent())
1535 return false;
1536
1537 return true;
1538}
1539
1540/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1541/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1542/// hoistCommonCodeFromSuccessors. e.g. The input:
1543/// I1 DVRs: { x, z },
1544/// OtherInsts: { I2 DVRs: { x, y, z } }
1545/// would result in hoisting only DbgVariableRecord x.
1547 Instruction *TI, Instruction *I1,
1548 SmallVectorImpl<Instruction *> &OtherInsts) {
1549 if (!I1->hasDbgRecords())
1550 return;
1551 using CurrentAndEndIt =
1552 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1553 // Vector of {Current, End} iterators.
1555 Itrs.reserve(OtherInsts.size() + 1);
1556 // Helper lambdas for lock-step checks:
1557 // Return true if this Current == End.
1558 auto atEnd = [](const CurrentAndEndIt &Pair) {
1559 return Pair.first == Pair.second;
1560 };
1561 // Return true if all Current are identical.
1562 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1563 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1565 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1566 });
1567 };
1568
1569 // Collect the iterators.
1570 Itrs.push_back(
1571 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1572 for (Instruction *Other : OtherInsts) {
1573 if (!Other->hasDbgRecords())
1574 return;
1575 Itrs.push_back(
1576 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1577 }
1578
1579 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1580 // the lock-step DbgRecord are identical, hoist all of them to TI.
1581 // This replicates the dbg.* intrinsic behaviour in
1582 // hoistCommonCodeFromSuccessors.
1583 while (none_of(Itrs, atEnd)) {
1584 bool HoistDVRs = allIdentical(Itrs);
1585 for (CurrentAndEndIt &Pair : Itrs) {
1586 // Increment Current iterator now as we may be about to move the
1587 // DbgRecord.
1588 DbgRecord &DR = *Pair.first++;
1589 if (HoistDVRs) {
1590 DR.removeFromParent();
1591 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1592 }
1593 }
1594 }
1595}
1596
1598 const Instruction *I2) {
1599 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1600 return true;
1601
1602 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1603 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1604 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1605 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1606 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1607
1608 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1609 return I1->getOperand(0) == I2->getOperand(1) &&
1610 I1->getOperand(1) == I2->getOperand(0) &&
1611 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1612 }
1613
1614 return false;
1615}
1616
1617/// If the target supports conditional faulting,
1618/// we look for the following pattern:
1619/// \code
1620/// BB:
1621/// ...
1622/// %cond = icmp ult %x, %y
1623/// br i1 %cond, label %TrueBB, label %FalseBB
1624/// FalseBB:
1625/// store i32 1, ptr %q, align 4
1626/// ...
1627/// TrueBB:
1628/// %maskedloadstore = load i32, ptr %b, align 4
1629/// store i32 %maskedloadstore, ptr %p, align 4
1630/// ...
1631/// \endcode
1632///
1633/// and transform it into:
1634///
1635/// \code
1636/// BB:
1637/// ...
1638/// %cond = icmp ult %x, %y
1639/// %maskedloadstore = cload i32, ptr %b, %cond
1640/// cstore i32 %maskedloadstore, ptr %p, %cond
1641/// cstore i32 1, ptr %q, ~%cond
1642/// br i1 %cond, label %TrueBB, label %FalseBB
1643/// FalseBB:
1644/// ...
1645/// TrueBB:
1646/// ...
1647/// \endcode
1648///
1649/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1650/// e.g.
1651///
1652/// \code
1653/// %vcond = bitcast i1 %cond to <1 x i1>
1654/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1655/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1656/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1657/// call void @llvm.masked.store.v1i32.p0
1658/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1659/// %cond.not = xor i1 %cond, true
1660/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1661/// call void @llvm.masked.store.v1i32.p0
1662/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1663/// \endcode
1664///
1665/// So we need to turn hoisted load/store into cload/cstore.
1666///
1667/// \param BI The branch instruction.
1668/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1669/// will be speculated.
1670/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1672 BranchInst *BI,
1673 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1674 std::optional<bool> Invert) {
1675 auto &Context = BI->getParent()->getContext();
1676 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1677 auto *Cond = BI->getOperand(0);
1678 // Construct the condition if needed.
1679 BasicBlock *BB = BI->getParent();
1680 IRBuilder<> Builder(
1681 Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
1682 Value *Mask = nullptr;
1683 Value *MaskFalse = nullptr;
1684 Value *MaskTrue = nullptr;
1685 if (Invert.has_value()) {
1686 Mask = Builder.CreateBitCast(
1687 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1688 VCondTy);
1689 } else {
1690 MaskFalse = Builder.CreateBitCast(
1691 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1692 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1693 }
1694 auto PeekThroughBitcasts = [](Value *V) {
1695 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1696 V = BitCast->getOperand(0);
1697 return V;
1698 };
1699 for (auto *I : SpeculatedConditionalLoadsStores) {
1700 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1701 if (!Invert.has_value())
1702 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1703 // We currently assume conditional faulting load/store is supported for
1704 // scalar types only when creating new instructions. This can be easily
1705 // extended for vector types in the future.
1706 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1707 auto *Op0 = I->getOperand(0);
1708 CallInst *MaskedLoadStore = nullptr;
1709 if (auto *LI = dyn_cast<LoadInst>(I)) {
1710 // Handle Load.
1711 auto *Ty = I->getType();
1712 PHINode *PN = nullptr;
1713 Value *PassThru = nullptr;
1714 if (Invert.has_value())
1715 for (User *U : I->users())
1716 if ((PN = dyn_cast<PHINode>(U))) {
1717 PassThru = Builder.CreateBitCast(
1718 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1719 FixedVectorType::get(Ty, 1));
1720 break;
1721 }
1722 MaskedLoadStore = Builder.CreateMaskedLoad(
1723 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1724 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1725 if (PN)
1726 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1727 I->replaceAllUsesWith(NewLoadStore);
1728 } else {
1729 // Handle Store.
1730 auto *StoredVal = Builder.CreateBitCast(
1731 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1732 MaskedLoadStore = Builder.CreateMaskedStore(
1733 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1734 }
1735 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1736 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1737 //
1738 // !nonnull, !align : Not support pointer type, no need to keep.
1739 // !range: Load type is changed from scalar to vector, but the metadata on
1740 // vector specifies a per-element range, so the semantics stay the
1741 // same. Keep it.
1742 // !annotation: Not impact semantics. Keep it.
1743 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1744 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1745 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1746 // FIXME: DIAssignID is not supported for masked store yet.
1747 // (Verifier::visitDIAssignIDMetadata)
1749 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1750 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1751 });
1752 MaskedLoadStore->copyMetadata(*I);
1753 I->eraseFromParent();
1754 }
1755}
1756
1758 const TargetTransformInfo &TTI) {
1759 // Not handle volatile or atomic.
1760 if (auto *L = dyn_cast<LoadInst>(I)) {
1761 if (!L->isSimple())
1762 return false;
1763 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1764 if (!S->isSimple())
1765 return false;
1766 } else
1767 return false;
1768
1769 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1770 // That's why we have the alignment limitation.
1771 // FIXME: Update the prototype of the intrinsics?
1774}
1775
1776namespace {
1777
1778// LockstepReverseIterator - Iterates through instructions
1779// in a set of blocks in reverse order from the first non-terminator.
1780// For example (assume all blocks have size n):
1781// LockstepReverseIterator I([B1, B2, B3]);
1782// *I-- = [B1[n], B2[n], B3[n]];
1783// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1784// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1785// ...
1786class LockstepReverseIterator {
1789 bool Fail;
1790
1791public:
1792 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
1793 reset();
1794 }
1795
1796 void reset() {
1797 Fail = false;
1798 Insts.clear();
1799 for (auto *BB : Blocks) {
1800 Instruction *Inst = BB->getTerminator();
1801 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1802 Inst = Inst->getPrevNode();
1803 if (!Inst) {
1804 // Block wasn't big enough.
1805 Fail = true;
1806 return;
1807 }
1808 Insts.push_back(Inst);
1809 }
1810 }
1811
1812 bool isValid() const { return !Fail; }
1813
1814 void operator--() {
1815 if (Fail)
1816 return;
1817 for (auto *&Inst : Insts) {
1818 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1819 Inst = Inst->getPrevNode();
1820 // Already at beginning of block.
1821 if (!Inst) {
1822 Fail = true;
1823 return;
1824 }
1825 }
1826 }
1827
1828 void operator++() {
1829 if (Fail)
1830 return;
1831 for (auto *&Inst : Insts) {
1832 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1833 Inst = Inst->getNextNode();
1834 // Already at end of block.
1835 if (!Inst) {
1836 Fail = true;
1837 return;
1838 }
1839 }
1840 }
1841
1842 ArrayRef<Instruction *> operator*() const { return Insts; }
1843};
1844
1845} // end anonymous namespace
1846
1847/// Hoist any common code in the successor blocks up into the block. This
1848/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1849/// given, only perform hoisting in case all successors blocks contain matching
1850/// instructions only. In that case, all instructions can be hoisted and the
1851/// original branch will be replaced and selects for PHIs are added.
1852bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1853 bool AllInstsEqOnly) {
1854 // This does very trivial matching, with limited scanning, to find identical
1855 // instructions in the two blocks. In particular, we don't want to get into
1856 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1857 // such, we currently just scan for obviously identical instructions in an
1858 // identical order, possibly separated by the same number of non-identical
1859 // instructions.
1860 BasicBlock *BB = TI->getParent();
1861 unsigned int SuccSize = succ_size(BB);
1862 if (SuccSize < 2)
1863 return false;
1864
1865 // If either of the blocks has it's address taken, then we can't do this fold,
1866 // because the code we'd hoist would no longer run when we jump into the block
1867 // by it's address.
1868 for (auto *Succ : successors(BB))
1869 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1870 return false;
1871
1872 // The second of pair is a SkipFlags bitmask.
1873 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1874 SmallVector<SuccIterPair, 8> SuccIterPairs;
1875 for (auto *Succ : successors(BB)) {
1876 BasicBlock::iterator SuccItr = Succ->begin();
1877 if (isa<PHINode>(*SuccItr))
1878 return false;
1879 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1880 }
1881
1882 if (AllInstsEqOnly) {
1883 // Check if all instructions in the successor blocks match. This allows
1884 // hoisting all instructions and removing the blocks we are hoisting from,
1885 // so does not add any new instructions.
1887 // Check if sizes and terminators of all successors match.
1888 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1889 Instruction *Term0 = Succs[0]->getTerminator();
1890 Instruction *Term = Succ->getTerminator();
1891 return !Term->isSameOperationAs(Term0) ||
1892 !equal(Term->operands(), Term0->operands()) ||
1893 Succs[0]->size() != Succ->size();
1894 });
1895 if (!AllSame)
1896 return false;
1897 if (AllSame) {
1898 LockstepReverseIterator LRI(Succs);
1899 while (LRI.isValid()) {
1900 Instruction *I0 = (*LRI)[0];
1901 if (any_of(*LRI, [I0](Instruction *I) {
1902 return !areIdenticalUpToCommutativity(I0, I);
1903 })) {
1904 return false;
1905 }
1906 --LRI;
1907 }
1908 }
1909 // Now we know that all instructions in all successors can be hoisted. Let
1910 // the loop below handle the hoisting.
1911 }
1912
1913 // Count how many instructions were not hoisted so far. There's a limit on how
1914 // many instructions we skip, serving as a compilation time control as well as
1915 // preventing excessive increase of life ranges.
1916 unsigned NumSkipped = 0;
1917 // If we find an unreachable instruction at the beginning of a basic block, we
1918 // can still hoist instructions from the rest of the basic blocks.
1919 if (SuccIterPairs.size() > 2) {
1920 erase_if(SuccIterPairs,
1921 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1922 if (SuccIterPairs.size() < 2)
1923 return false;
1924 }
1925
1926 bool Changed = false;
1927
1928 for (;;) {
1929 auto *SuccIterPairBegin = SuccIterPairs.begin();
1930 auto &BB1ItrPair = *SuccIterPairBegin++;
1931 auto OtherSuccIterPairRange =
1932 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1933 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1934
1935 Instruction *I1 = &*BB1ItrPair.first;
1936
1937 // Skip debug info if it is not identical.
1938 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1939 Instruction *I2 = &*Iter;
1940 return I1->isIdenticalToWhenDefined(I2);
1941 });
1942 if (!AllDbgInstsAreIdentical) {
1943 while (isa<DbgInfoIntrinsic>(I1))
1944 I1 = &*++BB1ItrPair.first;
1945 for (auto &SuccIter : OtherSuccIterRange) {
1946 Instruction *I2 = &*SuccIter;
1947 while (isa<DbgInfoIntrinsic>(I2))
1948 I2 = &*++SuccIter;
1949 }
1950 }
1951
1952 bool AllInstsAreIdentical = true;
1953 bool HasTerminator = I1->isTerminator();
1954 for (auto &SuccIter : OtherSuccIterRange) {
1955 Instruction *I2 = &*SuccIter;
1956 HasTerminator |= I2->isTerminator();
1957 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1958 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1959 AllInstsAreIdentical = false;
1960 }
1961
1963 for (auto &SuccIter : OtherSuccIterRange)
1964 OtherInsts.push_back(&*SuccIter);
1965
1966 // If we are hoisting the terminator instruction, don't move one (making a
1967 // broken BB), instead clone it, and remove BI.
1968 if (HasTerminator) {
1969 // Even if BB, which contains only one unreachable instruction, is ignored
1970 // at the beginning of the loop, we can hoist the terminator instruction.
1971 // If any instructions remain in the block, we cannot hoist terminators.
1972 if (NumSkipped || !AllInstsAreIdentical) {
1973 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1974 return Changed;
1975 }
1976
1977 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1978 Changed;
1979 }
1980
1981 if (AllInstsAreIdentical) {
1982 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1983 AllInstsAreIdentical =
1984 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1985 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1986 Instruction *I2 = &*Pair.first;
1987 unsigned SkipFlagsBB2 = Pair.second;
1988 // Even if the instructions are identical, it may not
1989 // be safe to hoist them if we have skipped over
1990 // instructions with side effects or their operands
1991 // weren't hoisted.
1992 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1994 });
1995 }
1996
1997 if (AllInstsAreIdentical) {
1998 BB1ItrPair.first++;
1999 if (isa<DbgInfoIntrinsic>(I1)) {
2000 // The debug location is an integral part of a debug info intrinsic
2001 // and can't be separated from it or replaced. Instead of attempting
2002 // to merge locations, simply hoist both copies of the intrinsic.
2003 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2004 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2005 // and leave any that were not hoisted behind (by calling moveBefore
2006 // rather than moveBeforePreserving).
2007 I1->moveBefore(TI->getIterator());
2008 for (auto &SuccIter : OtherSuccIterRange) {
2009 auto *I2 = &*SuccIter++;
2010 assert(isa<DbgInfoIntrinsic>(I2));
2011 I2->moveBefore(TI->getIterator());
2012 }
2013 } else {
2014 // For a normal instruction, we just move one to right before the
2015 // branch, then replace all uses of the other with the first. Finally,
2016 // we remove the now redundant second instruction.
2017 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2018 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2019 // and leave any that were not hoisted behind (by calling moveBefore
2020 // rather than moveBeforePreserving).
2021 I1->moveBefore(TI->getIterator());
2022 for (auto &SuccIter : OtherSuccIterRange) {
2023 Instruction *I2 = &*SuccIter++;
2024 assert(I2 != I1);
2025 if (!I2->use_empty())
2026 I2->replaceAllUsesWith(I1);
2027 I1->andIRFlags(I2);
2028 if (auto *CB = dyn_cast<CallBase>(I1)) {
2029 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2030 assert(Success && "We should not be trying to hoist callbases "
2031 "with non-intersectable attributes");
2032 // For NDEBUG Compile.
2033 (void)Success;
2034 }
2035
2036 combineMetadataForCSE(I1, I2, true);
2037 // I1 and I2 are being combined into a single instruction. Its debug
2038 // location is the merged locations of the original instructions.
2039 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2040 I2->eraseFromParent();
2041 }
2042 }
2043 if (!Changed)
2044 NumHoistCommonCode += SuccIterPairs.size();
2045 Changed = true;
2046 NumHoistCommonInstrs += SuccIterPairs.size();
2047 } else {
2048 if (NumSkipped >= HoistCommonSkipLimit) {
2049 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2050 return Changed;
2051 }
2052 // We are about to skip over a pair of non-identical instructions. Record
2053 // if any have characteristics that would prevent reordering instructions
2054 // across them.
2055 for (auto &SuccIterPair : SuccIterPairs) {
2056 Instruction *I = &*SuccIterPair.first++;
2057 SuccIterPair.second |= skippedInstrFlags(I);
2058 }
2059 ++NumSkipped;
2060 }
2061 }
2062}
2063
2064bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2065 Instruction *TI, Instruction *I1,
2066 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2067
2068 auto *BI = dyn_cast<BranchInst>(TI);
2069
2070 bool Changed = false;
2071 BasicBlock *TIParent = TI->getParent();
2072 BasicBlock *BB1 = I1->getParent();
2073
2074 // Use only for an if statement.
2075 auto *I2 = *OtherSuccTIs.begin();
2076 auto *BB2 = I2->getParent();
2077 if (BI) {
2078 assert(OtherSuccTIs.size() == 1);
2079 assert(BI->getSuccessor(0) == I1->getParent());
2080 assert(BI->getSuccessor(1) == I2->getParent());
2081 }
2082
2083 // In the case of an if statement, we try to hoist an invoke.
2084 // FIXME: Can we define a safety predicate for CallBr?
2085 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2086 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2087 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2088 return false;
2089
2090 // TODO: callbr hoisting currently disabled pending further study.
2091 if (isa<CallBrInst>(I1))
2092 return false;
2093
2094 for (BasicBlock *Succ : successors(BB1)) {
2095 for (PHINode &PN : Succ->phis()) {
2096 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2097 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2098 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2099 if (BB1V == BB2V)
2100 continue;
2101
2102 // In the case of an if statement, check for
2103 // passingValueIsAlwaysUndefined here because we would rather eliminate
2104 // undefined control flow then converting it to a select.
2105 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2107 return false;
2108 }
2109 }
2110 }
2111
2112 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2113 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2114 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2115 // Clone the terminator and hoist it into the pred, without any debug info.
2116 Instruction *NT = I1->clone();
2117 NT->insertInto(TIParent, TI->getIterator());
2118 if (!NT->getType()->isVoidTy()) {
2119 I1->replaceAllUsesWith(NT);
2120 for (Instruction *OtherSuccTI : OtherSuccTIs)
2121 OtherSuccTI->replaceAllUsesWith(NT);
2122 NT->takeName(I1);
2123 }
2124 Changed = true;
2125 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2126
2127 // Ensure terminator gets a debug location, even an unknown one, in case
2128 // it involves inlinable calls.
2130 Locs.push_back(I1->getDebugLoc());
2131 for (auto *OtherSuccTI : OtherSuccTIs)
2132 Locs.push_back(OtherSuccTI->getDebugLoc());
2133 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
2134
2135 // PHIs created below will adopt NT's merged DebugLoc.
2136 IRBuilder<NoFolder> Builder(NT);
2137
2138 // In the case of an if statement, hoisting one of the terminators from our
2139 // successor is a great thing. Unfortunately, the successors of the if/else
2140 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2141 // must agree for all PHI nodes, so we insert select instruction to compute
2142 // the final result.
2143 if (BI) {
2144 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2145 for (BasicBlock *Succ : successors(BB1)) {
2146 for (PHINode &PN : Succ->phis()) {
2147 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2148 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2149 if (BB1V == BB2V)
2150 continue;
2151
2152 // These values do not agree. Insert a select instruction before NT
2153 // that determines the right value.
2154 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2155 if (!SI) {
2156 // Propagate fast-math-flags from phi node to its replacement select.
2157 SI = cast<SelectInst>(Builder.CreateSelectFMF(
2158 BI->getCondition(), BB1V, BB2V,
2159 isa<FPMathOperator>(PN) ? &PN : nullptr,
2160 BB1V->getName() + "." + BB2V->getName(), BI));
2161 }
2162
2163 // Make the PHI node use the select for all incoming values for BB1/BB2
2164 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2165 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2166 PN.setIncomingValue(i, SI);
2167 }
2168 }
2169 }
2170
2172
2173 // Update any PHI nodes in our new successors.
2174 for (BasicBlock *Succ : successors(BB1)) {
2175 addPredecessorToBlock(Succ, TIParent, BB1);
2176 if (DTU)
2177 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2178 }
2179
2180 if (DTU)
2181 for (BasicBlock *Succ : successors(TI))
2182 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2183
2185 if (DTU)
2186 DTU->applyUpdates(Updates);
2187 return Changed;
2188}
2189
2190// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2191// into variables.
2193 int OpIdx) {
2194 // Divide/Remainder by constant is typically much cheaper than by variable.
2195 if (I->isIntDivRem())
2196 return OpIdx != 1;
2197 return !isa<IntrinsicInst>(I);
2198}
2199
2200// All instructions in Insts belong to different blocks that all unconditionally
2201// branch to a common successor. Analyze each instruction and return true if it
2202// would be possible to sink them into their successor, creating one common
2203// instruction instead. For every value that would be required to be provided by
2204// PHI node (because an operand varies in each input block), add to PHIOperands.
2207 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2208 // Prune out obviously bad instructions to move. Each instruction must have
2209 // the same number of uses, and we check later that the uses are consistent.
2210 std::optional<unsigned> NumUses;
2211 for (auto *I : Insts) {
2212 // These instructions may change or break semantics if moved.
2213 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2214 I->getType()->isTokenTy())
2215 return false;
2216
2217 // Do not try to sink an instruction in an infinite loop - it can cause
2218 // this algorithm to infinite loop.
2219 if (I->getParent()->getSingleSuccessor() == I->getParent())
2220 return false;
2221
2222 // Conservatively return false if I is an inline-asm instruction. Sinking
2223 // and merging inline-asm instructions can potentially create arguments
2224 // that cannot satisfy the inline-asm constraints.
2225 // If the instruction has nomerge or convergent attribute, return false.
2226 if (const auto *C = dyn_cast<CallBase>(I))
2227 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2228 return false;
2229
2230 if (!NumUses)
2231 NumUses = I->getNumUses();
2232 else if (NumUses != I->getNumUses())
2233 return false;
2234 }
2235
2236 const Instruction *I0 = Insts.front();
2237 const auto I0MMRA = MMRAMetadata(*I0);
2238 for (auto *I : Insts) {
2239 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2240 return false;
2241
2242 // swifterror pointers can only be used by a load or store; sinking a load
2243 // or store would require introducing a select for the pointer operand,
2244 // which isn't allowed for swifterror pointers.
2245 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
2246 return false;
2247 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
2248 return false;
2249
2250 // Treat MMRAs conservatively. This pass can be quite aggressive and
2251 // could drop a lot of MMRAs otherwise.
2252 if (MMRAMetadata(*I) != I0MMRA)
2253 return false;
2254 }
2255
2256 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2257 // then the other phi operands must match the instructions from Insts. This
2258 // also has to hold true for any phi nodes that would be created as a result
2259 // of sinking. Both of these cases are represented by PhiOperands.
2260 for (const Use &U : I0->uses()) {
2261 auto It = PHIOperands.find(&U);
2262 if (It == PHIOperands.end())
2263 // There may be uses in other blocks when sinking into a loop header.
2264 return false;
2265 if (!equal(Insts, It->second))
2266 return false;
2267 }
2268
2269 // For calls to be sinkable, they must all be indirect, or have same callee.
2270 // I.e. if we have two direct calls to different callees, we don't want to
2271 // turn that into an indirect call. Likewise, if we have an indirect call,
2272 // and a direct call, we don't actually want to have a single indirect call.
2273 if (isa<CallBase>(I0)) {
2274 auto IsIndirectCall = [](const Instruction *I) {
2275 return cast<CallBase>(I)->isIndirectCall();
2276 };
2277 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2278 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2279 if (HaveIndirectCalls) {
2280 if (!AllCallsAreIndirect)
2281 return false;
2282 } else {
2283 // All callees must be identical.
2284 Value *Callee = nullptr;
2285 for (const Instruction *I : Insts) {
2286 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2287 if (!Callee)
2288 Callee = CurrCallee;
2289 else if (Callee != CurrCallee)
2290 return false;
2291 }
2292 }
2293 }
2294
2295 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2296 Value *Op = I0->getOperand(OI);
2297 if (Op->getType()->isTokenTy())
2298 // Don't touch any operand of token type.
2299 return false;
2300
2301 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2302 assert(I->getNumOperands() == I0->getNumOperands());
2303 return I->getOperand(OI) == I0->getOperand(OI);
2304 };
2305 if (!all_of(Insts, SameAsI0)) {
2306 // SROA can't speculate lifetime markers of selects/phis, and the
2307 // backend may handle such lifetimes incorrectly as well (#104776).
2308 // Don't sink lifetimes if it would introduce a phi on the pointer
2309 // argument.
2310 if (isa<LifetimeIntrinsic>(I0) && OI == 1 &&
2311 any_of(Insts, [](const Instruction *I) {
2312 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2313 }))
2314 return false;
2315
2316 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2318 // We can't create a PHI from this GEP.
2319 return false;
2320 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2321 for (auto *I : Insts)
2322 Ops.push_back(I->getOperand(OI));
2323 }
2324 }
2325 return true;
2326}
2327
2328// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2329// instruction of every block in Blocks to their common successor, commoning
2330// into one instruction.
2332 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2333
2334 // canSinkInstructions returning true guarantees that every block has at
2335 // least one non-terminator instruction.
2337 for (auto *BB : Blocks) {
2338 Instruction *I = BB->getTerminator();
2339 do {
2340 I = I->getPrevNode();
2341 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2342 if (!isa<DbgInfoIntrinsic>(I))
2343 Insts.push_back(I);
2344 }
2345
2346 // We don't need to do any more checking here; canSinkInstructions should
2347 // have done it all for us.
2348 SmallVector<Value*, 4> NewOperands;
2349 Instruction *I0 = Insts.front();
2350 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2351 // This check is different to that in canSinkInstructions. There, we
2352 // cared about the global view once simplifycfg (and instcombine) have
2353 // completed - it takes into account PHIs that become trivially
2354 // simplifiable. However here we need a more local view; if an operand
2355 // differs we create a PHI and rely on instcombine to clean up the very
2356 // small mess we may make.
2357 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2358 return I->getOperand(O) != I0->getOperand(O);
2359 });
2360 if (!NeedPHI) {
2361 NewOperands.push_back(I0->getOperand(O));
2362 continue;
2363 }
2364
2365 // Create a new PHI in the successor block and populate it.
2366 auto *Op = I0->getOperand(O);
2367 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2368 auto *PN =
2369 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2370 PN->insertBefore(BBEnd->begin());
2371 for (auto *I : Insts)
2372 PN->addIncoming(I->getOperand(O), I->getParent());
2373 NewOperands.push_back(PN);
2374 }
2375
2376 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2377 // and move it to the start of the successor block.
2378 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2379 I0->getOperandUse(O).set(NewOperands[O]);
2380
2381 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2382
2383 // Update metadata and IR flags, and merge debug locations.
2384 for (auto *I : Insts)
2385 if (I != I0) {
2386 // The debug location for the "common" instruction is the merged locations
2387 // of all the commoned instructions. We start with the original location
2388 // of the "common" instruction and iteratively merge each location in the
2389 // loop below.
2390 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2391 // However, as N-way merge for CallInst is rare, so we use simplified API
2392 // instead of using complex API for N-way merge.
2393 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2394 combineMetadataForCSE(I0, I, true);
2395 I0->andIRFlags(I);
2396 if (auto *CB = dyn_cast<CallBase>(I0)) {
2397 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2398 assert(Success && "We should not be trying to sink callbases "
2399 "with non-intersectable attributes");
2400 // For NDEBUG Compile.
2401 (void)Success;
2402 }
2403 }
2404
2405 for (User *U : make_early_inc_range(I0->users())) {
2406 // canSinkLastInstruction checked that all instructions are only used by
2407 // phi nodes in a way that allows replacing the phi node with the common
2408 // instruction.
2409 auto *PN = cast<PHINode>(U);
2410 PN->replaceAllUsesWith(I0);
2411 PN->eraseFromParent();
2412 }
2413
2414 // Finally nuke all instructions apart from the common instruction.
2415 for (auto *I : Insts) {
2416 if (I == I0)
2417 continue;
2418 // The remaining uses are debug users, replace those with the common inst.
2419 // In most (all?) cases this just introduces a use-before-def.
2420 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2421 I->replaceAllUsesWith(I0);
2422 I->eraseFromParent();
2423 }
2424}
2425
2426/// Check whether BB's predecessors end with unconditional branches. If it is
2427/// true, sink any common code from the predecessors to BB.
2429 DomTreeUpdater *DTU) {
2430 // We support two situations:
2431 // (1) all incoming arcs are unconditional
2432 // (2) there are non-unconditional incoming arcs
2433 //
2434 // (2) is very common in switch defaults and
2435 // else-if patterns;
2436 //
2437 // if (a) f(1);
2438 // else if (b) f(2);
2439 //
2440 // produces:
2441 //
2442 // [if]
2443 // / \
2444 // [f(1)] [if]
2445 // | | \
2446 // | | |
2447 // | [f(2)]|
2448 // \ | /
2449 // [ end ]
2450 //
2451 // [end] has two unconditional predecessor arcs and one conditional. The
2452 // conditional refers to the implicit empty 'else' arc. This conditional
2453 // arc can also be caused by an empty default block in a switch.
2454 //
2455 // In this case, we attempt to sink code from all *unconditional* arcs.
2456 // If we can sink instructions from these arcs (determined during the scan
2457 // phase below) we insert a common successor for all unconditional arcs and
2458 // connect that to [end], to enable sinking:
2459 //
2460 // [if]
2461 // / \
2462 // [x(1)] [if]
2463 // | | \
2464 // | | \
2465 // | [x(2)] |
2466 // \ / |
2467 // [sink.split] |
2468 // \ /
2469 // [ end ]
2470 //
2471 SmallVector<BasicBlock*,4> UnconditionalPreds;
2472 bool HaveNonUnconditionalPredecessors = false;
2473 for (auto *PredBB : predecessors(BB)) {
2474 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2475 if (PredBr && PredBr->isUnconditional())
2476 UnconditionalPreds.push_back(PredBB);
2477 else
2478 HaveNonUnconditionalPredecessors = true;
2479 }
2480 if (UnconditionalPreds.size() < 2)
2481 return false;
2482
2483 // We take a two-step approach to tail sinking. First we scan from the end of
2484 // each block upwards in lockstep. If the n'th instruction from the end of each
2485 // block can be sunk, those instructions are added to ValuesToSink and we
2486 // carry on. If we can sink an instruction but need to PHI-merge some operands
2487 // (because they're not identical in each instruction) we add these to
2488 // PHIOperands.
2489 // We prepopulate PHIOperands with the phis that already exist in BB.
2491 for (PHINode &PN : BB->phis()) {
2493 for (const Use &U : PN.incoming_values())
2494 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2495 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2496 for (BasicBlock *Pred : UnconditionalPreds)
2497 Ops.push_back(*IncomingVals[Pred]);
2498 }
2499
2500 int ScanIdx = 0;
2501 SmallPtrSet<Value*,4> InstructionsToSink;
2502 LockstepReverseIterator LRI(UnconditionalPreds);
2503 while (LRI.isValid() &&
2504 canSinkInstructions(*LRI, PHIOperands)) {
2505 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2506 << "\n");
2507 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2508 ++ScanIdx;
2509 --LRI;
2510 }
2511
2512 // If no instructions can be sunk, early-return.
2513 if (ScanIdx == 0)
2514 return false;
2515
2516 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2517
2518 if (!followedByDeoptOrUnreachable) {
2519 // Check whether this is the pointer operand of a load/store.
2520 auto IsMemOperand = [](Use &U) {
2521 auto *I = cast<Instruction>(U.getUser());
2522 if (isa<LoadInst>(I))
2523 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2524 if (isa<StoreInst>(I))
2525 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2526 return false;
2527 };
2528
2529 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2530 // actually sink before encountering instruction that is unprofitable to
2531 // sink?
2532 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2533 unsigned NumPHIInsts = 0;
2534 for (Use &U : (*LRI)[0]->operands()) {
2535 auto It = PHIOperands.find(&U);
2536 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2537 return InstructionsToSink.contains(V);
2538 })) {
2539 ++NumPHIInsts;
2540 // Do not separate a load/store from the gep producing the address.
2541 // The gep can likely be folded into the load/store as an addressing
2542 // mode. Additionally, a load of a gep is easier to analyze than a
2543 // load of a phi.
2544 if (IsMemOperand(U) &&
2545 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2546 return false;
2547 // FIXME: this check is overly optimistic. We may end up not sinking
2548 // said instruction, due to the very same profitability check.
2549 // See @creating_too_many_phis in sink-common-code.ll.
2550 }
2551 }
2552 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2553 return NumPHIInsts <= 1;
2554 };
2555
2556 // We've determined that we are going to sink last ScanIdx instructions,
2557 // and recorded them in InstructionsToSink. Now, some instructions may be
2558 // unprofitable to sink. But that determination depends on the instructions
2559 // that we are going to sink.
2560
2561 // First, forward scan: find the first instruction unprofitable to sink,
2562 // recording all the ones that are profitable to sink.
2563 // FIXME: would it be better, after we detect that not all are profitable.
2564 // to either record the profitable ones, or erase the unprofitable ones?
2565 // Maybe we need to choose (at runtime) the one that will touch least
2566 // instrs?
2567 LRI.reset();
2568 int Idx = 0;
2569 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2570 while (Idx < ScanIdx) {
2571 if (!ProfitableToSinkInstruction(LRI)) {
2572 // Too many PHIs would be created.
2573 LLVM_DEBUG(
2574 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2575 break;
2576 }
2577 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2578 --LRI;
2579 ++Idx;
2580 }
2581
2582 // If no instructions can be sunk, early-return.
2583 if (Idx == 0)
2584 return false;
2585
2586 // Did we determine that (only) some instructions are unprofitable to sink?
2587 if (Idx < ScanIdx) {
2588 // Okay, some instructions are unprofitable.
2589 ScanIdx = Idx;
2590 InstructionsToSink = InstructionsProfitableToSink;
2591
2592 // But, that may make other instructions unprofitable, too.
2593 // So, do a backward scan, do any earlier instructions become
2594 // unprofitable?
2595 assert(
2596 !ProfitableToSinkInstruction(LRI) &&
2597 "We already know that the last instruction is unprofitable to sink");
2598 ++LRI;
2599 --Idx;
2600 while (Idx >= 0) {
2601 // If we detect that an instruction becomes unprofitable to sink,
2602 // all earlier instructions won't be sunk either,
2603 // so preemptively keep InstructionsProfitableToSink in sync.
2604 // FIXME: is this the most performant approach?
2605 for (auto *I : *LRI)
2606 InstructionsProfitableToSink.erase(I);
2607 if (!ProfitableToSinkInstruction(LRI)) {
2608 // Everything starting with this instruction won't be sunk.
2609 ScanIdx = Idx;
2610 InstructionsToSink = InstructionsProfitableToSink;
2611 }
2612 ++LRI;
2613 --Idx;
2614 }
2615 }
2616
2617 // If no instructions can be sunk, early-return.
2618 if (ScanIdx == 0)
2619 return false;
2620 }
2621
2622 bool Changed = false;
2623
2624 if (HaveNonUnconditionalPredecessors) {
2625 if (!followedByDeoptOrUnreachable) {
2626 // It is always legal to sink common instructions from unconditional
2627 // predecessors. However, if not all predecessors are unconditional,
2628 // this transformation might be pessimizing. So as a rule of thumb,
2629 // don't do it unless we'd sink at least one non-speculatable instruction.
2630 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2631 LRI.reset();
2632 int Idx = 0;
2633 bool Profitable = false;
2634 while (Idx < ScanIdx) {
2635 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2636 Profitable = true;
2637 break;
2638 }
2639 --LRI;
2640 ++Idx;
2641 }
2642 if (!Profitable)
2643 return false;
2644 }
2645
2646 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2647 // We have a conditional edge and we're going to sink some instructions.
2648 // Insert a new block postdominating all blocks we're going to sink from.
2649 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2650 // Edges couldn't be split.
2651 return false;
2652 Changed = true;
2653 }
2654
2655 // Now that we've analyzed all potential sinking candidates, perform the
2656 // actual sink. We iteratively sink the last non-terminator of the source
2657 // blocks into their common successor unless doing so would require too
2658 // many PHI instructions to be generated (currently only one PHI is allowed
2659 // per sunk instruction).
2660 //
2661 // We can use InstructionsToSink to discount values needing PHI-merging that will
2662 // actually be sunk in a later iteration. This allows us to be more
2663 // aggressive in what we sink. This does allow a false positive where we
2664 // sink presuming a later value will also be sunk, but stop half way through
2665 // and never actually sink it which means we produce more PHIs than intended.
2666 // This is unlikely in practice though.
2667 int SinkIdx = 0;
2668 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2669 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2670 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2671 << "\n");
2672
2673 // Because we've sunk every instruction in turn, the current instruction to
2674 // sink is always at index 0.
2675 LRI.reset();
2676
2677 sinkLastInstruction(UnconditionalPreds);
2678 NumSinkCommonInstrs++;
2679 Changed = true;
2680 }
2681 if (SinkIdx != 0)
2682 ++NumSinkCommonCode;
2683 return Changed;
2684}
2685
2686namespace {
2687
2688struct CompatibleSets {
2689 using SetTy = SmallVector<InvokeInst *, 2>;
2690
2692
2693 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2694
2695 SetTy &getCompatibleSet(InvokeInst *II);
2696
2697 void insert(InvokeInst *II);
2698};
2699
2700CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2701 // Perform a linear scan over all the existing sets, see if the new `invoke`
2702 // is compatible with any particular set. Since we know that all the `invokes`
2703 // within a set are compatible, only check the first `invoke` in each set.
2704 // WARNING: at worst, this has quadratic complexity.
2705 for (CompatibleSets::SetTy &Set : Sets) {
2706 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2707 return Set;
2708 }
2709
2710 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2711 return Sets.emplace_back();
2712}
2713
2714void CompatibleSets::insert(InvokeInst *II) {
2715 getCompatibleSet(II).emplace_back(II);
2716}
2717
2718bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2719 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2720
2721 // Can we theoretically merge these `invoke`s?
2722 auto IsIllegalToMerge = [](InvokeInst *II) {
2723 return II->cannotMerge() || II->isInlineAsm();
2724 };
2725 if (any_of(Invokes, IsIllegalToMerge))
2726 return false;
2727
2728 // Either both `invoke`s must be direct,
2729 // or both `invoke`s must be indirect.
2730 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2731 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2732 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2733 if (HaveIndirectCalls) {
2734 if (!AllCallsAreIndirect)
2735 return false;
2736 } else {
2737 // All callees must be identical.
2738 Value *Callee = nullptr;
2739 for (InvokeInst *II : Invokes) {
2740 Value *CurrCallee = II->getCalledOperand();
2741 assert(CurrCallee && "There is always a called operand.");
2742 if (!Callee)
2743 Callee = CurrCallee;
2744 else if (Callee != CurrCallee)
2745 return false;
2746 }
2747 }
2748
2749 // Either both `invoke`s must not have a normal destination,
2750 // or both `invoke`s must have a normal destination,
2751 auto HasNormalDest = [](InvokeInst *II) {
2752 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2753 };
2754 if (any_of(Invokes, HasNormalDest)) {
2755 // Do not merge `invoke` that does not have a normal destination with one
2756 // that does have a normal destination, even though doing so would be legal.
2757 if (!all_of(Invokes, HasNormalDest))
2758 return false;
2759
2760 // All normal destinations must be identical.
2761 BasicBlock *NormalBB = nullptr;
2762 for (InvokeInst *II : Invokes) {
2763 BasicBlock *CurrNormalBB = II->getNormalDest();
2764 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2765 if (!NormalBB)
2766 NormalBB = CurrNormalBB;
2767 else if (NormalBB != CurrNormalBB)
2768 return false;
2769 }
2770
2771 // In the normal destination, the incoming values for these two `invoke`s
2772 // must be compatible.
2773 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2775 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2776 &EquivalenceSet))
2777 return false;
2778 }
2779
2780#ifndef NDEBUG
2781 // All unwind destinations must be identical.
2782 // We know that because we have started from said unwind destination.
2783 BasicBlock *UnwindBB = nullptr;
2784 for (InvokeInst *II : Invokes) {
2785 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2786 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2787 if (!UnwindBB)
2788 UnwindBB = CurrUnwindBB;
2789 else
2790 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2791 }
2792#endif
2793
2794 // In the unwind destination, the incoming values for these two `invoke`s
2795 // must be compatible.
2797 Invokes.front()->getUnwindDest(),
2798 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2799 return false;
2800
2801 // Ignoring arguments, these `invoke`s must be identical,
2802 // including operand bundles.
2803 const InvokeInst *II0 = Invokes.front();
2804 for (auto *II : Invokes.drop_front())
2805 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2806 return false;
2807
2808 // Can we theoretically form the data operands for the merged `invoke`?
2809 auto IsIllegalToMergeArguments = [](auto Ops) {
2810 Use &U0 = std::get<0>(Ops);
2811 Use &U1 = std::get<1>(Ops);
2812 if (U0 == U1)
2813 return false;
2814 return U0->getType()->isTokenTy() ||
2815 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2816 U0.getOperandNo());
2817 };
2818 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2819 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2820 IsIllegalToMergeArguments))
2821 return false;
2822
2823 return true;
2824}
2825
2826} // namespace
2827
2828// Merge all invokes in the provided set, all of which are compatible
2829// as per the `CompatibleSets::shouldBelongToSameSet()`.
2831 DomTreeUpdater *DTU) {
2832 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2833
2835 if (DTU)
2836 Updates.reserve(2 + 3 * Invokes.size());
2837
2838 bool HasNormalDest =
2839 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2840
2841 // Clone one of the invokes into a new basic block.
2842 // Since they are all compatible, it doesn't matter which invoke is cloned.
2843 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2844 InvokeInst *II0 = Invokes.front();
2845 BasicBlock *II0BB = II0->getParent();
2846 BasicBlock *InsertBeforeBlock =
2847 II0->getParent()->getIterator()->getNextNode();
2848 Function *Func = II0BB->getParent();
2849 LLVMContext &Ctx = II0->getContext();
2850
2851 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2852 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2853
2854 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2855 // NOTE: all invokes have the same attributes, so no handling needed.
2856 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2857
2858 if (!HasNormalDest) {
2859 // This set does not have a normal destination,
2860 // so just form a new block with unreachable terminator.
2861 BasicBlock *MergedNormalDest = BasicBlock::Create(
2862 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2863 new UnreachableInst(Ctx, MergedNormalDest);
2864 MergedInvoke->setNormalDest(MergedNormalDest);
2865 }
2866
2867 // The unwind destination, however, remainds identical for all invokes here.
2868
2869 return MergedInvoke;
2870 }();
2871
2872 if (DTU) {
2873 // Predecessor blocks that contained these invokes will now branch to
2874 // the new block that contains the merged invoke, ...
2875 for (InvokeInst *II : Invokes)
2876 Updates.push_back(
2877 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2878
2879 // ... which has the new `unreachable` block as normal destination,
2880 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2881 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2882 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2883 SuccBBOfMergedInvoke});
2884
2885 // Since predecessor blocks now unconditionally branch to a new block,
2886 // they no longer branch to their original successors.
2887 for (InvokeInst *II : Invokes)
2888 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2889 Updates.push_back(
2890 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2891 }
2892
2893 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2894
2895 // Form the merged operands for the merged invoke.
2896 for (Use &U : MergedInvoke->operands()) {
2897 // Only PHI together the indirect callees and data operands.
2898 if (MergedInvoke->isCallee(&U)) {
2899 if (!IsIndirectCall)
2900 continue;
2901 } else if (!MergedInvoke->isDataOperand(&U))
2902 continue;
2903
2904 // Don't create trivial PHI's with all-identical incoming values.
2905 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2906 return II->getOperand(U.getOperandNo()) != U.get();
2907 });
2908 if (!NeedPHI)
2909 continue;
2910
2911 // Form a PHI out of all the data ops under this index.
2913 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2914 for (InvokeInst *II : Invokes)
2915 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2916
2917 U.set(PN);
2918 }
2919
2920 // We've ensured that each PHI node has compatible (identical) incoming values
2921 // when coming from each of the `invoke`s in the current merge set,
2922 // so update the PHI nodes accordingly.
2923 for (BasicBlock *Succ : successors(MergedInvoke))
2924 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2925 /*ExistPred=*/Invokes.front()->getParent());
2926
2927 // And finally, replace the original `invoke`s with an unconditional branch
2928 // to the block with the merged `invoke`. Also, give that merged `invoke`
2929 // the merged debugloc of all the original `invoke`s.
2930 DILocation *MergedDebugLoc = nullptr;
2931 for (InvokeInst *II : Invokes) {
2932 // Compute the debug location common to all the original `invoke`s.
2933 if (!MergedDebugLoc)
2934 MergedDebugLoc = II->getDebugLoc();
2935 else
2936 MergedDebugLoc =
2937 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2938
2939 // And replace the old `invoke` with an unconditionally branch
2940 // to the block with the merged `invoke`.
2941 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2942 OrigSuccBB->removePredecessor(II->getParent());
2943 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2944 // The unconditional branch is part of the replacement for the original
2945 // invoke, so should use its DebugLoc.
2946 BI->setDebugLoc(II->getDebugLoc());
2947 bool Success = MergedInvoke->tryIntersectAttributes(II);
2948 assert(Success && "Merged invokes with incompatible attributes");
2949 // For NDEBUG Compile
2950 (void)Success;
2951 II->replaceAllUsesWith(MergedInvoke);
2952 II->eraseFromParent();
2953 ++NumInvokesMerged;
2954 }
2955 MergedInvoke->setDebugLoc(MergedDebugLoc);
2956 ++NumInvokeSetsFormed;
2957
2958 if (DTU)
2959 DTU->applyUpdates(Updates);
2960}
2961
2962/// If this block is a `landingpad` exception handling block, categorize all
2963/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2964/// being "mergeable" together, and then merge invokes in each set together.
2965///
2966/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2967/// [...] [...]
2968/// | |
2969/// [invoke0] [invoke1]
2970/// / \ / \
2971/// [cont0] [landingpad] [cont1]
2972/// to:
2973/// [...] [...]
2974/// \ /
2975/// [invoke]
2976/// / \
2977/// [cont] [landingpad]
2978///
2979/// But of course we can only do that if the invokes share the `landingpad`,
2980/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2981/// and the invoked functions are "compatible".
2984 return false;
2985
2986 bool Changed = false;
2987
2988 // FIXME: generalize to all exception handling blocks?
2989 if (!BB->isLandingPad())
2990 return Changed;
2991
2992 CompatibleSets Grouper;
2993
2994 // Record all the predecessors of this `landingpad`. As per verifier,
2995 // the only allowed predecessor is the unwind edge of an `invoke`.
2996 // We want to group "compatible" `invokes` into the same set to be merged.
2997 for (BasicBlock *PredBB : predecessors(BB))
2998 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2999
3000 // And now, merge `invoke`s that were grouped togeter.
3001 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
3002 if (Invokes.size() < 2)
3003 continue;
3004 Changed = true;
3005 mergeCompatibleInvokesImpl(Invokes, DTU);
3006 }
3007
3008 return Changed;
3009}
3010
3011namespace {
3012/// Track ephemeral values, which should be ignored for cost-modelling
3013/// purposes. Requires walking instructions in reverse order.
3014class EphemeralValueTracker {
3016
3017 bool isEphemeral(const Instruction *I) {
3018 if (isa<AssumeInst>(I))
3019 return true;
3020 return !I->mayHaveSideEffects() && !I->isTerminator() &&
3021 all_of(I->users(), [&](const User *U) {
3022 return EphValues.count(cast<Instruction>(U));
3023 });
3024 }
3025
3026public:
3027 bool track(const Instruction *I) {
3028 if (isEphemeral(I)) {
3029 EphValues.insert(I);
3030 return true;
3031 }
3032 return false;
3033 }
3034
3035 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3036};
3037} // namespace
3038
3039/// Determine if we can hoist sink a sole store instruction out of a
3040/// conditional block.
3041///
3042/// We are looking for code like the following:
3043/// BrBB:
3044/// store i32 %add, i32* %arrayidx2
3045/// ... // No other stores or function calls (we could be calling a memory
3046/// ... // function).
3047/// %cmp = icmp ult %x, %y
3048/// br i1 %cmp, label %EndBB, label %ThenBB
3049/// ThenBB:
3050/// store i32 %add5, i32* %arrayidx2
3051/// br label EndBB
3052/// EndBB:
3053/// ...
3054/// We are going to transform this into:
3055/// BrBB:
3056/// store i32 %add, i32* %arrayidx2
3057/// ... //
3058/// %cmp = icmp ult %x, %y
3059/// %add.add5 = select i1 %cmp, i32 %add, %add5
3060/// store i32 %add.add5, i32* %arrayidx2
3061/// ...
3062///
3063/// \return The pointer to the value of the previous store if the store can be
3064/// hoisted into the predecessor block. 0 otherwise.
3066 BasicBlock *StoreBB, BasicBlock *EndBB) {
3067 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3068 if (!StoreToHoist)
3069 return nullptr;
3070
3071 // Volatile or atomic.
3072 if (!StoreToHoist->isSimple())
3073 return nullptr;
3074
3075 Value *StorePtr = StoreToHoist->getPointerOperand();
3076 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3077
3078 // Look for a store to the same pointer in BrBB.
3079 unsigned MaxNumInstToLookAt = 9;
3080 // Skip pseudo probe intrinsic calls which are not really killing any memory
3081 // accesses.
3082 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3083 if (!MaxNumInstToLookAt)
3084 break;
3085 --MaxNumInstToLookAt;
3086
3087 // Could be calling an instruction that affects memory like free().
3088 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3089 return nullptr;
3090
3091 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3092 // Found the previous store to same location and type. Make sure it is
3093 // simple, to avoid introducing a spurious non-atomic write after an
3094 // atomic write.
3095 if (SI->getPointerOperand() == StorePtr &&
3096 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3097 SI->getAlign() >= StoreToHoist->getAlign())
3098 // Found the previous store, return its value operand.
3099 return SI->getValueOperand();
3100 return nullptr; // Unknown store.
3101 }
3102
3103 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3104 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3105 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3106 Value *Obj = getUnderlyingObject(StorePtr);
3107 bool ExplicitlyDereferenceableOnly;
3108 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3109 !PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3110 /*StoreCaptures=*/true) &&
3111 (!ExplicitlyDereferenceableOnly ||
3112 isDereferenceablePointer(StorePtr, StoreTy,
3113 LI->getDataLayout()))) {
3114 // Found a previous load, return it.
3115 return LI;
3116 }
3117 }
3118 // The load didn't work out, but we may still find a store.
3119 }
3120 }
3121
3122 return nullptr;
3123}
3124
3125/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3126/// converted to selects.
3128 BasicBlock *EndBB,
3129 unsigned &SpeculatedInstructions,
3131 const TargetTransformInfo &TTI) {
3133 BB->getParent()->hasMinSize()
3136
3137 bool HaveRewritablePHIs = false;
3138 for (PHINode &PN : EndBB->phis()) {
3139 Value *OrigV = PN.getIncomingValueForBlock(BB);
3140 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3141
3142 // FIXME: Try to remove some of the duplication with
3143 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3144 if (ThenV == OrigV)
3145 continue;
3146
3147 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
3149
3150 // Don't convert to selects if we could remove undefined behavior instead.
3151 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3153 return false;
3154
3155 HaveRewritablePHIs = true;
3156 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3157 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3158 if (!OrigCE && !ThenCE)
3159 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3160
3161 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3162 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3163 InstructionCost MaxCost =
3165 if (OrigCost + ThenCost > MaxCost)
3166 return false;
3167
3168 // Account for the cost of an unfolded ConstantExpr which could end up
3169 // getting expanded into Instructions.
3170 // FIXME: This doesn't account for how many operations are combined in the
3171 // constant expression.
3172 ++SpeculatedInstructions;
3173 if (SpeculatedInstructions > 1)
3174 return false;
3175 }
3176
3177 return HaveRewritablePHIs;
3178}
3179
3181 std::optional<bool> Invert,
3182 const TargetTransformInfo &TTI) {
3183 // If the branch is non-unpredictable, and is predicted to *not* branch to
3184 // the `then` block, then avoid speculating it.
3185 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3186 return true;
3187
3188 uint64_t TWeight, FWeight;
3189 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3190 return true;
3191
3192 if (!Invert.has_value())
3193 return false;
3194
3195 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3196 BranchProbability BIEndProb =
3197 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3199 return BIEndProb < Likely;
3200}
3201
3202/// Speculate a conditional basic block flattening the CFG.
3203///
3204/// Note that this is a very risky transform currently. Speculating
3205/// instructions like this is most often not desirable. Instead, there is an MI
3206/// pass which can do it with full awareness of the resource constraints.
3207/// However, some cases are "obvious" and we should do directly. An example of
3208/// this is speculating a single, reasonably cheap instruction.
3209///
3210/// There is only one distinct advantage to flattening the CFG at the IR level:
3211/// it makes very common but simplistic optimizations such as are common in
3212/// instcombine and the DAG combiner more powerful by removing CFG edges and
3213/// modeling their effects with easier to reason about SSA value graphs.
3214///
3215///
3216/// An illustration of this transform is turning this IR:
3217/// \code
3218/// BB:
3219/// %cmp = icmp ult %x, %y
3220/// br i1 %cmp, label %EndBB, label %ThenBB
3221/// ThenBB:
3222/// %sub = sub %x, %y
3223/// br label BB2
3224/// EndBB:
3225/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3226/// ...
3227/// \endcode
3228///
3229/// Into this IR:
3230/// \code
3231/// BB:
3232/// %cmp = icmp ult %x, %y
3233/// %sub = sub %x, %y
3234/// %cond = select i1 %cmp, 0, %sub
3235/// ...
3236/// \endcode
3237///
3238/// \returns true if the conditional block is removed.
3239bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3240 BasicBlock *ThenBB) {
3241 if (!Options.SpeculateBlocks)
3242 return false;
3243
3244 // Be conservative for now. FP select instruction can often be expensive.
3245 Value *BrCond = BI->getCondition();
3246 if (isa<FCmpInst>(BrCond))
3247 return false;
3248
3249 BasicBlock *BB = BI->getParent();
3250 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3251 InstructionCost Budget =
3253
3254 // If ThenBB is actually on the false edge of the conditional branch, remember
3255 // to swap the select operands later.
3256 bool Invert = false;
3257 if (ThenBB != BI->getSuccessor(0)) {
3258 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3259 Invert = true;
3260 }
3261 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3262
3263 if (!isProfitableToSpeculate(BI, Invert, TTI))
3264 return false;
3265
3266 // Keep a count of how many times instructions are used within ThenBB when
3267 // they are candidates for sinking into ThenBB. Specifically:
3268 // - They are defined in BB, and
3269 // - They have no side effects, and
3270 // - All of their uses are in ThenBB.
3271 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3272
3273 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3274
3275 unsigned SpeculatedInstructions = 0;
3276 bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
3277 Options.HoistLoadsStoresWithCondFaulting;
3278 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3279 Value *SpeculatedStoreValue = nullptr;
3280 StoreInst *SpeculatedStore = nullptr;
3281 EphemeralValueTracker EphTracker;
3282 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3283 // Skip debug info.
3284 if (isa<DbgInfoIntrinsic>(I)) {
3285 SpeculatedDbgIntrinsics.push_back(&I);
3286 continue;
3287 }
3288
3289 // Skip pseudo probes. The consequence is we lose track of the branch
3290 // probability for ThenBB, which is fine since the optimization here takes
3291 // place regardless of the branch probability.
3292 if (isa<PseudoProbeInst>(I)) {
3293 // The probe should be deleted so that it will not be over-counted when
3294 // the samples collected on the non-conditional path are counted towards
3295 // the conditional path. We leave it for the counts inference algorithm to
3296 // figure out a proper count for an unknown probe.
3297 SpeculatedDbgIntrinsics.push_back(&I);
3298 continue;
3299 }
3300
3301 // Ignore ephemeral values, they will be dropped by the transform.
3302 if (EphTracker.track(&I))
3303 continue;
3304
3305 // Only speculatively execute a single instruction (not counting the
3306 // terminator) for now.
3307 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3309 SpeculatedConditionalLoadsStores.size() <
3311 // Not count load/store into cost if target supports conditional faulting
3312 // b/c it's cheap to speculate it.
3313 if (IsSafeCheapLoadStore)
3314 SpeculatedConditionalLoadsStores.push_back(&I);
3315 else
3316 ++SpeculatedInstructions;
3317
3318 if (SpeculatedInstructions > 1)
3319 return false;
3320
3321 // Don't hoist the instruction if it's unsafe or expensive.
3322 if (!IsSafeCheapLoadStore &&
3324 !(HoistCondStores && !SpeculatedStoreValue &&
3325 (SpeculatedStoreValue =
3326 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3327 return false;
3328 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3331 return false;
3332
3333 // Store the store speculation candidate.
3334 if (!SpeculatedStore && SpeculatedStoreValue)
3335 SpeculatedStore = cast<StoreInst>(&I);
3336
3337 // Do not hoist the instruction if any of its operands are defined but not
3338 // used in BB. The transformation will prevent the operand from
3339 // being sunk into the use block.
3340 for (Use &Op : I.operands()) {
3341 Instruction *OpI = dyn_cast<Instruction>(Op);
3342 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3343 continue; // Not a candidate for sinking.
3344
3345 ++SinkCandidateUseCounts[OpI];
3346 }
3347 }
3348
3349 // Consider any sink candidates which are only used in ThenBB as costs for
3350 // speculation. Note, while we iterate over a DenseMap here, we are summing
3351 // and so iteration order isn't significant.
3352 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3353 if (Inst->hasNUses(Count)) {
3354 ++SpeculatedInstructions;
3355 if (SpeculatedInstructions > 1)
3356 return false;
3357 }
3358
3359 // Check that we can insert the selects and that it's not too expensive to do
3360 // so.
3361 bool Convert =
3362 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3364 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3365 SpeculatedInstructions, Cost, TTI);
3366 if (!Convert || Cost > Budget)
3367 return false;
3368
3369 // If we get here, we can hoist the instruction and if-convert.
3370 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3371
3372 // Insert a select of the value of the speculated store.
3373 if (SpeculatedStoreValue) {
3374 IRBuilder<NoFolder> Builder(BI);
3375 Value *OrigV = SpeculatedStore->getValueOperand();
3376 Value *TrueV = SpeculatedStore->getValueOperand();
3377 Value *FalseV = SpeculatedStoreValue;
3378 if (Invert)
3379 std::swap(TrueV, FalseV);
3380 Value *S = Builder.CreateSelect(
3381 BrCond, TrueV, FalseV, "spec.store.select", BI);
3382 SpeculatedStore->setOperand(0, S);
3383 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3384 SpeculatedStore->getDebugLoc());
3385 // The value stored is still conditional, but the store itself is now
3386 // unconditonally executed, so we must be sure that any linked dbg.assign
3387 // intrinsics are tracking the new stored value (the result of the
3388 // select). If we don't, and the store were to be removed by another pass
3389 // (e.g. DSE), then we'd eventually end up emitting a location describing
3390 // the conditional value, unconditionally.
3391 //
3392 // === Before this transformation ===
3393 // pred:
3394 // store %one, %x.dest, !DIAssignID !1
3395 // dbg.assign %one, "x", ..., !1, ...
3396 // br %cond if.then
3397 //
3398 // if.then:
3399 // store %two, %x.dest, !DIAssignID !2
3400 // dbg.assign %two, "x", ..., !2, ...
3401 //
3402 // === After this transformation ===
3403 // pred:
3404 // store %one, %x.dest, !DIAssignID !1
3405 // dbg.assign %one, "x", ..., !1
3406 /// ...
3407 // %merge = select %cond, %two, %one
3408 // store %merge, %x.dest, !DIAssignID !2
3409 // dbg.assign %merge, "x", ..., !2
3410 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3411 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3412 DbgAssign->replaceVariableLocationOp(OrigV, S);
3413 };
3414 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3415 for_each(at::getDVRAssignmentMarkers(SpeculatedStore), replaceVariable);
3416 }
3417
3418 // Metadata can be dependent on the condition we are hoisting above.
3419 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3420 // to avoid making it appear as if the condition is a constant, which would
3421 // be misleading while debugging.
3422 // Similarly strip attributes that maybe dependent on condition we are
3423 // hoisting above.
3424 for (auto &I : make_early_inc_range(*ThenBB)) {
3425 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3426 // Don't update the DILocation of dbg.assign intrinsics.
3427 if (!isa<DbgAssignIntrinsic>(&I))
3428 I.setDebugLoc(DebugLoc());
3429 }
3430 I.dropUBImplyingAttrsAndMetadata();
3431
3432 // Drop ephemeral values.
3433 if (EphTracker.contains(&I)) {
3434 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3435 I.eraseFromParent();
3436 }
3437 }
3438
3439 // Hoist the instructions.
3440 // In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3441 // to these instructions, in the same way that dbg.value intrinsics are
3442 // dropped at the end of this block.
3443 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3444 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3445 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3446 // equivalent).
3447 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3448 !DVR || !DVR->isDbgAssign())
3449 It.dropOneDbgRecord(&DR);
3450 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3451 std::prev(ThenBB->end()));
3452
3453 if (!SpeculatedConditionalLoadsStores.empty())
3454 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
3455
3456 // Insert selects and rewrite the PHI operands.
3457 IRBuilder<NoFolder> Builder(BI);
3458 for (PHINode &PN : EndBB->phis()) {
3459 unsigned OrigI = PN.getBasicBlockIndex(BB);
3460 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3461 Value *OrigV = PN.getIncomingValue(OrigI);
3462 Value *ThenV = PN.getIncomingValue(ThenI);
3463
3464 // Skip PHIs which are trivial.
3465 if (OrigV == ThenV)
3466 continue;
3467
3468 // Create a select whose true value is the speculatively executed value and
3469 // false value is the pre-existing value. Swap them if the branch
3470 // destinations were inverted.
3471 Value *TrueV = ThenV, *FalseV = OrigV;
3472 if (Invert)
3473 std::swap(TrueV, FalseV);
3474 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3475 PN.setIncomingValue(OrigI, V);
3476 PN.setIncomingValue(ThenI, V);
3477 }
3478
3479 // Remove speculated dbg intrinsics.
3480 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3481 // dbg value for the different flows and inserting it after the select.
3482 for (Instruction *I : SpeculatedDbgIntrinsics) {
3483 // We still want to know that an assignment took place so don't remove
3484 // dbg.assign intrinsics.
3485 if (!isa<DbgAssignIntrinsic>(I))
3486 I->eraseFromParent();
3487 }
3488
3489 ++NumSpeculations;
3490 return true;
3491}
3492
3493/// Return true if we can thread a branch across this block.
3495 int Size = 0;
3496 EphemeralValueTracker EphTracker;
3497
3498 // Walk the loop in reverse so that we can identify ephemeral values properly
3499 // (values only feeding assumes).
3500 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3501 // Can't fold blocks that contain noduplicate or convergent calls.
3502 if (CallInst *CI = dyn_cast<CallInst>(&I))
3503 if (CI->cannotDuplicate() || CI->isConvergent())
3504 return false;
3505
3506 // Ignore ephemeral values which are deleted during codegen.
3507 // We will delete Phis while threading, so Phis should not be accounted in
3508 // block's size.
3509 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3510 if (Size++ > MaxSmallBlockSize)
3511 return false; // Don't clone large BB's.
3512 }
3513
3514 // We can only support instructions that do not define values that are
3515 // live outside of the current basic block.
3516 for (User *U : I.users()) {
3517 Instruction *UI = cast<Instruction>(U);
3518 if (UI->getParent() != BB || isa<PHINode>(UI))
3519 return false;
3520 }
3521
3522 // Looks ok, continue checking.
3523 }
3524
3525 return true;
3526}
3527
3529 BasicBlock *To) {
3530 // Don't look past the block defining the value, we might get the value from
3531 // a previous loop iteration.
3532 auto *I = dyn_cast<Instruction>(V);
3533 if (I && I->getParent() == To)
3534 return nullptr;
3535
3536 // We know the value if the From block branches on it.
3537 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3538 if (BI && BI->isConditional() && BI->getCondition() == V &&
3539 BI->getSuccessor(0) != BI->getSuccessor(1))
3540 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3542
3543 return nullptr;
3544}
3545
3546/// If we have a conditional branch on something for which we know the constant
3547/// value in predecessors (e.g. a phi node in the current block), thread edges
3548/// from the predecessor to their ultimate destination.
3549static std::optional<bool>
3551 const DataLayout &DL,
3552 AssumptionCache *AC) {
3554 BasicBlock *BB = BI->getParent();
3555 Value *Cond = BI->getCondition();
3556 PHINode *PN = dyn_cast<PHINode>(Cond);
3557 if (PN && PN->getParent() == BB) {
3558 // Degenerate case of a single entry PHI.
3559 if (PN->getNumIncomingValues() == 1) {
3561 return true;
3562 }
3563
3564 for (Use &U : PN->incoming_values())
3565 if (auto *CB = dyn_cast<ConstantInt>(U))
3566 KnownValues[CB].insert(PN->getIncomingBlock(U));
3567 } else {
3568 for (BasicBlock *Pred : predecessors(BB)) {
3569 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3570 KnownValues[CB].insert(Pred);
3571 }
3572 }
3573
3574 if (KnownValues.empty())
3575 return false;
3576
3577 // Now we know that this block has multiple preds and two succs.
3578 // Check that the block is small enough and values defined in the block are
3579 // not used outside of it.
3581 return false;
3582
3583 for (const auto &Pair : KnownValues) {
3584 // Okay, we now know that all edges from PredBB should be revectored to
3585 // branch to RealDest.
3586 ConstantInt *CB = Pair.first;
3587 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3588 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3589
3590 if (RealDest == BB)
3591 continue; // Skip self loops.
3592
3593 // Skip if the predecessor's terminator is an indirect branch.
3594 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3595 return isa<IndirectBrInst>(PredBB->getTerminator());
3596 }))
3597 continue;
3598
3599 LLVM_DEBUG({
3600 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3601 << " has value " << *Pair.first << " in predecessors:\n";
3602 for (const BasicBlock *PredBB : Pair.second)
3603 dbgs() << " " << PredBB->getName() << "\n";
3604 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3605 });
3606
3607 // Split the predecessors we are threading into a new edge block. We'll
3608 // clone the instructions into this block, and then redirect it to RealDest.
3609 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3610
3611 // TODO: These just exist to reduce test diff, we can drop them if we like.
3612 EdgeBB->setName(RealDest->getName() + ".critedge");
3613 EdgeBB->moveBefore(RealDest);
3614
3615 // Update PHI nodes.
3616 addPredecessorToBlock(RealDest, EdgeBB, BB);
3617
3618 // BB may have instructions that are being threaded over. Clone these
3619 // instructions into EdgeBB. We know that there will be no uses of the
3620 // cloned instructions outside of EdgeBB.
3621 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3622 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3623 TranslateMap[Cond] = CB;
3624
3625 // RemoveDIs: track instructions that we optimise away while folding, so
3626 // that we can copy DbgVariableRecords from them later.
3627 BasicBlock::iterator SrcDbgCursor = BB->begin();
3628 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3629 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3630 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3631 continue;
3632 }
3633 // Clone the instruction.
3634 Instruction *N = BBI->clone();
3635 // Insert the new instruction into its new home.
3636 N->insertInto(EdgeBB, InsertPt);
3637
3638 if (BBI->hasName())
3639 N->setName(BBI->getName() + ".c");
3640
3641 // Update operands due to translation.
3642 for (Use &Op : N->operands()) {
3643 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3644 if (PI != TranslateMap.end())
3645 Op = PI->second;
3646 }
3647
3648 // Check for trivial simplification.
3649 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3650 if (!BBI->use_empty())
3651 TranslateMap[&*BBI] = V;
3652 if (!N->mayHaveSideEffects()) {
3653 N->eraseFromParent(); // Instruction folded away, don't need actual
3654 // inst
3655 N = nullptr;
3656 }
3657 } else {
3658 if (!BBI->use_empty())
3659 TranslateMap[&*BBI] = N;
3660 }
3661 if (N) {
3662 // Copy all debug-info attached to instructions from the last we
3663 // successfully clone, up to this instruction (they might have been
3664 // folded away).
3665 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3666 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3667 SrcDbgCursor = std::next(BBI);
3668 // Clone debug-info on this instruction too.
3669 N->cloneDebugInfoFrom(&*BBI);
3670
3671 // Register the new instruction with the assumption cache if necessary.
3672 if (auto *Assume = dyn_cast<AssumeInst>(N))
3673 if (AC)
3674 AC->registerAssumption(Assume);
3675 }
3676 }
3677
3678 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3679 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3680 InsertPt->cloneDebugInfoFrom(BI);
3681
3682 BB->removePredecessor(EdgeBB);
3683 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3684 EdgeBI->setSuccessor(0, RealDest);
3685 EdgeBI->setDebugLoc(BI->getDebugLoc());
3686
3687 if (DTU) {
3689 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3690 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3691 DTU->applyUpdates(Updates);
3692 }
3693
3694 // For simplicity, we created a separate basic block for the edge. Merge
3695 // it back into the predecessor if possible. This not only avoids
3696 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3697 // bypass the check for trivial cycles above.
3698 MergeBlockIntoPredecessor(EdgeBB, DTU);
3699
3700 // Signal repeat, simplifying any other constants.
3701 return std::nullopt;
3702 }
3703
3704 return false;
3705}
3706
3708 DomTreeUpdater *DTU,
3709 const DataLayout &DL,
3710 AssumptionCache *AC) {
3711 std::optional<bool> Result;
3712 bool EverChanged = false;
3713 do {
3714 // Note that None means "we changed things, but recurse further."
3715 Result = foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3716 EverChanged |= Result == std::nullopt || *Result;
3717 } while (Result == std::nullopt);
3718 return EverChanged;
3719}
3720
3721/// Given a BB that starts with the specified two-entry PHI node,
3722/// see if we can eliminate it.
3725 const DataLayout &DL,
3726 bool SpeculateUnpredictables) {
3727 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3728 // statement", which has a very simple dominance structure. Basically, we
3729 // are trying to find the condition that is being branched on, which
3730 // subsequently causes this merge to happen. We really want control
3731 // dependence information for this check, but simplifycfg can't keep it up
3732 // to date, and this catches most of the cases we care about anyway.
3733 BasicBlock *BB = PN->getParent();
3734
3735 BasicBlock *IfTrue, *IfFalse;
3736 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3737 if (!DomBI)
3738 return false;
3739 Value *IfCond = DomBI->getCondition();
3740 // Don't bother if the branch will be constant folded trivially.
3741 if (isa<ConstantInt>(IfCond))
3742 return false;
3743
3744 BasicBlock *DomBlock = DomBI->getParent();
3747 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3748 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3749 });
3750 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3751 "Will have either one or two blocks to speculate.");
3752
3753 // If the branch is non-unpredictable, see if we either predictably jump to
3754 // the merge bb (if we have only a single 'then' block), or if we predictably
3755 // jump to one specific 'then' block (if we have two of them).
3756 // It isn't beneficial to speculatively execute the code
3757 // from the block that we know is predictably not entered.
3758 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3759 if (!IsUnpredictable) {
3760 uint64_t TWeight, FWeight;
3761 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3762 (TWeight + FWeight) != 0) {
3763 BranchProbability BITrueProb =
3764 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3766 BranchProbability BIFalseProb = BITrueProb.getCompl();
3767 if (IfBlocks.size() == 1) {
3768 BranchProbability BIBBProb =
3769 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3770 if (BIBBProb >= Likely)
3771 return false;
3772 } else {
3773 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3774 return false;
3775 }
3776 }
3777 }
3778
3779 // Don't try to fold an unreachable block. For example, the phi node itself
3780 // can't be the candidate if-condition for a select that we want to form.
3781 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3782 if (IfCondPhiInst->getParent() == BB)
3783 return false;
3784
3785 // Okay, we found that we can merge this two-entry phi node into a select.
3786 // Doing so would require us to fold *all* two entry phi nodes in this block.
3787 // At some point this becomes non-profitable (particularly if the target
3788 // doesn't support cmov's). Only do this transformation if there are two or
3789 // fewer PHI nodes in this block.
3790 unsigned NumPhis = 0;
3791 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3792 if (NumPhis > 2)
3793 return false;
3794
3795 // Loop over the PHI's seeing if we can promote them all to select
3796 // instructions. While we are at it, keep track of the instructions
3797 // that need to be moved to the dominating block.
3798 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3800 InstructionCost Budget =
3802 if (SpeculateUnpredictables && IsUnpredictable)
3803 Budget += TTI.getBranchMispredictPenalty();
3804
3805 bool Changed = false;
3806 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3807 PHINode *PN = cast<PHINode>(II++);
3808 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3809 PN->replaceAllUsesWith(V);
3810 PN->eraseFromParent();
3811 Changed = true;
3812 continue;
3813 }
3814
3815 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3816 AggressiveInsts, Cost, Budget, TTI, AC) ||
3817 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3818 AggressiveInsts, Cost, Budget, TTI, AC))
3819 return Changed;
3820 }
3821
3822 // If we folded the first phi, PN dangles at this point. Refresh it. If
3823 // we ran out of PHIs then we simplified them all.
3824 PN = dyn_cast<PHINode>(BB->begin());
3825 if (!PN)
3826 return true;
3827
3828 // Return true if at least one of these is a 'not', and another is either
3829 // a 'not' too, or a constant.
3830 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3831 if (!match(V0, m_Not(m_Value())))
3832 std::swap(V0, V1);
3833 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3834 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3835 };
3836
3837 // Don't fold i1 branches on PHIs which contain binary operators or
3838 // (possibly inverted) select form of or/ands, unless one of
3839 // the incoming values is an 'not' and another one is freely invertible.
3840 // These can often be turned into switches and other things.
3841 auto IsBinOpOrAnd = [](Value *V) {
3842 return match(
3844 };
3845 if (PN->getType()->isIntegerTy(1) &&
3846 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3847 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3848 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3849 PN->getIncomingValue(1)))
3850 return Changed;
3851
3852 // If all PHI nodes are promotable, check to make sure that all instructions
3853 // in the predecessor blocks can be promoted as well. If not, we won't be able
3854 // to get rid of the control flow, so it's not worth promoting to select
3855 // instructions.
3856 for (BasicBlock *IfBlock : IfBlocks)
3857 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3858 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3859 // This is not an aggressive instruction that we can promote.
3860 // Because of this, we won't be able to get rid of the control flow, so
3861 // the xform is not worth it.
3862 return Changed;
3863 }
3864
3865 // If either of the blocks has it's address taken, we can't do this fold.
3866 if (any_of(IfBlocks,
3867 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3868 return Changed;
3869
3870 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3871 if (IsUnpredictable) dbgs() << " (unpredictable)";
3872 dbgs() << " T: " << IfTrue->getName()
3873 << " F: " << IfFalse->getName() << "\n");
3874
3875 // If we can still promote the PHI nodes after this gauntlet of tests,
3876 // do all of the PHI's now.
3877
3878 // Move all 'aggressive' instructions, which are defined in the
3879 // conditional parts of the if's up to the dominating block.
3880 for (BasicBlock *IfBlock : IfBlocks)
3881 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3882
3883 IRBuilder<NoFolder> Builder(DomBI);
3884 // Propagate fast-math-flags from phi nodes to replacement selects.
3885 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3886 // Change the PHI node into a select instruction.
3887 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3888 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3889
3890 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3891 isa<FPMathOperator>(PN) ? PN : nullptr,
3892 "", DomBI);
3893 PN->replaceAllUsesWith(Sel);
3894 Sel->takeName(PN);
3895 PN->eraseFromParent();
3896 }
3897
3898 // At this point, all IfBlocks are empty, so our if statement
3899 // has been flattened. Change DomBlock to jump directly to our new block to
3900 // avoid other simplifycfg's kicking in on the diamond.
3901 Builder.CreateBr(BB);
3902
3904 if (DTU) {
3905 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3906 for (auto *Successor : successors(DomBlock))
3907 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3908 }
3909
3910 DomBI->eraseFromParent();
3911 if (DTU)
3912 DTU->applyUpdates(Updates);
3913
3914 return true;
3915}
3916
3918 Instruction::BinaryOps Opc, Value *LHS,
3919 Value *RHS, const Twine &Name = "") {
3920 // Try to relax logical op to binary op.
3921 if (impliesPoison(RHS, LHS))
3922 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3923 if (Opc == Instruction::And)
3924 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3925 if (Opc == Instruction::Or)
3926 return Builder.CreateLogicalOr(LHS, RHS, Name);
3927 llvm_unreachable("Invalid logical opcode");
3928}
3929
3930/// Return true if either PBI or BI has branch weight available, and store
3931/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3932/// not have branch weight, use 1:1 as its weight.
3934 uint64_t &PredTrueWeight,
3935 uint64_t &PredFalseWeight,
3936 uint64_t &SuccTrueWeight,
3937 uint64_t &SuccFalseWeight) {
3938 bool PredHasWeights =
3939 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3940 bool SuccHasWeights =
3941 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3942 if (PredHasWeights || SuccHasWeights) {
3943 if (!PredHasWeights)
3944 PredTrueWeight = PredFalseWeight = 1;
3945 if (!SuccHasWeights)
3946 SuccTrueWeight = SuccFalseWeight = 1;
3947 return true;
3948 } else {
3949 return false;
3950 }
3951}
3952
3953/// Determine if the two branches share a common destination and deduce a glue
3954/// that joins the branches' conditions to arrive at the common destination if
3955/// that would be profitable.
3956static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3958 const TargetTransformInfo *TTI) {
3959 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3960 "Both blocks must end with a conditional branches.");
3962 "PredBB must be a predecessor of BB.");
3963
3964 // We have the potential to fold the conditions together, but if the
3965 // predecessor branch is predictable, we may not want to merge them.
3966 uint64_t PTWeight, PFWeight;
3967 BranchProbability PBITrueProb, Likely;
3968 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3969 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3970 (PTWeight + PFWeight) != 0) {
3971 PBITrueProb =
3972 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3974 }
3975
3976 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3977 // Speculate the 2nd condition unless the 1st is probably true.
3978 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3979 return {{BI->getSuccessor(0), Instruction::Or, false}};
3980 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3981 // Speculate the 2nd condition unless the 1st is probably false.
3982 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3983 return {{BI->getSuccessor(1), Instruction::And, false}};
3984 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3985 // Speculate the 2nd condition unless the 1st is probably true.
3986 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3987 return {{BI->getSuccessor(1), Instruction::And, true}};
3988 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3989 // Speculate the 2nd condition unless the 1st is probably false.
3990 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3991 return {{BI->getSuccessor(0), Instruction::Or, true}};
3992 }
3993 return std::nullopt;
3994}
3995
3997 DomTreeUpdater *DTU,
3998 MemorySSAUpdater *MSSAU,
3999 const TargetTransformInfo *TTI) {
4000 BasicBlock *BB = BI->getParent();
4001 BasicBlock *PredBlock = PBI->getParent();
4002
4003 // Determine if the two branches share a common destination.
4004 BasicBlock *CommonSucc;
4006 bool InvertPredCond;
4007 std::tie(CommonSucc, Opc, InvertPredCond) =
4009
4010 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4011
4012 IRBuilder<> Builder(PBI);
4013 // The builder is used to create instructions to eliminate the branch in BB.
4014 // If BB's terminator has !annotation metadata, add it to the new
4015 // instructions.
4017 {LLVMContext::MD_annotation});
4018
4019 // If we need to invert the condition in the pred block to match, do so now.
4020 if (InvertPredCond) {
4021 InvertBranch(PBI, Builder);
4022 }
4023
4024 BasicBlock *UniqueSucc =
4025 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4026
4027 // Before cloning instructions, notify the successor basic block that it
4028 // is about to have a new predecessor. This will update PHI nodes,
4029 // which will allow us to update live-out uses of bonus instructions.
4030 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4031
4032 // Try to update branch weights.
4033 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4034 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4035 SuccTrueWeight, SuccFalseWeight)) {
4036 SmallVector<uint64_t, 8> NewWeights;
4037
4038 if (PBI->getSuccessor(0) == BB) {
4039 // PBI: br i1 %x, BB, FalseDest
4040 // BI: br i1 %y, UniqueSucc, FalseDest
4041 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4042 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4043 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4044 // TrueWeight for PBI * FalseWeight for BI.
4045 // We assume that total weights of a BranchInst can fit into 32 bits.
4046 // Therefore, we will not have overflow using 64-bit arithmetic.
4047 NewWeights.push_back(PredFalseWeight *
4048 (SuccFalseWeight + SuccTrueWeight) +
4049 PredTrueWeight * SuccFalseWeight);
4050 } else {
4051 // PBI: br i1 %x, TrueDest, BB
4052 // BI: br i1 %y, TrueDest, UniqueSucc
4053 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4054 // FalseWeight for PBI * TrueWeight for BI.
4055 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4056 PredFalseWeight * SuccTrueWeight);
4057 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4058 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4059 }
4060
4061 // Halve the weights if any of them cannot fit in an uint32_t
4062 fitWeights(NewWeights);
4063
4064 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
4065 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4066
4067 // TODO: If BB is reachable from all paths through PredBlock, then we
4068 // could replace PBI's branch probabilities with BI's.
4069 } else
4070 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4071
4072 // Now, update the CFG.
4073 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4074
4075 if (DTU)
4076 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4077 {DominatorTree::Delete, PredBlock, BB}});
4078
4079 // If BI was a loop latch, it may have had associated loop metadata.
4080 // We need to copy it to the new latch, that is, PBI.
4081 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4082 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4083
4084 ValueToValueMapTy VMap; // maps original values to cloned values
4086
4087 Module *M = BB->getModule();
4088
4089 if (PredBlock->IsNewDbgInfoFormat) {
4090 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4091 for (DbgVariableRecord &DVR :
4093 RemapDbgRecord(M, &DVR, VMap,
4095 }
4096 }
4097
4098 // Now that the Cond was cloned into the predecessor basic block,
4099 // or/and the two conditions together.
4100 Value *BICond = VMap[BI->getCondition()];
4101 PBI->setCondition(
4102 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4103
4104 ++NumFoldBranchToCommonDest;
4105 return true;
4106}
4107
4108/// Return if an instruction's type or any of its operands' types are a vector
4109/// type.
4110static bool isVectorOp(Instruction &I) {
4111 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4112 return U->getType()->isVectorTy();
4113 });
4114}
4115
4116/// If this basic block is simple enough, and if a predecessor branches to us
4117/// and one of our successors, fold the block into the predecessor and use
4118/// logical operations to pick the right destination.
4120 MemorySSAUpdater *MSSAU,
4121 const TargetTransformInfo *TTI,
4122 unsigned BonusInstThreshold) {
4123 // If this block ends with an unconditional branch,
4124 // let speculativelyExecuteBB() deal with it.
4125 if (!BI->isConditional())
4126 return false;
4127
4128 BasicBlock *BB = BI->getParent();
4132
4133 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4134
4135 if (!Cond ||
4136 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
4137 !isa<SelectInst>(Cond)) ||
4138 Cond->getParent() != BB || !Cond->hasOneUse())
4139 return false;
4140
4141 // Finally, don't infinitely unroll conditional loops.
4142 if (is_contained(successors(BB), BB))
4143 return false;
4144
4145 // With which predecessors will we want to deal with?
4147 for (BasicBlock *PredBlock : predecessors(BB)) {
4148 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4149
4150 // Check that we have two conditional branches. If there is a PHI node in
4151 // the common successor, verify that the same value flows in from both
4152 // blocks.
4153 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4154 continue;
4155
4156 // Determine if the two branches share a common destination.
4157 BasicBlock *CommonSucc;
4159 bool InvertPredCond;
4160 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4161 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4162 else
4163 continue;
4164
4165 // Check the cost of inserting the necessary logic before performing the
4166 // transformation.
4167 if (TTI) {
4168 Type *Ty = BI->getCondition()->getType();
4170 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4171 !isa<CmpInst>(PBI->getCondition())))
4172 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4173
4175 continue;
4176 }
4177
4178 // Ok, we do want to deal with this predecessor. Record it.
4179 Preds.emplace_back(PredBlock);
4180 }
4181
4182 // If there aren't any predecessors into which we can fold,
4183 // don't bother checking the cost.
4184 if (Preds.empty())
4185 return false;
4186
4187 // Only allow this transformation if computing the condition doesn't involve
4188 // too many instructions and these involved instructions can be executed
4189 // unconditionally. We denote all involved instructions except the condition
4190 // as "bonus instructions", and only allow this transformation when the
4191 // number of the bonus instructions we'll need to create when cloning into
4192 // each predecessor does not exceed a certain threshold.
4193 unsigned NumBonusInsts = 0;
4194 bool SawVectorOp = false;
4195 const unsigned PredCount = Preds.size();
4196 for (Instruction &I : *BB) {
4197 // Don't check the branch condition comparison itself.
4198 if (&I == Cond)
4199 continue;
4200 // Ignore dbg intrinsics, and the terminator.
4201 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
4202 continue;
4203 // I must be safe to execute unconditionally.
4205 return false;
4206 SawVectorOp |= isVectorOp(I);
4207
4208 // Account for the cost of duplicating this instruction into each
4209 // predecessor. Ignore free instructions.
4210 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4212 NumBonusInsts += PredCount;
4213
4214 // Early exits once we reach the limit.
4215 if (NumBonusInsts >
4216 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4217 return false;
4218 }
4219
4220 auto IsBCSSAUse = [BB, &I](Use &U) {
4221 auto *UI = cast<Instruction>(U.getUser());
4222 if (auto *PN = dyn_cast<PHINode>(UI))
4223 return PN->getIncomingBlock(U) == BB;
4224 return UI->getParent() == BB && I.comesBefore(UI);
4225 };
4226
4227 // Does this instruction require rewriting of uses?
4228 if (!all_of(I.uses(), IsBCSSAUse))
4229 return false;
4230 }
4231 if (NumBonusInsts >
4232 BonusInstThreshold *
4233 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4234 return false;
4235
4236 // Ok, we have the budget. Perform the transformation.
4237 for (BasicBlock *PredBlock : Preds) {
4238 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4239 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4240 }
4241 return false;
4242}
4243
4244// If there is only one store in BB1 and BB2, return it, otherwise return
4245// nullptr.
4247 StoreInst *S = nullptr;
4248 for (auto *BB : {BB1, BB2}) {
4249 if (!BB)
4250 continue;
4251 for (auto &I : *BB)
4252 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4253 if (S)
4254 // Multiple stores seen.
4255 return nullptr;
4256 else
4257 S = SI;
4258 }
4259 }
4260 return S;
4261}
4262
4264 Value *AlternativeV = nullptr) {
4265 // PHI is going to be a PHI node that allows the value V that is defined in
4266 // BB to be referenced in BB's only successor.
4267 //
4268 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4269 // doesn't matter to us what the other operand is (it'll never get used). We
4270 // could just create a new PHI with an undef incoming value, but that could
4271 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4272 // other PHI. So here we directly look for some PHI in BB's successor with V
4273 // as an incoming operand. If we find one, we use it, else we create a new
4274 // one.
4275 //
4276 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4277 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4278 // where OtherBB is the single other predecessor of BB's only successor.
4279 PHINode *PHI = nullptr;
4280 BasicBlock *Succ = BB->getSingleSuccessor();
4281
4282 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4283 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4284 PHI = cast<PHINode>(I);
4285 if (!AlternativeV)
4286 break;
4287
4288 assert(Succ->hasNPredecessors(2));
4289 auto PredI = pred_begin(Succ);
4290 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4291 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4292 break;
4293 PHI = nullptr;
4294 }
4295 if (PHI)
4296 return PHI;
4297
4298 // If V is not an instruction defined in BB, just return it.
4299 if (!AlternativeV &&
4300 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4301 return V;
4302
4303 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4304 PHI->insertBefore(Succ->begin());
4305 PHI->addIncoming(V, BB);
4306 for (BasicBlock *PredBB : predecessors(Succ))
4307 if (PredBB != BB)
4308 PHI->addIncoming(
4309 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4310 return PHI;
4311}
4312
4314 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4315 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4316 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4317 // For every pointer, there must be exactly two stores, one coming from
4318 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4319 // store (to any address) in PTB,PFB or QTB,QFB.
4320 // FIXME: We could relax this restriction with a bit more work and performance
4321 // testing.
4322 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4323 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4324 if (!PStore || !QStore)
4325 return false;
4326
4327 // Now check the stores are compatible.
4328 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4329 PStore->getValueOperand()->getType() !=
4330 QStore->getValueOperand()->getType())
4331 return false;
4332
4333 // Check that sinking the store won't cause program behavior changes. Sinking
4334 // the store out of the Q blocks won't change any behavior as we're sinking
4335 // from a block to its unconditional successor. But we're moving a store from
4336 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4337 // So we need to check that there are no aliasing loads or stores in
4338 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4339 // operations between PStore and the end of its parent block.
4340 //
4341 // The ideal way to do this is to query AliasAnalysis, but we don't
4342 // preserve AA currently so that is dangerous. Be super safe and just
4343 // check there are no other memory operations at all.
4344 for (auto &I : *QFB->getSinglePredecessor())
4345 if (I.mayReadOrWriteMemory())
4346 return false;
4347 for (auto &I : *QFB)
4348 if (&I != QStore && I.mayReadOrWriteMemory())
4349 return false;
4350 if (QTB)
4351 for (auto &I : *QTB)
4352 if (&I != QStore && I.mayReadOrWriteMemory())
4353 return false;
4354 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4355 I != E; ++I)
4356 if (&*I != PStore && I->mayReadOrWriteMemory())
4357 return false;
4358
4359 // If we're not in aggressive mode, we only optimize if we have some
4360 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4361 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4362 if (!BB)
4363 return true;
4364 // Heuristic: if the block can be if-converted/phi-folded and the
4365 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4366 // thread this store.
4368 InstructionCost Budget =
4370 for (auto &I : BB->instructionsWithoutDebug(false)) {
4371 // Consider terminator instruction to be free.
4372 if (I.isTerminator())
4373 continue;
4374 // If this is one the stores that we want to speculate out of this BB,
4375 // then don't count it's cost, consider it to be free.
4376 if (auto *S = dyn_cast<StoreInst>(&I))
4377 if (llvm::find(FreeStores, S))
4378 continue;
4379 // Else, we have a white-list of instructions that we are ak speculating.
4380 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4381 return false; // Not in white-list - not worthwhile folding.
4382 // And finally, if this is a non-free instruction that we are okay
4383 // speculating, ensure that we consider the speculation budget.
4384 Cost +=
4386 if (Cost > Budget)
4387 return false; // Eagerly refuse to fold as soon as we're out of budget.
4388 }
4389 assert(Cost <= Budget &&
4390 "When we run out of budget we will eagerly return from within the "
4391 "per-instruction loop.");
4392 return true;
4393 };
4394
4395 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4397 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4398 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4399 return false;
4400
4401 // If PostBB has more than two predecessors, we need to split it so we can
4402 // sink the store.
4403 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4404 // We know that QFB's only successor is PostBB. And QFB has a single
4405 // predecessor. If QTB exists, then its only successor is also PostBB.
4406 // If QTB does not exist, then QFB's only predecessor has a conditional
4407 // branch to QFB and PostBB.
4408 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4409 BasicBlock *NewBB =
4410 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4411 if (!NewBB)
4412 return false;
4413 PostBB = NewBB;
4414 }
4415
4416 // OK, we're going to sink the stores to PostBB. The store has to be
4417 // conditional though, so first create the predicate.
4418 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4419 ->getCondition();
4420 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4421 ->getCondition();
4422
4424 PStore->getParent());
4426 QStore->getParent(), PPHI);
4427
4428 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4429 IRBuilder<> QB(PostBB, PostBBFirst);
4430 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4431
4432 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4433 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4434
4435 if (InvertPCond)
4436 PPred = QB.CreateNot(PPred);
4437 if (InvertQCond)
4438 QPred = QB.CreateNot(QPred);
4439 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4440
4441 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4442 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4443 /*Unreachable=*/false,
4444 /*BranchWeights=*/nullptr, DTU);
4445
4446 QB.SetInsertPoint(T);
4447 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4448 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4449 // Choose the minimum alignment. If we could prove both stores execute, we
4450 // could use biggest one. In this case, though, we only know that one of the
4451 // stores executes. And we don't know it's safe to take the alignment from a
4452 // store that doesn't execute.
4453 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4454
4455 QStore->eraseFromParent();
4456 PStore->eraseFromParent();
4457
4458 return true;
4459}
4460
4462 DomTreeUpdater *DTU, const DataLayout &DL,
4463 const TargetTransformInfo &TTI) {
4464 // The intention here is to find diamonds or triangles (see below) where each
4465 // conditional block contains a store to the same address. Both of these
4466 // stores are conditional, so they can't be unconditionally sunk. But it may
4467 // be profitable to speculatively sink the stores into one merged store at the
4468 // end, and predicate the merged store on the union of the two conditions of
4469 // PBI and QBI.
4470 //
4471 // This can reduce the number of stores executed if both of the conditions are
4472 // true, and can allow the blocks to become small enough to be if-converted.
4473 // This optimization will also chain, so that ladders of test-and-set
4474 // sequences can be if-converted away.
4475 //
4476 // We only deal with simple diamonds or triangles:
4477 //
4478 // PBI or PBI or a combination of the two
4479 // / \ | \
4480 // PTB PFB | PFB
4481 // \ / | /
4482 // QBI QBI
4483 // / \ | \
4484 // QTB QFB | QFB
4485 // \ / | /
4486 // PostBB PostBB
4487 //
4488 // We model triangles as a type of diamond with a nullptr "true" block.
4489 // Triangles are canonicalized so that the fallthrough edge is represented by
4490 // a true condition, as in the diagram above.
4491 BasicBlock *PTB = PBI->getSuccessor(0);
4492 BasicBlock *PFB = PBI->getSuccessor(1);
4493 BasicBlock *QTB = QBI->getSuccessor(0);
4494 BasicBlock *QFB = QBI->getSuccessor(1);
4495 BasicBlock *PostBB = QFB->getSingleSuccessor();
4496
4497 // Make sure we have a good guess for PostBB. If QTB's only successor is
4498 // QFB, then QFB is a better PostBB.
4499 if (QTB->getSingleSuccessor() == QFB)
4500 PostBB = QFB;
4501
4502 // If we couldn't find a good PostBB, stop.
4503 if (!PostBB)
4504 return false;
4505
4506 bool InvertPCond = false, InvertQCond = false;
4507 // Canonicalize fallthroughs to the true branches.
4508 if (PFB == QBI->getParent()) {
4509 std::swap(PFB, PTB);
4510 InvertPCond = true;
4511 }
4512 if (QFB == PostBB) {
4513 std::swap(QFB, QTB);
4514 InvertQCond = true;
4515 }
4516
4517 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4518 // and QFB may not. Model fallthroughs as a nullptr block.
4519 if (PTB == QBI->getParent())
4520 PTB = nullptr;
4521 if (QTB == PostBB)
4522 QTB = nullptr;
4523
4524 // Legality bailouts. We must have at least the non-fallthrough blocks and
4525 // the post-dominating block, and the non-fallthroughs must only have one
4526 // predecessor.
4527 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4528 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4529 };
4530 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4531 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4532 return false;
4533 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4534 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4535 return false;
4536 if (!QBI->getParent()->hasNUses(2))
4537 return false;
4538
4539 // OK, this is a sequence of two diamonds or triangles.
4540 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4541 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4542 for (auto *BB : {PTB, PFB}) {
4543 if (!BB)
4544 continue;
4545 for (auto &I : *BB)
4546 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4547 PStoreAddresses.insert(SI->getPointerOperand());
4548 }
4549 for (auto *BB : {QTB, QFB}) {
4550 if (!BB)
4551 continue;
4552 for (auto &I : *BB)
4553 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4554 QStoreAddresses.insert(SI->getPointerOperand());
4555 }
4556
4557 set_intersect(PStoreAddresses, QStoreAddresses);
4558 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4559 // clear what it contains.
4560 auto &CommonAddresses = PStoreAddresses;
4561
4562 bool Changed = false;
4563 for (auto *Address : CommonAddresses)
4564 Changed |=
4565 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4566 InvertPCond, InvertQCond, DTU, DL, TTI);
4567 return Changed;
4568}
4569
4570/// If the previous block ended with a widenable branch, determine if reusing
4571/// the target block is profitable and legal. This will have the effect of
4572/// "widening" PBI, but doesn't require us to reason about hosting safety.
4574 DomTreeUpdater *DTU) {
4575 // TODO: This can be generalized in two important ways:
4576 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4577 // values from the PBI edge.
4578 // 2) We can sink side effecting instructions into BI's fallthrough
4579 // successor provided they doesn't contribute to computation of
4580 // BI's condition.
4581 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4582 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4583 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4584 !BI->getParent()->getSinglePredecessor())
4585 return false;
4586 if (!IfFalseBB->phis().empty())
4587 return false; // TODO
4588 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4589 // may undo the transform done here.
4590 // TODO: There might be a more fine-grained solution to this.
4591 if (!llvm::succ_empty(IfFalseBB))
4592 return false;
4593 // Use lambda to lazily compute expensive condition after cheap ones.
4594 auto NoSideEffects = [](BasicBlock &BB) {
4595 return llvm::none_of(BB, [](const Instruction &I) {
4596 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4597 });
4598 };
4599 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4600 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4601 NoSideEffects(*BI->getParent())) {
4602 auto *OldSuccessor = BI->getSuccessor(1);
4603 OldSuccessor->removePredecessor(BI->getParent());
4604 BI->setSuccessor(1, IfFalseBB);
4605 if (DTU)
4606 DTU->applyUpdates(
4607 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4608 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4609 return true;
4610 }
4611 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4612 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4613 NoSideEffects(*BI->getParent())) {
4614 auto *OldSuccessor = BI->getSuccessor(0);
4615 OldSuccessor->removePredecessor(BI->getParent());
4616 BI->setSuccessor(0, IfFalseBB);
4617 if (DTU)
4618 DTU->applyUpdates(
4619 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4620 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4621 return true;
4622 }
4623 return false;
4624}
4625
4626/// If we have a conditional branch as a predecessor of another block,
4627/// this function tries to simplify it. We know
4628/// that PBI and BI are both conditional branches, and BI is in one of the
4629/// successor blocks of PBI - PBI branches to BI.
4631 DomTreeUpdater *DTU,
4632 const DataLayout &DL,
4633 const TargetTransformInfo &TTI) {
4634 assert(PBI->isConditional() && BI->isConditional());
4635 BasicBlock *BB = BI->getParent();
4636
4637 // If this block ends with a branch instruction, and if there is a
4638 // predecessor that ends on a branch of the same condition, make
4639 // this conditional branch redundant.
4640 if (PBI->getCondition() == BI->getCondition() &&
4641 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4642 // Okay, the outcome of this conditional branch is statically
4643 // knowable. If this block had a single pred, handle specially, otherwise
4644 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4645 if (BB->getSinglePredecessor()) {
4646 // Turn this into a branch on constant.
4647 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4648 BI->setCondition(
4649 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4650 return true; // Nuke the branch on constant.
4651 }
4652 }
4653
4654 // If the previous block ended with a widenable branch, determine if reusing
4655 // the target block is profitable and legal. This will have the effect of
4656 // "widening" PBI, but doesn't require us to reason about hosting safety.
4657 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4658 return true;
4659
4660 // If both branches are conditional and both contain stores to the same
4661 // address, remove the stores from the conditionals and create a conditional
4662 // merged store at the end.
4663 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4664 return true;
4665
4666 // If this is a conditional branch in an empty block, and if any
4667 // predecessors are a conditional branch to one of our destinations,
4668 // fold the conditions into logical ops and one cond br.
4669
4670 // Ignore dbg intrinsics.
4671 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4672 return false;
4673
4674 int PBIOp, BIOp;
4675 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4676 PBIOp = 0;
4677 BIOp = 0;
4678 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4679 PBIOp = 0;
4680 BIOp = 1;
4681 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4682 PBIOp = 1;
4683 BIOp = 0;
4684 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4685 PBIOp = 1;
4686 BIOp = 1;
4687 } else {
4688 return false;
4689 }
4690
4691 // Check to make sure that the other destination of this branch
4692 // isn't BB itself. If so, this is an infinite loop that will
4693 // keep getting unwound.
4694 if (PBI->getSuccessor(PBIOp) == BB)
4695 return false;
4696
4697 // If predecessor's branch probability to BB is too low don't merge branches.
4698 SmallVector<uint32_t, 2> PredWeights;
4699 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4700 extractBranchWeights(*PBI, PredWeights) &&
4701 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4702
4704 PredWeights[PBIOp],
4705 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4706
4708 if (CommonDestProb >= Likely)
4709 return false;
4710 }
4711
4712 // Do not perform this transformation if it would require
4713 // insertion of a large number of select instructions. For targets
4714 // without predication/cmovs, this is a big pessimization.
4715
4716 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4717 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4718 unsigned NumPhis = 0;
4719 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4720 ++II, ++NumPhis) {
4721 if (NumPhis > 2) // Disable this xform.
4722 return false;
4723 }
4724
4725 // Finally, if everything is ok, fold the branches to logical ops.
4726 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4727
4728 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4729 << "AND: " << *BI->getParent());
4730
4732
4733 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4734 // branch in it, where one edge (OtherDest) goes back to itself but the other
4735 // exits. We don't *know* that the program avoids the infinite loop
4736 // (even though that seems likely). If we do this xform naively, we'll end up
4737 // recursively unpeeling the loop. Since we know that (after the xform is
4738 // done) that the block *is* infinite if reached, we just make it an obviously
4739 // infinite loop with no cond branch.
4740 if (OtherDest == BB) {
4741 // Insert it at the end of the function, because it's either code,
4742 // or it won't matter if it's hot. :)
4743 BasicBlock *InfLoopBlock =
4744 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4745 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4746 if (DTU)
4747 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4748 OtherDest = InfLoopBlock;
4749 }
4750
4751 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4752
4753 // BI may have other predecessors. Because of this, we leave
4754 // it alone, but modify PBI.
4755
4756 // Make sure we get to CommonDest on True&True directions.
4757 Value *PBICond = PBI->getCondition();
4758 IRBuilder<NoFolder> Builder(PBI);
4759 if (PBIOp)
4760 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4761
4762 Value *BICond = BI->getCondition();
4763 if (BIOp)
4764 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4765
4766 // Merge the conditions.
4767 Value *Cond =
4768 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4769
4770 // Modify PBI to branch on the new condition to the new dests.
4771 PBI->setCondition(Cond);
4772 PBI->setSuccessor(0, CommonDest);
4773 PBI->setSuccessor(1, OtherDest);
4774
4775 if (DTU) {
4776 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4777 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4778
4779 DTU->applyUpdates(Updates);
4780 }
4781
4782 // Update branch weight for PBI.
4783 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4784 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4785 bool HasWeights =
4786 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4787 SuccTrueWeight, SuccFalseWeight);
4788 if (HasWeights) {
4789 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4790 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4791 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4792 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4793 // The weight to CommonDest should be PredCommon * SuccTotal +
4794 // PredOther * SuccCommon.
4795 // The weight to OtherDest should be PredOther * SuccOther.
4796 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4797 PredOther * SuccCommon,
4798 PredOther * SuccOther};
4799 // Halve the weights if any of them cannot fit in an uint32_t
4800 fitWeights(NewWeights);
4801
4802 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4803 }
4804
4805 // OtherDest may have phi nodes. If so, add an entry from PBI's
4806 // block that are identical to the entries for BI's block.
4807 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4808
4809 // We know that the CommonDest already had an edge from PBI to
4810 // it. If it has PHIs though, the PHIs may have different
4811 // entries for BB and PBI's BB. If so, insert a select to make
4812 // them agree.
4813 for (PHINode &PN : CommonDest->phis()) {
4814 Value *BIV = PN.getIncomingValueForBlock(BB);
4815 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4816 Value *PBIV = PN.getIncomingValue(PBBIdx);
4817 if (BIV != PBIV) {
4818 // Insert a select in PBI to pick the right value.
4819 SelectInst *NV = cast<SelectInst>(
4820 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4821 PN.setIncomingValue(PBBIdx, NV);
4822 // Although the select has the same condition as PBI, the original branch
4823 // weights for PBI do not apply to the new select because the select's
4824 // 'logical' edges are incoming edges of the phi that is eliminated, not
4825 // the outgoing edges of PBI.
4826 if (HasWeights) {
4827 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4828 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4829 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4830 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4831 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4832 // The weight to PredOtherDest should be PredOther * SuccCommon.
4833 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4834 PredOther * SuccCommon};
4835
4836 fitWeights(NewWeights);
4837
4838 setBranchWeights(NV, NewWeights[0], NewWeights[1],
4839 /*IsExpected=*/false);
4840 }
4841 }
4842 }
4843
4844 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4845 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4846
4847 // This basic block is probably dead. We know it has at least
4848 // one fewer predecessor.
4849 return true;
4850}
4851
4852// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4853// true or to FalseBB if Cond is false.
4854// Takes care of updating the successors and removing the old terminator.
4855// Also makes sure not to introduce new successors by assuming that edges to
4856// non-successor TrueBBs and FalseBBs aren't reachable.
4857bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4858 Value *Cond, BasicBlock *TrueBB,
4859 BasicBlock *FalseBB,
4860 uint32_t TrueWeight,
4861 uint32_t FalseWeight) {
4862 auto *BB = OldTerm->getParent();
4863 // Remove any superfluous successor edges from the CFG.
4864 // First, figure out which successors to preserve.
4865 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4866 // successor.
4867 BasicBlock *KeepEdge1 = TrueBB;
4868 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4869
4870 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4871
4872 // Then remove the rest.
4873 for (BasicBlock *Succ : successors(OldTerm)) {
4874 // Make sure only to keep exactly one copy of each edge.
4875 if (Succ == KeepEdge1)
4876 KeepEdge1 = nullptr;
4877 else if (Succ == KeepEdge2)
4878 KeepEdge2 = nullptr;
4879 else {
4880 Succ->removePredecessor(BB,
4881 /*KeepOneInputPHIs=*/true);
4882
4883 if (Succ != TrueBB && Succ != FalseBB)
4884 RemovedSuccessors.insert(Succ);
4885 }
4886 }
4887
4888 IRBuilder<> Builder(OldTerm);
4889 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4890
4891 // Insert an appropriate new terminator.
4892 if (!KeepEdge1 && !KeepEdge2) {
4893 if (TrueBB == FalseBB) {
4894 // We were only looking for one successor, and it was present.
4895 // Create an unconditional branch to it.
4896 Builder.CreateBr(TrueBB);
4897 } else {
4898 // We found both of the successors we were looking for.
4899 // Create a conditional branch sharing the condition of the select.
4900 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4901 if (TrueWeight != FalseWeight)
4902 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4903 }
4904 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4905 // Neither of the selected blocks were successors, so this
4906 // terminator must be unreachable.
4907 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4908 } else {
4909 // One of the selected values was a successor, but the other wasn't.
4910 // Insert an unconditional branch to the one that was found;
4911 // the edge to the one that wasn't must be unreachable.
4912 if (!KeepEdge1) {
4913 // Only TrueBB was found.
4914 Builder.CreateBr(TrueBB);
4915 } else {
4916 // Only FalseBB was found.
4917 Builder.CreateBr(FalseBB);
4918 }
4919 }
4920
4922
4923 if (DTU) {
4925 Updates.reserve(RemovedSuccessors.size());
4926 for (auto *RemovedSuccessor : RemovedSuccessors)
4927 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4928 DTU->applyUpdates(Updates);
4929 }
4930
4931 return true;
4932}
4933
4934// Replaces
4935// (switch (select cond, X, Y)) on constant X, Y
4936// with a branch - conditional if X and Y lead to distinct BBs,
4937// unconditional otherwise.
4938bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4939 SelectInst *Select) {
4940 // Check for constant integer values in the select.
4941 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4942 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4943 if (!TrueVal || !FalseVal)
4944 return false;
4945
4946 // Find the relevant condition and destinations.
4947 Value *Condition = Select->getCondition();
4948 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4949 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4950
4951 // Get weight for TrueBB and FalseBB.
4952 uint32_t TrueWeight = 0, FalseWeight = 0;
4954 bool HasWeights = hasBranchWeightMD(*SI);
4955 if (HasWeights) {
4956 getBranchWeights(SI, Weights);
4957 if (Weights.size() == 1 + SI->getNumCases()) {
4958 TrueWeight =
4959 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4960 FalseWeight =
4961 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4962 }
4963 }
4964
4965 // Perform the actual simplification.
4966 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4967 FalseWeight);
4968}
4969
4970// Replaces
4971// (indirectbr (select cond, blockaddress(@fn, BlockA),
4972// blockaddress(@fn, BlockB)))
4973// with
4974// (br cond, BlockA, BlockB).
4975bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4976 SelectInst *SI) {
4977 // Check that both operands of the select are block addresses.
4978 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4979 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4980 if (!TBA || !FBA)
4981 return false;
4982
4983 // Extract the actual blocks.
4984 BasicBlock *TrueBB = TBA->getBasicBlock();
4985 BasicBlock *FalseBB = FBA->getBasicBlock();
4986
4987 // Perform the actual simplification.
4988 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4989 0);
4990}
4991
4992/// This is called when we find an icmp instruction
4993/// (a seteq/setne with a constant) as the only instruction in a
4994/// block that ends with an uncond branch. We are looking for a very specific
4995/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4996/// this case, we merge the first two "or's of icmp" into a switch, but then the
4997/// default value goes to an uncond block with a seteq in it, we get something
4998/// like:
4999///
5000/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5001/// DEFAULT:
5002/// %tmp = icmp eq i8 %A, 92
5003/// br label %end
5004/// end:
5005/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5006///
5007/// We prefer to split the edge to 'end' so that there is a true/false entry to
5008/// the PHI, merging the third icmp into the switch.
5009bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5010 ICmpInst *ICI, IRBuilder<> &Builder) {
5011 BasicBlock *BB = ICI->getParent();
5012
5013 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5014 // complex.
5015 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5016 return false;
5017
5018 Value *V = ICI->getOperand(0);
5019 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5020
5021 // The pattern we're looking for is where our only predecessor is a switch on
5022 // 'V' and this block is the default case for the switch. In this case we can
5023 // fold the compared value into the switch to simplify things.
5024 BasicBlock *Pred = BB->getSinglePredecessor();
5025 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5026 return false;
5027
5028 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5029 if (SI->getCondition() != V)
5030 return false;
5031
5032 // If BB is reachable on a non-default case, then we simply know the value of
5033 // V in this block. Substitute it and constant fold the icmp instruction
5034 // away.
5035 if (SI->getDefaultDest() != BB) {
5036 ConstantInt *VVal = SI->findCaseDest(BB);
5037 assert(VVal && "Should have a unique destination value");
5038 ICI->setOperand(0, VVal);
5039
5040 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5041 ICI->replaceAllUsesWith(V);
5042 ICI->eraseFromParent();
5043 }
5044 // BB is now empty, so it is likely to simplify away.
5045 return requestResimplify();
5046 }
5047
5048 // Ok, the block is reachable from the default dest. If the constant we're
5049 // comparing exists in one of the other edges, then we can constant fold ICI
5050 // and zap it.
5051 if (SI->findCaseValue(Cst) != SI->case_default()) {
5052 Value *V;
5053 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5055 else
5057
5058 ICI->replaceAllUsesWith(V);
5059 ICI->eraseFromParent();
5060 // BB is now empty, so it is likely to simplify away.
5061 return requestResimplify();
5062 }
5063
5064 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5065 // the block.
5066 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5067 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5068 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5069 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
5070 return false;
5071
5072 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5073 // true in the PHI.
5074 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5075 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5076
5077 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5078 std::swap(DefaultCst, NewCst);
5079
5080 // Replace ICI (which is used by the PHI for the default value) with true or
5081 // false depending on if it is EQ or NE.
5082 ICI->replaceAllUsesWith(DefaultCst);
5083 ICI->eraseFromParent();
5084
5086
5087 // Okay, the switch goes to this block on a default value. Add an edge from
5088 // the switch to the merge point on the compared value.
5089 BasicBlock *NewBB =
5090 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5091 {
5093 auto W0 = SIW.getSuccessorWeight(0);
5095 if (W0) {
5096 NewW = ((uint64_t(*W0) + 1) >> 1);
5097 SIW.setSuccessorWeight(0, *NewW);
5098 }
5099 SIW.addCase(Cst, NewBB, NewW);
5100 if (DTU)
5101 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5102 }
5103
5104 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5105 Builder.SetInsertPoint(NewBB);
5106 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5107 Builder.CreateBr(SuccBlock);
5108 PHIUse->addIncoming(NewCst, NewBB);
5109 if (DTU) {
5110 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5111 DTU->applyUpdates(Updates);
5112 }
5113 return true;
5114}
5115
5116/// The specified branch is a conditional branch.
5117/// Check to see if it is branching on an or/and chain of icmp instructions, and
5118/// fold it into a switch instruction if so.
5119bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5120 IRBuilder<> &Builder,
5121 const DataLayout &DL) {
5122 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
5123 if (!Cond)
5124 return false;
5125
5126 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5127 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5128 // 'setne's and'ed together, collect them.
5129
5130 // Try to gather values from a chain of and/or to be turned into a switch
5131 ConstantComparesGatherer ConstantCompare(Cond, DL);
5132 // Unpack the result
5133 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5134 Value *CompVal = ConstantCompare.CompValue;
5135 unsigned UsedICmps = ConstantCompare.UsedICmps;
5136 Value *ExtraCase = ConstantCompare.Extra;
5137
5138 // If we didn't have a multiply compared value, fail.
5139 if (!CompVal)
5140 return false;
5141
5142 // Avoid turning single icmps into a switch.
5143 if (UsedICmps <= 1)
5144 return false;
5145
5146 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
5147
5148 // There might be duplicate constants in the list, which the switch
5149 // instruction can't handle, remove them now.
5150 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5151 Values.erase(llvm::unique(Values), Values.end());
5152
5153 // If Extra was used, we require at least two switch values to do the
5154 // transformation. A switch with one value is just a conditional branch.
5155 if (ExtraCase && Values.size() < 2)
5156 return false;
5157
5158 // TODO: Preserve branch weight metadata, similarly to how
5159 // foldValueComparisonIntoPredecessors preserves it.
5160
5161 // Figure out which block is which destination.
5162 BasicBlock *DefaultBB = BI->getSuccessor(1);
5163 BasicBlock *EdgeBB = BI->getSuccessor(0);
5164 if (!TrueWhenEqual)
5165 std::swap(DefaultBB, EdgeBB);
5166
5167 BasicBlock *BB = BI->getParent();
5168
5169 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5170 << " cases into SWITCH. BB is:\n"
5171 << *BB);
5172
5174
5175 // If there are any extra values that couldn't be folded into the switch
5176 // then we evaluate them with an explicit branch first. Split the block
5177 // right before the condbr to handle it.
5178 if (ExtraCase) {
5179 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5180 /*MSSAU=*/nullptr, "switch.early.test");
5181
5182 // Remove the uncond branch added to the old block.
5183 Instruction *OldTI = BB->getTerminator();
5184 Builder.SetInsertPoint(OldTI);
5185
5186 // There can be an unintended UB if extra values are Poison. Before the
5187 // transformation, extra values may not be evaluated according to the
5188 // condition, and it will not raise UB. But after transformation, we are
5189 // evaluating extra values before checking the condition, and it will raise
5190 // UB. It can be solved by adding freeze instruction to extra values.
5191 AssumptionCache *AC = Options.AC;
5192
5193 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5194 ExtraCase = Builder.CreateFreeze(ExtraCase);
5195
5196 if (TrueWhenEqual)
5197 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5198 else
5199 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5200
5201 OldTI->eraseFromParent();
5202
5203 if (DTU)
5204 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5205
5206 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5207 // for the edge we just added.
5208 addPredecessorToBlock(EdgeBB, BB, NewBB);
5209
5210 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5211 << "\nEXTRABB = " << *BB);
5212 BB = NewBB;
5213 }
5214
5215 Builder.SetInsertPoint(BI);
5216 // Convert pointer to int before we switch.
5217 if (CompVal->getType()->isPointerTy()) {
5218 CompVal = Builder.CreatePtrToInt(
5219 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5220 }
5221
5222 // Create the new switch instruction now.
5223 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5224
5225 // Add all of the 'cases' to the switch instruction.
5226 for (unsigned i = 0, e = Values.size(); i != e; ++i)
5227 New->addCase(Values[i], EdgeBB);
5228
5229 // We added edges from PI to the EdgeBB. As such, if there were any
5230 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5231 // the number of edges added.
5232 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5233 PHINode *PN = cast<PHINode>(BBI);
5234 Value *InVal = PN->getIncomingValueForBlock(BB);
5235 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5236 PN->addIncoming(InVal, BB);
5237 }
5238
5239 // Erase the old branch instruction.
5241 if (DTU)
5242 DTU->applyUpdates(Updates);
5243
5244 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5245 return true;
5246}
5247
5248bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5249 if (isa<PHINode>(RI->getValue()))
5250 return simplifyCommonResume(RI);
5251 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5252 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5253 // The resume must unwind the exception that caused control to branch here.
5254 return simplifySingleResume(RI);
5255
5256 return false;
5257}
5258
5259// Check if cleanup block is empty
5261 for (Instruction &I : R) {
5262 auto *II = dyn_cast<IntrinsicInst>(&I);
5263 if (!II)
5264 return false;
5265
5266 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5267 switch (IntrinsicID) {
5268 case Intrinsic::dbg_declare:
5269 case Intrinsic::dbg_value:
5270 case Intrinsic::dbg_label:
5271 case Intrinsic::lifetime_end:
5272 break;
5273 default:
5274 return false;
5275 }
5276 }
5277 return true;
5278}
5279
5280// Simplify resume that is shared by several landing pads (phi of landing pad).
5281bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5282 BasicBlock *BB = RI->getParent();
5283
5284 // Check that there are no other instructions except for debug and lifetime
5285 // intrinsics between the phi's and resume instruction.
5286 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5287 BB->getTerminator()->getIterator())))
5288 return false;
5289
5290 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5291 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5292
5293 // Check incoming blocks to see if any of them are trivial.
5294 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5295 Idx++) {
5296 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5297 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5298
5299 // If the block has other successors, we can not delete it because
5300 // it has other dependents.
5301 if (IncomingBB->getUniqueSuccessor() != BB)
5302 continue;
5303
5304 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5305 // Not the landing pad that caused the control to branch here.
5306 if (IncomingValue != LandingPad)
5307 continue;
5308
5310 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5311 TrivialUnwindBlocks.insert(IncomingBB);
5312 }
5313
5314 // If no trivial unwind blocks, don't do any simplifications.
5315 if (TrivialUnwindBlocks.empty())
5316 return false;
5317
5318 // Turn all invokes that unwind here into calls.
5319 for (auto *TrivialBB : TrivialUnwindBlocks) {
5320 // Blocks that will be simplified should be removed from the phi node.
5321 // Note there could be multiple edges to the resume block, and we need
5322 // to remove them all.
5323 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5324 BB->removePredecessor(TrivialBB, true);
5325
5326 for (BasicBlock *Pred :
5328 removeUnwindEdge(Pred, DTU);
5329 ++NumInvokes;
5330 }
5331
5332 // In each SimplifyCFG run, only the current processed block can be erased.
5333 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5334 // of erasing TrivialBB, we only remove the branch to the common resume
5335 // block so that we can later erase the resume block since it has no
5336 // predecessors.
5337 TrivialBB->getTerminator()->eraseFromParent();
5338 new UnreachableInst(RI->getContext(), TrivialBB);
5339 if (DTU)
5340 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5341 }
5342
5343 // Delete the resume block if all its predecessors have been removed.
5344 if (pred_empty(BB))
5345 DeleteDeadBlock(BB, DTU);
5346
5347 return !TrivialUnwindBlocks.empty();
5348}
5349
5350// Simplify resume that is only used by a single (non-phi) landing pad.
5351bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5352 BasicBlock *BB = RI->getParent();
5353 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5354 assert(RI->getValue() == LPInst &&
5355 "Resume must unwind the exception that caused control to here");
5356
5357 // Check that there are no other instructions except for debug intrinsics.
5359 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5360 return false;
5361
5362 // Turn all invokes that unwind here into calls and delete the basic block.
5364 removeUnwindEdge(Pred, DTU);
5365 ++NumInvokes;
5366 }
5367
5368 // The landingpad is now unreachable. Zap it.
5369 DeleteDeadBlock(BB, DTU);
5370 return true;
5371}
5372
5374 // If this is a trivial cleanup pad that executes no instructions, it can be
5375 // eliminated. If the cleanup pad continues to the caller, any predecessor
5376 // that is an EH pad will be updated to continue to the caller and any
5377 // predecessor that terminates with an invoke instruction will have its invoke
5378 // instruction converted to a call instruction. If the cleanup pad being
5379 // simplified does not continue to the caller, each predecessor will be
5380 // updated to continue to the unwind destination of the cleanup pad being
5381 // simplified.
5382 BasicBlock *BB = RI->getParent();
5383 CleanupPadInst *CPInst = RI->getCleanupPad();
5384 if (CPInst->getParent() != BB)
5385 // This isn't an empty cleanup.
5386 return false;
5387
5388 // We cannot kill the pad if it has multiple uses. This typically arises
5389 // from unreachable basic blocks.
5390 if (!CPInst->hasOneUse())
5391 return false;
5392
5393 // Check that there are no other instructions except for benign intrinsics.
5395 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5396 return false;
5397
5398 // If the cleanup return we are simplifying unwinds to the caller, this will
5399 // set UnwindDest to nullptr.
5400 BasicBlock *UnwindDest = RI->getUnwindDest();
5401
5402 // We're about to remove BB from the control flow. Before we do, sink any
5403 // PHINodes into the unwind destination. Doing this before changing the
5404 // control flow avoids some potentially slow checks, since we can currently
5405 // be certain that UnwindDest and BB have no common predecessors (since they
5406 // are both EH pads).
5407 if (UnwindDest) {
5408 // First, go through the PHI nodes in UnwindDest and update any nodes that
5409 // reference the block we are removing
5410 for (PHINode &DestPN : UnwindDest->phis()) {
5411 int Idx = DestPN.getBasicBlockIndex(BB);
5412 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5413 assert(Idx != -1);
5414 // This PHI node has an incoming value that corresponds to a control
5415 // path through the cleanup pad we are removing. If the incoming
5416 // value is in the cleanup pad, it must be a PHINode (because we
5417 // verified above that the block is otherwise empty). Otherwise, the
5418 // value is either a constant or a value that dominates the cleanup
5419 // pad being removed.
5420 //
5421 // Because BB and UnwindDest are both EH pads, all of their
5422 // predecessors must unwind to these blocks, and since no instruction
5423 // can have multiple unwind destinations, there will be no overlap in
5424 // incoming blocks between SrcPN and DestPN.
5425 Value *SrcVal = DestPN.getIncomingValue(Idx);
5426 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5427
5428 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5429 for (auto *Pred : predecessors(BB)) {
5430 Value *Incoming =
5431 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5432 DestPN.addIncoming(Incoming, Pred);
5433 }
5434 }
5435
5436 // Sink any remaining PHI nodes directly into UnwindDest.
5437 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5438 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5439 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5440 // If the PHI node has no uses or all of its uses are in this basic
5441 // block (meaning they are debug or lifetime intrinsics), just leave
5442 // it. It will be erased when we erase BB below.
5443 continue;
5444
5445 // Otherwise, sink this PHI node into UnwindDest.
5446 // Any predecessors to UnwindDest which are not already represented
5447 // must be back edges which inherit the value from the path through
5448 // BB. In this case, the PHI value must reference itself.
5449 for (auto *pred : predecessors(UnwindDest))
5450 if (pred != BB)
5451 PN.addIncoming(&PN, pred);
5452 PN.moveBefore(InsertPt);
5453 // Also, add a dummy incoming value for the original BB itself,
5454 // so that the PHI is well-formed until we drop said predecessor.
5455 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5456 }
5457 }
5458
5459 std::vector<DominatorTree::UpdateType> Updates;
5460
5461 // We use make_early_inc_range here because we will remove all predecessors.
5463 if (UnwindDest == nullptr) {
5464 if (DTU) {
5465 DTU->applyUpdates(Updates);
5466 Updates.clear();
5467 }
5468 removeUnwindEdge(PredBB, DTU);
5469 ++NumInvokes;
5470 } else {
5471 BB->removePredecessor(PredBB);
5472 Instruction *TI = PredBB->getTerminator();
5473 TI->replaceUsesOfWith(BB, UnwindDest);
5474 if (DTU) {
5475 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5476 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5477 }
5478 }
5479 }
5480
5481 if (DTU)
5482 DTU->applyUpdates(Updates);
5483
5484 DeleteDeadBlock(BB, DTU);
5485
5486 return true;
5487}
5488
5489// Try to merge two cleanuppads together.
5491 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5492 // with.
5493 BasicBlock *UnwindDest = RI->getUnwindDest();
5494 if (!UnwindDest)
5495 return false;
5496
5497 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5498 // be safe to merge without code duplication.
5499 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5500 return false;
5501
5502 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5503 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5504 if (!SuccessorCleanupPad)
5505 return false;
5506
5507 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5508 // Replace any uses of the successor cleanupad with the predecessor pad
5509 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5510 // funclet bundle operands.
5511 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5512 // Remove the old cleanuppad.
5513 SuccessorCleanupPad->eraseFromParent();
5514 // Now, we simply replace the cleanupret with a branch to the unwind
5515 // destination.
5516 BranchInst::Create(UnwindDest, RI->getParent());
5517 RI->eraseFromParent();
5518
5519 return true;
5520}
5521
5522bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5523 // It is possible to transiantly have an undef cleanuppad operand because we
5524 // have deleted some, but not all, dead blocks.
5525 // Eventually, this block will be deleted.
5526 if (isa<UndefValue>(RI->getOperand(0)))
5527 return false;
5528
5529 if (mergeCleanupPad(RI))
5530 return true;
5531
5532 if (removeEmptyCleanup(RI, DTU))
5533 return true;
5534
5535 return false;
5536}
5537
5538// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5539bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5540 BasicBlock *BB = UI->getParent();
5541
5542 bool Changed = false;
5543
5544 // Ensure that any debug-info records that used to occur after the Unreachable
5545 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5546 // the block.
5548
5549 // Debug-info records on the unreachable inst itself should be deleted, as
5550 // below we delete everything past the final executable instruction.
5551 UI->dropDbgRecords();
5552
5553 // If there are any instructions immediately before the unreachable that can
5554 // be removed, do so.
5555 while (UI->getIterator() != BB->begin()) {
5557 --BBI;
5558
5560 break; // Can not drop any more instructions. We're done here.
5561 // Otherwise, this instruction can be freely erased,
5562 // even if it is not side-effect free.
5563
5564 // Note that deleting EH's here is in fact okay, although it involves a bit
5565 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5566 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5567 // and we can therefore guarantee this block will be erased.
5568
5569 // If we're deleting this, we're deleting any subsequent debug info, so
5570 // delete DbgRecords.
5571 BBI->dropDbgRecords();
5572
5573 // Delete this instruction (any uses are guaranteed to be dead)
5574 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5575 BBI->eraseFromParent();
5576 Changed = true;
5577 }
5578
5579 // If the unreachable instruction is the first in the block, take a gander
5580 // at all of the predecessors of this instruction, and simplify them.
5581 if (&BB->front() != UI)
5582 return Changed;
5583
5584 std::vector<DominatorTree::UpdateType> Updates;
5585
5587 for (BasicBlock *Predecessor : Preds) {
5588 Instruction *TI = Predecessor->getTerminator();
5589 IRBuilder<> Builder(TI);
5590 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5591 // We could either have a proper unconditional branch,
5592 // or a degenerate conditional branch with matching destinations.
5593 if (all_of(BI->successors(),
5594 [BB](auto *Successor) { return Successor == BB; })) {
5595 new UnreachableInst(TI->getContext(), TI->getIterator());
5596 TI->eraseFromParent();
5597 Changed = true;
5598 } else {
5599 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5600 Value* Cond = BI->getCondition();
5601 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5602 "The destinations are guaranteed to be different here.");
5603 CallInst *Assumption;
5604 if (BI->getSuccessor(0) == BB) {
5605 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5606 Builder.CreateBr(BI->getSuccessor(1));
5607 } else {
5608 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5609 Assumption = Builder.CreateAssumption(Cond);
5610 Builder.CreateBr(BI->getSuccessor(0));
5611 }
5612 if (Options.AC)
5613 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5614
5616 Changed = true;
5617 }
5618 if (DTU)
5619 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5620 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5622 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5623 if (i->getCaseSuccessor() != BB) {
5624 ++i;
5625 continue;
5626 }
5627 BB->removePredecessor(SU->getParent());
5628 i = SU.removeCase(i);
5629 e = SU->case_end();
5630 Changed = true;
5631 }
5632 // Note that the default destination can't be removed!
5633 if (DTU && SI->getDefaultDest() != BB)
5634 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5635 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5636 if (II->getUnwindDest() == BB) {
5637 if (DTU) {
5638 DTU->applyUpdates(Updates);
5639 Updates.clear();
5640 }
5641 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5642 if (!CI->doesNotThrow())
5643 CI->setDoesNotThrow();
5644 Changed = true;
5645 }
5646 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5647 if (CSI->getUnwindDest() == BB) {
5648 if (DTU) {
5649 DTU->applyUpdates(Updates);
5650 Updates.clear();
5651 }
5652 removeUnwindEdge(TI->getParent(), DTU);
5653 Changed = true;
5654 continue;
5655 }
5656
5657 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5658 E = CSI->handler_end();
5659 I != E; ++I) {
5660 if (*I == BB) {
5661 CSI->removeHandler(I);
5662 --I;
5663 --E;
5664 Changed = true;
5665 }
5666 }
5667 if (DTU)
5668 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5669 if (CSI->getNumHandlers() == 0) {
5670 if (CSI->hasUnwindDest()) {
5671 // Redirect all predecessors of the block containing CatchSwitchInst
5672 // to instead branch to the CatchSwitchInst's unwind destination.
5673 if (DTU) {
5674 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5675 Updates.push_back({DominatorTree::Insert,
5676 PredecessorOfPredecessor,
5677 CSI->getUnwindDest()});
5678 Updates.push_back({DominatorTree::Delete,
5679 PredecessorOfPredecessor, Predecessor});
5680 }
5681 }
5682 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5683 } else {
5684 // Rewrite all preds to unwind to caller (or from invoke to call).
5685 if (DTU) {
5686 DTU->applyUpdates(Updates);
5687 Updates.clear();
5688 }
5689 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5690 for (BasicBlock *EHPred : EHPreds)
5691 removeUnwindEdge(EHPred, DTU);
5692 }
5693 // The catchswitch is no longer reachable.
5694 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5695 CSI->eraseFromParent();
5696 Changed = true;
5697 }
5698 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5699 (void)CRI;
5700 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5701 "Expected to always have an unwind to BB.");
5702 if (DTU)
5703 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5704 new UnreachableInst(TI->getContext(), TI->getIterator());
5705 TI->eraseFromParent();
5706 Changed = true;
5707 }
5708 }
5709
5710 if (DTU)
5711 DTU->applyUpdates(Updates);
5712
5713 // If this block is now dead, remove it.
5714 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5715 DeleteDeadBlock(BB, DTU);
5716 return true;
5717 }
5718
5719 return Changed;
5720}
5721
5723 assert(Cases.size() >= 1);
5724
5726 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5727 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5728 return false;
5729 }
5730 return true;
5731}
5732
5734 DomTreeUpdater *DTU,
5735 bool RemoveOrigDefaultBlock = true) {
5736 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5737 auto *BB = Switch->getParent();
5738 auto *OrigDefaultBlock = Switch->getDefaultDest();
5739 if (RemoveOrigDefaultBlock)
5740 OrigDefaultBlock->removePredecessor(BB);
5741 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5742 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5743 OrigDefaultBlock);
5744 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5745 Switch->setDefaultDest(&*NewDefaultBlock);
5746 if (DTU) {
5748 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5749 if (RemoveOrigDefaultBlock &&
5750 !is_contained(successors(BB), OrigDefaultBlock))
5751 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5752 DTU->applyUpdates(Updates);
5753 }
5754}
5755
5756/// Turn a switch into an integer range comparison and branch.
5757/// Switches with more than 2 destinations are ignored.
5758/// Switches with 1 destination are also ignored.
5759bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5760 IRBuilder<> &Builder) {
5761 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5762
5763 bool HasDefault =
5764 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5765
5766 auto *BB = SI->getParent();
5767
5768 // Partition the cases into two sets with different destinations.
5769 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5770 BasicBlock *DestB = nullptr;
5773
5774 for (auto Case : SI->cases()) {
5775 BasicBlock *Dest = Case.getCaseSuccessor();
5776 if (!DestA)
5777 DestA = Dest;
5778 if (Dest == DestA) {
5779 CasesA.push_back(Case.getCaseValue());
5780 continue;
5781 }
5782 if (!DestB)
5783 DestB = Dest;
5784 if (Dest == DestB) {
5785 CasesB.push_back(Case.getCaseValue());
5786 continue;
5787 }
5788 return false; // More than two destinations.
5789 }
5790 if (!DestB)
5791 return false; // All destinations are the same and the default is unreachable
5792
5793 assert(DestA && DestB &&
5794 "Single-destination switch should have been folded.");
5795 assert(DestA != DestB);
5796 assert(DestB != SI->getDefaultDest());
5797 assert(!CasesB.empty() && "There must be non-default cases.");
5798 assert(!CasesA.empty() || HasDefault);
5799
5800 // Figure out if one of the sets of cases form a contiguous range.
5801 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5802 BasicBlock *ContiguousDest = nullptr;
5803 BasicBlock *OtherDest = nullptr;
5804 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5805 ContiguousCases = &CasesA;
5806 ContiguousDest = DestA;
5807 OtherDest = DestB;
5808 } else if (casesAreContiguous(CasesB)) {
5809 ContiguousCases = &CasesB;
5810 ContiguousDest = DestB;
5811 OtherDest = DestA;
5812 } else
5813 return false;
5814
5815 // Start building the compare and branch.
5816
5817 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5818 Constant *NumCases =
5819 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5820
5821 Value *Sub = SI->getCondition();
5822 if (!Offset->isNullValue())
5823 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5824
5825 Value *Cmp;
5826 // If NumCases overflowed, then all possible values jump to the successor.
5827 if (NumCases->isNullValue() && !ContiguousCases->empty())
5828 Cmp = ConstantInt::getTrue(SI->getContext());
5829 else
5830 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5831 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5832
5833 // Update weight for the newly-created conditional branch.
5834 if (hasBranchWeightMD(*SI)) {
5836 getBranchWeights(SI, Weights);
5837 if (Weights.size() == 1 + SI->getNumCases()) {
5838 uint64_t TrueWeight = 0;
5839 uint64_t FalseWeight = 0;
5840 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5841 if (SI->getSuccessor(I) == ContiguousDest)
5842 TrueWeight += Weights[I];
5843 else
5844 FalseWeight += Weights[I];
5845 }
5846 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5847 TrueWeight /= 2;
5848 FalseWeight /= 2;
5849 }
5850 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5851 }
5852 }
5853
5854 // Prune obsolete incoming values off the successors' PHI nodes.
5855 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5856 unsigned PreviousEdges = ContiguousCases->size();
5857 if (ContiguousDest == SI->getDefaultDest())
5858 ++PreviousEdges;
5859 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5860 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5861 }
5862 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5863 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5864 if (OtherDest == SI->getDefaultDest())
5865 ++PreviousEdges;
5866 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5867 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5868 }
5869
5870 // Clean up the default block - it may have phis or other instructions before
5871 // the unreachable terminator.
5872 if (!HasDefault)
5874
5875 auto *UnreachableDefault = SI->getDefaultDest();
5876
5877 // Drop the switch.
5878 SI->eraseFromParent();
5879
5880 if (!HasDefault && DTU)
5881 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5882
5883 return true;
5884}
5885
5886/// Compute masked bits for the condition of a switch
5887/// and use it to remove dead cases.
5889 AssumptionCache *AC,
5890 const DataLayout &DL) {
5891 Value *Cond = SI->getCondition();
5892 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5893
5894 // We can also eliminate cases by determining that their values are outside of
5895 // the limited range of the condition based on how many significant (non-sign)
5896 // bits are in the condition value.
5897 unsigned MaxSignificantBitsInCond =
5898 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5899
5900 // Gather dead cases.
5902 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5903 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5904 for (const auto &Case : SI->cases()) {
5905 auto *Successor = Case.getCaseSuccessor();
5906 if (DTU) {
5907 if (!NumPerSuccessorCases.count(Successor))
5908 UniqueSuccessors.push_back(Successor);
5909 ++NumPerSuccessorCases[Successor];
5910 }
5911 const APInt &CaseVal = Case.getCaseValue()->getValue();
5912 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5913 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5914 DeadCases.push_back(Case.getCaseValue());
5915 if (DTU)
5916 --NumPerSuccessorCases[Successor];
5917 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5918 << " is dead.\n");
5919 }
5920 }
5921
5922 // If we can prove that the cases must cover all possible values, the
5923 // default destination becomes dead and we can remove it. If we know some
5924 // of the bits in the value, we can use that to more precisely compute the
5925 // number of possible unique case values.
5926 bool HasDefault =
5927 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5928 const unsigned NumUnknownBits =
5929 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5930 assert(NumUnknownBits <= Known.getBitWidth());
5931 if (HasDefault && DeadCases.empty() &&
5932 NumUnknownBits < 64 /* avoid overflow */) {
5933 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5934 if (SI->getNumCases() == AllNumCases) {
5936 return true;
5937 }
5938 // When only one case value is missing, replace default with that case.
5939 // Eliminating the default branch will provide more opportunities for
5940 // optimization, such as lookup tables.
5941 if (SI->getNumCases() == AllNumCases - 1) {
5942 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5943 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5944 if (CondTy->getIntegerBitWidth() > 64 ||
5945 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5946 return false;
5947
5948 uint64_t MissingCaseVal = 0;
5949 for (const auto &Case : SI->cases())
5950 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5951 auto *MissingCase =
5952 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5954 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5955 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5956 SIW.setSuccessorWeight(0, 0);
5957 return true;
5958 }
5959 }
5960
5961 if (DeadCases.empty())
5962 return false;
5963
5965 for (ConstantInt *DeadCase : DeadCases) {
5966 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5967 assert(CaseI != SI->case_default() &&
5968 "Case was not found. Probably mistake in DeadCases forming.");
5969 // Prune unused values from PHI nodes.
5970 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5971 SIW.removeCase(CaseI);
5972 }
5973
5974 if (DTU) {
5975 std::vector<DominatorTree::UpdateType> Updates;
5976 for (auto *Successor : UniqueSuccessors)
5977 if (NumPerSuccessorCases[Successor] == 0)
5978 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5979 DTU->applyUpdates(Updates);
5980 }
5981
5982 return true;
5983}
5984
5985/// If BB would be eligible for simplification by
5986/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5987/// by an unconditional branch), look at the phi node for BB in the successor
5988/// block and see if the incoming value is equal to CaseValue. If so, return
5989/// the phi node, and set PhiIndex to BB's index in the phi node.
5991 BasicBlock *BB, int *PhiIndex) {
5992 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
5993 return nullptr; // BB must be empty to be a candidate for simplification.
5994 if (!BB->getSinglePredecessor())
5995 return nullptr; // BB must be dominated by the switch.
5996
5997 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5998 if (!Branch || !Branch->isUnconditional())
5999 return nullptr; // Terminator must be unconditional branch.
6000
6001 BasicBlock *Succ = Branch->getSuccessor(0);
6002
6003 for (PHINode &PHI : Succ->phis()) {
6004 int Idx = PHI.getBasicBlockIndex(BB);
6005 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6006
6007 Value *InValue = PHI.getIncomingValue(Idx);
6008 if (InValue != CaseValue)
6009 continue;
6010
6011 *PhiIndex = Idx;
6012 return &PHI;
6013 }
6014
6015 return nullptr;
6016}
6017
6018/// Try to forward the condition of a switch instruction to a phi node
6019/// dominated by the switch, if that would mean that some of the destination
6020/// blocks of the switch can be folded away. Return true if a change is made.
6022 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6023
6024 ForwardingNodesMap ForwardingNodes;
6025 BasicBlock *SwitchBlock = SI->getParent();
6026 bool Changed = false;
6027 for (const auto &Case : SI->cases()) {
6028 ConstantInt *CaseValue = Case.getCaseValue();
6029 BasicBlock *CaseDest = Case.getCaseSuccessor();
6030
6031 // Replace phi operands in successor blocks that are using the constant case
6032 // value rather than the switch condition variable:
6033 // switchbb:
6034 // switch i32 %x, label %default [
6035 // i32 17, label %succ
6036 // ...
6037 // succ:
6038 // %r = phi i32 ... [ 17, %switchbb ] ...
6039 // -->
6040 // %r = phi i32 ... [ %x, %switchbb ] ...
6041
6042 for (PHINode &Phi : CaseDest->phis()) {
6043 // This only works if there is exactly 1 incoming edge from the switch to
6044 // a phi. If there is >1, that means multiple cases of the switch map to 1
6045 // value in the phi, and that phi value is not the switch condition. Thus,
6046 // this transform would not make sense (the phi would be invalid because
6047 // a phi can't have different incoming values from the same block).
6048 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6049 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6050 count(Phi.blocks(), SwitchBlock) == 1) {
6051 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6052 Changed = true;
6053 }
6054 }
6055
6056 // Collect phi nodes that are indirectly using this switch's case constants.
6057 int PhiIdx;
6058 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6059 ForwardingNodes[Phi].push_back(PhiIdx);
6060 }
6061
6062 for (auto &ForwardingNode : ForwardingNodes) {
6063 PHINode *Phi = ForwardingNode.first;
6064 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6065 // Check if it helps to fold PHI.
6066 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6067 continue;
6068
6069 for (int Index : Indexes)
6070 Phi->setIncomingValue(Index, SI->getCondition());
6071 Changed = true;
6072 }
6073
6074 return Changed;
6075}
6076
6077/// Return true if the backend will be able to handle
6078/// initializing an array of constants like C.
6080 if (C->isThreadDependent())
6081 return false;
6082 if (C->isDLLImportDependent())
6083 return false;
6084
6085 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6086 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
6087 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
6088 return false;
6089
6090 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
6091 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6092 // materializing the array of constants.
6093 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6094 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6095 return false;
6096 }
6097
6099 return false;
6100
6101 return true;
6102}
6103
6104/// If V is a Constant, return it. Otherwise, try to look up
6105/// its constant value in ConstantPool, returning 0 if it's not there.
6106static Constant *
6109 if (Constant *C = dyn_cast<Constant>(V))
6110 return C;
6111 return ConstantPool.lookup(V);
6112}
6113
6114/// Try to fold instruction I into a constant. This works for
6115/// simple instructions such as binary operations where both operands are
6116/// constant or can be replaced by constants from the ConstantPool. Returns the
6117/// resulting constant on success, 0 otherwise.
6118static Constant *
6121 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
6122 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6123 if (!A)
6124 return nullptr;
6125 if (A->isAllOnesValue())
6126 return lookupConstant(Select->getTrueValue(), ConstantPool);
6127 if (A->isNullValue())
6128 return lookupConstant(Select->getFalseValue(), ConstantPool);
6129 return nullptr;
6130 }
6131
6133 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6134 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6135 COps.push_back(A);
6136 else
6137 return nullptr;
6138 }
6139
6140 return ConstantFoldInstOperands(I, COps, DL);
6141}
6142
6143/// Try to determine the resulting constant values in phi nodes
6144/// at the common destination basic block, *CommonDest, for one of the case
6145/// destionations CaseDest corresponding to value CaseVal (0 for the default
6146/// case), of a switch instruction SI.
6147static bool
6149 BasicBlock **CommonDest,
6150 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6151 const DataLayout &DL, const TargetTransformInfo &TTI) {
6152 // The block from which we enter the common destination.
6153 BasicBlock *Pred = SI->getParent();
6154
6155 // If CaseDest is empty except for some side-effect free instructions through
6156 // which we can constant-propagate the CaseVal, continue to its successor.
6158 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6159 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6160 if (I.isTerminator()) {
6161 // If the terminator is a simple branch, continue to the next block.
6162 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6163 return false;
6164 Pred = CaseDest;
6165 CaseDest = I.getSuccessor(0);
6166 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6167 // Instruction is side-effect free and constant.
6168
6169 // If the instruction has uses outside this block or a phi node slot for
6170 // the block, it is not safe to bypass the instruction since it would then
6171 // no longer dominate all its uses.
6172 for (auto &Use : I.uses()) {
6173 User *User = Use.getUser();
6174 if (Instruction *I = dyn_cast<Instruction>(User))
6175 if (I->getParent() == CaseDest)
6176 continue;
6177 if (PHINode *Phi = dyn_cast<PHINode>(User))
6178 if (Phi->getIncomingBlock(Use) == CaseDest)
6179 continue;
6180 return false;
6181 }
6182
6183 ConstantPool.insert(std::make_pair(&I, C));
6184 } else {
6185 break;
6186 }
6187 }
6188
6189 // If we did not have a CommonDest before, use the current one.
6190 if (!*CommonDest)
6191 *CommonDest = CaseDest;
6192 // If the destination isn't the common one, abort.
6193 if (CaseDest != *CommonDest)
6194 return false;
6195
6196 // Get the values for this case from phi nodes in the destination block.
6197 for (PHINode &PHI : (*CommonDest)->phis()) {
6198 int Idx = PHI.getBasicBlockIndex(Pred);
6199 if (Idx == -1)
6200 continue;
6201
6202 Constant *ConstVal =
6203 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6204 if (!ConstVal)
6205 return false;
6206
6207 // Be conservative about which kinds of constants we support.
6208 if (!validLookupTableConstant(ConstVal, TTI))
6209 return false;
6210
6211 Res.push_back(std::make_pair(&PHI, ConstVal));
6212 }
6213
6214 return Res.size() > 0;
6215}
6216
6217// Helper function used to add CaseVal to the list of cases that generate
6218// Result. Returns the updated number of cases that generate this result.
6219static size_t mapCaseToResult(ConstantInt *CaseVal,
6220 SwitchCaseResultVectorTy &UniqueResults,
6221 Constant *Result) {
6222 for (auto &I : UniqueResults) {
6223 if (I.first == Result) {
6224 I.second.push_back(CaseVal);
6225 return I.second.size();
6226 }
6227 }
6228 UniqueResults.push_back(
6229 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6230 return 1;
6231}
6232
6233// Helper function that initializes a map containing
6234// results for the PHI node of the common destination block for a switch
6235// instruction. Returns false if multiple PHI nodes have been found or if
6236// there is not a common destination block for the switch.
6238 BasicBlock *&CommonDest,
6239 SwitchCaseResultVectorTy &UniqueResults,
6240 Constant *&DefaultResult,
6241 const DataLayout &DL,
6242 const TargetTransformInfo &TTI,
6243 uintptr_t MaxUniqueResults) {
6244 for (const auto &I : SI->cases()) {
6245 ConstantInt *CaseVal = I.getCaseValue();
6246
6247 // Resulting value at phi nodes for this case value.
6248 SwitchCaseResultsTy Results;
6249 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6250 DL, TTI))
6251 return false;
6252
6253 // Only one value per case is permitted.
6254 if (Results.size() > 1)
6255 return false;
6256
6257 // Add the case->result mapping to UniqueResults.
6258 const size_t NumCasesForResult =
6259 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6260
6261 // Early out if there are too many cases for this result.
6262 if (NumCasesForResult > MaxSwitchCasesPerResult)
6263 return false;
6264
6265 // Early out if there are too many unique results.
6266 if (UniqueResults.size() > MaxUniqueResults)
6267 return false;
6268
6269 // Check the PHI consistency.
6270 if (!PHI)
6271 PHI = Results[0].first;
6272 else if (PHI != Results[0].first)
6273 return false;
6274 }
6275 // Find the default result value.
6277 BasicBlock *DefaultDest = SI->getDefaultDest();
6278 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6279 DL, TTI);
6280 // If the default value is not found abort unless the default destination
6281 // is unreachable.
6282 DefaultResult =
6283 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6284 if ((!DefaultResult &&
6285 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6286 return false;
6287
6288 return true;
6289}
6290
6291// Helper function that checks if it is possible to transform a switch with only
6292// two cases (or two cases + default) that produces a result into a select.
6293// TODO: Handle switches with more than 2 cases that map to the same result.
6294static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6295 Constant *DefaultResult, Value *Condition,
6296 IRBuilder<> &Builder) {
6297 // If we are selecting between only two cases transform into a simple
6298 // select or a two-way select if default is possible.
6299 // Example:
6300 // switch (a) { %0 = icmp eq i32 %a, 10
6301 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6302 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6303 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6304 // }
6305 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6306 ResultVector[1].second.size() == 1) {
6307 ConstantInt *FirstCase = ResultVector[0].second[0];
6308 ConstantInt *SecondCase = ResultVector[1].second[0];
6309 Value *SelectValue = ResultVector[1].first;
6310 if (DefaultResult) {
6311 Value *ValueCompare =
6312 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6313 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6314 DefaultResult, "switch.select");
6315 }
6316 Value *ValueCompare =
6317 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6318 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6319 SelectValue, "switch.select");
6320 }
6321
6322 // Handle the degenerate case where two cases have the same result value.
6323 if (ResultVector.size() == 1 && DefaultResult) {
6324 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6325 unsigned CaseCount = CaseValues.size();
6326 // n bits group cases map to the same result:
6327 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6328 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6329 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6330 if (isPowerOf2_32(CaseCount)) {
6331 ConstantInt *MinCaseVal = CaseValues[0];
6332 // Find mininal value.
6333 for (auto *Case : CaseValues)
6334 if (Case->getValue().slt(MinCaseVal->getValue()))
6335 MinCaseVal = Case;
6336
6337 // Mark the bits case number touched.
6338 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6339 for (auto *Case : CaseValues)
6340 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6341
6342 // Check if cases with the same result can cover all number
6343 // in touched bits.
6344 if (BitMask.popcount() == Log2_32(CaseCount)) {
6345 if (!MinCaseVal->isNullValue())
6346 Condition = Builder.CreateSub(Condition, MinCaseVal);
6347 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6348 Value *Cmp = Builder.CreateICmpEQ(
6349 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6350 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6351 }
6352 }
6353
6354 // Handle the degenerate case where two cases have the same value.
6355 if (CaseValues.size() == 2) {
6356 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6357 "switch.selectcmp.case1");
6358 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6359 "switch.selectcmp.case2");
6360 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6361 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6362 }
6363 }
6364
6365 return nullptr;
6366}
6367
6368// Helper function to cleanup a switch instruction that has been converted into
6369// a select, fixing up PHI nodes and basic blocks.
6371 Value *SelectValue,
6372 IRBuilder<> &Builder,
6373 DomTreeUpdater *DTU) {
6374 std::vector<DominatorTree::UpdateType> Updates;
6375
6376 BasicBlock *SelectBB = SI->getParent();
6377 BasicBlock *DestBB = PHI->getParent();
6378
6379 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6380 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6381 Builder.CreateBr(DestBB);
6382
6383 // Remove the switch.
6384
6385 PHI->removeIncomingValueIf(
6386 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6387 PHI->addIncoming(SelectValue, SelectBB);
6388
6389 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6390 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6391 BasicBlock *Succ = SI->getSuccessor(i);
6392
6393 if (Succ == DestBB)
6394 continue;
6395 Succ->removePredecessor(SelectBB);
6396 if (DTU && RemovedSuccessors.insert(Succ).second)
6397 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6398 }
6399 SI->eraseFromParent();
6400 if (DTU)
6401 DTU->applyUpdates(Updates);
6402}
6403
6404/// If a switch is only used to initialize one or more phi nodes in a common
6405/// successor block with only two different constant values, try to replace the
6406/// switch with a select. Returns true if the fold was made.
6407static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6408 DomTreeUpdater *DTU, const DataLayout &DL,
6409 const TargetTransformInfo &TTI) {
6410 Value *const Cond = SI->getCondition();
6411 PHINode *PHI = nullptr;
6412 BasicBlock *CommonDest = nullptr;
6413 Constant *DefaultResult;
6414 SwitchCaseResultVectorTy UniqueResults;
6415 // Collect all the cases that will deliver the same value from the switch.
6416 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6417 DL, TTI, /*MaxUniqueResults*/ 2))
6418 return false;
6419
6420 assert(PHI != nullptr && "PHI for value select not found");
6421 Builder.SetInsertPoint(SI);
6422 Value *SelectValue =
6423 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6424 if (!SelectValue)
6425 return false;
6426
6427 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6428 return true;
6429}
6430
6431namespace {
6432
6433/// This class represents a lookup table that can be used to replace a switch.
6434class SwitchLookupTable {
6435public:
6436 /// Create a lookup table to use as a switch replacement with the contents
6437 /// of Values, using DefaultValue to fill any holes in the table.
6438 SwitchLookupTable(
6439 Module &M, uint64_t TableSize, ConstantInt *Offset,
6440 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6441 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6442
6443 /// Build instructions with Builder to retrieve the value at
6444 /// the position given by Index in the lookup table.
6445 Value *buildLookup(Value *Index, IRBuilder<> &Builder);
6446
6447 /// Return true if a table with TableSize elements of
6448 /// type ElementType would fit in a target-legal register.
6449 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6450 Type *ElementType);
6451
6452private:
6453 // Depending on the contents of the table, it can be represented in
6454 // different ways.
6455 enum {
6456 // For tables where each element contains the same value, we just have to
6457 // store that single value and return it for each lookup.
6458 SingleValueKind,
6459
6460 // For tables where there is a linear relationship between table index
6461 // and values. We calculate the result with a simple multiplication
6462 // and addition instead of a table lookup.
6463 LinearMapKind,
6464
6465 // For small tables with integer elements, we can pack them into a bitmap
6466 // that fits into a target-legal register. Values are retrieved by
6467 // shift and mask operations.
6468 BitMapKind,
6469
6470 // The table is stored as an array of values. Values are retrieved by load
6471 // instructions from the table.
6472 ArrayKind
6473 } Kind;
6474
6475 // For SingleValueKind, this is the single value.
6476 Constant *SingleValue = nullptr;
6477
6478 // For BitMapKind, this is the bitmap.
6479 ConstantInt *BitMap = nullptr;
6480 IntegerType *BitMapElementTy = nullptr;
6481
6482 // For LinearMapKind, these are the constants used to derive the value.
6483 ConstantInt *LinearOffset = nullptr;
6484 ConstantInt *LinearMultiplier = nullptr;
6485 bool LinearMapValWrapped = false;
6486
6487 // For ArrayKind, this is the array.
6488 GlobalVariable *Array = nullptr;
6489};
6490
6491} // end anonymous namespace
6492
6493SwitchLookupTable::SwitchLookupTable(
6494 Module &M, uint64_t TableSize, ConstantInt *Offset,
6495 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6496 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6497 assert(Values.size() && "Can't build lookup table without values!");
6498 assert(TableSize >= Values.size() && "Can't fit values in table!");
6499
6500 // If all values in the table are equal, this is that value.
6501 SingleValue = Values.begin()->second;
6502
6503 Type *ValueType = Values.begin()->second->getType();
6504
6505 // Build up the table contents.
6506 SmallVector<Constant *, 64> TableContents(TableSize);
6507 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6508 ConstantInt *CaseVal = Values[I].first;
6509 Constant *CaseRes = Values[I].second;
6510 assert(CaseRes->getType() == ValueType);
6511
6512 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6513 TableContents[Idx] = CaseRes;
6514
6515 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6516 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6517 }
6518
6519 // Fill in any holes in the table with the default result.
6520 if (Values.size() < TableSize) {
6521 assert(DefaultValue &&
6522 "Need a default value to fill the lookup table holes.");
6523 assert(DefaultValue->getType() == ValueType);
6524 for (uint64_t I = 0; I < TableSize; ++I) {
6525 if (!TableContents[I])
6526 TableContents[I] = DefaultValue;
6527 }
6528
6529 // If the default value is poison, all the holes are poison.
6530 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6531
6532 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6533 SingleValue = nullptr;
6534 }
6535
6536 // If each element in the table contains the same value, we only need to store
6537 // that single value.
6538 if (SingleValue) {
6539 Kind = SingleValueKind;
6540 return;
6541 }
6542
6543 // Check if we can derive the value with a linear transformation from the
6544 // table index.
6545 if (isa<IntegerType>(ValueType)) {
6546 bool LinearMappingPossible = true;
6547 APInt PrevVal;
6548 APInt DistToPrev;
6549 // When linear map is monotonic and signed overflow doesn't happen on
6550 // maximum index, we can attach nsw on Add and Mul.
6551 bool NonMonotonic = false;
6552 assert(TableSize >= 2 && "Should be a SingleValue table.");
6553 // Check if there is the same distance between two consecutive values.
6554 for (uint64_t I = 0; I < TableSize; ++I) {
6555 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6556
6557 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6558 // This is an poison, so it's (probably) a lookup table hole.
6559 // To prevent any regressions from before we switched to using poison as
6560 // the default value, holes will fall back to using the first value.
6561 // This can be removed once we add proper handling for poisons in lookup
6562 // tables.
6563 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6564 }
6565
6566 if (!ConstVal) {
6567 // This is an undef. We could deal with it, but undefs in lookup tables
6568 // are very seldom. It's probably not worth the additional complexity.
6569 LinearMappingPossible = false;
6570 break;
6571 }
6572 const APInt &Val = ConstVal->getValue();
6573 if (I != 0) {
6574 APInt Dist = Val - PrevVal;
6575 if (I == 1) {
6576 DistToPrev = Dist;
6577 } else if (Dist != DistToPrev) {
6578 LinearMappingPossible = false;
6579 break;
6580 }
6581 NonMonotonic |=
6582 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6583 }
6584 PrevVal = Val;
6585 }
6586 if (LinearMappingPossible) {
6587 LinearOffset = cast<ConstantInt>(TableContents[0]);
6588 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6589 APInt M = LinearMultiplier->getValue();
6590 bool MayWrap = true;
6591 if (isIntN(M.getBitWidth(), TableSize - 1))
6592 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6593 LinearMapValWrapped = NonMonotonic || MayWrap;
6594 Kind = LinearMapKind;
6595 ++NumLinearMaps;
6596 return;
6597 }
6598 }
6599
6600 // If the type is integer and the table fits in a register, build a bitmap.
6601 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6602 IntegerType *IT = cast<IntegerType>(ValueType);
6603 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6604 for (uint64_t I = TableSize; I > 0; --I) {
6605 TableInt <<= IT->getBitWidth();
6606 // Insert values into the bitmap. Undef values are set to zero.
6607 if (!isa<UndefValue>(TableContents[I - 1])) {
6608 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6609 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6610 }
6611 }
6612 BitMap = ConstantInt::get(M.getContext(), TableInt);
6613 BitMapElementTy = IT;
6614 Kind = BitMapKind;
6615 ++NumBitMaps;
6616 return;
6617 }
6618
6619 // Store the table in an array.
6620 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6621 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6622
6623 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6624 GlobalVariable::PrivateLinkage, Initializer,
6625 "switch.table." + FuncName);
6626 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6627 // Set the alignment to that of an array items. We will be only loading one
6628 // value out of it.
6629 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6630 Kind = ArrayKind;
6631}
6632
6633Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder) {
6634 switch (Kind) {
6635 case SingleValueKind:
6636 return SingleValue;
6637 case LinearMapKind: {
6638 // Derive the result value from the input value.
6639 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6640 false, "switch.idx.cast");
6641 if (!LinearMultiplier->isOne())
6642 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6643 /*HasNUW = */ false,
6644 /*HasNSW = */ !LinearMapValWrapped);
6645
6646 if (!LinearOffset->isZero())
6647 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6648 /*HasNUW = */ false,
6649 /*HasNSW = */ !LinearMapValWrapped);
6650 return Result;
6651 }
6652 case BitMapKind: {
6653 // Type of the bitmap (e.g. i59).
6654 IntegerType *MapTy = BitMap->getIntegerType();
6655
6656 // Cast Index to the same type as the bitmap.
6657 // Note: The Index is <= the number of elements in the table, so
6658 // truncating it to the width of the bitmask is safe.
6659 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6660
6661 // Multiply the shift amount by the element width. NUW/NSW can always be
6662 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6663 // BitMap's bit width.
6664 ShiftAmt = Builder.CreateMul(
6665 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6666 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6667
6668 // Shift down.
6669 Value *DownShifted =
6670 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6671 // Mask off.
6672 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6673 }
6674 case ArrayKind: {
6675 // Make sure the table index will not overflow when treated as signed.
6676 IntegerType *IT = cast<IntegerType>(Index->getType());
6677 uint64_t TableSize =
6678 Array->getInitializer()->getType()->getArrayNumElements();
6679 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6680 Index = Builder.CreateZExt(
6681 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6682 "switch.tableidx.zext");
6683
6684 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6685 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6686 GEPIndices, "switch.gep");
6687 return Builder.CreateLoad(
6688 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6689 "switch.load");
6690 }
6691 }
6692 llvm_unreachable("Unknown lookup table kind!");
6693}
6694
6695bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6696 uint64_t TableSize,
6697 Type *ElementType) {
6698 auto *IT = dyn_cast<IntegerType>(ElementType);
6699 if (!IT)
6700 return false;
6701 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6702 // are <= 15, we could try to narrow the type.
6703
6704 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6705 if (TableSize >= UINT_MAX / IT->getBitWidth())
6706 return false;
6707 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6708}
6709
6711 const DataLayout &DL) {
6712 // Allow any legal type.
6713 if (TTI.isTypeLegal(Ty))
6714 return true;
6715
6716 auto *IT = dyn_cast<IntegerType>(Ty);
6717 if (!IT)
6718 return false;
6719
6720 // Also allow power of 2 integer types that have at least 8 bits and fit in
6721 // a register. These types are common in frontend languages and targets
6722 // usually support loads of these types.
6723 // TODO: We could relax this to any integer that fits in a register and rely
6724 // on ABI alignment and padding in the table to allow the load to be widened.
6725 // Or we could widen the constants and truncate the load.
6726 unsigned BitWidth = IT->getBitWidth();
6727 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6728 DL.fitsInLegalInteger(IT->getBitWidth());
6729}
6730
6731static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6732 // 40% is the default density for building a jump table in optsize/minsize
6733 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6734 // function was based on.
6735 const uint64_t MinDensity = 40;
6736
6737 if (CaseRange >= UINT64_MAX / 100)
6738 return false; // Avoid multiplication overflows below.
6739
6740 return NumCases * 100 >= CaseRange * MinDensity;
6741}
6742
6744 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6745 uint64_t Range = Diff + 1;
6746 if (Range < Diff)
6747 return false; // Overflow.
6748
6749 return isSwitchDense(Values.size(), Range);
6750}
6751
6752/// Determine whether a lookup table should be built for this switch, based on
6753/// the number of cases, size of the table, and the types of the results.
6754// TODO: We could support larger than legal types by limiting based on the
6755// number of loads required and/or table size. If the constants are small we
6756// could use smaller table entries and extend after the load.
6757static bool
6759 const TargetTransformInfo &TTI, const DataLayout &DL,
6760 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6761 if (SI->getNumCases() > TableSize)
6762 return false; // TableSize overflowed.
6763
6764 bool AllTablesFitInRegister = true;
6765 bool HasIllegalType = false;
6766 for (const auto &I : ResultTypes) {
6767 Type *Ty = I.second;
6768
6769 // Saturate this flag to true.
6770 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6771
6772 // Saturate this flag to false.
6773 AllTablesFitInRegister =
6774 AllTablesFitInRegister &&
6775 SwitchLookupTable::wouldFitInRegister(DL, TableSize, Ty);
6776
6777 // If both flags saturate, we're done. NOTE: This *only* works with
6778 // saturating flags, and all flags have to saturate first due to the
6779 // non-deterministic behavior of iterating over a dense map.
6780 if (HasIllegalType && !AllTablesFitInRegister)
6781 break;
6782 }
6783
6784 // If each table would fit in a register, we should build it anyway.
6785 if (AllTablesFitInRegister)
6786 return true;
6787
6788 // Don't build a table that doesn't fit in-register if it has illegal types.
6789 if (HasIllegalType)
6790 return false;
6791
6792 return isSwitchDense(SI->getNumCases(), TableSize);
6793}
6794
6796 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6797 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6798 const DataLayout &DL, const TargetTransformInfo &TTI) {
6799 if (MinCaseVal.isNullValue())
6800 return true;
6801 if (MinCaseVal.isNegative() ||
6802 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6803 !HasDefaultResults)
6804 return false;
6805 return all_of(ResultTypes, [&](const auto &KV) {
6806 return SwitchLookupTable::wouldFitInRegister(
6807 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6808 KV.second /* ResultType */);
6809 });
6810}
6811
6812/// Try to reuse the switch table index compare. Following pattern:
6813/// \code
6814/// if (idx < tablesize)
6815/// r = table[idx]; // table does not contain default_value
6816/// else
6817/// r = default_value;
6818/// if (r != default_value)
6819/// ...
6820/// \endcode
6821/// Is optimized to:
6822/// \code
6823/// cond = idx < tablesize;
6824/// if (cond)
6825/// r = table[idx];
6826/// else
6827/// r = default_value;
6828/// if (cond)
6829/// ...
6830/// \endcode
6831/// Jump threading will then eliminate the second if(cond).
6833 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6834 Constant *DefaultValue,
6835 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6836 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6837 if (!CmpInst)
6838 return;
6839
6840 // We require that the compare is in the same block as the phi so that jump
6841 // threading can do its work afterwards.
6842 if (CmpInst->getParent() != PhiBlock)
6843 return;
6844
6845 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6846 if (!CmpOp1)
6847 return;
6848
6849 Value *RangeCmp = RangeCheckBranch->getCondition();
6850 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6851 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6852
6853 // Check if the compare with the default value is constant true or false.
6854 const DataLayout &DL = PhiBlock->getDataLayout();
6856 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6857 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6858 return;
6859
6860 // Check if the compare with the case values is distinct from the default
6861 // compare result.
6862 for (auto ValuePair : Values) {
6864 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6865 if (!CaseConst || CaseConst == DefaultConst ||
6866 (CaseConst != TrueConst && CaseConst != FalseConst))
6867 return;
6868 }
6869
6870 // Check if the branch instruction dominates the phi node. It's a simple
6871 // dominance check, but sufficient for our needs.
6872 // Although this check is invariant in the calling loops, it's better to do it
6873 // at this late stage. Practically we do it at most once for a switch.
6874 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6875 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6876 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6877 return;
6878 }
6879
6880 if (DefaultConst == FalseConst) {
6881 // The compare yields the same result. We can replace it.
6882 CmpInst->replaceAllUsesWith(RangeCmp);
6883 ++NumTableCmpReuses;
6884 } else {
6885 // The compare yields the same result, just inverted. We can replace it.
6886 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6887 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6888 RangeCheckBranch->getIterator());
6889 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6890 ++NumTableCmpReuses;
6891 }
6892}
6893
6894/// If the switch is only used to initialize one or more phi nodes in a common
6895/// successor block with different constant values, replace the switch with
6896/// lookup tables.
6898 DomTreeUpdater *DTU, const DataLayout &DL,
6899 const TargetTransformInfo &TTI) {
6900 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6901
6902 BasicBlock *BB = SI->getParent();
6903 Function *Fn = BB->getParent();
6904 // Only build lookup table when we have a target that supports it or the
6905 // attribute is not set.
6907 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6908 return false;
6909
6910 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6911 // split off a dense part and build a lookup table for that.
6912
6913 // FIXME: This creates arrays of GEPs to constant strings, which means each
6914 // GEP needs a runtime relocation in PIC code. We should just build one big
6915 // string and lookup indices into that.
6916
6917 // Ignore switches with less than three cases. Lookup tables will not make
6918 // them faster, so we don't analyze them.
6919 if (SI->getNumCases() < 3)
6920 return false;
6921
6922 // Figure out the corresponding result for each case value and phi node in the
6923 // common destination, as well as the min and max case values.
6924 assert(!SI->cases().empty());
6925 SwitchInst::CaseIt CI = SI->case_begin();
6926 ConstantInt *MinCaseVal = CI->getCaseValue();
6927 ConstantInt *MaxCaseVal = CI->getCaseValue();
6928
6929 BasicBlock *CommonDest = nullptr;
6930
6931 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6933
6937
6938 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6939 ConstantInt *CaseVal = CI->getCaseValue();
6940 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6941 MinCaseVal = CaseVal;
6942 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6943 MaxCaseVal = CaseVal;
6944
6945 // Resulting value at phi nodes for this case value.
6947 ResultsTy Results;
6948 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6949 Results, DL, TTI))
6950 return false;
6951
6952 // Append the result from this case to the list for each phi.
6953 for (const auto &I : Results) {
6954 PHINode *PHI = I.first;
6955 Constant *Value = I.second;
6956 if (!ResultLists.count(PHI))
6957 PHIs.push_back(PHI);
6958 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6959 }
6960 }
6961
6962 // Keep track of the result types.
6963 for (PHINode *PHI : PHIs) {
6964 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6965 }
6966
6967 uint64_t NumResults = ResultLists[PHIs[0]].size();
6968
6969 // If the table has holes, we need a constant result for the default case
6970 // or a bitmask that fits in a register.
6971 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6972 bool HasDefaultResults =
6973 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6974 DefaultResultsList, DL, TTI);
6975
6976 for (const auto &I : DefaultResultsList) {
6977 PHINode *PHI = I.first;
6978 Constant *Result = I.second;
6979 DefaultResults[PHI] = Result;
6980 }
6981
6982 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6983 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6984 uint64_t TableSize;
6985 if (UseSwitchConditionAsTableIndex)
6986 TableSize = MaxCaseVal->getLimitedValue() + 1;
6987 else
6988 TableSize =
6989 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6990
6991 // If the default destination is unreachable, or if the lookup table covers
6992 // all values of the conditional variable, branch directly to the lookup table
6993 // BB. Otherwise, check that the condition is within the case range.
6994 bool DefaultIsReachable = !SI->defaultDestUndefined();
6995
6996 bool TableHasHoles = (NumResults < TableSize);
6997
6998 // If the table has holes but the default destination doesn't produce any
6999 // constant results, the lookup table entries corresponding to the holes will
7000 // contain poison.
7001 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7002
7003 // If the default destination doesn't produce a constant result but is still
7004 // reachable, and the lookup table has holes, we need to use a mask to
7005 // determine if the current index should load from the lookup table or jump
7006 // to the default case.
7007 // The mask is unnecessary if the table has holes but the default destination
7008 // is unreachable, as in that case the holes must also be unreachable.
7009 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7010 if (NeedMask) {
7011 // As an extra penalty for the validity test we require more cases.
7012 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7013 return false;
7014 if (!DL.fitsInLegalInteger(TableSize))
7015 return false;
7016 }
7017
7018 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7019 return false;
7020
7021 std::vector<DominatorTree::UpdateType> Updates;
7022
7023 // Compute the maximum table size representable by the integer type we are
7024 // switching upon.
7025 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7026 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7027 assert(MaxTableSize >= TableSize &&
7028 "It is impossible for a switch to have more entries than the max "
7029 "representable value of its input integer type's size.");
7030
7031 // Create the BB that does the lookups.
7032 Module &Mod = *CommonDest->getParent()->getParent();
7033 BasicBlock *LookupBB = BasicBlock::Create(
7034 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7035
7036 // Compute the table index value.
7037 Builder.SetInsertPoint(SI);
7038 Value *TableIndex;
7039 ConstantInt *TableIndexOffset;
7040 if (UseSwitchConditionAsTableIndex) {
7041 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7042 TableIndex = SI->getCondition();
7043 } else {
7044 TableIndexOffset = MinCaseVal;
7045 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7046 // we can try to attach nsw.
7047 bool MayWrap = true;
7048 if (!DefaultIsReachable) {
7049 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7050 (void)Res;
7051 }
7052
7053 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7054 "switch.tableidx", /*HasNUW =*/false,
7055 /*HasNSW =*/!MayWrap);
7056 }
7057
7058 BranchInst *RangeCheckBranch = nullptr;
7059
7060 // Grow the table to cover all possible index values to avoid the range check.
7061 // It will use the default result to fill in the table hole later, so make
7062 // sure it exist.
7063 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
7064 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
7065 // Grow the table shouldn't have any size impact by checking
7066 // wouldFitInRegister.
7067 // TODO: Consider growing the table also when it doesn't fit in a register
7068 // if no optsize is specified.
7069 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7070 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
7071 return SwitchLookupTable::wouldFitInRegister(
7072 DL, UpperBound, KV.second /* ResultType */);
7073 })) {
7074 // There may be some case index larger than the UpperBound (unreachable
7075 // case), so make sure the table size does not get smaller.
7076 TableSize = std::max(UpperBound, TableSize);
7077 // The default branch is unreachable after we enlarge the lookup table.
7078 // Adjust DefaultIsReachable to reuse code path.
7079 DefaultIsReachable = false;
7080 }
7081 }
7082
7083 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7084 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7085 Builder.CreateBr(LookupBB);
7086 if (DTU)
7087 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7088 // Note: We call removeProdecessor later since we need to be able to get the
7089 // PHI value for the default case in case we're using a bit mask.
7090 } else {
7091 Value *Cmp = Builder.CreateICmpULT(
7092 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7093 RangeCheckBranch =
7094 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7095 if (DTU)
7096 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7097 }
7098
7099 // Populate the BB that does the lookups.
7100 Builder.SetInsertPoint(LookupBB);
7101
7102 if (NeedMask) {
7103 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7104 // re-purposed to do the hole check, and we create a new LookupBB.
7105 BasicBlock *MaskBB = LookupBB;
7106 MaskBB->setName("switch.hole_check");
7107 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7108 CommonDest->getParent(), CommonDest);
7109
7110 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7111 // unnecessary illegal types.
7112 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7113 APInt MaskInt(TableSizePowOf2, 0);
7114 APInt One(TableSizePowOf2, 1);
7115 // Build bitmask; fill in a 1 bit for every case.
7116 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7117 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
7118 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
7119 .getLimitedValue();
7120 MaskInt |= One << Idx;
7121 }
7122 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7123
7124 // Get the TableIndex'th bit of the bitmask.
7125 // If this bit is 0 (meaning hole) jump to the default destination,
7126 // else continue with table lookup.
7127 IntegerType *MapTy = TableMask->getIntegerType();
7128 Value *MaskIndex =
7129 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7130 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7131 Value *LoBit = Builder.CreateTrunc(
7132 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7133 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7134 if (DTU) {
7135 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7136 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7137 }
7138 Builder.SetInsertPoint(LookupBB);
7139 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7140 }
7141
7142 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7143 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7144 // do not delete PHINodes here.
7145 SI->getDefaultDest()->removePredecessor(BB,
7146 /*KeepOneInputPHIs=*/true);
7147 if (DTU)
7148 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7149 }
7150
7151 for (PHINode *PHI : PHIs) {
7152 const ResultListTy &ResultList = ResultLists[PHI];
7153
7154 Type *ResultType = ResultList.begin()->second->getType();
7155
7156 // Use any value to fill the lookup table holes.
7157 Constant *DV =
7158 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7159 StringRef FuncName = Fn->getName();
7160 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
7161 DL, FuncName);
7162
7163 Value *Result = Table.buildLookup(TableIndex, Builder);
7164
7165 // Do a small peephole optimization: re-use the switch table compare if
7166 // possible.
7167 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7168 BasicBlock *PhiBlock = PHI->getParent();
7169 // Search for compare instructions which use the phi.
7170 for (auto *User : PHI->users()) {
7171 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
7172 }
7173 }
7174
7175 PHI->addIncoming(Result, LookupBB);
7176 }
7177
7178 Builder.CreateBr(CommonDest);
7179 if (DTU)
7180 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7181
7182 // Remove the switch.
7183 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7184 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7185 BasicBlock *Succ = SI->getSuccessor(i);
7186
7187 if (Succ == SI->getDefaultDest())
7188 continue;
7189 Succ->removePredecessor(BB);
7190 if (DTU && RemovedSuccessors.insert(Succ).second)
7191 Updates.push_back({DominatorTree::Delete, BB, Succ});
7192 }
7193 SI->eraseFromParent();
7194
7195 if (DTU)
7196 DTU->applyUpdates(Updates);
7197
7198 ++NumLookupTables;
7199 if (NeedMask)
7200 ++NumLookupTablesHoles;
7201 return true;
7202}
7203
7204/// Try to transform a switch that has "holes" in it to a contiguous sequence
7205/// of cases.
7206///
7207/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7208/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7209///
7210/// This converts a sparse switch into a dense switch which allows better
7211/// lowering and could also allow transforming into a lookup table.
7212static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7213 const DataLayout &DL,
7214 const TargetTransformInfo &TTI) {
7215 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7216 if (CondTy->getIntegerBitWidth() > 64 ||
7217 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7218 return false;
7219 // Only bother with this optimization if there are more than 3 switch cases;
7220 // SDAG will only bother creating jump tables for 4 or more cases.
7221 if (SI->getNumCases() < 4)
7222 return false;
7223
7224 // This transform is agnostic to the signedness of the input or case values. We
7225 // can treat the case values as signed or unsigned. We can optimize more common
7226 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7227 // as signed.
7229 for (const auto &C : SI->cases())
7230 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7231 llvm::sort(Values);
7232
7233 // If the switch is already dense, there's nothing useful to do here.
7234 if (isSwitchDense(Values))
7235 return false;
7236
7237 // First, transform the values such that they start at zero and ascend.
7238 int64_t Base = Values[0];
7239 for (auto &V : Values)
7240 V -= (uint64_t)(Base);
7241
7242 // Now we have signed numbers that have been shifted so that, given enough
7243 // precision, there are no negative values. Since the rest of the transform
7244 // is bitwise only, we switch now to an unsigned representation.
7245
7246 // This transform can be done speculatively because it is so cheap - it
7247 // results in a single rotate operation being inserted.
7248
7249 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7250 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7251 // less than 64.
7252 unsigned Shift = 64;
7253 for (auto &V : Values)
7254 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7255 assert(Shift < 64);
7256 if (Shift > 0)
7257 for (auto &V : Values)
7258 V = (int64_t)((uint64_t)V >> Shift);
7259
7260 if (!isSwitchDense(Values))
7261 // Transform didn't create a dense switch.
7262 return false;
7263
7264 // The obvious transform is to shift the switch condition right and emit a
7265 // check that the condition actually cleanly divided by GCD, i.e.
7266 // C & (1 << Shift - 1) == 0
7267 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7268 //
7269 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7270 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7271 // are nonzero then the switch condition will be very large and will hit the
7272 // default case.
7273
7274 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7275 Builder.SetInsertPoint(SI);
7276 Value *Sub =
7277 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7278 Value *Rot = Builder.CreateIntrinsic(
7279 Ty, Intrinsic::fshl,
7280 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7281 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7282
7283 for (auto Case : SI->cases()) {
7284 auto *Orig = Case.getCaseValue();
7285 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7286 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7287 }
7288 return true;
7289}
7290
7291/// Tries to transform switch of powers of two to reduce switch range.
7292/// For example, switch like:
7293/// switch (C) { case 1: case 2: case 64: case 128: }
7294/// will be transformed to:
7295/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7296///
7297/// This transformation allows better lowering and could allow transforming into
7298/// a lookup table.
7300 const DataLayout &DL,
7301 const TargetTransformInfo &TTI) {
7302 Value *Condition = SI->getCondition();
7303 LLVMContext &Context = SI->getContext();
7304 auto *CondTy = cast<IntegerType>(Condition->getType());
7305
7306 if (CondTy->getIntegerBitWidth() > 64 ||
7307 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7308 return false;
7309
7310 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7311 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7312 {Condition, ConstantInt::getTrue(Context)}),
7314
7315 if (CttzIntrinsicCost > TTI::TCC_Basic)
7316 // Inserting intrinsic is too expensive.
7317 return false;
7318
7319 // Only bother with this optimization if there are more than 3 switch cases.
7320 // SDAG will only bother creating jump tables for 4 or more cases.
7321 if (SI->getNumCases() < 4)
7322 return false;
7323
7324 // We perform this optimization only for switches with
7325 // unreachable default case.
7326 // This assumtion will save us from checking if `Condition` is a power of two.
7327 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7328 return false;
7329
7330 // Check that switch cases are powers of two.
7332 for (const auto &Case : SI->cases()) {
7333 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7334 if (llvm::has_single_bit(CaseValue))
7335 Values.push_back(CaseValue);
7336 else
7337 return false;
7338 }
7339
7340 // isSwichDense requires case values to be sorted.
7341 llvm::sort(Values);
7342 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7343 llvm::countr_zero(Values.front()) + 1))
7344 // Transform is unable to generate dense switch.
7345 return false;
7346
7347 Builder.SetInsertPoint(SI);
7348
7349 // Replace each case with its trailing zeros number.
7350 for (auto &Case : SI->cases()) {
7351 auto *OrigValue = Case.getCaseValue();
7352 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7353 OrigValue->getValue().countr_zero()));
7354 }
7355
7356 // Replace condition with its trailing zeros number.
7357 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7358 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7359
7360 SI->setCondition(ConditionTrailingZeros);
7361
7362 return true;
7363}
7364
7365/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7366/// the same destination.
7368 DomTreeUpdater *DTU) {
7369 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7370 if (!Cmp || !Cmp->hasOneUse())
7371 return false;
7372
7374 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7375 if (!HasWeights)
7376 Weights.resize(4); // Avoid checking HasWeights everywhere.
7377
7378 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7379 int64_t Res;
7380 BasicBlock *Succ, *OtherSucc;
7381 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7382 BasicBlock *Unreachable = nullptr;
7383
7384 if (SI->getNumCases() == 2) {
7385 // Find which of 1, 0 or -1 is missing (handled by default dest).
7386 SmallSet<int64_t, 3> Missing;
7387 Missing.insert(1);
7388 Missing.insert(0);
7389 Missing.insert(-1);
7390
7391 Succ = SI->getDefaultDest();
7392 SuccWeight = Weights[0];
7393 OtherSucc = nullptr;
7394 for (auto &Case : SI->cases()) {
7395 std::optional<int64_t> Val =
7396 Case.getCaseValue()->getValue().trySExtValue();
7397 if (!Val)
7398 return false;
7399 if (!Missing.erase(*Val))
7400 return false;
7401 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7402 return false;
7403 OtherSucc = Case.getCaseSuccessor();
7404 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7405 }
7406
7407 assert(Missing.size() == 1 && "Should have one case left");
7408 Res = *Missing.begin();
7409 } else if (SI->getNumCases() == 3 && SI->defaultDestUndefined()) {
7410 // Normalize so that Succ is taken once and OtherSucc twice.
7411 Unreachable = SI->getDefaultDest();
7412 Succ = OtherSucc = nullptr;
7413 for (auto &Case : SI->cases()) {
7414 BasicBlock *NewSucc = Case.getCaseSuccessor();
7415 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7416 if (!OtherSucc || OtherSucc == NewSucc) {
7417 OtherSucc = NewSucc;
7418 OtherSuccWeight += Weight;
7419 } else if (!Succ) {
7420 Succ = NewSucc;
7421 SuccWeight = Weight;
7422 } else if (Succ == NewSucc) {
7423 std::swap(Succ, OtherSucc);
7424 std::swap(SuccWeight, OtherSuccWeight);
7425 } else
7426 return false;
7427 }
7428 for (auto &Case : SI->cases()) {
7429 std::optional<int64_t> Val =
7430 Case.getCaseValue()->getValue().trySExtValue();
7431 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7432 return false;
7433 if (Case.getCaseSuccessor() == Succ) {
7434 Res = *Val;
7435 break;
7436 }
7437 }
7438 } else {
7439 return false;
7440 }
7441
7442 // Determine predicate for the missing case.
7444 switch (Res) {
7445 case 1:
7446 Pred = ICmpInst::ICMP_UGT;
7447 break;
7448 case 0:
7449 Pred = ICmpInst::ICMP_EQ;
7450 break;
7451 case -1:
7452 Pred = ICmpInst::ICMP_ULT;
7453 break;
7454 }
7455 if (Cmp->isSigned())
7456 Pred = ICmpInst::getSignedPredicate(Pred);
7457
7458 MDNode *NewWeights = nullptr;
7459 if (HasWeights)
7460 NewWeights = MDBuilder(SI->getContext())
7461 .createBranchWeights(SuccWeight, OtherSuccWeight);
7462
7463 BasicBlock *BB = SI->getParent();
7464 Builder.SetInsertPoint(SI->getIterator());
7465 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7466 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7467 SI->getMetadata(LLVMContext::MD_unpredictable));
7468 OtherSucc->removePredecessor(BB);
7469 if (Unreachable)
7470 Unreachable->removePredecessor(BB);
7471 SI->eraseFromParent();
7472 Cmp->eraseFromParent();
7473 if (DTU && Unreachable)
7474 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7475 return true;
7476}
7477
7478/// Checking whether two cases of SI are equal depends on the contents of the
7479/// BasicBlock and the incoming values of their successor PHINodes.
7480/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7481/// calling this function on each BasicBlock every time isEqual is called,
7482/// especially since the same BasicBlock may be passed as an argument multiple
7483/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7484/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7485/// of the incoming values.
7489};
7490
7491namespace llvm {
7492template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7494 return static_cast<SwitchSuccWrapper *>(
7496 }
7498 return static_cast<SwitchSuccWrapper *>(
7500 }
7501 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7502 BasicBlock *Succ = SSW->Dest;
7503 BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
7504 assert(BI->isUnconditional() &&
7505 "Only supporting unconditional branches for now");
7506 assert(BI->getNumSuccessors() == 1 &&
7507 "Expected unconditional branches to have one successor");
7508 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7509
7510 // Since we assume the BB is just a single BranchInst with a single
7511 // successor, we hash as the BB and the incoming Values of its successor
7512 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7513 // including the incoming PHI values leads to better performance.
7514 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7515 // time and passing it in SwitchSuccWrapper, but this slowed down the
7516 // average compile time without having any impact on the worst case compile
7517 // time.
7518 BasicBlock *BB = BI->getSuccessor(0);
7519 SmallVector<Value *> PhiValsForBB;
7520 for (PHINode &Phi : BB->phis())
7521 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7522
7523 return hash_combine(
7524 BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
7525 }
7526 static bool isEqual(const SwitchSuccWrapper *LHS,
7527 const SwitchSuccWrapper *RHS) {
7530 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7531 return LHS == RHS;
7532
7533 BasicBlock *A = LHS->Dest;
7534 BasicBlock *B = RHS->Dest;
7535
7536 // FIXME: we checked that the size of A and B are both 1 in
7537 // simplifyDuplicateSwitchArms to make the Case list smaller to
7538 // improve performance. If we decide to support BasicBlocks with more
7539 // than just a single instruction, we need to check that A.size() ==
7540 // B.size() here, and we need to check more than just the BranchInsts
7541 // for equality.
7542
7543 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7544 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7545 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7546 "Only supporting unconditional branches for now");
7547 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7548 return false;
7549
7550 // Need to check that PHIs in successor have matching values
7551 BasicBlock *Succ = ABI->getSuccessor(0);
7552 for (PHINode &Phi : Succ->phis()) {
7553 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7554 if (PredIVs[A] != PredIVs[B])
7555 return false;
7556 }
7557
7558 return true;
7559 }
7560};
7561} // namespace llvm
7562
7563bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7564 DomTreeUpdater *DTU) {
7565 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7566 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7567 // an entire PHI at once after the loop, opposed to calling
7568 // getIncomingValueForBlock inside this loop, since each call to
7569 // getIncomingValueForBlock is O(|Preds|).
7575 Cases.reserve(SI->getNumSuccessors());
7576
7577 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7578 BasicBlock *BB = SI->getSuccessor(I);
7579
7580 // FIXME: Support more than just a single BranchInst. One way we could do
7581 // this is by taking a hashing approach of all insts in BB.
7582 if (BB->size() != 1)
7583 continue;
7584
7585 // FIXME: This case needs some extra care because the terminators other than
7586 // SI need to be updated. For now, consider only backedges to the SI.
7587 if (BB->hasNPredecessorsOrMore(4) ||
7588 BB->getUniquePredecessor() != SI->getParent())
7589 continue;
7590
7591 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7592 // on other kinds of terminators. We decide to only support unconditional
7593 // branches for now for compile time reasons.
7594 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7595 if (!BI || BI->isConditional())
7596 continue;
7597
7598 if (Seen.insert(BB).second) {
7599 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7600 for (BasicBlock *Succ : BI->successors())
7601 for (PHINode &Phi : Succ->phis())
7602 Phis.insert(&Phi);
7603 // Add the successor only if not previously visited.
7604 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7605 }
7606
7607 BBToSuccessorIndexes[BB].emplace_back(I);
7608 }
7609
7610 // Precompute a data structure to improve performance of isEqual for
7611 // SwitchSuccWrapper.
7612 PhiPredIVs.reserve(Phis.size());
7613 for (PHINode *Phi : Phis) {
7614 PhiPredIVs[Phi] =
7615 SmallDenseMap<BasicBlock *, Value *, 8>(Phi->getNumIncomingValues());
7616 for (auto &IV : Phi->incoming_values())
7617 PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
7618 }
7619
7620 // Build a set such that if the SwitchSuccWrapper exists in the set and
7621 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7622 // which is not in the set should be replaced with the one in the set. If the
7623 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7624 // other SwitchSuccWrappers can check against it in the same manner. We use
7625 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7626 // around information to isEquality, getHashValue, and when doing the
7627 // replacement with better performance.
7629 ReplaceWith.reserve(Cases.size());
7630
7632 Updates.reserve(ReplaceWith.size());
7633 bool MadeChange = false;
7634 for (auto &SSW : Cases) {
7635 // SSW is a candidate for simplification. If we find a duplicate BB,
7636 // replace it.
7637 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7638 if (!Inserted) {
7639 // We know that SI's parent BB no longer dominates the old case successor
7640 // since we are making it dead.
7641 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7642 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7643 for (unsigned Idx : Successors)
7644 SI->setSuccessor(Idx, (*It)->Dest);
7645 MadeChange = true;
7646 }
7647 }
7648
7649 if (DTU)
7650 DTU->applyUpdates(Updates);
7651
7652 return MadeChange;
7653}
7654
7655bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7656 BasicBlock *BB = SI->getParent();
7657
7658 if (isValueEqualityComparison(SI)) {
7659 // If we only have one predecessor, and if it is a branch on this value,
7660 // see if that predecessor totally determines the outcome of this switch.
7661 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7662 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7663 return requestResimplify();
7664
7665 Value *Cond = SI->getCondition();
7666 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7667 if (simplifySwitchOnSelect(SI, Select))
7668 return requestResimplify();
7669
7670 // If the block only contains the switch, see if we can fold the block
7671 // away into any preds.
7672 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7673 if (foldValueComparisonIntoPredecessors(SI, Builder))
7674 return requestResimplify();
7675 }
7676
7677 // Try to transform the switch into an icmp and a branch.
7678 // The conversion from switch to comparison may lose information on
7679 // impossible switch values, so disable it early in the pipeline.
7680 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7681 return requestResimplify();
7682
7683 // Remove unreachable cases.
7684 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7685 return requestResimplify();
7686
7687 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7688 return requestResimplify();
7689
7690 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7691 return requestResimplify();
7692
7693 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7694 return requestResimplify();
7695
7696 // The conversion from switch to lookup tables results in difficult-to-analyze
7697 // code and makes pruning branches much harder. This is a problem if the
7698 // switch expression itself can still be restricted as a result of inlining or
7699 // CVP. Therefore, only apply this transformation during late stages of the
7700 // optimisation pipeline.
7701 if (Options.ConvertSwitchToLookupTable &&
7702 switchToLookupTable(SI, Builder, DTU, DL, TTI))
7703 return requestResimplify();
7704
7705 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7706 return requestResimplify();
7707
7708 if (reduceSwitchRange(SI, Builder, DL, TTI))
7709 return requestResimplify();
7710
7711 if (HoistCommon &&
7712 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7713 return requestResimplify();
7714
7715 if (simplifyDuplicateSwitchArms(SI, DTU))
7716 return requestResimplify();
7717
7718 return false;
7719}
7720
7721bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7722 BasicBlock *BB = IBI->getParent();
7723 bool Changed = false;
7724
7725 // Eliminate redundant destinations.
7728 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7729 BasicBlock *Dest = IBI->getDestination(i);
7730 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7731 if (!Dest->hasAddressTaken())
7732 RemovedSuccs.insert(Dest);
7733 Dest->removePredecessor(BB);
7734 IBI->removeDestination(i);
7735 --i;
7736 --e;
7737 Changed = true;
7738 }
7739 }
7740
7741 if (DTU) {
7742 std::vector<DominatorTree::UpdateType> Updates;
7743 Updates.reserve(RemovedSuccs.size());
7744 for (auto *RemovedSucc : RemovedSuccs)
7745 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7746 DTU->applyUpdates(Updates);
7747 }
7748
7749 if (IBI->getNumDestinations() == 0) {
7750 // If the indirectbr has no successors, change it to unreachable.
7751 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7753 return true;
7754 }
7755
7756 if (IBI->getNumDestinations() == 1) {
7757 // If the indirectbr has one successor, change it to a direct branch.
7760 return true;
7761 }
7762
7763 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7764 if (simplifyIndirectBrOnSelect(IBI, SI))
7765 return requestResimplify();
7766 }
7767 return Changed;
7768}
7769
7770/// Given an block with only a single landing pad and a unconditional branch
7771/// try to find another basic block which this one can be merged with. This
7772/// handles cases where we have multiple invokes with unique landing pads, but
7773/// a shared handler.
7774///
7775/// We specifically choose to not worry about merging non-empty blocks
7776/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7777/// practice, the optimizer produces empty landing pad blocks quite frequently
7778/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7779/// sinking in this file)
7780///
7781/// This is primarily a code size optimization. We need to avoid performing
7782/// any transform which might inhibit optimization (such as our ability to
7783/// specialize a particular handler via tail commoning). We do this by not
7784/// merging any blocks which require us to introduce a phi. Since the same
7785/// values are flowing through both blocks, we don't lose any ability to
7786/// specialize. If anything, we make such specialization more likely.
7787///
7788/// TODO - This transformation could remove entries from a phi in the target
7789/// block when the inputs in the phi are the same for the two blocks being
7790/// merged. In some cases, this could result in removal of the PHI entirely.
7792 BasicBlock *BB, DomTreeUpdater *DTU) {
7793 auto Succ = BB->getUniqueSuccessor();
7794 assert(Succ);
7795 // If there's a phi in the successor block, we'd likely have to introduce
7796 // a phi into the merged landing pad block.
7797 if (isa<PHINode>(*Succ->begin()))
7798 return false;
7799
7800 for (BasicBlock *OtherPred : predecessors(Succ)) {
7801 if (BB == OtherPred)
7802 continue;
7803 BasicBlock::iterator I = OtherPred->begin();
7804 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7805 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7806 continue;
7807 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7808 ;
7809 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7810 if (!BI2 || !BI2->isIdenticalTo(BI))
7811 continue;
7812
7813 std::vector<DominatorTree::UpdateType> Updates;
7814
7815 // We've found an identical block. Update our predecessors to take that
7816 // path instead and make ourselves dead.
7818 for (BasicBlock *Pred : UniquePreds) {
7819 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7820 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7821 "unexpected successor");
7822 II->setUnwindDest(OtherPred);
7823 if (DTU) {
7824 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7825 Updates.push_back({DominatorTree::Delete, Pred, BB});
7826 }
7827 }
7828
7829 // The debug info in OtherPred doesn't cover the merged control flow that
7830 // used to go through BB. We need to delete it or update it.
7831 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7832 if (isa<DbgInfoIntrinsic>(Inst))
7833 Inst.eraseFromParent();
7834
7836 for (BasicBlock *Succ : UniqueSuccs) {
7837 Succ->removePredecessor(BB);
7838 if (DTU)
7839 Updates.push_back({DominatorTree::Delete, BB, Succ});
7840 }
7841
7842 IRBuilder<> Builder(BI);
7843 Builder.CreateUnreachable();
7844 BI->eraseFromParent();
7845 if (DTU)
7846 DTU->applyUpdates(Updates);
7847 return true;
7848 }
7849 return false;
7850}
7851
7852bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7853 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7854 : simplifyCondBranch(Branch, Builder);
7855}
7856
7857bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7858 IRBuilder<> &Builder) {
7859 BasicBlock *BB = BI->getParent();
7860 BasicBlock *Succ = BI->getSuccessor(0);
7861
7862 // If the Terminator is the only non-phi instruction, simplify the block.
7863 // If LoopHeader is provided, check if the block or its successor is a loop
7864 // header. (This is for early invocations before loop simplify and
7865 // vectorization to keep canonical loop forms for nested loops. These blocks
7866 // can be eliminated when the pass is invoked later in the back-end.)
7867 // Note that if BB has only one predecessor then we do not introduce new
7868 // backedge, so we can eliminate BB.
7869 bool NeedCanonicalLoop =
7870 Options.NeedCanonicalLoop &&
7871 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7872 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7874 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7875 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7876 return true;
7877
7878 // If the only instruction in the block is a seteq/setne comparison against a
7879 // constant, try to simplify the block.
7880 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7881 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7882 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7883 ;
7884 if (I->isTerminator() &&
7885 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7886 return true;
7887 }
7888
7889 // See if we can merge an empty landing pad block with another which is
7890 // equivalent.
7891 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7892 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7893 ;
7894 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7895 return true;
7896 }
7897
7898 // If this basic block is ONLY a compare and a branch, and if a predecessor
7899 // branches to us and our successor, fold the comparison into the
7900 // predecessor and use logical operations to update the incoming value
7901 // for PHI nodes in common successor.
7902 if (Options.SpeculateBlocks &&
7903 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7904 Options.BonusInstThreshold))
7905 return requestResimplify();
7906 return false;
7907}
7908
7910 BasicBlock *PredPred = nullptr;
7911 for (auto *P : predecessors(BB)) {
7912 BasicBlock *PPred = P->getSinglePredecessor();
7913 if (!PPred || (PredPred && PredPred != PPred))
7914 return nullptr;
7915 PredPred = PPred;
7916 }
7917 return PredPred;
7918}
7919
7920/// Fold the following pattern:
7921/// bb0:
7922/// br i1 %cond1, label %bb1, label %bb2
7923/// bb1:
7924/// br i1 %cond2, label %bb3, label %bb4
7925/// bb2:
7926/// br i1 %cond2, label %bb4, label %bb3
7927/// bb3:
7928/// ...
7929/// bb4:
7930/// ...
7931/// into
7932/// bb0:
7933/// %cond = xor i1 %cond1, %cond2
7934/// br i1 %cond, label %bb4, label %bb3
7935/// bb3:
7936/// ...
7937/// bb4:
7938/// ...
7939/// NOTE: %cond2 always dominates the terminator of bb0.
7941 BasicBlock *BB = BI->getParent();
7942 BasicBlock *BB1 = BI->getSuccessor(0);
7943 BasicBlock *BB2 = BI->getSuccessor(1);
7944 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7945 if (Succ == BB)
7946 return false;
7947 if (&Succ->front() != Succ->getTerminator())
7948 return false;
7949 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
7950 if (!SuccBI || !SuccBI->isConditional())
7951 return false;
7952 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
7953 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
7954 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7955 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
7956 };
7957 BranchInst *BB1BI, *BB2BI;
7958 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7959 return false;
7960
7961 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7962 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
7963 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
7964 return false;
7965
7966 BasicBlock *BB3 = BB1BI->getSuccessor(0);
7967 BasicBlock *BB4 = BB1BI->getSuccessor(1);
7968 IRBuilder<> Builder(BI);
7969 BI->setCondition(
7970 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
7971 BB1->removePredecessor(BB);
7972 BI->setSuccessor(0, BB4);
7973 BB2->removePredecessor(BB);
7974 BI->setSuccessor(1, BB3);
7975 if (DTU) {
7977 Updates.push_back({DominatorTree::Delete, BB, BB1});
7978 Updates.push_back({DominatorTree::Insert, BB, BB4});
7979 Updates.push_back({DominatorTree::Delete, BB, BB2});
7980 Updates.push_back({DominatorTree::Insert, BB, BB3});
7981
7982 DTU->applyUpdates(Updates);
7983 }
7984 bool HasWeight = false;
7985 uint64_t BBTWeight, BBFWeight;
7986 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
7987 HasWeight = true;
7988 else
7989 BBTWeight = BBFWeight = 1;
7990 uint64_t BB1TWeight, BB1FWeight;
7991 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
7992 HasWeight = true;
7993 else
7994 BB1TWeight = BB1FWeight = 1;
7995 uint64_t BB2TWeight, BB2FWeight;
7996 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
7997 HasWeight = true;
7998 else
7999 BB2TWeight = BB2FWeight = 1;
8000 if (HasWeight) {
8001 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8002 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8003 fitWeights(Weights);
8004 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8005 }
8006 return true;
8007}
8008
8009bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8010 assert(
8011 !isa<ConstantInt>(BI->getCondition()) &&
8012 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8013 "Tautological conditional branch should have been eliminated already.");
8014
8015 BasicBlock *BB = BI->getParent();
8016 if (!Options.SimplifyCondBranch ||
8017 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8018 return false;
8019
8020 // Conditional branch
8021 if (isValueEqualityComparison(BI)) {
8022 // If we only have one predecessor, and if it is a branch on this value,
8023 // see if that predecessor totally determines the outcome of this
8024 // switch.
8025 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8026 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8027 return requestResimplify();
8028
8029 // This block must be empty, except for the setcond inst, if it exists.
8030 // Ignore dbg and pseudo intrinsics.
8031 auto I = BB->instructionsWithoutDebug(true).begin();
8032 if (&*I == BI) {
8033 if (foldValueComparisonIntoPredecessors(BI, Builder))
8034 return requestResimplify();
8035 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8036 ++I;
8037 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8038 return requestResimplify();
8039 }
8040 }
8041
8042 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8043 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8044 return true;
8045
8046 // If this basic block has dominating predecessor blocks and the dominating
8047 // blocks' conditions imply BI's condition, we know the direction of BI.
8048 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8049 if (Imp) {
8050 // Turn this into a branch on constant.
8051 auto *OldCond = BI->getCondition();
8052 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8053 : ConstantInt::getFalse(BB->getContext());
8054 BI->setCondition(TorF);
8056 return requestResimplify();
8057 }
8058
8059 // If this basic block is ONLY a compare and a branch, and if a predecessor
8060 // branches to us and one of our successors, fold the comparison into the
8061 // predecessor and use logical operations to pick the right destination.
8062 if (Options.SpeculateBlocks &&
8063 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8064 Options.BonusInstThreshold))
8065 return requestResimplify();
8066
8067 // We have a conditional branch to two blocks that are only reachable
8068 // from BI. We know that the condbr dominates the two blocks, so see if
8069 // there is any identical code in the "then" and "else" blocks. If so, we
8070 // can hoist it up to the branching block.
8071 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8072 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8073 if (HoistCommon &&
8074 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8075 return requestResimplify();
8076
8078 Options.HoistLoadsStoresWithCondFaulting &&
8079 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8080 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8081 auto CanSpeculateConditionalLoadsStores = [&]() {
8082 for (auto *Succ : successors(BB)) {
8083 for (Instruction &I : *Succ) {
8084 if (I.isTerminator()) {
8085 if (I.getNumSuccessors() > 1)
8086 return false;
8087 continue;
8088 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8089 SpeculatedConditionalLoadsStores.size() ==
8091 return false;
8092 }
8093 SpeculatedConditionalLoadsStores.push_back(&I);
8094 }
8095 }
8096 return !SpeculatedConditionalLoadsStores.empty();
8097 };
8098
8099 if (CanSpeculateConditionalLoadsStores()) {
8100 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8101 std::nullopt);
8102 return requestResimplify();
8103 }
8104 }
8105 } else {
8106 // If Successor #1 has multiple preds, we may be able to conditionally
8107 // execute Successor #0 if it branches to Successor #1.
8108 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8109 if (Succ0TI->getNumSuccessors() == 1 &&
8110 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8111 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8112 return requestResimplify();
8113 }
8114 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8115 // If Successor #0 has multiple preds, we may be able to conditionally
8116 // execute Successor #1 if it branches to Successor #0.
8117 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8118 if (Succ1TI->getNumSuccessors() == 1 &&
8119 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8120 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8121 return requestResimplify();
8122 }
8123
8124 // If this is a branch on something for which we know the constant value in
8125 // predecessors (e.g. a phi node in the current block), thread control
8126 // through this block.
8128 return requestResimplify();
8129
8130 // Scan predecessor blocks for conditional branches.
8131 for (BasicBlock *Pred : predecessors(BB))
8132 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8133 if (PBI != BI && PBI->isConditional())
8134 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8135 return requestResimplify();
8136
8137 // Look for diamond patterns.
8138 if (MergeCondStores)
8140 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8141 if (PBI != BI && PBI->isConditional())
8142 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8143 return requestResimplify();
8144
8145 // Look for nested conditional branches.
8146 if (mergeNestedCondBranch(BI, DTU))
8147 return requestResimplify();
8148
8149 return false;
8150}
8151
8152/// Check if passing a value to an instruction will cause undefined behavior.
8153static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8154 Constant *C = dyn_cast<Constant>(V);
8155 if (!C)
8156 return false;
8157
8158 if (I->use_empty())
8159 return false;
8160
8161 if (C->isNullValue() || isa<UndefValue>(C)) {
8162 // Only look at the first use we can handle, avoid hurting compile time with
8163 // long uselists
8164 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8165 auto *Use = cast<Instruction>(U.getUser());
8166 // Change this list when we want to add new instructions.
8167 switch (Use->getOpcode()) {
8168 default:
8169 return false;
8170 case Instruction::GetElementPtr:
8171 case Instruction::Ret:
8172 case Instruction::BitCast:
8173 case Instruction::Load:
8174 case Instruction::Store:
8175 case Instruction::Call:
8176 case Instruction::CallBr:
8177 case Instruction::Invoke:
8178 case Instruction::UDiv:
8179 case Instruction::URem:
8180 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8181 // implemented to avoid code complexity as it is unclear how useful such
8182 // logic is.
8183 case Instruction::SDiv:
8184 case Instruction::SRem:
8185 return true;
8186 }
8187 });
8188 if (FindUse == I->use_end())
8189 return false;
8190 auto &Use = *FindUse;
8191 auto *User = cast<Instruction>(Use.getUser());
8192 // Bail out if User is not in the same BB as I or User == I or User comes
8193 // before I in the block. The latter two can be the case if User is a
8194 // PHI node.
8195 if (User->getParent() != I->getParent() || User == I ||
8196 User->comesBefore(I))
8197 return false;
8198
8199 // Now make sure that there are no instructions in between that can alter
8200 // control flow (eg. calls)
8201 auto InstrRange =
8202 make_range(std::next(I->getIterator()), User->getIterator());
8203 if (any_of(InstrRange, [](Instruction &I) {
8205 }))
8206 return false;
8207
8208 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8209 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User))
8210 if (GEP->getPointerOperand() == I) {
8211 // The current base address is null, there are four cases to consider:
8212 // getelementptr (TY, null, 0) -> null
8213 // getelementptr (TY, null, not zero) -> may be modified
8214 // getelementptr inbounds (TY, null, 0) -> null
8215 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8216 // undefined?
8217 if (!GEP->hasAllZeroIndices() &&
8218 (!GEP->isInBounds() ||
8219 NullPointerIsDefined(GEP->getFunction(),
8220 GEP->getPointerAddressSpace())))
8221 PtrValueMayBeModified = true;
8222 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8223 }
8224
8225 // Look through return.
8226 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8227 bool HasNoUndefAttr =
8228 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8229 // Return undefined to a noundef return value is undefined.
8230 if (isa<UndefValue>(C) && HasNoUndefAttr)
8231 return true;
8232 // Return null to a nonnull+noundef return value is undefined.
8233 if (C->isNullValue() && HasNoUndefAttr &&
8234 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8235 return !PtrValueMayBeModified;
8236 }
8237 }
8238
8239 // Load from null is undefined.
8240 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8241 if (!LI->isVolatile())
8242 return !NullPointerIsDefined(LI->getFunction(),
8243 LI->getPointerAddressSpace());
8244
8245 // Store to null is undefined.
8246 if (StoreInst *SI = dyn_cast<StoreInst>(User))
8247 if (!SI->isVolatile())
8248 return (!NullPointerIsDefined(SI->getFunction(),
8249 SI->getPointerAddressSpace())) &&
8250 SI->getPointerOperand() == I;
8251
8252 // llvm.assume(false/undef) always triggers immediate UB.
8253 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8254 // Ignore assume operand bundles.
8255 if (I == Assume->getArgOperand(0))
8256 return true;
8257 }
8258
8259 if (auto *CB = dyn_cast<CallBase>(User)) {
8260 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8261 return false;
8262 // A call to null is undefined.
8263 if (CB->getCalledOperand() == I)
8264 return true;
8265
8266 if (CB->isArgOperand(&Use)) {
8267 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8268 // Passing null to a nonnnull+noundef argument is undefined.
8269 if (C->isNullValue() && CB->isPassingUndefUB(ArgIdx) &&
8270 CB->paramHasAttr(ArgIdx, Attribute::NonNull))
8271 return !PtrValueMayBeModified;
8272 // Passing undef to a noundef argument is undefined.
8273 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8274 return true;
8275 }
8276 }
8277 // Div/Rem by zero is immediate UB
8278 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8279 return true;
8280 }
8281 return false;
8282}
8283
8284/// If BB has an incoming value that will always trigger undefined behavior
8285/// (eg. null pointer dereference), remove the branch leading here.
8287 DomTreeUpdater *DTU,
8288 AssumptionCache *AC) {
8289 for (PHINode &PHI : BB->phis())
8290 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8291 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8292 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8293 Instruction *T = Predecessor->getTerminator();
8294 IRBuilder<> Builder(T);
8295 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8296 BB->removePredecessor(Predecessor);
8297 // Turn unconditional branches into unreachables and remove the dead
8298 // destination from conditional branches.
8299 if (BI->isUnconditional())
8300 Builder.CreateUnreachable();
8301 else {
8302 // Preserve guarding condition in assume, because it might not be
8303 // inferrable from any dominating condition.
8304 Value *Cond = BI->getCondition();
8305 CallInst *Assumption;
8306 if (BI->getSuccessor(0) == BB)
8307 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8308 else
8309 Assumption = Builder.CreateAssumption(Cond);
8310 if (AC)
8311 AC->registerAssumption(cast<AssumeInst>(Assumption));
8312 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8313 : BI->getSuccessor(0));
8314 }
8315 BI->eraseFromParent();
8316 if (DTU)
8317 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8318 return true;
8319 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8320 // Redirect all branches leading to UB into
8321 // a newly created unreachable block.
8322 BasicBlock *Unreachable = BasicBlock::Create(
8323 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8324 Builder.SetInsertPoint(Unreachable);
8325 // The new block contains only one instruction: Unreachable
8326 Builder.CreateUnreachable();
8327 for (const auto &Case : SI->cases())
8328 if (Case.getCaseSuccessor() == BB) {
8329 BB->removePredecessor(Predecessor);
8330 Case.setSuccessor(Unreachable);
8331 }
8332 if (SI->getDefaultDest() == BB) {
8333 BB->removePredecessor(Predecessor);
8334 SI->setDefaultDest(Unreachable);
8335 }
8336
8337 if (DTU)
8338 DTU->applyUpdates(
8339 { { DominatorTree::Insert, Predecessor, Unreachable },
8340 { DominatorTree::Delete, Predecessor, BB } });
8341 return true;
8342 }
8343 }
8344
8345 return false;
8346}
8347
8348bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8349 bool Changed = false;
8350
8351 assert(BB && BB->getParent() && "Block not embedded in function!");
8352 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8353
8354 // Remove basic blocks that have no predecessors (except the entry block)...
8355 // or that just have themself as a predecessor. These are unreachable.
8356 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8357 BB->getSinglePredecessor() == BB) {
8358 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8359 DeleteDeadBlock(BB, DTU);
8360 return true;
8361 }
8362
8363 // Check to see if we can constant propagate this terminator instruction
8364 // away...
8365 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8366 /*TLI=*/nullptr, DTU);
8367
8368 // Check for and eliminate duplicate PHI nodes in this block.
8369 Changed |= EliminateDuplicatePHINodes(BB);
8370
8371 // Check for and remove branches that will always cause undefined behavior.
8373 return requestResimplify();
8374
8375 // Merge basic blocks into their predecessor if there is only one distinct
8376 // pred, and if there is only one distinct successor of the predecessor, and
8377 // if there are no PHI nodes.
8378 if (MergeBlockIntoPredecessor(BB, DTU))
8379 return true;
8380
8381 if (SinkCommon && Options.SinkCommonInsts)
8382 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8383 mergeCompatibleInvokes(BB, DTU)) {
8384 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8385 // so we may now how duplicate PHI's.
8386 // Let's rerun EliminateDuplicatePHINodes() first,
8387 // before foldTwoEntryPHINode() potentially converts them into select's,
8388 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8389 return true;
8390 }
8391
8392 IRBuilder<> Builder(BB);
8393
8394 if (Options.SpeculateBlocks &&
8395 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8396 // If there is a trivial two-entry PHI node in this basic block, and we can
8397 // eliminate it, do so now.
8398 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8399 if (PN->getNumIncomingValues() == 2)
8400 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8401 Options.SpeculateUnpredictables))
8402 return true;
8403 }
8404
8406 Builder.SetInsertPoint(Terminator);
8407 switch (Terminator->getOpcode()) {
8408 case Instruction::Br:
8409 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8410 break;
8411 case Instruction::Resume:
8412 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8413 break;
8414 case Instruction::CleanupRet:
8415 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8416 break;
8417 case Instruction::Switch:
8418 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8419 break;
8420 case Instruction::Unreachable:
8421 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8422 break;
8423 case Instruction::IndirectBr:
8424 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8425 break;
8426 }
8427
8428 return Changed;
8429}
8430
8431bool SimplifyCFGOpt::run(BasicBlock *BB) {
8432 bool Changed = false;
8433
8434 // Repeated simplify BB as long as resimplification is requested.
8435 do {
8436 Resimplify = false;
8437
8438 // Perform one round of simplifcation. Resimplify flag will be set if
8439 // another iteration is requested.
8440 Changed |= simplifyOnce(BB);
8441 } while (Resimplify);
8442
8443 return Changed;
8444}
8445
8448 ArrayRef<WeakVH> LoopHeaders) {
8449 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8450 Options)
8451 .run(BB);
8452}
#define Fail
#define Success
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static cl::opt< bool > HoistLoadsStoresWithCondFaulting("simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads/stores if the target supports " "conditional faulting"))
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights, bool IsExpected)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert)
If the target supports conditional faulting, we look for the following pattern:
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1649
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition: APInt.h:1554
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1915
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:177
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:382
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:530
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:437
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:671
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:381
const Instruction & front() const
Definition: BasicBlock.h:484
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:213
InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:398
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:503
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:519
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:481
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:331
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:489
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:511
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:739
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:220
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
size_t size() const
Definition: BasicBlock.h:482
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:699
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:507
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:644
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:538
The address of a basic block.
Definition: Constants.h:893
BasicBlock * getBasicBlock() const
Definition: Constants.h:924
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
Definition: InstrTypes.h:1568
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2626
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isNegative() const
Definition: Constants.h:203
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:258
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:187
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:151
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:103
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
const BasicBlock & getEntryBlock() const
Definition: Function.h:821
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:716
iterator begin()
Definition: Function.h:865
size_t size() const
Definition: Function.h:870
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
bool hasPostDomTree() const
Returns true if it holds a PostDomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2286
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2051
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1306
Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1058
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:521
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2574
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:239
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1882
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:252
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1187
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1164
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1798
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1811
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1158
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2225
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1562
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1694
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:104
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:511
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:426
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:313
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
Definition: Instruction.h:942
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:949
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:508
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:176
static unsigned getPointerOperandIndex()
Definition: Instructions.h:257
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1073
Helper class to manipulate !mmra metadata nodes.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:704
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Align getAlign() const
Definition: Instructions.h:333
bool isSimple() const
Definition: Instructions.h:370
Value * getValueOperand()
Definition: Instructions.h:378
bool isUnordered() const
Definition: Instructions.h:372
static unsigned getPointerOperandIndex()
Definition: Instructions.h:383
Value * getPointerOperand()
Definition: Instructions.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:234
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
void set(Value *Val)
Definition: Value.h:892
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:64
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
op_range operands()
Definition: User.h:288
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:241
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
static constexpr uint64_t MaximumAlignment
Definition: Value.h:817
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition: DenseSet.h:90
size_type size() const
Definition: DenseSet.h:81
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:507
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:864
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1867
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:240
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
Definition: DebugInfo.cpp:1881
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1732
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:543
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:136
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2204
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition: ValueMapper.h:299
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1785
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1153
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:96
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:78
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:277
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1439
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataSetTy *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
Definition: ValueMapper.h:288
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3233
auto succ_size(const MachineBasicBlock *BB)
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3436
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3703
@ And
Bitwise or logical AND of integers.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:261
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4206
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2014
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:237
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1624
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:590
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2067
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1521
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:383
void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
BasicBlock * Dest
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254