LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
99 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101 cl::desc(
102 "Temporary development switch used to gradually uplift SimplifyCFG "
103 "into preserving DomTree,"));
104
105// Chosen as 2 so as to be cheap, but still to have enough power to fold
106// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107// To catch this, we need to fold a compare and a select, hence '2' being the
108// minimum reasonable default.
110 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
111 cl::desc(
112 "Control the amount of phi node folding to perform (default = 2)"));
113
115 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
116 cl::desc("Control the maximal total instruction cost that we are willing "
117 "to speculatively execute to fold a 2-entry PHI node into a "
118 "select (default = 4)"));
119
120static cl::opt<bool>
121 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
122 cl::desc("Hoist common instructions up to the parent block"));
123
125 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
126 cl::desc("Hoist loads if the target supports conditional faulting"));
127
129 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
130 cl::desc("Hoist stores if the target supports conditional faulting"));
131
133 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
134 cl::desc("Control the maximal conditional load/store that we are willing "
135 "to speculatively execute to eliminate conditional branch "
136 "(default = 6)"));
137
139 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140 cl::init(20),
141 cl::desc("Allow reordering across at most this many "
142 "instructions when hoisting"));
143
144static cl::opt<bool>
145 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
146 cl::desc("Sink common instructions down to the end block"));
147
149 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
150 cl::desc("Hoist conditional stores if an unconditional store precedes"));
151
153 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
154 cl::desc("Hoist conditional stores even if an unconditional store does not "
155 "precede - hoist multiple conditional stores into a single "
156 "predicated store"));
157
159 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
160 cl::desc("When merging conditional stores, do so even if the resultant "
161 "basic blocks are unlikely to be if-converted as a result"));
162
164 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
165 cl::desc("Allow exactly one expensive instruction to be speculatively "
166 "executed"));
167
169 "max-speculation-depth", cl::Hidden, cl::init(10),
170 cl::desc("Limit maximum recursion depth when calculating costs of "
171 "speculatively executed instructions"));
172
173static cl::opt<int>
174 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175 cl::init(10),
176 cl::desc("Max size of a block which is still considered "
177 "small enough to thread through"));
178
179// Two is chosen to allow one negation and a logical combine.
181 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182 cl::init(2),
183 cl::desc("Maximum cost of combining conditions when "
184 "folding branches"));
185
187 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188 cl::init(2),
189 cl::desc("Multiplier to apply to threshold when determining whether or not "
190 "to fold branch to common destination when vector operations are "
191 "present"));
192
194 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
195 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
196
198 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
199 cl::desc("Limit cases to analyze when converting a switch to select"));
200
202 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
203 cl::desc("Limit number of blocks a define in a threaded block is allowed "
204 "to be live in"));
205
207
208STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
209STATISTIC(NumLinearMaps,
210 "Number of switch instructions turned into linear mapping");
211STATISTIC(NumLookupTables,
212 "Number of switch instructions turned into lookup tables");
214 NumLookupTablesHoles,
215 "Number of switch instructions turned into lookup tables (holes checked)");
216STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
217STATISTIC(NumFoldValueComparisonIntoPredecessors,
218 "Number of value comparisons folded into predecessor basic blocks");
219STATISTIC(NumFoldBranchToCommonDest,
220 "Number of branches folded into predecessor basic block");
222 NumHoistCommonCode,
223 "Number of common instruction 'blocks' hoisted up to the begin block");
224STATISTIC(NumHoistCommonInstrs,
225 "Number of common instructions hoisted up to the begin block");
226STATISTIC(NumSinkCommonCode,
227 "Number of common instruction 'blocks' sunk down to the end block");
228STATISTIC(NumSinkCommonInstrs,
229 "Number of common instructions sunk down to the end block");
230STATISTIC(NumSpeculations, "Number of speculative executed instructions");
231STATISTIC(NumInvokes,
232 "Number of invokes with empty resume blocks simplified into calls");
233STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
234STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
235
236namespace {
237
238// The first field contains the value that the switch produces when a certain
239// case group is selected, and the second field is a vector containing the
240// cases composing the case group.
241using SwitchCaseResultVectorTy =
243
244// The first field contains the phi node that generates a result of the switch
245// and the second field contains the value generated for a certain case in the
246// switch for that PHI.
247using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
248
249/// ValueEqualityComparisonCase - Represents a case of a switch.
250struct ValueEqualityComparisonCase {
252 BasicBlock *Dest;
253
254 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
255 : Value(Value), Dest(Dest) {}
256
257 bool operator<(ValueEqualityComparisonCase RHS) const {
258 // Comparing pointers is ok as we only rely on the order for uniquing.
259 return Value < RHS.Value;
260 }
261
262 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
263};
264
265class SimplifyCFGOpt {
266 const TargetTransformInfo &TTI;
267 DomTreeUpdater *DTU;
268 const DataLayout &DL;
269 ArrayRef<WeakVH> LoopHeaders;
270 const SimplifyCFGOptions &Options;
271 bool Resimplify;
272
273 Value *isValueEqualityComparison(Instruction *TI);
274 BasicBlock *getValueEqualityComparisonCases(
275 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
276 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
277 BasicBlock *Pred,
278 IRBuilder<> &Builder);
279 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
280 Instruction *PTI,
281 IRBuilder<> &Builder);
282 bool foldValueComparisonIntoPredecessors(Instruction *TI,
283 IRBuilder<> &Builder);
284
285 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
286 bool simplifySingleResume(ResumeInst *RI);
287 bool simplifyCommonResume(ResumeInst *RI);
288 bool simplifyCleanupReturn(CleanupReturnInst *RI);
289 bool simplifyUnreachable(UnreachableInst *UI);
290 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
291 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
292 bool simplifyIndirectBr(IndirectBrInst *IBI);
293 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
294 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
295 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
296 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
297
298 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
299 IRBuilder<> &Builder);
300
301 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
302 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
303 Instruction *TI, Instruction *I1,
304 SmallVectorImpl<Instruction *> &OtherSuccTIs);
305 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
306 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
307 BasicBlock *TrueBB, BasicBlock *FalseBB,
308 uint32_t TrueWeight, uint32_t FalseWeight);
309 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
310 const DataLayout &DL);
311 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
312 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
313 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
314
315public:
316 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
317 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
318 const SimplifyCFGOptions &Opts)
319 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
320 assert((!DTU || !DTU->hasPostDomTree()) &&
321 "SimplifyCFG is not yet capable of maintaining validity of a "
322 "PostDomTree, so don't ask for it.");
323 }
324
325 bool simplifyOnce(BasicBlock *BB);
326 bool run(BasicBlock *BB);
327
328 // Helper to set Resimplify and return change indication.
329 bool requestResimplify() {
330 Resimplify = true;
331 return true;
332 }
333};
334
335// we synthesize a || b as select a, true, b
336// we synthesize a && b as select a, b, false
337// this function determines if SI is playing one of those roles.
338[[maybe_unused]] bool
339isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
340 return ((isa<ConstantInt>(SI->getTrueValue()) &&
341 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
342 (isa<ConstantInt>(SI->getFalseValue()) &&
343 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
344}
345
346} // end anonymous namespace
347
348/// Return true if all the PHI nodes in the basic block \p BB
349/// receive compatible (identical) incoming values when coming from
350/// all of the predecessor blocks that are specified in \p IncomingBlocks.
351///
352/// Note that if the values aren't exactly identical, but \p EquivalenceSet
353/// is provided, and *both* of the values are present in the set,
354/// then they are considered equal.
356 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
357 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
358 assert(IncomingBlocks.size() == 2 &&
359 "Only for a pair of incoming blocks at the time!");
360
361 // FIXME: it is okay if one of the incoming values is an `undef` value,
362 // iff the other incoming value is guaranteed to be a non-poison value.
363 // FIXME: it is okay if one of the incoming values is a `poison` value.
364 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
365 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
366 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
367 if (IV0 == IV1)
368 return true;
369 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
370 EquivalenceSet->contains(IV1))
371 return true;
372 return false;
373 });
374}
375
376/// Return true if it is safe to merge these two
377/// terminator instructions together.
378static bool
380 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
381 if (SI1 == SI2)
382 return false; // Can't merge with self!
383
384 // It is not safe to merge these two switch instructions if they have a common
385 // successor, and if that successor has a PHI node, and if *that* PHI node has
386 // conflicting incoming values from the two switch blocks.
387 BasicBlock *SI1BB = SI1->getParent();
388 BasicBlock *SI2BB = SI2->getParent();
389
391 bool Fail = false;
392 for (BasicBlock *Succ : successors(SI2BB)) {
393 if (!SI1Succs.count(Succ))
394 continue;
395 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
396 continue;
397 Fail = true;
398 if (FailBlocks)
399 FailBlocks->insert(Succ);
400 else
401 break;
402 }
403
404 return !Fail;
405}
406
407/// Update PHI nodes in Succ to indicate that there will now be entries in it
408/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
409/// will be the same as those coming in from ExistPred, an existing predecessor
410/// of Succ.
411static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
412 BasicBlock *ExistPred,
413 MemorySSAUpdater *MSSAU = nullptr) {
414 for (PHINode &PN : Succ->phis())
415 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
416 if (MSSAU)
417 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
418 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
419}
420
421/// Compute an abstract "cost" of speculating the given instruction,
422/// which is assumed to be safe to speculate. TCC_Free means cheap,
423/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
424/// expensive.
426 const TargetTransformInfo &TTI) {
427 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
428}
429
430/// If we have a merge point of an "if condition" as accepted above,
431/// return true if the specified value dominates the block. We don't handle
432/// the true generality of domination here, just a special case which works
433/// well enough for us.
434///
435/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
436/// see if V (which must be an instruction) and its recursive operands
437/// that do not dominate BB have a combined cost lower than Budget and
438/// are non-trapping. If both are true, the instruction is inserted into the
439/// set and true is returned.
440///
441/// The cost for most non-trapping instructions is defined as 1 except for
442/// Select whose cost is 2.
443///
444/// After this function returns, Cost is increased by the cost of
445/// V plus its non-dominating operands. If that cost is greater than
446/// Budget, false is returned and Cost is undefined.
448 Value *V, BasicBlock *BB, Instruction *InsertPt,
449 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
451 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
452 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
453 // so limit the recursion depth.
454 // TODO: While this recursion limit does prevent pathological behavior, it
455 // would be better to track visited instructions to avoid cycles.
457 return false;
458
460 if (!I) {
461 // Non-instructions dominate all instructions and can be executed
462 // unconditionally.
463 return true;
464 }
465 BasicBlock *PBB = I->getParent();
466
467 // We don't want to allow weird loops that might have the "if condition" in
468 // the bottom of this block.
469 if (PBB == BB)
470 return false;
471
472 // If this instruction is defined in a block that contains an unconditional
473 // branch to BB, then it must be in the 'conditional' part of the "if
474 // statement". If not, it definitely dominates the region.
476 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
477 return true;
478
479 // If we have seen this instruction before, don't count it again.
480 if (AggressiveInsts.count(I))
481 return true;
482
483 // Okay, it looks like the instruction IS in the "condition". Check to
484 // see if it's a cheap instruction to unconditionally compute, and if it
485 // only uses stuff defined outside of the condition. If so, hoist it out.
486 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
487 return false;
488
489 // Overflow arithmetic instruction plus extract value are usually generated
490 // when a division is being replaced. But, in this case, the zero check may
491 // still be kept in the code. In that case it would be worth to hoist these
492 // two instruction out of the basic block. Let's treat this pattern as one
493 // single cheap instruction here!
494 WithOverflowInst *OverflowInst;
495 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
496 ZeroCostInstructions.insert(OverflowInst);
497 Cost += 1;
498 } else if (!ZeroCostInstructions.contains(I))
499 Cost += computeSpeculationCost(I, TTI);
500
501 // Allow exactly one instruction to be speculated regardless of its cost
502 // (as long as it is safe to do so).
503 // This is intended to flatten the CFG even if the instruction is a division
504 // or other expensive operation. The speculation of an expensive instruction
505 // is expected to be undone in CodeGenPrepare if the speculation has not
506 // enabled further IR optimizations.
507 if (Cost > Budget &&
508 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
509 !Cost.isValid()))
510 return false;
511
512 // Okay, we can only really hoist these out if their operands do
513 // not take us over the cost threshold.
514 for (Use &Op : I->operands())
515 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
516 TTI, AC, ZeroCostInstructions, Depth + 1))
517 return false;
518 // Okay, it's safe to do this! Remember this instruction.
519 AggressiveInsts.insert(I);
520 return true;
521}
522
523/// Extract ConstantInt from value, looking through IntToPtr
524/// and PointerNullValue. Return NULL if value is not a constant int.
526 // Normal constant int.
528 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
529 DL.isNonIntegralPointerType(V->getType()))
530 return CI;
531
532 // This is some kind of pointer constant. Turn it into a pointer-sized
533 // ConstantInt if possible.
534 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
535
536 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
538 return ConstantInt::get(PtrTy, 0);
539
540 // IntToPtr const int.
542 if (CE->getOpcode() == Instruction::IntToPtr)
543 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
544 // The constant is very likely to have the right type already.
545 if (CI->getType() == PtrTy)
546 return CI;
547 else
548 return cast<ConstantInt>(
549 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
550 }
551 return nullptr;
552}
553
554namespace {
555
556/// Given a chain of or (||) or and (&&) comparison of a value against a
557/// constant, this will try to recover the information required for a switch
558/// structure.
559/// It will depth-first traverse the chain of comparison, seeking for patterns
560/// like %a == 12 or %a < 4 and combine them to produce a set of integer
561/// representing the different cases for the switch.
562/// Note that if the chain is composed of '||' it will build the set of elements
563/// that matches the comparisons (i.e. any of this value validate the chain)
564/// while for a chain of '&&' it will build the set elements that make the test
565/// fail.
566struct ConstantComparesGatherer {
567 const DataLayout &DL;
568
569 /// Value found for the switch comparison
570 Value *CompValue = nullptr;
571
572 /// Extra clause to be checked before the switch
573 Value *Extra = nullptr;
574
575 /// Set of integers to match in switch
577
578 /// Number of comparisons matched in the and/or chain
579 unsigned UsedICmps = 0;
580
581 /// If the elements in Vals matches the comparisons
582 bool IsEq = false;
583
584 // Used to check if the first matched CompValue shall be the Extra check.
585 bool IgnoreFirstMatch = false;
586 bool MultipleMatches = false;
587
588 /// Construct and compute the result for the comparison instruction Cond
589 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
590 gather(Cond);
591 if (CompValue || !MultipleMatches)
592 return;
593 Extra = nullptr;
594 Vals.clear();
595 UsedICmps = 0;
596 IgnoreFirstMatch = true;
597 gather(Cond);
598 }
599
600 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
601 ConstantComparesGatherer &
602 operator=(const ConstantComparesGatherer &) = delete;
603
604private:
605 /// Try to set the current value used for the comparison, it succeeds only if
606 /// it wasn't set before or if the new value is the same as the old one
607 bool setValueOnce(Value *NewVal) {
608 if (IgnoreFirstMatch) {
609 IgnoreFirstMatch = false;
610 return false;
611 }
612 if (CompValue && CompValue != NewVal) {
613 MultipleMatches = true;
614 return false;
615 }
616 CompValue = NewVal;
617 return true;
618 }
619
620 /// Try to match Instruction "I" as a comparison against a constant and
621 /// populates the array Vals with the set of values that match (or do not
622 /// match depending on isEQ).
623 /// Return false on failure. On success, the Value the comparison matched
624 /// against is placed in CompValue.
625 /// If CompValue is already set, the function is expected to fail if a match
626 /// is found but the value compared to is different.
627 bool matchInstruction(Instruction *I, bool isEQ) {
628 if (match(I, m_Not(m_Instruction(I))))
629 isEQ = !isEQ;
630
631 Value *Val;
632 if (match(I, m_NUWTrunc(m_Value(Val)))) {
633 // If we already have a value for the switch, it has to match!
634 if (!setValueOnce(Val))
635 return false;
636 UsedICmps++;
637 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
638 return true;
639 }
640 // If this is an icmp against a constant, handle this as one of the cases.
641 ICmpInst *ICI;
642 ConstantInt *C;
643 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
644 (C = getConstantInt(I->getOperand(1), DL)))) {
645 return false;
646 }
647
648 Value *RHSVal;
649 const APInt *RHSC;
650
651 // Pattern match a special case
652 // (x & ~2^z) == y --> x == y || x == y|2^z
653 // This undoes a transformation done by instcombine to fuse 2 compares.
654 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
655 // It's a little bit hard to see why the following transformations are
656 // correct. Here is a CVC3 program to verify them for 64-bit values:
657
658 /*
659 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
660 x : BITVECTOR(64);
661 y : BITVECTOR(64);
662 z : BITVECTOR(64);
663 mask : BITVECTOR(64) = BVSHL(ONE, z);
664 QUERY( (y & ~mask = y) =>
665 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
666 );
667 QUERY( (y | mask = y) =>
668 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
669 );
670 */
671
672 // Please note that each pattern must be a dual implication (<--> or
673 // iff). One directional implication can create spurious matches. If the
674 // implication is only one-way, an unsatisfiable condition on the left
675 // side can imply a satisfiable condition on the right side. Dual
676 // implication ensures that satisfiable conditions are transformed to
677 // other satisfiable conditions and unsatisfiable conditions are
678 // transformed to other unsatisfiable conditions.
679
680 // Here is a concrete example of a unsatisfiable condition on the left
681 // implying a satisfiable condition on the right:
682 //
683 // mask = (1 << z)
684 // (x & ~mask) == y --> (x == y || x == (y | mask))
685 //
686 // Substituting y = 3, z = 0 yields:
687 // (x & -2) == 3 --> (x == 3 || x == 2)
688
689 // Pattern match a special case:
690 /*
691 QUERY( (y & ~mask = y) =>
692 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
693 );
694 */
695 if (match(ICI->getOperand(0),
696 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
697 APInt Mask = ~*RHSC;
698 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
699 // If we already have a value for the switch, it has to match!
700 if (!setValueOnce(RHSVal))
701 return false;
702
703 Vals.push_back(C);
704 Vals.push_back(
705 ConstantInt::get(C->getContext(),
706 C->getValue() | Mask));
707 UsedICmps++;
708 return true;
709 }
710 }
711
712 // Pattern match a special case:
713 /*
714 QUERY( (y | mask = y) =>
715 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
716 );
717 */
718 if (match(ICI->getOperand(0),
719 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
720 APInt Mask = *RHSC;
721 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
722 // If we already have a value for the switch, it has to match!
723 if (!setValueOnce(RHSVal))
724 return false;
725
726 Vals.push_back(C);
727 Vals.push_back(ConstantInt::get(C->getContext(),
728 C->getValue() & ~Mask));
729 UsedICmps++;
730 return true;
731 }
732 }
733
734 // If we already have a value for the switch, it has to match!
735 if (!setValueOnce(ICI->getOperand(0)))
736 return false;
737
738 UsedICmps++;
739 Vals.push_back(C);
740 return true;
741 }
742
743 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
744 ConstantRange Span =
746
747 // Shift the range if the compare is fed by an add. This is the range
748 // compare idiom as emitted by instcombine.
749 Value *CandidateVal = I->getOperand(0);
750 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
751 Span = Span.subtract(*RHSC);
752 CandidateVal = RHSVal;
753 }
754
755 // If this is an and/!= check, then we are looking to build the set of
756 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
757 // x != 0 && x != 1.
758 if (!isEQ)
759 Span = Span.inverse();
760
761 // If there are a ton of values, we don't want to make a ginormous switch.
762 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
763 return false;
764 }
765
766 // If we already have a value for the switch, it has to match!
767 if (!setValueOnce(CandidateVal))
768 return false;
769
770 // Add all values from the range to the set
771 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
772 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
773
774 UsedICmps++;
775 return true;
776 }
777
778 /// Given a potentially 'or'd or 'and'd together collection of icmp
779 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
780 /// the value being compared, and stick the list constants into the Vals
781 /// vector.
782 /// One "Extra" case is allowed to differ from the other.
783 void gather(Value *V) {
784 Value *Op0, *Op1;
785 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
786 IsEq = true;
787 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
788 IsEq = false;
789 else
790 return;
791 // Keep a stack (SmallVector for efficiency) for depth-first traversal
792 SmallVector<Value *, 8> DFT{Op0, Op1};
793 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
794
795 while (!DFT.empty()) {
796 V = DFT.pop_back_val();
797
798 if (Instruction *I = dyn_cast<Instruction>(V)) {
799 // If it is a || (or && depending on isEQ), process the operands.
800 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
801 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
802 if (Visited.insert(Op1).second)
803 DFT.push_back(Op1);
804 if (Visited.insert(Op0).second)
805 DFT.push_back(Op0);
806
807 continue;
808 }
809
810 // Try to match the current instruction
811 if (matchInstruction(I, IsEq))
812 // Match succeed, continue the loop
813 continue;
814 }
815
816 // One element of the sequence of || (or &&) could not be match as a
817 // comparison against the same value as the others.
818 // We allow only one "Extra" case to be checked before the switch
819 if (!Extra) {
820 Extra = V;
821 continue;
822 }
823 // Failed to parse a proper sequence, abort now
824 CompValue = nullptr;
825 break;
826 }
827 }
828};
829
830} // end anonymous namespace
831
833 MemorySSAUpdater *MSSAU = nullptr) {
834 Instruction *Cond = nullptr;
836 Cond = dyn_cast<Instruction>(SI->getCondition());
837 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
838 if (BI->isConditional())
839 Cond = dyn_cast<Instruction>(BI->getCondition());
840 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
841 Cond = dyn_cast<Instruction>(IBI->getAddress());
842 }
843
844 TI->eraseFromParent();
845 if (Cond)
847}
848
849/// Return true if the specified terminator checks
850/// to see if a value is equal to constant integer value.
851Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
852 Value *CV = nullptr;
853 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
854 // Do not permit merging of large switch instructions into their
855 // predecessors unless there is only one predecessor.
856 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
857 CV = SI->getCondition();
858 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
859 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
860 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
861 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
862 CV = ICI->getOperand(0);
863 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
864 if (Trunc->hasNoUnsignedWrap())
865 CV = Trunc->getOperand(0);
866 }
867 }
868
869 // Unwrap any lossless ptrtoint cast.
870 if (CV) {
871 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
872 Value *Ptr = PTII->getPointerOperand();
873 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
874 CV = Ptr;
875 }
876 }
877 return CV;
878}
879
880/// Given a value comparison instruction,
881/// decode all of the 'cases' that it represents and return the 'default' block.
882BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
883 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
884 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
885 Cases.reserve(SI->getNumCases());
886 for (auto Case : SI->cases())
887 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
888 Case.getCaseSuccessor()));
889 return SI->getDefaultDest();
890 }
891
892 BranchInst *BI = cast<BranchInst>(TI);
893 Value *Cond = BI->getCondition();
894 ICmpInst::Predicate Pred;
895 ConstantInt *C;
896 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
897 Pred = ICI->getPredicate();
898 C = getConstantInt(ICI->getOperand(1), DL);
899 } else {
900 Pred = ICmpInst::ICMP_NE;
901 auto *Trunc = cast<TruncInst>(Cond);
902 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
903 }
904 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
905 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
906 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
907}
908
909/// Given a vector of bb/value pairs, remove any entries
910/// in the list that match the specified block.
911static void
913 std::vector<ValueEqualityComparisonCase> &Cases) {
914 llvm::erase(Cases, BB);
915}
916
917/// Return true if there are any keys in C1 that exist in C2 as well.
918static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
919 std::vector<ValueEqualityComparisonCase> &C2) {
920 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
921
922 // Make V1 be smaller than V2.
923 if (V1->size() > V2->size())
924 std::swap(V1, V2);
925
926 if (V1->empty())
927 return false;
928 if (V1->size() == 1) {
929 // Just scan V2.
930 ConstantInt *TheVal = (*V1)[0].Value;
931 for (const ValueEqualityComparisonCase &VECC : *V2)
932 if (TheVal == VECC.Value)
933 return true;
934 }
935
936 // Otherwise, just sort both lists and compare element by element.
937 array_pod_sort(V1->begin(), V1->end());
938 array_pod_sort(V2->begin(), V2->end());
939 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
940 while (i1 != e1 && i2 != e2) {
941 if ((*V1)[i1].Value == (*V2)[i2].Value)
942 return true;
943 if ((*V1)[i1].Value < (*V2)[i2].Value)
944 ++i1;
945 else
946 ++i2;
947 }
948 return false;
949}
950
951// Set branch weights on SwitchInst. This sets the metadata if there is at
952// least one non-zero weight.
954 bool IsExpected) {
955 // Check that there is at least one non-zero weight. Otherwise, pass
956 // nullptr to setMetadata which will erase the existing metadata.
957 MDNode *N = nullptr;
958 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
959 N = MDBuilder(SI->getParent()->getContext())
960 .createBranchWeights(Weights, IsExpected);
961 SI->setMetadata(LLVMContext::MD_prof, N);
962}
963
964// Similar to the above, but for branch and select instructions that take
965// exactly 2 weights.
966static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
967 uint32_t FalseWeight, bool IsExpected) {
969 // Check that there is at least one non-zero weight. Otherwise, pass
970 // nullptr to setMetadata which will erase the existing metadata.
971 MDNode *N = nullptr;
972 if (TrueWeight || FalseWeight)
973 N = MDBuilder(I->getParent()->getContext())
974 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
975 I->setMetadata(LLVMContext::MD_prof, N);
976}
977
978/// If TI is known to be a terminator instruction and its block is known to
979/// only have a single predecessor block, check to see if that predecessor is
980/// also a value comparison with the same value, and if that comparison
981/// determines the outcome of this comparison. If so, simplify TI. This does a
982/// very limited form of jump threading.
983bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
984 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
985 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
986 if (!PredVal)
987 return false; // Not a value comparison in predecessor.
988
989 Value *ThisVal = isValueEqualityComparison(TI);
990 assert(ThisVal && "This isn't a value comparison!!");
991 if (ThisVal != PredVal)
992 return false; // Different predicates.
993
994 // TODO: Preserve branch weight metadata, similarly to how
995 // foldValueComparisonIntoPredecessors preserves it.
996
997 // Find out information about when control will move from Pred to TI's block.
998 std::vector<ValueEqualityComparisonCase> PredCases;
999 BasicBlock *PredDef =
1000 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
1001 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
1002
1003 // Find information about how control leaves this block.
1004 std::vector<ValueEqualityComparisonCase> ThisCases;
1005 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
1006 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
1007
1008 // If TI's block is the default block from Pred's comparison, potentially
1009 // simplify TI based on this knowledge.
1010 if (PredDef == TI->getParent()) {
1011 // If we are here, we know that the value is none of those cases listed in
1012 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1013 // can simplify TI.
1014 if (!valuesOverlap(PredCases, ThisCases))
1015 return false;
1016
1017 if (isa<BranchInst>(TI)) {
1018 // Okay, one of the successors of this condbr is dead. Convert it to a
1019 // uncond br.
1020 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1021 // Insert the new branch.
1022 Instruction *NI = Builder.CreateBr(ThisDef);
1023 (void)NI;
1024
1025 // Remove PHI node entries for the dead edge.
1026 ThisCases[0].Dest->removePredecessor(PredDef);
1027
1028 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1029 << "Through successor TI: " << *TI << "Leaving: " << *NI
1030 << "\n");
1031
1033
1034 if (DTU)
1035 DTU->applyUpdates(
1036 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1037
1038 return true;
1039 }
1040
1041 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1042 // Okay, TI has cases that are statically dead, prune them away.
1043 SmallPtrSet<Constant *, 16> DeadCases;
1044 for (const ValueEqualityComparisonCase &Case : PredCases)
1045 DeadCases.insert(Case.Value);
1046
1047 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1048 << "Through successor TI: " << *TI);
1049
1050 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1051 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1052 --i;
1053 auto *Successor = i->getCaseSuccessor();
1054 if (DTU)
1055 ++NumPerSuccessorCases[Successor];
1056 if (DeadCases.count(i->getCaseValue())) {
1057 Successor->removePredecessor(PredDef);
1058 SI.removeCase(i);
1059 if (DTU)
1060 --NumPerSuccessorCases[Successor];
1061 }
1062 }
1063
1064 if (DTU) {
1065 std::vector<DominatorTree::UpdateType> Updates;
1066 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1067 if (I.second == 0)
1068 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1069 DTU->applyUpdates(Updates);
1070 }
1071
1072 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1073 return true;
1074 }
1075
1076 // Otherwise, TI's block must correspond to some matched value. Find out
1077 // which value (or set of values) this is.
1078 ConstantInt *TIV = nullptr;
1079 BasicBlock *TIBB = TI->getParent();
1080 for (const auto &[Value, Dest] : PredCases)
1081 if (Dest == TIBB) {
1082 if (TIV)
1083 return false; // Cannot handle multiple values coming to this block.
1084 TIV = Value;
1085 }
1086 assert(TIV && "No edge from pred to succ?");
1087
1088 // Okay, we found the one constant that our value can be if we get into TI's
1089 // BB. Find out which successor will unconditionally be branched to.
1090 BasicBlock *TheRealDest = nullptr;
1091 for (const auto &[Value, Dest] : ThisCases)
1092 if (Value == TIV) {
1093 TheRealDest = Dest;
1094 break;
1095 }
1096
1097 // If not handled by any explicit cases, it is handled by the default case.
1098 if (!TheRealDest)
1099 TheRealDest = ThisDef;
1100
1101 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1102
1103 // Remove PHI node entries for dead edges.
1104 BasicBlock *CheckEdge = TheRealDest;
1105 for (BasicBlock *Succ : successors(TIBB))
1106 if (Succ != CheckEdge) {
1107 if (Succ != TheRealDest)
1108 RemovedSuccs.insert(Succ);
1109 Succ->removePredecessor(TIBB);
1110 } else
1111 CheckEdge = nullptr;
1112
1113 // Insert the new branch.
1114 Instruction *NI = Builder.CreateBr(TheRealDest);
1115 (void)NI;
1116
1117 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1118 << "Through successor TI: " << *TI << "Leaving: " << *NI
1119 << "\n");
1120
1122 if (DTU) {
1123 SmallVector<DominatorTree::UpdateType, 2> Updates;
1124 Updates.reserve(RemovedSuccs.size());
1125 for (auto *RemovedSucc : RemovedSuccs)
1126 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1127 DTU->applyUpdates(Updates);
1128 }
1129 return true;
1130}
1131
1132namespace {
1133
1134/// This class implements a stable ordering of constant
1135/// integers that does not depend on their address. This is important for
1136/// applications that sort ConstantInt's to ensure uniqueness.
1137struct ConstantIntOrdering {
1138 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1139 return LHS->getValue().ult(RHS->getValue());
1140 }
1141};
1142
1143} // end anonymous namespace
1144
1146 ConstantInt *const *P2) {
1147 const ConstantInt *LHS = *P1;
1148 const ConstantInt *RHS = *P2;
1149 if (LHS == RHS)
1150 return 0;
1151 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1152}
1153
1154/// Get Weights of a given terminator, the default weight is at the front
1155/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1156/// metadata.
1158 SmallVectorImpl<uint64_t> &Weights) {
1159 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1160 assert(MD && "Invalid branch-weight metadata");
1161 extractFromBranchWeightMD64(MD, Weights);
1162
1163 // If TI is a conditional eq, the default case is the false case,
1164 // and the corresponding branch-weight data is at index 2. We swap the
1165 // default weight to be the first entry.
1166 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1167 assert(Weights.size() == 2);
1168 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1169 if (!ICI)
1170 return;
1171
1172 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1173 std::swap(Weights.front(), Weights.back());
1174 }
1175}
1176
1177/// Keep halving the weights until all can fit in uint32_t.
1179 uint64_t Max = *llvm::max_element(Weights);
1180 if (Max > UINT_MAX) {
1181 unsigned Offset = 32 - llvm::countl_zero(Max);
1182 for (uint64_t &I : Weights)
1183 I >>= Offset;
1184 }
1185}
1186
1188 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1189 Instruction *PTI = PredBlock->getTerminator();
1190
1191 // If we have bonus instructions, clone them into the predecessor block.
1192 // Note that there may be multiple predecessor blocks, so we cannot move
1193 // bonus instructions to a predecessor block.
1194 for (Instruction &BonusInst : *BB) {
1195 if (BonusInst.isTerminator())
1196 continue;
1197
1198 Instruction *NewBonusInst = BonusInst.clone();
1199
1200 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1201 // Unless the instruction has the same !dbg location as the original
1202 // branch, drop it. When we fold the bonus instructions we want to make
1203 // sure we reset their debug locations in order to avoid stepping on
1204 // dead code caused by folding dead branches.
1205 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1206 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1207 mapAtomInstance(DL, VMap);
1208 }
1209
1210 RemapInstruction(NewBonusInst, VMap,
1212
1213 // If we speculated an instruction, we need to drop any metadata that may
1214 // result in undefined behavior, as the metadata might have been valid
1215 // only given the branch precondition.
1216 // Similarly strip attributes on call parameters that may cause UB in
1217 // location the call is moved to.
1218 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1219
1220 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1221 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1222 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1224
1225 NewBonusInst->takeName(&BonusInst);
1226 BonusInst.setName(NewBonusInst->getName() + ".old");
1227 VMap[&BonusInst] = NewBonusInst;
1228
1229 // Update (liveout) uses of bonus instructions,
1230 // now that the bonus instruction has been cloned into predecessor.
1231 // Note that we expect to be in a block-closed SSA form for this to work!
1232 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1233 auto *UI = cast<Instruction>(U.getUser());
1234 auto *PN = dyn_cast<PHINode>(UI);
1235 if (!PN) {
1236 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1237 "If the user is not a PHI node, then it should be in the same "
1238 "block as, and come after, the original bonus instruction.");
1239 continue; // Keep using the original bonus instruction.
1240 }
1241 // Is this the block-closed SSA form PHI node?
1242 if (PN->getIncomingBlock(U) == BB)
1243 continue; // Great, keep using the original bonus instruction.
1244 // The only other alternative is an "use" when coming from
1245 // the predecessor block - here we should refer to the cloned bonus instr.
1246 assert(PN->getIncomingBlock(U) == PredBlock &&
1247 "Not in block-closed SSA form?");
1248 U.set(NewBonusInst);
1249 }
1250 }
1251
1252 // Key Instructions: We may have propagated atom info into the pred. If the
1253 // pred's terminator already has atom info do nothing as merging would drop
1254 // one atom group anyway. If it doesn't, propagte the remapped atom group
1255 // from BB's terminator.
1256 if (auto &PredDL = PTI->getDebugLoc()) {
1257 auto &DL = BB->getTerminator()->getDebugLoc();
1258 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1259 PredDL.isSameSourceLocation(DL)) {
1260 PTI->setDebugLoc(DL);
1261 RemapSourceAtom(PTI, VMap);
1262 }
1263 }
1264}
1265
1266bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1267 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1268 BasicBlock *BB = TI->getParent();
1269 BasicBlock *Pred = PTI->getParent();
1270
1272
1273 // Figure out which 'cases' to copy from SI to PSI.
1274 std::vector<ValueEqualityComparisonCase> BBCases;
1275 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1276
1277 std::vector<ValueEqualityComparisonCase> PredCases;
1278 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1279
1280 // Based on whether the default edge from PTI goes to BB or not, fill in
1281 // PredCases and PredDefault with the new switch cases we would like to
1282 // build.
1283 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1284
1285 // Update the branch weight metadata along the way
1286 SmallVector<uint64_t, 8> Weights;
1287 bool PredHasWeights = hasBranchWeightMD(*PTI);
1288 bool SuccHasWeights = hasBranchWeightMD(*TI);
1289
1290 if (PredHasWeights) {
1291 getBranchWeights(PTI, Weights);
1292 // branch-weight metadata is inconsistent here.
1293 if (Weights.size() != 1 + PredCases.size())
1294 PredHasWeights = SuccHasWeights = false;
1295 } else if (SuccHasWeights)
1296 // If there are no predecessor weights but there are successor weights,
1297 // populate Weights with 1, which will later be scaled to the sum of
1298 // successor's weights
1299 Weights.assign(1 + PredCases.size(), 1);
1300
1301 SmallVector<uint64_t, 8> SuccWeights;
1302 if (SuccHasWeights) {
1303 getBranchWeights(TI, SuccWeights);
1304 // branch-weight metadata is inconsistent here.
1305 if (SuccWeights.size() != 1 + BBCases.size())
1306 PredHasWeights = SuccHasWeights = false;
1307 } else if (PredHasWeights)
1308 SuccWeights.assign(1 + BBCases.size(), 1);
1309
1310 if (PredDefault == BB) {
1311 // If this is the default destination from PTI, only the edges in TI
1312 // that don't occur in PTI, or that branch to BB will be activated.
1313 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1314 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1315 if (PredCases[i].Dest != BB)
1316 PTIHandled.insert(PredCases[i].Value);
1317 else {
1318 // The default destination is BB, we don't need explicit targets.
1319 std::swap(PredCases[i], PredCases.back());
1320
1321 if (PredHasWeights || SuccHasWeights) {
1322 // Increase weight for the default case.
1323 Weights[0] += Weights[i + 1];
1324 std::swap(Weights[i + 1], Weights.back());
1325 Weights.pop_back();
1326 }
1327
1328 PredCases.pop_back();
1329 --i;
1330 --e;
1331 }
1332
1333 // Reconstruct the new switch statement we will be building.
1334 if (PredDefault != BBDefault) {
1335 PredDefault->removePredecessor(Pred);
1336 if (DTU && PredDefault != BB)
1337 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1338 PredDefault = BBDefault;
1339 ++NewSuccessors[BBDefault];
1340 }
1341
1342 unsigned CasesFromPred = Weights.size();
1343 uint64_t ValidTotalSuccWeight = 0;
1344 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1345 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1346 PredCases.push_back(BBCases[i]);
1347 ++NewSuccessors[BBCases[i].Dest];
1348 if (SuccHasWeights || PredHasWeights) {
1349 // The default weight is at index 0, so weight for the ith case
1350 // should be at index i+1. Scale the cases from successor by
1351 // PredDefaultWeight (Weights[0]).
1352 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1353 ValidTotalSuccWeight += SuccWeights[i + 1];
1354 }
1355 }
1356
1357 if (SuccHasWeights || PredHasWeights) {
1358 ValidTotalSuccWeight += SuccWeights[0];
1359 // Scale the cases from predecessor by ValidTotalSuccWeight.
1360 for (unsigned i = 1; i < CasesFromPred; ++i)
1361 Weights[i] *= ValidTotalSuccWeight;
1362 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1363 Weights[0] *= SuccWeights[0];
1364 }
1365 } else {
1366 // If this is not the default destination from PSI, only the edges
1367 // in SI that occur in PSI with a destination of BB will be
1368 // activated.
1369 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1370 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1371 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1372 if (PredCases[i].Dest == BB) {
1373 PTIHandled.insert(PredCases[i].Value);
1374
1375 if (PredHasWeights || SuccHasWeights) {
1376 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1377 std::swap(Weights[i + 1], Weights.back());
1378 Weights.pop_back();
1379 }
1380
1381 std::swap(PredCases[i], PredCases.back());
1382 PredCases.pop_back();
1383 --i;
1384 --e;
1385 }
1386
1387 // Okay, now we know which constants were sent to BB from the
1388 // predecessor. Figure out where they will all go now.
1389 for (const ValueEqualityComparisonCase &Case : BBCases)
1390 if (PTIHandled.count(Case.Value)) {
1391 // If this is one we are capable of getting...
1392 if (PredHasWeights || SuccHasWeights)
1393 Weights.push_back(WeightsForHandled[Case.Value]);
1394 PredCases.push_back(Case);
1395 ++NewSuccessors[Case.Dest];
1396 PTIHandled.erase(Case.Value); // This constant is taken care of
1397 }
1398
1399 // If there are any constants vectored to BB that TI doesn't handle,
1400 // they must go to the default destination of TI.
1401 for (ConstantInt *I : PTIHandled) {
1402 if (PredHasWeights || SuccHasWeights)
1403 Weights.push_back(WeightsForHandled[I]);
1404 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1405 ++NewSuccessors[BBDefault];
1406 }
1407 }
1408
1409 // Okay, at this point, we know which new successor Pred will get. Make
1410 // sure we update the number of entries in the PHI nodes for these
1411 // successors.
1412 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1413 if (DTU) {
1414 SuccsOfPred = {llvm::from_range, successors(Pred)};
1415 Updates.reserve(Updates.size() + NewSuccessors.size());
1416 }
1417 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1418 NewSuccessors) {
1419 for (auto I : seq(NewSuccessor.second)) {
1420 (void)I;
1421 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1422 }
1423 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1424 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1425 }
1426
1427 Builder.SetInsertPoint(PTI);
1428 // Convert pointer to int before we switch.
1429 if (CV->getType()->isPointerTy()) {
1430 CV =
1431 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1432 }
1433
1434 // Now that the successors are updated, create the new Switch instruction.
1435 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1436 NewSI->setDebugLoc(PTI->getDebugLoc());
1437 for (ValueEqualityComparisonCase &V : PredCases)
1438 NewSI->addCase(V.Value, V.Dest);
1439
1440 if (PredHasWeights || SuccHasWeights) {
1441 // Halve the weights if any of them cannot fit in an uint32_t
1442 fitWeights(Weights);
1443
1444 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1445
1446 setBranchWeights(NewSI, MDWeights, /*IsExpected=*/false);
1447 }
1448
1450
1451 // Okay, last check. If BB is still a successor of PSI, then we must
1452 // have an infinite loop case. If so, add an infinitely looping block
1453 // to handle the case to preserve the behavior of the code.
1454 BasicBlock *InfLoopBlock = nullptr;
1455 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1456 if (NewSI->getSuccessor(i) == BB) {
1457 if (!InfLoopBlock) {
1458 // Insert it at the end of the function, because it's either code,
1459 // or it won't matter if it's hot. :)
1460 InfLoopBlock =
1461 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1462 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1463 if (DTU)
1464 Updates.push_back(
1465 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1466 }
1467 NewSI->setSuccessor(i, InfLoopBlock);
1468 }
1469
1470 if (DTU) {
1471 if (InfLoopBlock)
1472 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1473
1474 Updates.push_back({DominatorTree::Delete, Pred, BB});
1475
1476 DTU->applyUpdates(Updates);
1477 }
1478
1479 ++NumFoldValueComparisonIntoPredecessors;
1480 return true;
1481}
1482
1483/// The specified terminator is a value equality comparison instruction
1484/// (either a switch or a branch on "X == c").
1485/// See if any of the predecessors of the terminator block are value comparisons
1486/// on the same value. If so, and if safe to do so, fold them together.
1487bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1488 IRBuilder<> &Builder) {
1489 BasicBlock *BB = TI->getParent();
1490 Value *CV = isValueEqualityComparison(TI); // CondVal
1491 assert(CV && "Not a comparison?");
1492
1493 bool Changed = false;
1494
1495 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1496 while (!Preds.empty()) {
1497 BasicBlock *Pred = Preds.pop_back_val();
1498 Instruction *PTI = Pred->getTerminator();
1499
1500 // Don't try to fold into itself.
1501 if (Pred == BB)
1502 continue;
1503
1504 // See if the predecessor is a comparison with the same value.
1505 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1506 if (PCV != CV)
1507 continue;
1508
1509 SmallSetVector<BasicBlock *, 4> FailBlocks;
1510 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1511 for (auto *Succ : FailBlocks) {
1512 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1513 return false;
1514 }
1515 }
1516
1517 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1518 Changed = true;
1519 }
1520 return Changed;
1521}
1522
1523// If we would need to insert a select that uses the value of this invoke
1524// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1525// need to do this), we can't hoist the invoke, as there is nowhere to put the
1526// select in this case.
1528 Instruction *I1, Instruction *I2) {
1529 for (BasicBlock *Succ : successors(BB1)) {
1530 for (const PHINode &PN : Succ->phis()) {
1531 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1532 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1533 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1534 return false;
1535 }
1536 }
1537 }
1538 return true;
1539}
1540
1541// Get interesting characteristics of instructions that
1542// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1543// instructions can be reordered across.
1549
1551 unsigned Flags = 0;
1552 if (I->mayReadFromMemory())
1553 Flags |= SkipReadMem;
1554 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1555 // inalloca) across stacksave/stackrestore boundaries.
1556 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1557 Flags |= SkipSideEffect;
1559 Flags |= SkipImplicitControlFlow;
1560 return Flags;
1561}
1562
1563// Returns true if it is safe to reorder an instruction across preceding
1564// instructions in a basic block.
1565static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1566 // Don't reorder a store over a load.
1567 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1568 return false;
1569
1570 // If we have seen an instruction with side effects, it's unsafe to reorder an
1571 // instruction which reads memory or itself has side effects.
1572 if ((Flags & SkipSideEffect) &&
1573 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1574 return false;
1575
1576 // Reordering across an instruction which does not necessarily transfer
1577 // control to the next instruction is speculation.
1579 return false;
1580
1581 // Hoisting of llvm.deoptimize is only legal together with the next return
1582 // instruction, which this pass is not always able to do.
1583 if (auto *CB = dyn_cast<CallBase>(I))
1584 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1585 return false;
1586
1587 // It's also unsafe/illegal to hoist an instruction above its instruction
1588 // operands
1589 BasicBlock *BB = I->getParent();
1590 for (Value *Op : I->operands()) {
1591 if (auto *J = dyn_cast<Instruction>(Op))
1592 if (J->getParent() == BB)
1593 return false;
1594 }
1595
1596 return true;
1597}
1598
1599static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1600
1601/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1602/// instructions \p I1 and \p I2 can and should be hoisted.
1604 const TargetTransformInfo &TTI) {
1605 // If we're going to hoist a call, make sure that the two instructions
1606 // we're commoning/hoisting are both marked with musttail, or neither of
1607 // them is marked as such. Otherwise, we might end up in a situation where
1608 // we hoist from a block where the terminator is a `ret` to a block where
1609 // the terminator is a `br`, and `musttail` calls expect to be followed by
1610 // a return.
1611 auto *C1 = dyn_cast<CallInst>(I1);
1612 auto *C2 = dyn_cast<CallInst>(I2);
1613 if (C1 && C2)
1614 if (C1->isMustTailCall() != C2->isMustTailCall())
1615 return false;
1616
1617 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1618 return false;
1619
1620 // If any of the two call sites has nomerge or convergent attribute, stop
1621 // hoisting.
1622 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1623 if (CB1->cannotMerge() || CB1->isConvergent())
1624 return false;
1625 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1626 if (CB2->cannotMerge() || CB2->isConvergent())
1627 return false;
1628
1629 return true;
1630}
1631
1632/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1633/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1634/// hoistCommonCodeFromSuccessors. e.g. The input:
1635/// I1 DVRs: { x, z },
1636/// OtherInsts: { I2 DVRs: { x, y, z } }
1637/// would result in hoisting only DbgVariableRecord x.
1639 Instruction *TI, Instruction *I1,
1640 SmallVectorImpl<Instruction *> &OtherInsts) {
1641 if (!I1->hasDbgRecords())
1642 return;
1643 using CurrentAndEndIt =
1644 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1645 // Vector of {Current, End} iterators.
1647 Itrs.reserve(OtherInsts.size() + 1);
1648 // Helper lambdas for lock-step checks:
1649 // Return true if this Current == End.
1650 auto atEnd = [](const CurrentAndEndIt &Pair) {
1651 return Pair.first == Pair.second;
1652 };
1653 // Return true if all Current are identical.
1654 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1655 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1657 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1658 });
1659 };
1660
1661 // Collect the iterators.
1662 Itrs.push_back(
1663 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1664 for (Instruction *Other : OtherInsts) {
1665 if (!Other->hasDbgRecords())
1666 return;
1667 Itrs.push_back(
1668 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1669 }
1670
1671 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1672 // the lock-step DbgRecord are identical, hoist all of them to TI.
1673 // This replicates the dbg.* intrinsic behaviour in
1674 // hoistCommonCodeFromSuccessors.
1675 while (none_of(Itrs, atEnd)) {
1676 bool HoistDVRs = allIdentical(Itrs);
1677 for (CurrentAndEndIt &Pair : Itrs) {
1678 // Increment Current iterator now as we may be about to move the
1679 // DbgRecord.
1680 DbgRecord &DR = *Pair.first++;
1681 if (HoistDVRs) {
1682 DR.removeFromParent();
1683 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1684 }
1685 }
1686 }
1687}
1688
1690 const Instruction *I2) {
1691 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1692 return true;
1693
1694 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1695 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1696 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1697 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1698 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1699
1700 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1701 return I1->getOperand(0) == I2->getOperand(1) &&
1702 I1->getOperand(1) == I2->getOperand(0) &&
1703 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1704 }
1705
1706 return false;
1707}
1708
1709/// If the target supports conditional faulting,
1710/// we look for the following pattern:
1711/// \code
1712/// BB:
1713/// ...
1714/// %cond = icmp ult %x, %y
1715/// br i1 %cond, label %TrueBB, label %FalseBB
1716/// FalseBB:
1717/// store i32 1, ptr %q, align 4
1718/// ...
1719/// TrueBB:
1720/// %maskedloadstore = load i32, ptr %b, align 4
1721/// store i32 %maskedloadstore, ptr %p, align 4
1722/// ...
1723/// \endcode
1724///
1725/// and transform it into:
1726///
1727/// \code
1728/// BB:
1729/// ...
1730/// %cond = icmp ult %x, %y
1731/// %maskedloadstore = cload i32, ptr %b, %cond
1732/// cstore i32 %maskedloadstore, ptr %p, %cond
1733/// cstore i32 1, ptr %q, ~%cond
1734/// br i1 %cond, label %TrueBB, label %FalseBB
1735/// FalseBB:
1736/// ...
1737/// TrueBB:
1738/// ...
1739/// \endcode
1740///
1741/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1742/// e.g.
1743///
1744/// \code
1745/// %vcond = bitcast i1 %cond to <1 x i1>
1746/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1747/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1748/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1749/// call void @llvm.masked.store.v1i32.p0
1750/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1751/// %cond.not = xor i1 %cond, true
1752/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1753/// call void @llvm.masked.store.v1i32.p0
1754/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1755/// \endcode
1756///
1757/// So we need to turn hoisted load/store into cload/cstore.
1758///
1759/// \param BI The branch instruction.
1760/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1761/// will be speculated.
1762/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1764 BranchInst *BI,
1765 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1766 std::optional<bool> Invert, Instruction *Sel) {
1767 auto &Context = BI->getParent()->getContext();
1768 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1769 auto *Cond = BI->getOperand(0);
1770 // Construct the condition if needed.
1771 BasicBlock *BB = BI->getParent();
1772 Value *Mask = nullptr;
1773 Value *MaskFalse = nullptr;
1774 Value *MaskTrue = nullptr;
1775 if (Invert.has_value()) {
1776 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1777 Mask = Builder.CreateBitCast(
1778 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1779 VCondTy);
1780 } else {
1781 IRBuilder<> Builder(BI);
1782 MaskFalse = Builder.CreateBitCast(
1783 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1784 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1785 }
1786 auto PeekThroughBitcasts = [](Value *V) {
1787 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1788 V = BitCast->getOperand(0);
1789 return V;
1790 };
1791 for (auto *I : SpeculatedConditionalLoadsStores) {
1792 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1793 if (!Invert.has_value())
1794 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1795 // We currently assume conditional faulting load/store is supported for
1796 // scalar types only when creating new instructions. This can be easily
1797 // extended for vector types in the future.
1798 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1799 auto *Op0 = I->getOperand(0);
1800 CallInst *MaskedLoadStore = nullptr;
1801 if (auto *LI = dyn_cast<LoadInst>(I)) {
1802 // Handle Load.
1803 auto *Ty = I->getType();
1804 PHINode *PN = nullptr;
1805 Value *PassThru = nullptr;
1806 if (Invert.has_value())
1807 for (User *U : I->users()) {
1808 if ((PN = dyn_cast<PHINode>(U))) {
1809 PassThru = Builder.CreateBitCast(
1810 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1811 FixedVectorType::get(Ty, 1));
1812 } else if (auto *Ins = cast<Instruction>(U);
1813 Sel && Ins->getParent() == BB) {
1814 // This happens when store or/and a speculative instruction between
1815 // load and store were hoisted to the BB. Make sure the masked load
1816 // inserted before its use.
1817 // We assume there's one of such use.
1818 Builder.SetInsertPoint(Ins);
1819 }
1820 }
1821 MaskedLoadStore = Builder.CreateMaskedLoad(
1822 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1823 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1824 if (PN)
1825 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1826 I->replaceAllUsesWith(NewLoadStore);
1827 } else {
1828 // Handle Store.
1829 auto *StoredVal = Builder.CreateBitCast(
1830 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1831 MaskedLoadStore = Builder.CreateMaskedStore(
1832 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1833 }
1834 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1835 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1836 //
1837 // !nonnull, !align : Not support pointer type, no need to keep.
1838 // !range: Load type is changed from scalar to vector, but the metadata on
1839 // vector specifies a per-element range, so the semantics stay the
1840 // same. Keep it.
1841 // !annotation: Not impact semantics. Keep it.
1842 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1843 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1844 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1845 // FIXME: DIAssignID is not supported for masked store yet.
1846 // (Verifier::visitDIAssignIDMetadata)
1848 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1849 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1850 });
1851 MaskedLoadStore->copyMetadata(*I);
1852 I->eraseFromParent();
1853 }
1854}
1855
1857 const TargetTransformInfo &TTI) {
1858 // Not handle volatile or atomic.
1859 bool IsStore = false;
1860 if (auto *L = dyn_cast<LoadInst>(I)) {
1861 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1862 return false;
1863 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1864 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1865 return false;
1866 IsStore = true;
1867 } else
1868 return false;
1869
1870 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1871 // That's why we have the alignment limitation.
1872 // FIXME: Update the prototype of the intrinsics?
1873 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1875}
1876
1877/// Hoist any common code in the successor blocks up into the block. This
1878/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1879/// given, only perform hoisting in case all successors blocks contain matching
1880/// instructions only. In that case, all instructions can be hoisted and the
1881/// original branch will be replaced and selects for PHIs are added.
1882bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1883 bool AllInstsEqOnly) {
1884 // This does very trivial matching, with limited scanning, to find identical
1885 // instructions in the two blocks. In particular, we don't want to get into
1886 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1887 // such, we currently just scan for obviously identical instructions in an
1888 // identical order, possibly separated by the same number of non-identical
1889 // instructions.
1890 BasicBlock *BB = TI->getParent();
1891 unsigned int SuccSize = succ_size(BB);
1892 if (SuccSize < 2)
1893 return false;
1894
1895 // If either of the blocks has it's address taken, then we can't do this fold,
1896 // because the code we'd hoist would no longer run when we jump into the block
1897 // by it's address.
1898 for (auto *Succ : successors(BB))
1899 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1900 return false;
1901
1902 // The second of pair is a SkipFlags bitmask.
1903 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1904 SmallVector<SuccIterPair, 8> SuccIterPairs;
1905 for (auto *Succ : successors(BB)) {
1906 BasicBlock::iterator SuccItr = Succ->begin();
1907 if (isa<PHINode>(*SuccItr))
1908 return false;
1909 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1910 }
1911
1912 if (AllInstsEqOnly) {
1913 // Check if all instructions in the successor blocks match. This allows
1914 // hoisting all instructions and removing the blocks we are hoisting from,
1915 // so does not add any new instructions.
1917 // Check if sizes and terminators of all successors match.
1918 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1919 Instruction *Term0 = Succs[0]->getTerminator();
1920 Instruction *Term = Succ->getTerminator();
1921 return !Term->isSameOperationAs(Term0) ||
1922 !equal(Term->operands(), Term0->operands()) ||
1923 Succs[0]->size() != Succ->size();
1924 });
1925 if (!AllSame)
1926 return false;
1927 if (AllSame) {
1928 LockstepReverseIterator<true> LRI(Succs);
1929 while (LRI.isValid()) {
1930 Instruction *I0 = (*LRI)[0];
1931 if (any_of(*LRI, [I0](Instruction *I) {
1932 return !areIdenticalUpToCommutativity(I0, I);
1933 })) {
1934 return false;
1935 }
1936 --LRI;
1937 }
1938 }
1939 // Now we know that all instructions in all successors can be hoisted. Let
1940 // the loop below handle the hoisting.
1941 }
1942
1943 // Count how many instructions were not hoisted so far. There's a limit on how
1944 // many instructions we skip, serving as a compilation time control as well as
1945 // preventing excessive increase of life ranges.
1946 unsigned NumSkipped = 0;
1947 // If we find an unreachable instruction at the beginning of a basic block, we
1948 // can still hoist instructions from the rest of the basic blocks.
1949 if (SuccIterPairs.size() > 2) {
1950 erase_if(SuccIterPairs,
1951 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1952 if (SuccIterPairs.size() < 2)
1953 return false;
1954 }
1955
1956 bool Changed = false;
1957
1958 for (;;) {
1959 auto *SuccIterPairBegin = SuccIterPairs.begin();
1960 auto &BB1ItrPair = *SuccIterPairBegin++;
1961 auto OtherSuccIterPairRange =
1962 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1963 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1964
1965 Instruction *I1 = &*BB1ItrPair.first;
1966
1967 bool AllInstsAreIdentical = true;
1968 bool HasTerminator = I1->isTerminator();
1969 for (auto &SuccIter : OtherSuccIterRange) {
1970 Instruction *I2 = &*SuccIter;
1971 HasTerminator |= I2->isTerminator();
1972 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1973 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1974 AllInstsAreIdentical = false;
1975 }
1976
1977 SmallVector<Instruction *, 8> OtherInsts;
1978 for (auto &SuccIter : OtherSuccIterRange)
1979 OtherInsts.push_back(&*SuccIter);
1980
1981 // If we are hoisting the terminator instruction, don't move one (making a
1982 // broken BB), instead clone it, and remove BI.
1983 if (HasTerminator) {
1984 // Even if BB, which contains only one unreachable instruction, is ignored
1985 // at the beginning of the loop, we can hoist the terminator instruction.
1986 // If any instructions remain in the block, we cannot hoist terminators.
1987 if (NumSkipped || !AllInstsAreIdentical) {
1988 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1989 return Changed;
1990 }
1991
1992 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1993 Changed;
1994 }
1995
1996 if (AllInstsAreIdentical) {
1997 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1998 AllInstsAreIdentical =
1999 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
2000 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
2001 Instruction *I2 = &*Pair.first;
2002 unsigned SkipFlagsBB2 = Pair.second;
2003 // Even if the instructions are identical, it may not
2004 // be safe to hoist them if we have skipped over
2005 // instructions with side effects or their operands
2006 // weren't hoisted.
2007 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
2009 });
2010 }
2011
2012 if (AllInstsAreIdentical) {
2013 BB1ItrPair.first++;
2014 // For a normal instruction, we just move one to right before the
2015 // branch, then replace all uses of the other with the first. Finally,
2016 // we remove the now redundant second instruction.
2017 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2018 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2019 // and leave any that were not hoisted behind (by calling moveBefore
2020 // rather than moveBeforePreserving).
2021 I1->moveBefore(TI->getIterator());
2022 for (auto &SuccIter : OtherSuccIterRange) {
2023 Instruction *I2 = &*SuccIter++;
2024 assert(I2 != I1);
2025 if (!I2->use_empty())
2026 I2->replaceAllUsesWith(I1);
2027 I1->andIRFlags(I2);
2028 if (auto *CB = dyn_cast<CallBase>(I1)) {
2029 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2030 assert(Success && "We should not be trying to hoist callbases "
2031 "with non-intersectable attributes");
2032 // For NDEBUG Compile.
2033 (void)Success;
2034 }
2035
2036 combineMetadataForCSE(I1, I2, true);
2037 // I1 and I2 are being combined into a single instruction. Its debug
2038 // location is the merged locations of the original instructions.
2039 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2040 I2->eraseFromParent();
2041 }
2042 if (!Changed)
2043 NumHoistCommonCode += SuccIterPairs.size();
2044 Changed = true;
2045 NumHoistCommonInstrs += SuccIterPairs.size();
2046 } else {
2047 if (NumSkipped >= HoistCommonSkipLimit) {
2048 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2049 return Changed;
2050 }
2051 // We are about to skip over a pair of non-identical instructions. Record
2052 // if any have characteristics that would prevent reordering instructions
2053 // across them.
2054 for (auto &SuccIterPair : SuccIterPairs) {
2055 Instruction *I = &*SuccIterPair.first++;
2056 SuccIterPair.second |= skippedInstrFlags(I);
2057 }
2058 ++NumSkipped;
2059 }
2060 }
2061}
2062
2063bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2064 Instruction *TI, Instruction *I1,
2065 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2066
2067 auto *BI = dyn_cast<BranchInst>(TI);
2068
2069 bool Changed = false;
2070 BasicBlock *TIParent = TI->getParent();
2071 BasicBlock *BB1 = I1->getParent();
2072
2073 // Use only for an if statement.
2074 auto *I2 = *OtherSuccTIs.begin();
2075 auto *BB2 = I2->getParent();
2076 if (BI) {
2077 assert(OtherSuccTIs.size() == 1);
2078 assert(BI->getSuccessor(0) == I1->getParent());
2079 assert(BI->getSuccessor(1) == I2->getParent());
2080 }
2081
2082 // In the case of an if statement, we try to hoist an invoke.
2083 // FIXME: Can we define a safety predicate for CallBr?
2084 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2085 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2086 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2087 return false;
2088
2089 // TODO: callbr hoisting currently disabled pending further study.
2090 if (isa<CallBrInst>(I1))
2091 return false;
2092
2093 for (BasicBlock *Succ : successors(BB1)) {
2094 for (PHINode &PN : Succ->phis()) {
2095 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2096 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2097 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2098 if (BB1V == BB2V)
2099 continue;
2100
2101 // In the case of an if statement, check for
2102 // passingValueIsAlwaysUndefined here because we would rather eliminate
2103 // undefined control flow then converting it to a select.
2104 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2106 return false;
2107 }
2108 }
2109 }
2110
2111 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2112 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2113 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2114 // Clone the terminator and hoist it into the pred, without any debug info.
2115 Instruction *NT = I1->clone();
2116 NT->insertInto(TIParent, TI->getIterator());
2117 if (!NT->getType()->isVoidTy()) {
2118 I1->replaceAllUsesWith(NT);
2119 for (Instruction *OtherSuccTI : OtherSuccTIs)
2120 OtherSuccTI->replaceAllUsesWith(NT);
2121 NT->takeName(I1);
2122 }
2123 Changed = true;
2124 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2125
2126 // Ensure terminator gets a debug location, even an unknown one, in case
2127 // it involves inlinable calls.
2129 Locs.push_back(I1->getDebugLoc());
2130 for (auto *OtherSuccTI : OtherSuccTIs)
2131 Locs.push_back(OtherSuccTI->getDebugLoc());
2132 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2133
2134 // PHIs created below will adopt NT's merged DebugLoc.
2135 IRBuilder<NoFolder> Builder(NT);
2136
2137 // In the case of an if statement, hoisting one of the terminators from our
2138 // successor is a great thing. Unfortunately, the successors of the if/else
2139 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2140 // must agree for all PHI nodes, so we insert select instruction to compute
2141 // the final result.
2142 if (BI) {
2143 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2144 for (BasicBlock *Succ : successors(BB1)) {
2145 for (PHINode &PN : Succ->phis()) {
2146 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2147 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2148 if (BB1V == BB2V)
2149 continue;
2150
2151 // These values do not agree. Insert a select instruction before NT
2152 // that determines the right value.
2153 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2154 if (!SI) {
2155 // Propagate fast-math-flags from phi node to its replacement select.
2157 BI->getCondition(), BB1V, BB2V,
2158 isa<FPMathOperator>(PN) ? &PN : nullptr,
2159 BB1V->getName() + "." + BB2V->getName(), BI));
2160 }
2161
2162 // Make the PHI node use the select for all incoming values for BB1/BB2
2163 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2164 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2165 PN.setIncomingValue(i, SI);
2166 }
2167 }
2168 }
2169
2171
2172 // Update any PHI nodes in our new successors.
2173 for (BasicBlock *Succ : successors(BB1)) {
2174 addPredecessorToBlock(Succ, TIParent, BB1);
2175 if (DTU)
2176 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2177 }
2178
2179 if (DTU)
2180 for (BasicBlock *Succ : successors(TI))
2181 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2182
2184 if (DTU)
2185 DTU->applyUpdates(Updates);
2186 return Changed;
2187}
2188
2189// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2190// into variables.
2192 int OpIdx) {
2193 // Divide/Remainder by constant is typically much cheaper than by variable.
2194 if (I->isIntDivRem())
2195 return OpIdx != 1;
2196 return !isa<IntrinsicInst>(I);
2197}
2198
2199// All instructions in Insts belong to different blocks that all unconditionally
2200// branch to a common successor. Analyze each instruction and return true if it
2201// would be possible to sink them into their successor, creating one common
2202// instruction instead. For every value that would be required to be provided by
2203// PHI node (because an operand varies in each input block), add to PHIOperands.
2206 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2207 // Prune out obviously bad instructions to move. Each instruction must have
2208 // the same number of uses, and we check later that the uses are consistent.
2209 std::optional<unsigned> NumUses;
2210 for (auto *I : Insts) {
2211 // These instructions may change or break semantics if moved.
2212 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2213 I->getType()->isTokenTy())
2214 return false;
2215
2216 // Do not try to sink an instruction in an infinite loop - it can cause
2217 // this algorithm to infinite loop.
2218 if (I->getParent()->getSingleSuccessor() == I->getParent())
2219 return false;
2220
2221 // Conservatively return false if I is an inline-asm instruction. Sinking
2222 // and merging inline-asm instructions can potentially create arguments
2223 // that cannot satisfy the inline-asm constraints.
2224 // If the instruction has nomerge or convergent attribute, return false.
2225 if (const auto *C = dyn_cast<CallBase>(I))
2226 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2227 return false;
2228
2229 if (!NumUses)
2230 NumUses = I->getNumUses();
2231 else if (NumUses != I->getNumUses())
2232 return false;
2233 }
2234
2235 const Instruction *I0 = Insts.front();
2236 const auto I0MMRA = MMRAMetadata(*I0);
2237 for (auto *I : Insts) {
2238 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2239 return false;
2240
2241 // Treat MMRAs conservatively. This pass can be quite aggressive and
2242 // could drop a lot of MMRAs otherwise.
2243 if (MMRAMetadata(*I) != I0MMRA)
2244 return false;
2245 }
2246
2247 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2248 // then the other phi operands must match the instructions from Insts. This
2249 // also has to hold true for any phi nodes that would be created as a result
2250 // of sinking. Both of these cases are represented by PhiOperands.
2251 for (const Use &U : I0->uses()) {
2252 auto It = PHIOperands.find(&U);
2253 if (It == PHIOperands.end())
2254 // There may be uses in other blocks when sinking into a loop header.
2255 return false;
2256 if (!equal(Insts, It->second))
2257 return false;
2258 }
2259
2260 // For calls to be sinkable, they must all be indirect, or have same callee.
2261 // I.e. if we have two direct calls to different callees, we don't want to
2262 // turn that into an indirect call. Likewise, if we have an indirect call,
2263 // and a direct call, we don't actually want to have a single indirect call.
2264 if (isa<CallBase>(I0)) {
2265 auto IsIndirectCall = [](const Instruction *I) {
2266 return cast<CallBase>(I)->isIndirectCall();
2267 };
2268 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2269 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2270 if (HaveIndirectCalls) {
2271 if (!AllCallsAreIndirect)
2272 return false;
2273 } else {
2274 // All callees must be identical.
2275 Value *Callee = nullptr;
2276 for (const Instruction *I : Insts) {
2277 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2278 if (!Callee)
2279 Callee = CurrCallee;
2280 else if (Callee != CurrCallee)
2281 return false;
2282 }
2283 }
2284 }
2285
2286 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2287 Value *Op = I0->getOperand(OI);
2288 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2289 assert(I->getNumOperands() == I0->getNumOperands());
2290 return I->getOperand(OI) == I0->getOperand(OI);
2291 };
2292 if (!all_of(Insts, SameAsI0)) {
2295 // We can't create a PHI from this GEP.
2296 return false;
2297 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2298 for (auto *I : Insts)
2299 Ops.push_back(I->getOperand(OI));
2300 }
2301 }
2302 return true;
2303}
2304
2305// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2306// instruction of every block in Blocks to their common successor, commoning
2307// into one instruction.
2309 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2310
2311 // canSinkInstructions returning true guarantees that every block has at
2312 // least one non-terminator instruction.
2314 for (auto *BB : Blocks) {
2315 Instruction *I = BB->getTerminator();
2316 I = I->getPrevNode();
2317 Insts.push_back(I);
2318 }
2319
2320 // We don't need to do any more checking here; canSinkInstructions should
2321 // have done it all for us.
2322 SmallVector<Value*, 4> NewOperands;
2323 Instruction *I0 = Insts.front();
2324 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2325 // This check is different to that in canSinkInstructions. There, we
2326 // cared about the global view once simplifycfg (and instcombine) have
2327 // completed - it takes into account PHIs that become trivially
2328 // simplifiable. However here we need a more local view; if an operand
2329 // differs we create a PHI and rely on instcombine to clean up the very
2330 // small mess we may make.
2331 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2332 return I->getOperand(O) != I0->getOperand(O);
2333 });
2334 if (!NeedPHI) {
2335 NewOperands.push_back(I0->getOperand(O));
2336 continue;
2337 }
2338
2339 // Create a new PHI in the successor block and populate it.
2340 auto *Op = I0->getOperand(O);
2341 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2342 auto *PN =
2343 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2344 PN->insertBefore(BBEnd->begin());
2345 for (auto *I : Insts)
2346 PN->addIncoming(I->getOperand(O), I->getParent());
2347 NewOperands.push_back(PN);
2348 }
2349
2350 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2351 // and move it to the start of the successor block.
2352 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2353 I0->getOperandUse(O).set(NewOperands[O]);
2354
2355 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2356
2357 // Update metadata and IR flags, and merge debug locations.
2358 for (auto *I : Insts)
2359 if (I != I0) {
2360 // The debug location for the "common" instruction is the merged locations
2361 // of all the commoned instructions. We start with the original location
2362 // of the "common" instruction and iteratively merge each location in the
2363 // loop below.
2364 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2365 // However, as N-way merge for CallInst is rare, so we use simplified API
2366 // instead of using complex API for N-way merge.
2367 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2368 combineMetadataForCSE(I0, I, true);
2369 I0->andIRFlags(I);
2370 if (auto *CB = dyn_cast<CallBase>(I0)) {
2371 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2372 assert(Success && "We should not be trying to sink callbases "
2373 "with non-intersectable attributes");
2374 // For NDEBUG Compile.
2375 (void)Success;
2376 }
2377 }
2378
2379 for (User *U : make_early_inc_range(I0->users())) {
2380 // canSinkLastInstruction checked that all instructions are only used by
2381 // phi nodes in a way that allows replacing the phi node with the common
2382 // instruction.
2383 auto *PN = cast<PHINode>(U);
2384 PN->replaceAllUsesWith(I0);
2385 PN->eraseFromParent();
2386 }
2387
2388 // Finally nuke all instructions apart from the common instruction.
2389 for (auto *I : Insts) {
2390 if (I == I0)
2391 continue;
2392 // The remaining uses are debug users, replace those with the common inst.
2393 // In most (all?) cases this just introduces a use-before-def.
2394 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2395 I->replaceAllUsesWith(I0);
2396 I->eraseFromParent();
2397 }
2398}
2399
2400/// Check whether BB's predecessors end with unconditional branches. If it is
2401/// true, sink any common code from the predecessors to BB.
2403 DomTreeUpdater *DTU) {
2404 // We support two situations:
2405 // (1) all incoming arcs are unconditional
2406 // (2) there are non-unconditional incoming arcs
2407 //
2408 // (2) is very common in switch defaults and
2409 // else-if patterns;
2410 //
2411 // if (a) f(1);
2412 // else if (b) f(2);
2413 //
2414 // produces:
2415 //
2416 // [if]
2417 // / \
2418 // [f(1)] [if]
2419 // | | \
2420 // | | |
2421 // | [f(2)]|
2422 // \ | /
2423 // [ end ]
2424 //
2425 // [end] has two unconditional predecessor arcs and one conditional. The
2426 // conditional refers to the implicit empty 'else' arc. This conditional
2427 // arc can also be caused by an empty default block in a switch.
2428 //
2429 // In this case, we attempt to sink code from all *unconditional* arcs.
2430 // If we can sink instructions from these arcs (determined during the scan
2431 // phase below) we insert a common successor for all unconditional arcs and
2432 // connect that to [end], to enable sinking:
2433 //
2434 // [if]
2435 // / \
2436 // [x(1)] [if]
2437 // | | \
2438 // | | \
2439 // | [x(2)] |
2440 // \ / |
2441 // [sink.split] |
2442 // \ /
2443 // [ end ]
2444 //
2445 SmallVector<BasicBlock*,4> UnconditionalPreds;
2446 bool HaveNonUnconditionalPredecessors = false;
2447 for (auto *PredBB : predecessors(BB)) {
2448 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2449 if (PredBr && PredBr->isUnconditional())
2450 UnconditionalPreds.push_back(PredBB);
2451 else
2452 HaveNonUnconditionalPredecessors = true;
2453 }
2454 if (UnconditionalPreds.size() < 2)
2455 return false;
2456
2457 // We take a two-step approach to tail sinking. First we scan from the end of
2458 // each block upwards in lockstep. If the n'th instruction from the end of each
2459 // block can be sunk, those instructions are added to ValuesToSink and we
2460 // carry on. If we can sink an instruction but need to PHI-merge some operands
2461 // (because they're not identical in each instruction) we add these to
2462 // PHIOperands.
2463 // We prepopulate PHIOperands with the phis that already exist in BB.
2465 for (PHINode &PN : BB->phis()) {
2467 for (const Use &U : PN.incoming_values())
2468 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2469 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2470 for (BasicBlock *Pred : UnconditionalPreds)
2471 Ops.push_back(*IncomingVals[Pred]);
2472 }
2473
2474 int ScanIdx = 0;
2475 SmallPtrSet<Value*,4> InstructionsToSink;
2476 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2477 while (LRI.isValid() &&
2478 canSinkInstructions(*LRI, PHIOperands)) {
2479 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2480 << "\n");
2481 InstructionsToSink.insert_range(*LRI);
2482 ++ScanIdx;
2483 --LRI;
2484 }
2485
2486 // If no instructions can be sunk, early-return.
2487 if (ScanIdx == 0)
2488 return false;
2489
2490 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2491
2492 if (!followedByDeoptOrUnreachable) {
2493 // Check whether this is the pointer operand of a load/store.
2494 auto IsMemOperand = [](Use &U) {
2495 auto *I = cast<Instruction>(U.getUser());
2496 if (isa<LoadInst>(I))
2497 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2498 if (isa<StoreInst>(I))
2499 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2500 return false;
2501 };
2502
2503 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2504 // actually sink before encountering instruction that is unprofitable to
2505 // sink?
2506 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2507 unsigned NumPHIInsts = 0;
2508 for (Use &U : (*LRI)[0]->operands()) {
2509 auto It = PHIOperands.find(&U);
2510 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2511 return InstructionsToSink.contains(V);
2512 })) {
2513 ++NumPHIInsts;
2514 // Do not separate a load/store from the gep producing the address.
2515 // The gep can likely be folded into the load/store as an addressing
2516 // mode. Additionally, a load of a gep is easier to analyze than a
2517 // load of a phi.
2518 if (IsMemOperand(U) &&
2519 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2520 return false;
2521 // FIXME: this check is overly optimistic. We may end up not sinking
2522 // said instruction, due to the very same profitability check.
2523 // See @creating_too_many_phis in sink-common-code.ll.
2524 }
2525 }
2526 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2527 return NumPHIInsts <= 1;
2528 };
2529
2530 // We've determined that we are going to sink last ScanIdx instructions,
2531 // and recorded them in InstructionsToSink. Now, some instructions may be
2532 // unprofitable to sink. But that determination depends on the instructions
2533 // that we are going to sink.
2534
2535 // First, forward scan: find the first instruction unprofitable to sink,
2536 // recording all the ones that are profitable to sink.
2537 // FIXME: would it be better, after we detect that not all are profitable.
2538 // to either record the profitable ones, or erase the unprofitable ones?
2539 // Maybe we need to choose (at runtime) the one that will touch least
2540 // instrs?
2541 LRI.reset();
2542 int Idx = 0;
2543 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2544 while (Idx < ScanIdx) {
2545 if (!ProfitableToSinkInstruction(LRI)) {
2546 // Too many PHIs would be created.
2547 LLVM_DEBUG(
2548 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2549 break;
2550 }
2551 InstructionsProfitableToSink.insert_range(*LRI);
2552 --LRI;
2553 ++Idx;
2554 }
2555
2556 // If no instructions can be sunk, early-return.
2557 if (Idx == 0)
2558 return false;
2559
2560 // Did we determine that (only) some instructions are unprofitable to sink?
2561 if (Idx < ScanIdx) {
2562 // Okay, some instructions are unprofitable.
2563 ScanIdx = Idx;
2564 InstructionsToSink = InstructionsProfitableToSink;
2565
2566 // But, that may make other instructions unprofitable, too.
2567 // So, do a backward scan, do any earlier instructions become
2568 // unprofitable?
2569 assert(
2570 !ProfitableToSinkInstruction(LRI) &&
2571 "We already know that the last instruction is unprofitable to sink");
2572 ++LRI;
2573 --Idx;
2574 while (Idx >= 0) {
2575 // If we detect that an instruction becomes unprofitable to sink,
2576 // all earlier instructions won't be sunk either,
2577 // so preemptively keep InstructionsProfitableToSink in sync.
2578 // FIXME: is this the most performant approach?
2579 for (auto *I : *LRI)
2580 InstructionsProfitableToSink.erase(I);
2581 if (!ProfitableToSinkInstruction(LRI)) {
2582 // Everything starting with this instruction won't be sunk.
2583 ScanIdx = Idx;
2584 InstructionsToSink = InstructionsProfitableToSink;
2585 }
2586 ++LRI;
2587 --Idx;
2588 }
2589 }
2590
2591 // If no instructions can be sunk, early-return.
2592 if (ScanIdx == 0)
2593 return false;
2594 }
2595
2596 bool Changed = false;
2597
2598 if (HaveNonUnconditionalPredecessors) {
2599 if (!followedByDeoptOrUnreachable) {
2600 // It is always legal to sink common instructions from unconditional
2601 // predecessors. However, if not all predecessors are unconditional,
2602 // this transformation might be pessimizing. So as a rule of thumb,
2603 // don't do it unless we'd sink at least one non-speculatable instruction.
2604 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2605 LRI.reset();
2606 int Idx = 0;
2607 bool Profitable = false;
2608 while (Idx < ScanIdx) {
2609 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2610 Profitable = true;
2611 break;
2612 }
2613 --LRI;
2614 ++Idx;
2615 }
2616 if (!Profitable)
2617 return false;
2618 }
2619
2620 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2621 // We have a conditional edge and we're going to sink some instructions.
2622 // Insert a new block postdominating all blocks we're going to sink from.
2623 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2624 // Edges couldn't be split.
2625 return false;
2626 Changed = true;
2627 }
2628
2629 // Now that we've analyzed all potential sinking candidates, perform the
2630 // actual sink. We iteratively sink the last non-terminator of the source
2631 // blocks into their common successor unless doing so would require too
2632 // many PHI instructions to be generated (currently only one PHI is allowed
2633 // per sunk instruction).
2634 //
2635 // We can use InstructionsToSink to discount values needing PHI-merging that will
2636 // actually be sunk in a later iteration. This allows us to be more
2637 // aggressive in what we sink. This does allow a false positive where we
2638 // sink presuming a later value will also be sunk, but stop half way through
2639 // and never actually sink it which means we produce more PHIs than intended.
2640 // This is unlikely in practice though.
2641 int SinkIdx = 0;
2642 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2643 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2644 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2645 << "\n");
2646
2647 // Because we've sunk every instruction in turn, the current instruction to
2648 // sink is always at index 0.
2649 LRI.reset();
2650
2651 sinkLastInstruction(UnconditionalPreds);
2652 NumSinkCommonInstrs++;
2653 Changed = true;
2654 }
2655 if (SinkIdx != 0)
2656 ++NumSinkCommonCode;
2657 return Changed;
2658}
2659
2660namespace {
2661
2662struct CompatibleSets {
2663 using SetTy = SmallVector<InvokeInst *, 2>;
2664
2666
2667 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2668
2669 SetTy &getCompatibleSet(InvokeInst *II);
2670
2671 void insert(InvokeInst *II);
2672};
2673
2674CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2675 // Perform a linear scan over all the existing sets, see if the new `invoke`
2676 // is compatible with any particular set. Since we know that all the `invokes`
2677 // within a set are compatible, only check the first `invoke` in each set.
2678 // WARNING: at worst, this has quadratic complexity.
2679 for (CompatibleSets::SetTy &Set : Sets) {
2680 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2681 return Set;
2682 }
2683
2684 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2685 return Sets.emplace_back();
2686}
2687
2688void CompatibleSets::insert(InvokeInst *II) {
2689 getCompatibleSet(II).emplace_back(II);
2690}
2691
2692bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2693 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2694
2695 // Can we theoretically merge these `invoke`s?
2696 auto IsIllegalToMerge = [](InvokeInst *II) {
2697 return II->cannotMerge() || II->isInlineAsm();
2698 };
2699 if (any_of(Invokes, IsIllegalToMerge))
2700 return false;
2701
2702 // Either both `invoke`s must be direct,
2703 // or both `invoke`s must be indirect.
2704 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2705 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2706 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2707 if (HaveIndirectCalls) {
2708 if (!AllCallsAreIndirect)
2709 return false;
2710 } else {
2711 // All callees must be identical.
2712 Value *Callee = nullptr;
2713 for (InvokeInst *II : Invokes) {
2714 Value *CurrCallee = II->getCalledOperand();
2715 assert(CurrCallee && "There is always a called operand.");
2716 if (!Callee)
2717 Callee = CurrCallee;
2718 else if (Callee != CurrCallee)
2719 return false;
2720 }
2721 }
2722
2723 // Either both `invoke`s must not have a normal destination,
2724 // or both `invoke`s must have a normal destination,
2725 auto HasNormalDest = [](InvokeInst *II) {
2726 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2727 };
2728 if (any_of(Invokes, HasNormalDest)) {
2729 // Do not merge `invoke` that does not have a normal destination with one
2730 // that does have a normal destination, even though doing so would be legal.
2731 if (!all_of(Invokes, HasNormalDest))
2732 return false;
2733
2734 // All normal destinations must be identical.
2735 BasicBlock *NormalBB = nullptr;
2736 for (InvokeInst *II : Invokes) {
2737 BasicBlock *CurrNormalBB = II->getNormalDest();
2738 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2739 if (!NormalBB)
2740 NormalBB = CurrNormalBB;
2741 else if (NormalBB != CurrNormalBB)
2742 return false;
2743 }
2744
2745 // In the normal destination, the incoming values for these two `invoke`s
2746 // must be compatible.
2747 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2749 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2750 &EquivalenceSet))
2751 return false;
2752 }
2753
2754#ifndef NDEBUG
2755 // All unwind destinations must be identical.
2756 // We know that because we have started from said unwind destination.
2757 BasicBlock *UnwindBB = nullptr;
2758 for (InvokeInst *II : Invokes) {
2759 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2760 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2761 if (!UnwindBB)
2762 UnwindBB = CurrUnwindBB;
2763 else
2764 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2765 }
2766#endif
2767
2768 // In the unwind destination, the incoming values for these two `invoke`s
2769 // must be compatible.
2771 Invokes.front()->getUnwindDest(),
2772 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2773 return false;
2774
2775 // Ignoring arguments, these `invoke`s must be identical,
2776 // including operand bundles.
2777 const InvokeInst *II0 = Invokes.front();
2778 for (auto *II : Invokes.drop_front())
2779 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2780 return false;
2781
2782 // Can we theoretically form the data operands for the merged `invoke`?
2783 auto IsIllegalToMergeArguments = [](auto Ops) {
2784 Use &U0 = std::get<0>(Ops);
2785 Use &U1 = std::get<1>(Ops);
2786 if (U0 == U1)
2787 return false;
2789 U0.getOperandNo());
2790 };
2791 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2792 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2793 IsIllegalToMergeArguments))
2794 return false;
2795
2796 return true;
2797}
2798
2799} // namespace
2800
2801// Merge all invokes in the provided set, all of which are compatible
2802// as per the `CompatibleSets::shouldBelongToSameSet()`.
2804 DomTreeUpdater *DTU) {
2805 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2806
2808 if (DTU)
2809 Updates.reserve(2 + 3 * Invokes.size());
2810
2811 bool HasNormalDest =
2812 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2813
2814 // Clone one of the invokes into a new basic block.
2815 // Since they are all compatible, it doesn't matter which invoke is cloned.
2816 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2817 InvokeInst *II0 = Invokes.front();
2818 BasicBlock *II0BB = II0->getParent();
2819 BasicBlock *InsertBeforeBlock =
2820 II0->getParent()->getIterator()->getNextNode();
2821 Function *Func = II0BB->getParent();
2822 LLVMContext &Ctx = II0->getContext();
2823
2824 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2825 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2826
2827 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2828 // NOTE: all invokes have the same attributes, so no handling needed.
2829 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2830
2831 if (!HasNormalDest) {
2832 // This set does not have a normal destination,
2833 // so just form a new block with unreachable terminator.
2834 BasicBlock *MergedNormalDest = BasicBlock::Create(
2835 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2836 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2837 UI->setDebugLoc(DebugLoc::getTemporary());
2838 MergedInvoke->setNormalDest(MergedNormalDest);
2839 }
2840
2841 // The unwind destination, however, remainds identical for all invokes here.
2842
2843 return MergedInvoke;
2844 }();
2845
2846 if (DTU) {
2847 // Predecessor blocks that contained these invokes will now branch to
2848 // the new block that contains the merged invoke, ...
2849 for (InvokeInst *II : Invokes)
2850 Updates.push_back(
2851 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2852
2853 // ... which has the new `unreachable` block as normal destination,
2854 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2855 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2856 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2857 SuccBBOfMergedInvoke});
2858
2859 // Since predecessor blocks now unconditionally branch to a new block,
2860 // they no longer branch to their original successors.
2861 for (InvokeInst *II : Invokes)
2862 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2863 Updates.push_back(
2864 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2865 }
2866
2867 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2868
2869 // Form the merged operands for the merged invoke.
2870 for (Use &U : MergedInvoke->operands()) {
2871 // Only PHI together the indirect callees and data operands.
2872 if (MergedInvoke->isCallee(&U)) {
2873 if (!IsIndirectCall)
2874 continue;
2875 } else if (!MergedInvoke->isDataOperand(&U))
2876 continue;
2877
2878 // Don't create trivial PHI's with all-identical incoming values.
2879 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2880 return II->getOperand(U.getOperandNo()) != U.get();
2881 });
2882 if (!NeedPHI)
2883 continue;
2884
2885 // Form a PHI out of all the data ops under this index.
2887 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2888 for (InvokeInst *II : Invokes)
2889 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2890
2891 U.set(PN);
2892 }
2893
2894 // We've ensured that each PHI node has compatible (identical) incoming values
2895 // when coming from each of the `invoke`s in the current merge set,
2896 // so update the PHI nodes accordingly.
2897 for (BasicBlock *Succ : successors(MergedInvoke))
2898 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2899 /*ExistPred=*/Invokes.front()->getParent());
2900
2901 // And finally, replace the original `invoke`s with an unconditional branch
2902 // to the block with the merged `invoke`. Also, give that merged `invoke`
2903 // the merged debugloc of all the original `invoke`s.
2904 DILocation *MergedDebugLoc = nullptr;
2905 for (InvokeInst *II : Invokes) {
2906 // Compute the debug location common to all the original `invoke`s.
2907 if (!MergedDebugLoc)
2908 MergedDebugLoc = II->getDebugLoc();
2909 else
2910 MergedDebugLoc =
2911 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2912
2913 // And replace the old `invoke` with an unconditionally branch
2914 // to the block with the merged `invoke`.
2915 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2916 OrigSuccBB->removePredecessor(II->getParent());
2917 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2918 // The unconditional branch is part of the replacement for the original
2919 // invoke, so should use its DebugLoc.
2920 BI->setDebugLoc(II->getDebugLoc());
2921 bool Success = MergedInvoke->tryIntersectAttributes(II);
2922 assert(Success && "Merged invokes with incompatible attributes");
2923 // For NDEBUG Compile
2924 (void)Success;
2925 II->replaceAllUsesWith(MergedInvoke);
2926 II->eraseFromParent();
2927 ++NumInvokesMerged;
2928 }
2929 MergedInvoke->setDebugLoc(MergedDebugLoc);
2930 ++NumInvokeSetsFormed;
2931
2932 if (DTU)
2933 DTU->applyUpdates(Updates);
2934}
2935
2936/// If this block is a `landingpad` exception handling block, categorize all
2937/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2938/// being "mergeable" together, and then merge invokes in each set together.
2939///
2940/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2941/// [...] [...]
2942/// | |
2943/// [invoke0] [invoke1]
2944/// / \ / \
2945/// [cont0] [landingpad] [cont1]
2946/// to:
2947/// [...] [...]
2948/// \ /
2949/// [invoke]
2950/// / \
2951/// [cont] [landingpad]
2952///
2953/// But of course we can only do that if the invokes share the `landingpad`,
2954/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2955/// and the invoked functions are "compatible".
2958 return false;
2959
2960 bool Changed = false;
2961
2962 // FIXME: generalize to all exception handling blocks?
2963 if (!BB->isLandingPad())
2964 return Changed;
2965
2966 CompatibleSets Grouper;
2967
2968 // Record all the predecessors of this `landingpad`. As per verifier,
2969 // the only allowed predecessor is the unwind edge of an `invoke`.
2970 // We want to group "compatible" `invokes` into the same set to be merged.
2971 for (BasicBlock *PredBB : predecessors(BB))
2972 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2973
2974 // And now, merge `invoke`s that were grouped togeter.
2975 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2976 if (Invokes.size() < 2)
2977 continue;
2978 Changed = true;
2979 mergeCompatibleInvokesImpl(Invokes, DTU);
2980 }
2981
2982 return Changed;
2983}
2984
2985namespace {
2986/// Track ephemeral values, which should be ignored for cost-modelling
2987/// purposes. Requires walking instructions in reverse order.
2988class EphemeralValueTracker {
2989 SmallPtrSet<const Instruction *, 32> EphValues;
2990
2991 bool isEphemeral(const Instruction *I) {
2992 if (isa<AssumeInst>(I))
2993 return true;
2994 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2995 all_of(I->users(), [&](const User *U) {
2996 return EphValues.count(cast<Instruction>(U));
2997 });
2998 }
2999
3000public:
3001 bool track(const Instruction *I) {
3002 if (isEphemeral(I)) {
3003 EphValues.insert(I);
3004 return true;
3005 }
3006 return false;
3007 }
3008
3009 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3010};
3011} // namespace
3012
3013/// Determine if we can hoist sink a sole store instruction out of a
3014/// conditional block.
3015///
3016/// We are looking for code like the following:
3017/// BrBB:
3018/// store i32 %add, i32* %arrayidx2
3019/// ... // No other stores or function calls (we could be calling a memory
3020/// ... // function).
3021/// %cmp = icmp ult %x, %y
3022/// br i1 %cmp, label %EndBB, label %ThenBB
3023/// ThenBB:
3024/// store i32 %add5, i32* %arrayidx2
3025/// br label EndBB
3026/// EndBB:
3027/// ...
3028/// We are going to transform this into:
3029/// BrBB:
3030/// store i32 %add, i32* %arrayidx2
3031/// ... //
3032/// %cmp = icmp ult %x, %y
3033/// %add.add5 = select i1 %cmp, i32 %add, %add5
3034/// store i32 %add.add5, i32* %arrayidx2
3035/// ...
3036///
3037/// \return The pointer to the value of the previous store if the store can be
3038/// hoisted into the predecessor block. 0 otherwise.
3040 BasicBlock *StoreBB, BasicBlock *EndBB) {
3041 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3042 if (!StoreToHoist)
3043 return nullptr;
3044
3045 // Volatile or atomic.
3046 if (!StoreToHoist->isSimple())
3047 return nullptr;
3048
3049 Value *StorePtr = StoreToHoist->getPointerOperand();
3050 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3051
3052 // Look for a store to the same pointer in BrBB.
3053 unsigned MaxNumInstToLookAt = 9;
3054 // Skip pseudo probe intrinsic calls which are not really killing any memory
3055 // accesses.
3056 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3057 if (!MaxNumInstToLookAt)
3058 break;
3059 --MaxNumInstToLookAt;
3060
3061 // Could be calling an instruction that affects memory like free().
3062 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3063 return nullptr;
3064
3065 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3066 // Found the previous store to same location and type. Make sure it is
3067 // simple, to avoid introducing a spurious non-atomic write after an
3068 // atomic write.
3069 if (SI->getPointerOperand() == StorePtr &&
3070 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3071 SI->getAlign() >= StoreToHoist->getAlign())
3072 // Found the previous store, return its value operand.
3073 return SI->getValueOperand();
3074 return nullptr; // Unknown store.
3075 }
3076
3077 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3078 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3079 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3080 Value *Obj = getUnderlyingObject(StorePtr);
3081 bool ExplicitlyDereferenceableOnly;
3082 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3084 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3086 (!ExplicitlyDereferenceableOnly ||
3087 isDereferenceablePointer(StorePtr, StoreTy,
3088 LI->getDataLayout()))) {
3089 // Found a previous load, return it.
3090 return LI;
3091 }
3092 }
3093 // The load didn't work out, but we may still find a store.
3094 }
3095 }
3096
3097 return nullptr;
3098}
3099
3100/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3101/// converted to selects.
3103 BasicBlock *EndBB,
3104 unsigned &SpeculatedInstructions,
3105 InstructionCost &Cost,
3106 const TargetTransformInfo &TTI) {
3108 BB->getParent()->hasMinSize()
3111
3112 bool HaveRewritablePHIs = false;
3113 for (PHINode &PN : EndBB->phis()) {
3114 Value *OrigV = PN.getIncomingValueForBlock(BB);
3115 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3116
3117 // FIXME: Try to remove some of the duplication with
3118 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3119 if (ThenV == OrigV)
3120 continue;
3121
3122 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3123 CmpInst::makeCmpResultType(PN.getType()),
3125
3126 // Don't convert to selects if we could remove undefined behavior instead.
3127 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3129 return false;
3130
3131 HaveRewritablePHIs = true;
3132 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3133 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3134 if (!OrigCE && !ThenCE)
3135 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3136
3137 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3138 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3139 InstructionCost MaxCost =
3141 if (OrigCost + ThenCost > MaxCost)
3142 return false;
3143
3144 // Account for the cost of an unfolded ConstantExpr which could end up
3145 // getting expanded into Instructions.
3146 // FIXME: This doesn't account for how many operations are combined in the
3147 // constant expression.
3148 ++SpeculatedInstructions;
3149 if (SpeculatedInstructions > 1)
3150 return false;
3151 }
3152
3153 return HaveRewritablePHIs;
3154}
3155
3157 std::optional<bool> Invert,
3158 const TargetTransformInfo &TTI) {
3159 // If the branch is non-unpredictable, and is predicted to *not* branch to
3160 // the `then` block, then avoid speculating it.
3161 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3162 return true;
3163
3164 uint64_t TWeight, FWeight;
3165 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3166 return true;
3167
3168 if (!Invert.has_value())
3169 return false;
3170
3171 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3172 BranchProbability BIEndProb =
3173 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3174 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3175 return BIEndProb < Likely;
3176}
3177
3178/// Speculate a conditional basic block flattening the CFG.
3179///
3180/// Note that this is a very risky transform currently. Speculating
3181/// instructions like this is most often not desirable. Instead, there is an MI
3182/// pass which can do it with full awareness of the resource constraints.
3183/// However, some cases are "obvious" and we should do directly. An example of
3184/// this is speculating a single, reasonably cheap instruction.
3185///
3186/// There is only one distinct advantage to flattening the CFG at the IR level:
3187/// it makes very common but simplistic optimizations such as are common in
3188/// instcombine and the DAG combiner more powerful by removing CFG edges and
3189/// modeling their effects with easier to reason about SSA value graphs.
3190///
3191///
3192/// An illustration of this transform is turning this IR:
3193/// \code
3194/// BB:
3195/// %cmp = icmp ult %x, %y
3196/// br i1 %cmp, label %EndBB, label %ThenBB
3197/// ThenBB:
3198/// %sub = sub %x, %y
3199/// br label BB2
3200/// EndBB:
3201/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3202/// ...
3203/// \endcode
3204///
3205/// Into this IR:
3206/// \code
3207/// BB:
3208/// %cmp = icmp ult %x, %y
3209/// %sub = sub %x, %y
3210/// %cond = select i1 %cmp, 0, %sub
3211/// ...
3212/// \endcode
3213///
3214/// \returns true if the conditional block is removed.
3215bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3216 BasicBlock *ThenBB) {
3217 if (!Options.SpeculateBlocks)
3218 return false;
3219
3220 // Be conservative for now. FP select instruction can often be expensive.
3221 Value *BrCond = BI->getCondition();
3222 if (isa<FCmpInst>(BrCond))
3223 return false;
3224
3225 BasicBlock *BB = BI->getParent();
3226 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3227 InstructionCost Budget =
3229
3230 // If ThenBB is actually on the false edge of the conditional branch, remember
3231 // to swap the select operands later.
3232 bool Invert = false;
3233 if (ThenBB != BI->getSuccessor(0)) {
3234 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3235 Invert = true;
3236 }
3237 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3238
3239 if (!isProfitableToSpeculate(BI, Invert, TTI))
3240 return false;
3241
3242 // Keep a count of how many times instructions are used within ThenBB when
3243 // they are candidates for sinking into ThenBB. Specifically:
3244 // - They are defined in BB, and
3245 // - They have no side effects, and
3246 // - All of their uses are in ThenBB.
3247 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3248
3249 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3250
3251 unsigned SpeculatedInstructions = 0;
3252 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3253 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3254 Value *SpeculatedStoreValue = nullptr;
3255 StoreInst *SpeculatedStore = nullptr;
3256 EphemeralValueTracker EphTracker;
3257 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3258 // Skip pseudo probes. The consequence is we lose track of the branch
3259 // probability for ThenBB, which is fine since the optimization here takes
3260 // place regardless of the branch probability.
3261 if (isa<PseudoProbeInst>(I)) {
3262 // The probe should be deleted so that it will not be over-counted when
3263 // the samples collected on the non-conditional path are counted towards
3264 // the conditional path. We leave it for the counts inference algorithm to
3265 // figure out a proper count for an unknown probe.
3266 SpeculatedPseudoProbes.push_back(&I);
3267 continue;
3268 }
3269
3270 // Ignore ephemeral values, they will be dropped by the transform.
3271 if (EphTracker.track(&I))
3272 continue;
3273
3274 // Only speculatively execute a single instruction (not counting the
3275 // terminator) for now.
3276 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3278 SpeculatedConditionalLoadsStores.size() <
3280 // Not count load/store into cost if target supports conditional faulting
3281 // b/c it's cheap to speculate it.
3282 if (IsSafeCheapLoadStore)
3283 SpeculatedConditionalLoadsStores.push_back(&I);
3284 else
3285 ++SpeculatedInstructions;
3286
3287 if (SpeculatedInstructions > 1)
3288 return false;
3289
3290 // Don't hoist the instruction if it's unsafe or expensive.
3291 if (!IsSafeCheapLoadStore &&
3293 !(HoistCondStores && !SpeculatedStoreValue &&
3294 (SpeculatedStoreValue =
3295 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3296 return false;
3297 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3300 return false;
3301
3302 // Store the store speculation candidate.
3303 if (!SpeculatedStore && SpeculatedStoreValue)
3304 SpeculatedStore = cast<StoreInst>(&I);
3305
3306 // Do not hoist the instruction if any of its operands are defined but not
3307 // used in BB. The transformation will prevent the operand from
3308 // being sunk into the use block.
3309 for (Use &Op : I.operands()) {
3311 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3312 continue; // Not a candidate for sinking.
3313
3314 ++SinkCandidateUseCounts[OpI];
3315 }
3316 }
3317
3318 // Consider any sink candidates which are only used in ThenBB as costs for
3319 // speculation. Note, while we iterate over a DenseMap here, we are summing
3320 // and so iteration order isn't significant.
3321 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3322 if (Inst->hasNUses(Count)) {
3323 ++SpeculatedInstructions;
3324 if (SpeculatedInstructions > 1)
3325 return false;
3326 }
3327
3328 // Check that we can insert the selects and that it's not too expensive to do
3329 // so.
3330 bool Convert =
3331 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3333 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3334 SpeculatedInstructions, Cost, TTI);
3335 if (!Convert || Cost > Budget)
3336 return false;
3337
3338 // If we get here, we can hoist the instruction and if-convert.
3339 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3340
3341 Instruction *Sel = nullptr;
3342 // Insert a select of the value of the speculated store.
3343 if (SpeculatedStoreValue) {
3344 IRBuilder<NoFolder> Builder(BI);
3345 Value *OrigV = SpeculatedStore->getValueOperand();
3346 Value *TrueV = SpeculatedStore->getValueOperand();
3347 Value *FalseV = SpeculatedStoreValue;
3348 if (Invert)
3349 std::swap(TrueV, FalseV);
3350 Value *S = Builder.CreateSelect(
3351 BrCond, TrueV, FalseV, "spec.store.select", BI);
3352 Sel = cast<Instruction>(S);
3353 SpeculatedStore->setOperand(0, S);
3354 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3355 SpeculatedStore->getDebugLoc());
3356 // The value stored is still conditional, but the store itself is now
3357 // unconditonally executed, so we must be sure that any linked dbg.assign
3358 // intrinsics are tracking the new stored value (the result of the
3359 // select). If we don't, and the store were to be removed by another pass
3360 // (e.g. DSE), then we'd eventually end up emitting a location describing
3361 // the conditional value, unconditionally.
3362 //
3363 // === Before this transformation ===
3364 // pred:
3365 // store %one, %x.dest, !DIAssignID !1
3366 // dbg.assign %one, "x", ..., !1, ...
3367 // br %cond if.then
3368 //
3369 // if.then:
3370 // store %two, %x.dest, !DIAssignID !2
3371 // dbg.assign %two, "x", ..., !2, ...
3372 //
3373 // === After this transformation ===
3374 // pred:
3375 // store %one, %x.dest, !DIAssignID !1
3376 // dbg.assign %one, "x", ..., !1
3377 /// ...
3378 // %merge = select %cond, %two, %one
3379 // store %merge, %x.dest, !DIAssignID !2
3380 // dbg.assign %merge, "x", ..., !2
3381 for (DbgVariableRecord *DbgAssign :
3382 at::getDVRAssignmentMarkers(SpeculatedStore))
3383 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3384 DbgAssign->replaceVariableLocationOp(OrigV, S);
3385 }
3386
3387 // Metadata can be dependent on the condition we are hoisting above.
3388 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3389 // to avoid making it appear as if the condition is a constant, which would
3390 // be misleading while debugging.
3391 // Similarly strip attributes that maybe dependent on condition we are
3392 // hoisting above.
3393 for (auto &I : make_early_inc_range(*ThenBB)) {
3394 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3395 I.setDebugLoc(DebugLoc::getDropped());
3396 }
3397 I.dropUBImplyingAttrsAndMetadata();
3398
3399 // Drop ephemeral values.
3400 if (EphTracker.contains(&I)) {
3401 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3402 I.eraseFromParent();
3403 }
3404 }
3405
3406 // Hoist the instructions.
3407 // Drop DbgVariableRecords attached to these instructions.
3408 for (auto &It : *ThenBB)
3409 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3410 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3411 // equivalent).
3412 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3413 !DVR || !DVR->isDbgAssign())
3414 It.dropOneDbgRecord(&DR);
3415 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3416 std::prev(ThenBB->end()));
3417
3418 if (!SpeculatedConditionalLoadsStores.empty())
3419 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3420 Sel);
3421
3422 // Insert selects and rewrite the PHI operands.
3423 IRBuilder<NoFolder> Builder(BI);
3424 for (PHINode &PN : EndBB->phis()) {
3425 unsigned OrigI = PN.getBasicBlockIndex(BB);
3426 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3427 Value *OrigV = PN.getIncomingValue(OrigI);
3428 Value *ThenV = PN.getIncomingValue(ThenI);
3429
3430 // Skip PHIs which are trivial.
3431 if (OrigV == ThenV)
3432 continue;
3433
3434 // Create a select whose true value is the speculatively executed value and
3435 // false value is the pre-existing value. Swap them if the branch
3436 // destinations were inverted.
3437 Value *TrueV = ThenV, *FalseV = OrigV;
3438 if (Invert)
3439 std::swap(TrueV, FalseV);
3440 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3441 PN.setIncomingValue(OrigI, V);
3442 PN.setIncomingValue(ThenI, V);
3443 }
3444
3445 // Remove speculated pseudo probes.
3446 for (Instruction *I : SpeculatedPseudoProbes)
3447 I->eraseFromParent();
3448
3449 ++NumSpeculations;
3450 return true;
3451}
3452
3454
3455// Return false if number of blocks searched is too much.
3456static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3457 BlocksSet &ReachesNonLocalUses) {
3458 if (BB == DefBB)
3459 return true;
3460 if (!ReachesNonLocalUses.insert(BB).second)
3461 return true;
3462
3463 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3464 return false;
3465 for (BasicBlock *Pred : predecessors(BB))
3466 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3467 return false;
3468 return true;
3469}
3470
3471/// Return true if we can thread a branch across this block.
3473 BlocksSet &NonLocalUseBlocks) {
3474 int Size = 0;
3475 EphemeralValueTracker EphTracker;
3476
3477 // Walk the loop in reverse so that we can identify ephemeral values properly
3478 // (values only feeding assumes).
3479 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3480 // Can't fold blocks that contain noduplicate or convergent calls.
3481 if (CallInst *CI = dyn_cast<CallInst>(&I))
3482 if (CI->cannotDuplicate() || CI->isConvergent())
3483 return false;
3484
3485 // Ignore ephemeral values which are deleted during codegen.
3486 // We will delete Phis while threading, so Phis should not be accounted in
3487 // block's size.
3488 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3489 if (Size++ > MaxSmallBlockSize)
3490 return false; // Don't clone large BB's.
3491 }
3492
3493 // Record blocks with non-local uses of values defined in the current basic
3494 // block.
3495 for (User *U : I.users()) {
3497 BasicBlock *UsedInBB = UI->getParent();
3498 if (UsedInBB == BB) {
3499 if (isa<PHINode>(UI))
3500 return false;
3501 } else
3502 NonLocalUseBlocks.insert(UsedInBB);
3503 }
3504
3505 // Looks ok, continue checking.
3506 }
3507
3508 return true;
3509}
3510
3512 BasicBlock *To) {
3513 // Don't look past the block defining the value, we might get the value from
3514 // a previous loop iteration.
3515 auto *I = dyn_cast<Instruction>(V);
3516 if (I && I->getParent() == To)
3517 return nullptr;
3518
3519 // We know the value if the From block branches on it.
3520 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3521 if (BI && BI->isConditional() && BI->getCondition() == V &&
3522 BI->getSuccessor(0) != BI->getSuccessor(1))
3523 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3525
3526 return nullptr;
3527}
3528
3529/// If we have a conditional branch on something for which we know the constant
3530/// value in predecessors (e.g. a phi node in the current block), thread edges
3531/// from the predecessor to their ultimate destination.
3532static std::optional<bool>
3534 const DataLayout &DL,
3535 AssumptionCache *AC) {
3537 BasicBlock *BB = BI->getParent();
3538 Value *Cond = BI->getCondition();
3540 if (PN && PN->getParent() == BB) {
3541 // Degenerate case of a single entry PHI.
3542 if (PN->getNumIncomingValues() == 1) {
3544 return true;
3545 }
3546
3547 for (Use &U : PN->incoming_values())
3548 if (auto *CB = dyn_cast<ConstantInt>(U))
3549 KnownValues[CB].insert(PN->getIncomingBlock(U));
3550 } else {
3551 for (BasicBlock *Pred : predecessors(BB)) {
3552 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3553 KnownValues[CB].insert(Pred);
3554 }
3555 }
3556
3557 if (KnownValues.empty())
3558 return false;
3559
3560 // Now we know that this block has multiple preds and two succs.
3561 // Check that the block is small enough and record which non-local blocks use
3562 // values defined in the block.
3563
3564 BlocksSet NonLocalUseBlocks;
3565 BlocksSet ReachesNonLocalUseBlocks;
3566 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3567 return false;
3568
3569 // Jump-threading can only be done to destinations where no values defined
3570 // in BB are live.
3571
3572 // Quickly check if both destinations have uses. If so, jump-threading cannot
3573 // be done.
3574 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3575 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3576 return false;
3577
3578 // Search backward from NonLocalUseBlocks to find which blocks
3579 // reach non-local uses.
3580 for (BasicBlock *UseBB : NonLocalUseBlocks)
3581 // Give up if too many blocks are searched.
3582 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3583 return false;
3584
3585 for (const auto &Pair : KnownValues) {
3586 ConstantInt *CB = Pair.first;
3587 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3588 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3589
3590 // Okay, we now know that all edges from PredBB should be revectored to
3591 // branch to RealDest.
3592 if (RealDest == BB)
3593 continue; // Skip self loops.
3594
3595 // Skip if the predecessor's terminator is an indirect branch.
3596 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3597 return isa<IndirectBrInst>(PredBB->getTerminator());
3598 }))
3599 continue;
3600
3601 // Only revector to RealDest if no values defined in BB are live.
3602 if (ReachesNonLocalUseBlocks.contains(RealDest))
3603 continue;
3604
3605 LLVM_DEBUG({
3606 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3607 << " has value " << *Pair.first << " in predecessors:\n";
3608 for (const BasicBlock *PredBB : Pair.second)
3609 dbgs() << " " << PredBB->getName() << "\n";
3610 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3611 });
3612
3613 // Split the predecessors we are threading into a new edge block. We'll
3614 // clone the instructions into this block, and then redirect it to RealDest.
3615 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3616
3617 // TODO: These just exist to reduce test diff, we can drop them if we like.
3618 EdgeBB->setName(RealDest->getName() + ".critedge");
3619 EdgeBB->moveBefore(RealDest);
3620
3621 // Update PHI nodes.
3622 addPredecessorToBlock(RealDest, EdgeBB, BB);
3623
3624 // BB may have instructions that are being threaded over. Clone these
3625 // instructions into EdgeBB. We know that there will be no uses of the
3626 // cloned instructions outside of EdgeBB.
3627 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3628 ValueToValueMapTy TranslateMap; // Track translated values.
3629 TranslateMap[Cond] = CB;
3630
3631 // RemoveDIs: track instructions that we optimise away while folding, so
3632 // that we can copy DbgVariableRecords from them later.
3633 BasicBlock::iterator SrcDbgCursor = BB->begin();
3634 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3635 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3636 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3637 continue;
3638 }
3639 // Clone the instruction.
3640 Instruction *N = BBI->clone();
3641 // Insert the new instruction into its new home.
3642 N->insertInto(EdgeBB, InsertPt);
3643
3644 if (BBI->hasName())
3645 N->setName(BBI->getName() + ".c");
3646
3647 // Update operands due to translation.
3648 // Key Instructions: Remap all the atom groups.
3649 if (const DebugLoc &DL = BBI->getDebugLoc())
3650 mapAtomInstance(DL, TranslateMap);
3651 RemapInstruction(N, TranslateMap,
3653
3654 // Check for trivial simplification.
3655 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3656 if (!BBI->use_empty())
3657 TranslateMap[&*BBI] = V;
3658 if (!N->mayHaveSideEffects()) {
3659 N->eraseFromParent(); // Instruction folded away, don't need actual
3660 // inst
3661 N = nullptr;
3662 }
3663 } else {
3664 if (!BBI->use_empty())
3665 TranslateMap[&*BBI] = N;
3666 }
3667 if (N) {
3668 // Copy all debug-info attached to instructions from the last we
3669 // successfully clone, up to this instruction (they might have been
3670 // folded away).
3671 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3672 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3673 SrcDbgCursor = std::next(BBI);
3674 // Clone debug-info on this instruction too.
3675 N->cloneDebugInfoFrom(&*BBI);
3676
3677 // Register the new instruction with the assumption cache if necessary.
3678 if (auto *Assume = dyn_cast<AssumeInst>(N))
3679 if (AC)
3680 AC->registerAssumption(Assume);
3681 }
3682 }
3683
3684 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3685 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3686 InsertPt->cloneDebugInfoFrom(BI);
3687
3688 BB->removePredecessor(EdgeBB);
3689 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3690 EdgeBI->setSuccessor(0, RealDest);
3691 EdgeBI->setDebugLoc(BI->getDebugLoc());
3692
3693 if (DTU) {
3695 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3696 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3697 DTU->applyUpdates(Updates);
3698 }
3699
3700 // For simplicity, we created a separate basic block for the edge. Merge
3701 // it back into the predecessor if possible. This not only avoids
3702 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3703 // bypass the check for trivial cycles above.
3704 MergeBlockIntoPredecessor(EdgeBB, DTU);
3705
3706 // Signal repeat, simplifying any other constants.
3707 return std::nullopt;
3708 }
3709
3710 return false;
3711}
3712
3713bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3714 // Note: If BB is a loop header then there is a risk that threading introduces
3715 // a non-canonical loop by moving a back edge. So we avoid this optimization
3716 // for loop headers if NeedCanonicalLoop is set.
3717 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3718 return false;
3719
3720 std::optional<bool> Result;
3721 bool EverChanged = false;
3722 do {
3723 // Note that None means "we changed things, but recurse further."
3724 Result =
3726 EverChanged |= Result == std::nullopt || *Result;
3727 } while (Result == std::nullopt);
3728 return EverChanged;
3729}
3730
3731/// Given a BB that starts with the specified two-entry PHI node,
3732/// see if we can eliminate it.
3735 const DataLayout &DL,
3736 bool SpeculateUnpredictables) {
3737 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3738 // statement", which has a very simple dominance structure. Basically, we
3739 // are trying to find the condition that is being branched on, which
3740 // subsequently causes this merge to happen. We really want control
3741 // dependence information for this check, but simplifycfg can't keep it up
3742 // to date, and this catches most of the cases we care about anyway.
3743 BasicBlock *BB = PN->getParent();
3744
3745 BasicBlock *IfTrue, *IfFalse;
3746 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3747 if (!DomBI)
3748 return false;
3749 Value *IfCond = DomBI->getCondition();
3750 // Don't bother if the branch will be constant folded trivially.
3751 if (isa<ConstantInt>(IfCond))
3752 return false;
3753
3754 BasicBlock *DomBlock = DomBI->getParent();
3757 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3758 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3759 });
3760 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3761 "Will have either one or two blocks to speculate.");
3762
3763 // If the branch is non-unpredictable, see if we either predictably jump to
3764 // the merge bb (if we have only a single 'then' block), or if we predictably
3765 // jump to one specific 'then' block (if we have two of them).
3766 // It isn't beneficial to speculatively execute the code
3767 // from the block that we know is predictably not entered.
3768 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3769 if (!IsUnpredictable) {
3770 uint64_t TWeight, FWeight;
3771 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3772 (TWeight + FWeight) != 0) {
3773 BranchProbability BITrueProb =
3774 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3775 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3776 BranchProbability BIFalseProb = BITrueProb.getCompl();
3777 if (IfBlocks.size() == 1) {
3778 BranchProbability BIBBProb =
3779 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3780 if (BIBBProb >= Likely)
3781 return false;
3782 } else {
3783 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3784 return false;
3785 }
3786 }
3787 }
3788
3789 // Don't try to fold an unreachable block. For example, the phi node itself
3790 // can't be the candidate if-condition for a select that we want to form.
3791 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3792 if (IfCondPhiInst->getParent() == BB)
3793 return false;
3794
3795 // Okay, we found that we can merge this two-entry phi node into a select.
3796 // Doing so would require us to fold *all* two entry phi nodes in this block.
3797 // At some point this becomes non-profitable (particularly if the target
3798 // doesn't support cmov's). Only do this transformation if there are two or
3799 // fewer PHI nodes in this block.
3800 unsigned NumPhis = 0;
3801 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3802 if (NumPhis > 2)
3803 return false;
3804
3805 // Loop over the PHI's seeing if we can promote them all to select
3806 // instructions. While we are at it, keep track of the instructions
3807 // that need to be moved to the dominating block.
3808 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3809 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3810 InstructionCost Cost = 0;
3811 InstructionCost Budget =
3813 if (SpeculateUnpredictables && IsUnpredictable)
3814 Budget += TTI.getBranchMispredictPenalty();
3815
3816 bool Changed = false;
3817 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3818 PHINode *PN = cast<PHINode>(II++);
3819 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3820 PN->replaceAllUsesWith(V);
3821 PN->eraseFromParent();
3822 Changed = true;
3823 continue;
3824 }
3825
3826 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3827 AggressiveInsts, Cost, Budget, TTI, AC,
3828 ZeroCostInstructions) ||
3829 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3830 AggressiveInsts, Cost, Budget, TTI, AC,
3831 ZeroCostInstructions))
3832 return Changed;
3833 }
3834
3835 // If we folded the first phi, PN dangles at this point. Refresh it. If
3836 // we ran out of PHIs then we simplified them all.
3837 PN = dyn_cast<PHINode>(BB->begin());
3838 if (!PN)
3839 return true;
3840
3841 // Return true if at least one of these is a 'not', and another is either
3842 // a 'not' too, or a constant.
3843 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3844 if (!match(V0, m_Not(m_Value())))
3845 std::swap(V0, V1);
3846 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3847 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3848 };
3849
3850 // Don't fold i1 branches on PHIs which contain binary operators or
3851 // (possibly inverted) select form of or/ands, unless one of
3852 // the incoming values is an 'not' and another one is freely invertible.
3853 // These can often be turned into switches and other things.
3854 auto IsBinOpOrAnd = [](Value *V) {
3855 return match(
3857 };
3858 if (PN->getType()->isIntegerTy(1) &&
3859 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3860 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3861 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3862 PN->getIncomingValue(1)))
3863 return Changed;
3864
3865 // If all PHI nodes are promotable, check to make sure that all instructions
3866 // in the predecessor blocks can be promoted as well. If not, we won't be able
3867 // to get rid of the control flow, so it's not worth promoting to select
3868 // instructions.
3869 for (BasicBlock *IfBlock : IfBlocks)
3870 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3871 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3872 // This is not an aggressive instruction that we can promote.
3873 // Because of this, we won't be able to get rid of the control flow, so
3874 // the xform is not worth it.
3875 return Changed;
3876 }
3877
3878 // If either of the blocks has it's address taken, we can't do this fold.
3879 if (any_of(IfBlocks,
3880 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3881 return Changed;
3882
3883 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3884 if (IsUnpredictable) dbgs() << " (unpredictable)";
3885 dbgs() << " T: " << IfTrue->getName()
3886 << " F: " << IfFalse->getName() << "\n");
3887
3888 // If we can still promote the PHI nodes after this gauntlet of tests,
3889 // do all of the PHI's now.
3890
3891 // Move all 'aggressive' instructions, which are defined in the
3892 // conditional parts of the if's up to the dominating block.
3893 for (BasicBlock *IfBlock : IfBlocks)
3894 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3895
3896 IRBuilder<NoFolder> Builder(DomBI);
3897 // Propagate fast-math-flags from phi nodes to replacement selects.
3898 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3899 // Change the PHI node into a select instruction.
3900 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3901 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3902
3903 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3904 isa<FPMathOperator>(PN) ? PN : nullptr,
3905 "", DomBI);
3906 PN->replaceAllUsesWith(Sel);
3907 Sel->takeName(PN);
3908 PN->eraseFromParent();
3909 }
3910
3911 // At this point, all IfBlocks are empty, so our if statement
3912 // has been flattened. Change DomBlock to jump directly to our new block to
3913 // avoid other simplifycfg's kicking in on the diamond.
3914 Builder.CreateBr(BB);
3915
3917 if (DTU) {
3918 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3919 for (auto *Successor : successors(DomBlock))
3920 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3921 }
3922
3923 DomBI->eraseFromParent();
3924 if (DTU)
3925 DTU->applyUpdates(Updates);
3926
3927 return true;
3928}
3929
3932 Value *RHS, const Twine &Name = "") {
3933 // Try to relax logical op to binary op.
3934 if (impliesPoison(RHS, LHS))
3935 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3936 if (Opc == Instruction::And)
3937 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3938 if (Opc == Instruction::Or)
3939 return Builder.CreateLogicalOr(LHS, RHS, Name);
3940 llvm_unreachable("Invalid logical opcode");
3941}
3942
3943/// Return true if either PBI or BI has branch weight available, and store
3944/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3945/// not have branch weight, use 1:1 as its weight.
3947 uint64_t &PredTrueWeight,
3948 uint64_t &PredFalseWeight,
3949 uint64_t &SuccTrueWeight,
3950 uint64_t &SuccFalseWeight) {
3951 bool PredHasWeights =
3952 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3953 bool SuccHasWeights =
3954 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3955 if (PredHasWeights || SuccHasWeights) {
3956 if (!PredHasWeights)
3957 PredTrueWeight = PredFalseWeight = 1;
3958 if (!SuccHasWeights)
3959 SuccTrueWeight = SuccFalseWeight = 1;
3960 return true;
3961 } else {
3962 return false;
3963 }
3964}
3965
3966/// Determine if the two branches share a common destination and deduce a glue
3967/// that joins the branches' conditions to arrive at the common destination if
3968/// that would be profitable.
3969static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3971 const TargetTransformInfo *TTI) {
3972 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3973 "Both blocks must end with a conditional branches.");
3975 "PredBB must be a predecessor of BB.");
3976
3977 // We have the potential to fold the conditions together, but if the
3978 // predecessor branch is predictable, we may not want to merge them.
3979 uint64_t PTWeight, PFWeight;
3980 BranchProbability PBITrueProb, Likely;
3981 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3982 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3983 (PTWeight + PFWeight) != 0) {
3984 PBITrueProb =
3985 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3986 Likely = TTI->getPredictableBranchThreshold();
3987 }
3988
3989 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3990 // Speculate the 2nd condition unless the 1st is probably true.
3991 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3992 return {{BI->getSuccessor(0), Instruction::Or, false}};
3993 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3994 // Speculate the 2nd condition unless the 1st is probably false.
3995 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3996 return {{BI->getSuccessor(1), Instruction::And, false}};
3997 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3998 // Speculate the 2nd condition unless the 1st is probably true.
3999 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4000 return {{BI->getSuccessor(1), Instruction::And, true}};
4001 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4002 // Speculate the 2nd condition unless the 1st is probably false.
4003 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4004 return {{BI->getSuccessor(0), Instruction::Or, true}};
4005 }
4006 return std::nullopt;
4007}
4008
4010 DomTreeUpdater *DTU,
4011 MemorySSAUpdater *MSSAU,
4012 const TargetTransformInfo *TTI) {
4013 BasicBlock *BB = BI->getParent();
4014 BasicBlock *PredBlock = PBI->getParent();
4015
4016 // Determine if the two branches share a common destination.
4017 BasicBlock *CommonSucc;
4019 bool InvertPredCond;
4020 std::tie(CommonSucc, Opc, InvertPredCond) =
4022
4023 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4024
4025 IRBuilder<> Builder(PBI);
4026 // The builder is used to create instructions to eliminate the branch in BB.
4027 // If BB's terminator has !annotation metadata, add it to the new
4028 // instructions.
4029 Builder.CollectMetadataToCopy(BB->getTerminator(),
4030 {LLVMContext::MD_annotation});
4031
4032 // If we need to invert the condition in the pred block to match, do so now.
4033 if (InvertPredCond) {
4034 InvertBranch(PBI, Builder);
4035 }
4036
4037 BasicBlock *UniqueSucc =
4038 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4039
4040 // Before cloning instructions, notify the successor basic block that it
4041 // is about to have a new predecessor. This will update PHI nodes,
4042 // which will allow us to update live-out uses of bonus instructions.
4043 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4044
4045 // Try to update branch weights.
4046 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4047 SmallVector<uint32_t, 2> MDWeights;
4048 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4049 SuccTrueWeight, SuccFalseWeight)) {
4050 SmallVector<uint64_t, 8> NewWeights;
4051
4052 if (PBI->getSuccessor(0) == BB) {
4053 // PBI: br i1 %x, BB, FalseDest
4054 // BI: br i1 %y, UniqueSucc, FalseDest
4055 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4056 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
4057 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4058 // TrueWeight for PBI * FalseWeight for BI.
4059 // We assume that total weights of a BranchInst can fit into 32 bits.
4060 // Therefore, we will not have overflow using 64-bit arithmetic.
4061 NewWeights.push_back(PredFalseWeight *
4062 (SuccFalseWeight + SuccTrueWeight) +
4063 PredTrueWeight * SuccFalseWeight);
4064 } else {
4065 // PBI: br i1 %x, TrueDest, BB
4066 // BI: br i1 %y, TrueDest, UniqueSucc
4067 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4068 // FalseWeight for PBI * TrueWeight for BI.
4069 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4070 PredFalseWeight * SuccTrueWeight);
4071 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4072 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
4073 }
4074
4075 // Halve the weights if any of them cannot fit in an uint32_t
4076 fitWeights(NewWeights);
4077
4078 append_range(MDWeights, NewWeights);
4079 setBranchWeights(PBI, MDWeights[0], MDWeights[1], /*IsExpected=*/false);
4080
4081 // TODO: If BB is reachable from all paths through PredBlock, then we
4082 // could replace PBI's branch probabilities with BI's.
4083 } else
4084 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4085
4086 // Now, update the CFG.
4087 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4088
4089 if (DTU)
4090 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4091 {DominatorTree::Delete, PredBlock, BB}});
4092
4093 // If BI was a loop latch, it may have had associated loop metadata.
4094 // We need to copy it to the new latch, that is, PBI.
4095 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4096 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4097
4098 ValueToValueMapTy VMap; // maps original values to cloned values
4100
4101 Module *M = BB->getModule();
4102
4103 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4104 for (DbgVariableRecord &DVR :
4106 RemapDbgRecord(M, &DVR, VMap,
4108 }
4109
4110 // Now that the Cond was cloned into the predecessor basic block,
4111 // or/and the two conditions together.
4112 Value *BICond = VMap[BI->getCondition()];
4113 PBI->setCondition(
4114 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4116 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4117 if (!MDWeights.empty()) {
4118 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4119 setBranchWeights(SI, MDWeights[0], MDWeights[1],
4120 /*IsExpected=*/false);
4121 }
4122
4123 ++NumFoldBranchToCommonDest;
4124 return true;
4125}
4126
4127/// Return if an instruction's type or any of its operands' types are a vector
4128/// type.
4129static bool isVectorOp(Instruction &I) {
4130 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4131 return U->getType()->isVectorTy();
4132 });
4133}
4134
4135/// If this basic block is simple enough, and if a predecessor branches to us
4136/// and one of our successors, fold the block into the predecessor and use
4137/// logical operations to pick the right destination.
4139 MemorySSAUpdater *MSSAU,
4140 const TargetTransformInfo *TTI,
4141 unsigned BonusInstThreshold) {
4142 // If this block ends with an unconditional branch,
4143 // let speculativelyExecuteBB() deal with it.
4144 if (!BI->isConditional())
4145 return false;
4146
4147 BasicBlock *BB = BI->getParent();
4151
4153
4155 Cond->getParent() != BB || !Cond->hasOneUse())
4156 return false;
4157
4158 // Finally, don't infinitely unroll conditional loops.
4159 if (is_contained(successors(BB), BB))
4160 return false;
4161
4162 // With which predecessors will we want to deal with?
4164 for (BasicBlock *PredBlock : predecessors(BB)) {
4165 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4166
4167 // Check that we have two conditional branches. If there is a PHI node in
4168 // the common successor, verify that the same value flows in from both
4169 // blocks.
4170 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4171 continue;
4172
4173 // Determine if the two branches share a common destination.
4174 BasicBlock *CommonSucc;
4176 bool InvertPredCond;
4177 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4178 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4179 else
4180 continue;
4181
4182 // Check the cost of inserting the necessary logic before performing the
4183 // transformation.
4184 if (TTI) {
4185 Type *Ty = BI->getCondition()->getType();
4186 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4187 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4188 !isa<CmpInst>(PBI->getCondition())))
4189 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4190
4192 continue;
4193 }
4194
4195 // Ok, we do want to deal with this predecessor. Record it.
4196 Preds.emplace_back(PredBlock);
4197 }
4198
4199 // If there aren't any predecessors into which we can fold,
4200 // don't bother checking the cost.
4201 if (Preds.empty())
4202 return false;
4203
4204 // Only allow this transformation if computing the condition doesn't involve
4205 // too many instructions and these involved instructions can be executed
4206 // unconditionally. We denote all involved instructions except the condition
4207 // as "bonus instructions", and only allow this transformation when the
4208 // number of the bonus instructions we'll need to create when cloning into
4209 // each predecessor does not exceed a certain threshold.
4210 unsigned NumBonusInsts = 0;
4211 bool SawVectorOp = false;
4212 const unsigned PredCount = Preds.size();
4213 for (Instruction &I : *BB) {
4214 // Don't check the branch condition comparison itself.
4215 if (&I == Cond)
4216 continue;
4217 // Ignore the terminator.
4218 if (isa<BranchInst>(I))
4219 continue;
4220 // I must be safe to execute unconditionally.
4222 return false;
4223 SawVectorOp |= isVectorOp(I);
4224
4225 // Account for the cost of duplicating this instruction into each
4226 // predecessor. Ignore free instructions.
4227 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4229 NumBonusInsts += PredCount;
4230
4231 // Early exits once we reach the limit.
4232 if (NumBonusInsts >
4233 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4234 return false;
4235 }
4236
4237 auto IsBCSSAUse = [BB, &I](Use &U) {
4238 auto *UI = cast<Instruction>(U.getUser());
4239 if (auto *PN = dyn_cast<PHINode>(UI))
4240 return PN->getIncomingBlock(U) == BB;
4241 return UI->getParent() == BB && I.comesBefore(UI);
4242 };
4243
4244 // Does this instruction require rewriting of uses?
4245 if (!all_of(I.uses(), IsBCSSAUse))
4246 return false;
4247 }
4248 if (NumBonusInsts >
4249 BonusInstThreshold *
4250 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4251 return false;
4252
4253 // Ok, we have the budget. Perform the transformation.
4254 for (BasicBlock *PredBlock : Preds) {
4255 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4256 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4257 }
4258 return false;
4259}
4260
4261// If there is only one store in BB1 and BB2, return it, otherwise return
4262// nullptr.
4264 StoreInst *S = nullptr;
4265 for (auto *BB : {BB1, BB2}) {
4266 if (!BB)
4267 continue;
4268 for (auto &I : *BB)
4269 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4270 if (S)
4271 // Multiple stores seen.
4272 return nullptr;
4273 else
4274 S = SI;
4275 }
4276 }
4277 return S;
4278}
4279
4281 Value *AlternativeV = nullptr) {
4282 // PHI is going to be a PHI node that allows the value V that is defined in
4283 // BB to be referenced in BB's only successor.
4284 //
4285 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4286 // doesn't matter to us what the other operand is (it'll never get used). We
4287 // could just create a new PHI with an undef incoming value, but that could
4288 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4289 // other PHI. So here we directly look for some PHI in BB's successor with V
4290 // as an incoming operand. If we find one, we use it, else we create a new
4291 // one.
4292 //
4293 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4294 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4295 // where OtherBB is the single other predecessor of BB's only successor.
4296 PHINode *PHI = nullptr;
4297 BasicBlock *Succ = BB->getSingleSuccessor();
4298
4299 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4300 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4301 PHI = cast<PHINode>(I);
4302 if (!AlternativeV)
4303 break;
4304
4305 assert(Succ->hasNPredecessors(2));
4306 auto PredI = pred_begin(Succ);
4307 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4308 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4309 break;
4310 PHI = nullptr;
4311 }
4312 if (PHI)
4313 return PHI;
4314
4315 // If V is not an instruction defined in BB, just return it.
4316 if (!AlternativeV &&
4317 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4318 return V;
4319
4320 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4321 PHI->insertBefore(Succ->begin());
4322 PHI->addIncoming(V, BB);
4323 for (BasicBlock *PredBB : predecessors(Succ))
4324 if (PredBB != BB)
4325 PHI->addIncoming(
4326 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4327 return PHI;
4328}
4329
4331 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4332 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4333 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4334 // For every pointer, there must be exactly two stores, one coming from
4335 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4336 // store (to any address) in PTB,PFB or QTB,QFB.
4337 // FIXME: We could relax this restriction with a bit more work and performance
4338 // testing.
4339 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4340 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4341 if (!PStore || !QStore)
4342 return false;
4343
4344 // Now check the stores are compatible.
4345 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4346 PStore->getValueOperand()->getType() !=
4347 QStore->getValueOperand()->getType())
4348 return false;
4349
4350 // Check that sinking the store won't cause program behavior changes. Sinking
4351 // the store out of the Q blocks won't change any behavior as we're sinking
4352 // from a block to its unconditional successor. But we're moving a store from
4353 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4354 // So we need to check that there are no aliasing loads or stores in
4355 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4356 // operations between PStore and the end of its parent block.
4357 //
4358 // The ideal way to do this is to query AliasAnalysis, but we don't
4359 // preserve AA currently so that is dangerous. Be super safe and just
4360 // check there are no other memory operations at all.
4361 for (auto &I : *QFB->getSinglePredecessor())
4362 if (I.mayReadOrWriteMemory())
4363 return false;
4364 for (auto &I : *QFB)
4365 if (&I != QStore && I.mayReadOrWriteMemory())
4366 return false;
4367 if (QTB)
4368 for (auto &I : *QTB)
4369 if (&I != QStore && I.mayReadOrWriteMemory())
4370 return false;
4371 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4372 I != E; ++I)
4373 if (&*I != PStore && I->mayReadOrWriteMemory())
4374 return false;
4375
4376 // If we're not in aggressive mode, we only optimize if we have some
4377 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4378 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4379 if (!BB)
4380 return true;
4381 // Heuristic: if the block can be if-converted/phi-folded and the
4382 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4383 // thread this store.
4384 InstructionCost Cost = 0;
4385 InstructionCost Budget =
4387 for (auto &I : BB->instructionsWithoutDebug(false)) {
4388 // Consider terminator instruction to be free.
4389 if (I.isTerminator())
4390 continue;
4391 // If this is one the stores that we want to speculate out of this BB,
4392 // then don't count it's cost, consider it to be free.
4393 if (auto *S = dyn_cast<StoreInst>(&I))
4394 if (llvm::find(FreeStores, S))
4395 continue;
4396 // Else, we have a white-list of instructions that we are ak speculating.
4398 return false; // Not in white-list - not worthwhile folding.
4399 // And finally, if this is a non-free instruction that we are okay
4400 // speculating, ensure that we consider the speculation budget.
4401 Cost +=
4402 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4403 if (Cost > Budget)
4404 return false; // Eagerly refuse to fold as soon as we're out of budget.
4405 }
4406 assert(Cost <= Budget &&
4407 "When we run out of budget we will eagerly return from within the "
4408 "per-instruction loop.");
4409 return true;
4410 };
4411
4412 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4414 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4415 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4416 return false;
4417
4418 // If PostBB has more than two predecessors, we need to split it so we can
4419 // sink the store.
4420 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4421 // We know that QFB's only successor is PostBB. And QFB has a single
4422 // predecessor. If QTB exists, then its only successor is also PostBB.
4423 // If QTB does not exist, then QFB's only predecessor has a conditional
4424 // branch to QFB and PostBB.
4425 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4426 BasicBlock *NewBB =
4427 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4428 if (!NewBB)
4429 return false;
4430 PostBB = NewBB;
4431 }
4432
4433 // OK, we're going to sink the stores to PostBB. The store has to be
4434 // conditional though, so first create the predicate.
4435 BranchInst *PBranch =
4437 BranchInst *QBranch =
4439 Value *PCond = PBranch->getCondition();
4440 Value *QCond = QBranch->getCondition();
4441
4443 PStore->getParent());
4445 QStore->getParent(), PPHI);
4446
4447 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4448 IRBuilder<> QB(PostBB, PostBBFirst);
4449 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4450
4451 InvertPCond ^= (PStore->getParent() != PTB);
4452 InvertQCond ^= (QStore->getParent() != QTB);
4453 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4454 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4455
4456 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4457
4458 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4459 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4460 /*Unreachable=*/false,
4461 /*BranchWeights=*/nullptr, DTU);
4462 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4464 SmallVector<uint32_t, 2> PWeights, QWeights;
4465 extractBranchWeights(*PBranch, PWeights);
4466 extractBranchWeights(*QBranch, QWeights);
4467 if (InvertPCond)
4468 std::swap(PWeights[0], PWeights[1]);
4469 if (InvertQCond)
4470 std::swap(QWeights[0], QWeights[1]);
4471 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4472 setBranchWeights(PostBB->getTerminator(), CombinedWeights[0],
4473 CombinedWeights[1],
4474 /*IsExpected=*/false);
4475 }
4476
4477 QB.SetInsertPoint(T);
4478 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4479 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4480 // Choose the minimum alignment. If we could prove both stores execute, we
4481 // could use biggest one. In this case, though, we only know that one of the
4482 // stores executes. And we don't know it's safe to take the alignment from a
4483 // store that doesn't execute.
4484 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4485
4486 QStore->eraseFromParent();
4487 PStore->eraseFromParent();
4488
4489 return true;
4490}
4491
4493 DomTreeUpdater *DTU, const DataLayout &DL,
4494 const TargetTransformInfo &TTI) {
4495 // The intention here is to find diamonds or triangles (see below) where each
4496 // conditional block contains a store to the same address. Both of these
4497 // stores are conditional, so they can't be unconditionally sunk. But it may
4498 // be profitable to speculatively sink the stores into one merged store at the
4499 // end, and predicate the merged store on the union of the two conditions of
4500 // PBI and QBI.
4501 //
4502 // This can reduce the number of stores executed if both of the conditions are
4503 // true, and can allow the blocks to become small enough to be if-converted.
4504 // This optimization will also chain, so that ladders of test-and-set
4505 // sequences can be if-converted away.
4506 //
4507 // We only deal with simple diamonds or triangles:
4508 //
4509 // PBI or PBI or a combination of the two
4510 // / \ | \
4511 // PTB PFB | PFB
4512 // \ / | /
4513 // QBI QBI
4514 // / \ | \
4515 // QTB QFB | QFB
4516 // \ / | /
4517 // PostBB PostBB
4518 //
4519 // We model triangles as a type of diamond with a nullptr "true" block.
4520 // Triangles are canonicalized so that the fallthrough edge is represented by
4521 // a true condition, as in the diagram above.
4522 BasicBlock *PTB = PBI->getSuccessor(0);
4523 BasicBlock *PFB = PBI->getSuccessor(1);
4524 BasicBlock *QTB = QBI->getSuccessor(0);
4525 BasicBlock *QFB = QBI->getSuccessor(1);
4526 BasicBlock *PostBB = QFB->getSingleSuccessor();
4527
4528 // Make sure we have a good guess for PostBB. If QTB's only successor is
4529 // QFB, then QFB is a better PostBB.
4530 if (QTB->getSingleSuccessor() == QFB)
4531 PostBB = QFB;
4532
4533 // If we couldn't find a good PostBB, stop.
4534 if (!PostBB)
4535 return false;
4536
4537 bool InvertPCond = false, InvertQCond = false;
4538 // Canonicalize fallthroughs to the true branches.
4539 if (PFB == QBI->getParent()) {
4540 std::swap(PFB, PTB);
4541 InvertPCond = true;
4542 }
4543 if (QFB == PostBB) {
4544 std::swap(QFB, QTB);
4545 InvertQCond = true;
4546 }
4547
4548 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4549 // and QFB may not. Model fallthroughs as a nullptr block.
4550 if (PTB == QBI->getParent())
4551 PTB = nullptr;
4552 if (QTB == PostBB)
4553 QTB = nullptr;
4554
4555 // Legality bailouts. We must have at least the non-fallthrough blocks and
4556 // the post-dominating block, and the non-fallthroughs must only have one
4557 // predecessor.
4558 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4559 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4560 };
4561 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4562 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4563 return false;
4564 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4565 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4566 return false;
4567 if (!QBI->getParent()->hasNUses(2))
4568 return false;
4569
4570 // OK, this is a sequence of two diamonds or triangles.
4571 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4572 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4573 for (auto *BB : {PTB, PFB}) {
4574 if (!BB)
4575 continue;
4576 for (auto &I : *BB)
4578 PStoreAddresses.insert(SI->getPointerOperand());
4579 }
4580 for (auto *BB : {QTB, QFB}) {
4581 if (!BB)
4582 continue;
4583 for (auto &I : *BB)
4585 QStoreAddresses.insert(SI->getPointerOperand());
4586 }
4587
4588 set_intersect(PStoreAddresses, QStoreAddresses);
4589 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4590 // clear what it contains.
4591 auto &CommonAddresses = PStoreAddresses;
4592
4593 bool Changed = false;
4594 for (auto *Address : CommonAddresses)
4595 Changed |=
4596 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4597 InvertPCond, InvertQCond, DTU, DL, TTI);
4598 return Changed;
4599}
4600
4601/// If the previous block ended with a widenable branch, determine if reusing
4602/// the target block is profitable and legal. This will have the effect of
4603/// "widening" PBI, but doesn't require us to reason about hosting safety.
4605 DomTreeUpdater *DTU) {
4606 // TODO: This can be generalized in two important ways:
4607 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4608 // values from the PBI edge.
4609 // 2) We can sink side effecting instructions into BI's fallthrough
4610 // successor provided they doesn't contribute to computation of
4611 // BI's condition.
4612 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4613 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4614 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4615 !BI->getParent()->getSinglePredecessor())
4616 return false;
4617 if (!IfFalseBB->phis().empty())
4618 return false; // TODO
4619 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4620 // may undo the transform done here.
4621 // TODO: There might be a more fine-grained solution to this.
4622 if (!llvm::succ_empty(IfFalseBB))
4623 return false;
4624 // Use lambda to lazily compute expensive condition after cheap ones.
4625 auto NoSideEffects = [](BasicBlock &BB) {
4626 return llvm::none_of(BB, [](const Instruction &I) {
4627 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4628 });
4629 };
4630 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4631 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4632 NoSideEffects(*BI->getParent())) {
4633 auto *OldSuccessor = BI->getSuccessor(1);
4634 OldSuccessor->removePredecessor(BI->getParent());
4635 BI->setSuccessor(1, IfFalseBB);
4636 if (DTU)
4637 DTU->applyUpdates(
4638 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4639 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4640 return true;
4641 }
4642 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4643 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4644 NoSideEffects(*BI->getParent())) {
4645 auto *OldSuccessor = BI->getSuccessor(0);
4646 OldSuccessor->removePredecessor(BI->getParent());
4647 BI->setSuccessor(0, IfFalseBB);
4648 if (DTU)
4649 DTU->applyUpdates(
4650 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4651 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4652 return true;
4653 }
4654 return false;
4655}
4656
4657/// If we have a conditional branch as a predecessor of another block,
4658/// this function tries to simplify it. We know
4659/// that PBI and BI are both conditional branches, and BI is in one of the
4660/// successor blocks of PBI - PBI branches to BI.
4662 DomTreeUpdater *DTU,
4663 const DataLayout &DL,
4664 const TargetTransformInfo &TTI) {
4665 assert(PBI->isConditional() && BI->isConditional());
4666 BasicBlock *BB = BI->getParent();
4667
4668 // If this block ends with a branch instruction, and if there is a
4669 // predecessor that ends on a branch of the same condition, make
4670 // this conditional branch redundant.
4671 if (PBI->getCondition() == BI->getCondition() &&
4672 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4673 // Okay, the outcome of this conditional branch is statically
4674 // knowable. If this block had a single pred, handle specially, otherwise
4675 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4676 if (BB->getSinglePredecessor()) {
4677 // Turn this into a branch on constant.
4678 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4679 BI->setCondition(
4680 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4681 return true; // Nuke the branch on constant.
4682 }
4683 }
4684
4685 // If the previous block ended with a widenable branch, determine if reusing
4686 // the target block is profitable and legal. This will have the effect of
4687 // "widening" PBI, but doesn't require us to reason about hosting safety.
4688 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4689 return true;
4690
4691 // If both branches are conditional and both contain stores to the same
4692 // address, remove the stores from the conditionals and create a conditional
4693 // merged store at the end.
4694 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4695 return true;
4696
4697 // If this is a conditional branch in an empty block, and if any
4698 // predecessors are a conditional branch to one of our destinations,
4699 // fold the conditions into logical ops and one cond br.
4700
4701 // Ignore dbg intrinsics.
4702 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4703 return false;
4704
4705 int PBIOp, BIOp;
4706 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4707 PBIOp = 0;
4708 BIOp = 0;
4709 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4710 PBIOp = 0;
4711 BIOp = 1;
4712 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4713 PBIOp = 1;
4714 BIOp = 0;
4715 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4716 PBIOp = 1;
4717 BIOp = 1;
4718 } else {
4719 return false;
4720 }
4721
4722 // Check to make sure that the other destination of this branch
4723 // isn't BB itself. If so, this is an infinite loop that will
4724 // keep getting unwound.
4725 if (PBI->getSuccessor(PBIOp) == BB)
4726 return false;
4727
4728 // If predecessor's branch probability to BB is too low don't merge branches.
4729 SmallVector<uint32_t, 2> PredWeights;
4730 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4731 extractBranchWeights(*PBI, PredWeights) &&
4732 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4733
4735 PredWeights[PBIOp],
4736 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4737
4738 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4739 if (CommonDestProb >= Likely)
4740 return false;
4741 }
4742
4743 // Do not perform this transformation if it would require
4744 // insertion of a large number of select instructions. For targets
4745 // without predication/cmovs, this is a big pessimization.
4746
4747 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4748 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4749 unsigned NumPhis = 0;
4750 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4751 ++II, ++NumPhis) {
4752 if (NumPhis > 2) // Disable this xform.
4753 return false;
4754 }
4755
4756 // Finally, if everything is ok, fold the branches to logical ops.
4757 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4758
4759 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4760 << "AND: " << *BI->getParent());
4761
4763
4764 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4765 // branch in it, where one edge (OtherDest) goes back to itself but the other
4766 // exits. We don't *know* that the program avoids the infinite loop
4767 // (even though that seems likely). If we do this xform naively, we'll end up
4768 // recursively unpeeling the loop. Since we know that (after the xform is
4769 // done) that the block *is* infinite if reached, we just make it an obviously
4770 // infinite loop with no cond branch.
4771 if (OtherDest == BB) {
4772 // Insert it at the end of the function, because it's either code,
4773 // or it won't matter if it's hot. :)
4774 BasicBlock *InfLoopBlock =
4775 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4776 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4777 if (DTU)
4778 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4779 OtherDest = InfLoopBlock;
4780 }
4781
4782 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4783
4784 // BI may have other predecessors. Because of this, we leave
4785 // it alone, but modify PBI.
4786
4787 // Make sure we get to CommonDest on True&True directions.
4788 Value *PBICond = PBI->getCondition();
4789 IRBuilder<NoFolder> Builder(PBI);
4790 if (PBIOp)
4791 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4792
4793 Value *BICond = BI->getCondition();
4794 if (BIOp)
4795 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4796
4797 // Merge the conditions.
4798 Value *Cond =
4799 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4800
4801 // Modify PBI to branch on the new condition to the new dests.
4802 PBI->setCondition(Cond);
4803 PBI->setSuccessor(0, CommonDest);
4804 PBI->setSuccessor(1, OtherDest);
4805
4806 if (DTU) {
4807 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4808 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4809
4810 DTU->applyUpdates(Updates);
4811 }
4812
4813 // Update branch weight for PBI.
4814 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4815 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4816 bool HasWeights =
4817 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4818 SuccTrueWeight, SuccFalseWeight);
4819 if (HasWeights) {
4820 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4821 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4822 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4823 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4824 // The weight to CommonDest should be PredCommon * SuccTotal +
4825 // PredOther * SuccCommon.
4826 // The weight to OtherDest should be PredOther * SuccOther.
4827 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4828 PredOther * SuccCommon,
4829 PredOther * SuccOther};
4830 // Halve the weights if any of them cannot fit in an uint32_t
4831 fitWeights(NewWeights);
4832
4833 setBranchWeights(PBI, NewWeights[0], NewWeights[1], /*IsExpected=*/false);
4834 // Cond may be a select instruction with the first operand set to "true", or
4835 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4837 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4838 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4839 // The select is predicated on PBICond
4840 assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
4841 // The corresponding probabilities are what was referred to above as
4842 // PredCommon and PredOther.
4843 setBranchWeights(SI, PredCommon, PredOther,
4844 /*IsExpected=*/false);
4845 }
4846 }
4847
4848 // OtherDest may have phi nodes. If so, add an entry from PBI's
4849 // block that are identical to the entries for BI's block.
4850 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4851
4852 // We know that the CommonDest already had an edge from PBI to
4853 // it. If it has PHIs though, the PHIs may have different
4854 // entries for BB and PBI's BB. If so, insert a select to make
4855 // them agree.
4856 for (PHINode &PN : CommonDest->phis()) {
4857 Value *BIV = PN.getIncomingValueForBlock(BB);
4858 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4859 Value *PBIV = PN.getIncomingValue(PBBIdx);
4860 if (BIV != PBIV) {
4861 // Insert a select in PBI to pick the right value.
4863 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4864 PN.setIncomingValue(PBBIdx, NV);
4865 // The select has the same condition as PBI, in the same BB. The
4866 // probabilities don't change.
4867 if (HasWeights) {
4868 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4869 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4870 setBranchWeights(NV, TrueWeight, FalseWeight,
4871 /*IsExpected=*/false);
4872 }
4873 }
4874 }
4875
4876 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4877 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4878
4879 // This basic block is probably dead. We know it has at least
4880 // one fewer predecessor.
4881 return true;
4882}
4883
4884// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4885// true or to FalseBB if Cond is false.
4886// Takes care of updating the successors and removing the old terminator.
4887// Also makes sure not to introduce new successors by assuming that edges to
4888// non-successor TrueBBs and FalseBBs aren't reachable.
4889bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4890 Value *Cond, BasicBlock *TrueBB,
4891 BasicBlock *FalseBB,
4892 uint32_t TrueWeight,
4893 uint32_t FalseWeight) {
4894 auto *BB = OldTerm->getParent();
4895 // Remove any superfluous successor edges from the CFG.
4896 // First, figure out which successors to preserve.
4897 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4898 // successor.
4899 BasicBlock *KeepEdge1 = TrueBB;
4900 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4901
4902 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4903
4904 // Then remove the rest.
4905 for (BasicBlock *Succ : successors(OldTerm)) {
4906 // Make sure only to keep exactly one copy of each edge.
4907 if (Succ == KeepEdge1)
4908 KeepEdge1 = nullptr;
4909 else if (Succ == KeepEdge2)
4910 KeepEdge2 = nullptr;
4911 else {
4912 Succ->removePredecessor(BB,
4913 /*KeepOneInputPHIs=*/true);
4914
4915 if (Succ != TrueBB && Succ != FalseBB)
4916 RemovedSuccessors.insert(Succ);
4917 }
4918 }
4919
4920 IRBuilder<> Builder(OldTerm);
4921 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4922
4923 // Insert an appropriate new terminator.
4924 if (!KeepEdge1 && !KeepEdge2) {
4925 if (TrueBB == FalseBB) {
4926 // We were only looking for one successor, and it was present.
4927 // Create an unconditional branch to it.
4928 Builder.CreateBr(TrueBB);
4929 } else {
4930 // We found both of the successors we were looking for.
4931 // Create a conditional branch sharing the condition of the select.
4932 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4933 if (TrueWeight != FalseWeight)
4934 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4935 }
4936 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4937 // Neither of the selected blocks were successors, so this
4938 // terminator must be unreachable.
4939 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4940 } else {
4941 // One of the selected values was a successor, but the other wasn't.
4942 // Insert an unconditional branch to the one that was found;
4943 // the edge to the one that wasn't must be unreachable.
4944 if (!KeepEdge1) {
4945 // Only TrueBB was found.
4946 Builder.CreateBr(TrueBB);
4947 } else {
4948 // Only FalseBB was found.
4949 Builder.CreateBr(FalseBB);
4950 }
4951 }
4952
4954
4955 if (DTU) {
4956 SmallVector<DominatorTree::UpdateType, 2> Updates;
4957 Updates.reserve(RemovedSuccessors.size());
4958 for (auto *RemovedSuccessor : RemovedSuccessors)
4959 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4960 DTU->applyUpdates(Updates);
4961 }
4962
4963 return true;
4964}
4965
4966// Replaces
4967// (switch (select cond, X, Y)) on constant X, Y
4968// with a branch - conditional if X and Y lead to distinct BBs,
4969// unconditional otherwise.
4970bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4971 SelectInst *Select) {
4972 // Check for constant integer values in the select.
4973 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4974 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4975 if (!TrueVal || !FalseVal)
4976 return false;
4977
4978 // Find the relevant condition and destinations.
4979 Value *Condition = Select->getCondition();
4980 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4981 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4982
4983 // Get weight for TrueBB and FalseBB.
4984 uint32_t TrueWeight = 0, FalseWeight = 0;
4985 SmallVector<uint64_t, 8> Weights;
4986 bool HasWeights = hasBranchWeightMD(*SI);
4987 if (HasWeights) {
4988 getBranchWeights(SI, Weights);
4989 if (Weights.size() == 1 + SI->getNumCases()) {
4990 TrueWeight =
4991 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4992 FalseWeight =
4993 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4994 }
4995 }
4996
4997 // Perform the actual simplification.
4998 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4999 FalseWeight);
5000}
5001
5002// Replaces
5003// (indirectbr (select cond, blockaddress(@fn, BlockA),
5004// blockaddress(@fn, BlockB)))
5005// with
5006// (br cond, BlockA, BlockB).
5007bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
5008 SelectInst *SI) {
5009 // Check that both operands of the select are block addresses.
5010 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
5011 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5012 if (!TBA || !FBA)
5013 return false;
5014
5015 // Extract the actual blocks.
5016 BasicBlock *TrueBB = TBA->getBasicBlock();
5017 BasicBlock *FalseBB = FBA->getBasicBlock();
5018
5019 // Perform the actual simplification.
5020 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
5021 0);
5022}
5023
5024/// This is called when we find an icmp instruction
5025/// (a seteq/setne with a constant) as the only instruction in a
5026/// block that ends with an uncond branch. We are looking for a very specific
5027/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5028/// this case, we merge the first two "or's of icmp" into a switch, but then the
5029/// default value goes to an uncond block with a seteq in it, we get something
5030/// like:
5031///
5032/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5033/// DEFAULT:
5034/// %tmp = icmp eq i8 %A, 92
5035/// br label %end
5036/// end:
5037/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5038///
5039/// We prefer to split the edge to 'end' so that there is a true/false entry to
5040/// the PHI, merging the third icmp into the switch.
5041bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5042 ICmpInst *ICI, IRBuilder<> &Builder) {
5043 BasicBlock *BB = ICI->getParent();
5044
5045 // If the block has any PHIs in it or the icmp has multiple uses, it is too
5046 // complex.
5047 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
5048 return false;
5049
5050 Value *V = ICI->getOperand(0);
5051 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
5052
5053 // The pattern we're looking for is where our only predecessor is a switch on
5054 // 'V' and this block is the default case for the switch. In this case we can
5055 // fold the compared value into the switch to simplify things.
5056 BasicBlock *Pred = BB->getSinglePredecessor();
5057 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5058 return false;
5059
5060 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5061 if (SI->getCondition() != V)
5062 return false;
5063
5064 // If BB is reachable on a non-default case, then we simply know the value of
5065 // V in this block. Substitute it and constant fold the icmp instruction
5066 // away.
5067 if (SI->getDefaultDest() != BB) {
5068 ConstantInt *VVal = SI->findCaseDest(BB);
5069 assert(VVal && "Should have a unique destination value");
5070 ICI->setOperand(0, VVal);
5071
5072 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5073 ICI->replaceAllUsesWith(V);
5074 ICI->eraseFromParent();
5075 }
5076 // BB is now empty, so it is likely to simplify away.
5077 return requestResimplify();
5078 }
5079
5080 // Ok, the block is reachable from the default dest. If the constant we're
5081 // comparing exists in one of the other edges, then we can constant fold ICI
5082 // and zap it.
5083 if (SI->findCaseValue(Cst) != SI->case_default()) {
5084 Value *V;
5085 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5087 else
5089
5090 ICI->replaceAllUsesWith(V);
5091 ICI->eraseFromParent();
5092 // BB is now empty, so it is likely to simplify away.
5093 return requestResimplify();
5094 }
5095
5096 // The use of the icmp has to be in the 'end' block, by the only PHI node in
5097 // the block.
5098 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5099 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
5100 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5102 return false;
5103
5104 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
5105 // true in the PHI.
5106 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
5107 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
5108
5109 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
5110 std::swap(DefaultCst, NewCst);
5111
5112 // Replace ICI (which is used by the PHI for the default value) with true or
5113 // false depending on if it is EQ or NE.
5114 ICI->replaceAllUsesWith(DefaultCst);
5115 ICI->eraseFromParent();
5116
5117 SmallVector<DominatorTree::UpdateType, 2> Updates;
5118
5119 // Okay, the switch goes to this block on a default value. Add an edge from
5120 // the switch to the merge point on the compared value.
5121 BasicBlock *NewBB =
5122 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5123 {
5124 SwitchInstProfUpdateWrapper SIW(*SI);
5125 auto W0 = SIW.getSuccessorWeight(0);
5127 if (W0) {
5128 NewW = ((uint64_t(*W0) + 1) >> 1);
5129 SIW.setSuccessorWeight(0, *NewW);
5130 }
5131 SIW.addCase(Cst, NewBB, NewW);
5132 if (DTU)
5133 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5134 }
5135
5136 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5137 Builder.SetInsertPoint(NewBB);
5138 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5139 Builder.CreateBr(SuccBlock);
5140 PHIUse->addIncoming(NewCst, NewBB);
5141 if (DTU) {
5142 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5143 DTU->applyUpdates(Updates);
5144 }
5145 return true;
5146}
5147
5148/// The specified branch is a conditional branch.
5149/// Check to see if it is branching on an or/and chain of icmp instructions, and
5150/// fold it into a switch instruction if so.
5151bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5152 IRBuilder<> &Builder,
5153 const DataLayout &DL) {
5155 if (!Cond)
5156 return false;
5157
5158 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5159 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5160 // 'setne's and'ed together, collect them.
5161
5162 // Try to gather values from a chain of and/or to be turned into a switch
5163 ConstantComparesGatherer ConstantCompare(Cond, DL);
5164 // Unpack the result
5165 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5166 Value *CompVal = ConstantCompare.CompValue;
5167 unsigned UsedICmps = ConstantCompare.UsedICmps;
5168 Value *ExtraCase = ConstantCompare.Extra;
5169 bool TrueWhenEqual = ConstantCompare.IsEq;
5170
5171 // If we didn't have a multiply compared value, fail.
5172 if (!CompVal)
5173 return false;
5174
5175 // Avoid turning single icmps into a switch.
5176 if (UsedICmps <= 1)
5177 return false;
5178
5179 // There might be duplicate constants in the list, which the switch
5180 // instruction can't handle, remove them now.
5181 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5182 Values.erase(llvm::unique(Values), Values.end());
5183
5184 // If Extra was used, we require at least two switch values to do the
5185 // transformation. A switch with one value is just a conditional branch.
5186 if (ExtraCase && Values.size() < 2)
5187 return false;
5188
5189 // TODO: Preserve branch weight metadata, similarly to how
5190 // foldValueComparisonIntoPredecessors preserves it.
5191
5192 // Figure out which block is which destination.
5193 BasicBlock *DefaultBB = BI->getSuccessor(1);
5194 BasicBlock *EdgeBB = BI->getSuccessor(0);
5195 if (!TrueWhenEqual)
5196 std::swap(DefaultBB, EdgeBB);
5197
5198 BasicBlock *BB = BI->getParent();
5199
5200 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5201 << " cases into SWITCH. BB is:\n"
5202 << *BB);
5203
5204 SmallVector<DominatorTree::UpdateType, 2> Updates;
5205
5206 // If there are any extra values that couldn't be folded into the switch
5207 // then we evaluate them with an explicit branch first. Split the block
5208 // right before the condbr to handle it.
5209 if (ExtraCase) {
5210 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5211 /*MSSAU=*/nullptr, "switch.early.test");
5212
5213 // Remove the uncond branch added to the old block.
5214 Instruction *OldTI = BB->getTerminator();
5215 Builder.SetInsertPoint(OldTI);
5216
5217 // There can be an unintended UB if extra values are Poison. Before the
5218 // transformation, extra values may not be evaluated according to the
5219 // condition, and it will not raise UB. But after transformation, we are
5220 // evaluating extra values before checking the condition, and it will raise
5221 // UB. It can be solved by adding freeze instruction to extra values.
5222 AssumptionCache *AC = Options.AC;
5223
5224 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5225 ExtraCase = Builder.CreateFreeze(ExtraCase);
5226
5227 if (TrueWhenEqual)
5228 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
5229 else
5230 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5231
5232 OldTI->eraseFromParent();
5233
5234 if (DTU)
5235 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5236
5237 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5238 // for the edge we just added.
5239 addPredecessorToBlock(EdgeBB, BB, NewBB);
5240
5241 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5242 << "\nEXTRABB = " << *BB);
5243 BB = NewBB;
5244 }
5245
5246 Builder.SetInsertPoint(BI);
5247 // Convert pointer to int before we switch.
5248 if (CompVal->getType()->isPointerTy()) {
5249 CompVal = Builder.CreatePtrToInt(
5250 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5251 }
5252
5253 // Create the new switch instruction now.
5254 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5255
5256 // Add all of the 'cases' to the switch instruction.
5257 for (ConstantInt *Val : Values)
5258 New->addCase(Val, EdgeBB);
5259
5260 // We added edges from PI to the EdgeBB. As such, if there were any
5261 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5262 // the number of edges added.
5263 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5264 PHINode *PN = cast<PHINode>(BBI);
5265 Value *InVal = PN->getIncomingValueForBlock(BB);
5266 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5267 PN->addIncoming(InVal, BB);
5268 }
5269
5270 // Erase the old branch instruction.
5272 if (DTU)
5273 DTU->applyUpdates(Updates);
5274
5275 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5276 return true;
5277}
5278
5279bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5280 if (isa<PHINode>(RI->getValue()))
5281 return simplifyCommonResume(RI);
5282 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5283 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5284 // The resume must unwind the exception that caused control to branch here.
5285 return simplifySingleResume(RI);
5286
5287 return false;
5288}
5289
5290// Check if cleanup block is empty
5292 for (Instruction &I : R) {
5293 auto *II = dyn_cast<IntrinsicInst>(&I);
5294 if (!II)
5295 return false;
5296
5297 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5298 switch (IntrinsicID) {
5299 case Intrinsic::dbg_declare:
5300 case Intrinsic::dbg_value:
5301 case Intrinsic::dbg_label:
5302 case Intrinsic::lifetime_end:
5303 break;
5304 default:
5305 return false;
5306 }
5307 }
5308 return true;
5309}
5310
5311// Simplify resume that is shared by several landing pads (phi of landing pad).
5312bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5313 BasicBlock *BB = RI->getParent();
5314
5315 // Check that there are no other instructions except for debug and lifetime
5316 // intrinsics between the phi's and resume instruction.
5317 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5318 BB->getTerminator()->getIterator())))
5319 return false;
5320
5321 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5322 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5323
5324 // Check incoming blocks to see if any of them are trivial.
5325 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5326 Idx++) {
5327 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5328 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5329
5330 // If the block has other successors, we can not delete it because
5331 // it has other dependents.
5332 if (IncomingBB->getUniqueSuccessor() != BB)
5333 continue;
5334
5335 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5336 // Not the landing pad that caused the control to branch here.
5337 if (IncomingValue != LandingPad)
5338 continue;
5339
5341 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5342 TrivialUnwindBlocks.insert(IncomingBB);
5343 }
5344
5345 // If no trivial unwind blocks, don't do any simplifications.
5346 if (TrivialUnwindBlocks.empty())
5347 return false;
5348
5349 // Turn all invokes that unwind here into calls.
5350 for (auto *TrivialBB : TrivialUnwindBlocks) {
5351 // Blocks that will be simplified should be removed from the phi node.
5352 // Note there could be multiple edges to the resume block, and we need
5353 // to remove them all.
5354 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5355 BB->removePredecessor(TrivialBB, true);
5356
5357 for (BasicBlock *Pred :
5359 removeUnwindEdge(Pred, DTU);
5360 ++NumInvokes;
5361 }
5362
5363 // In each SimplifyCFG run, only the current processed block can be erased.
5364 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5365 // of erasing TrivialBB, we only remove the branch to the common resume
5366 // block so that we can later erase the resume block since it has no
5367 // predecessors.
5368 TrivialBB->getTerminator()->eraseFromParent();
5369 new UnreachableInst(RI->getContext(), TrivialBB);
5370 if (DTU)
5371 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5372 }
5373
5374 // Delete the resume block if all its predecessors have been removed.
5375 if (pred_empty(BB))
5376 DeleteDeadBlock(BB, DTU);
5377
5378 return !TrivialUnwindBlocks.empty();
5379}
5380
5381// Simplify resume that is only used by a single (non-phi) landing pad.
5382bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5383 BasicBlock *BB = RI->getParent();
5384 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5385 assert(RI->getValue() == LPInst &&
5386 "Resume must unwind the exception that caused control to here");
5387
5388 // Check that there are no other instructions except for debug intrinsics.
5390 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5391 return false;
5392
5393 // Turn all invokes that unwind here into calls and delete the basic block.
5394 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5395 removeUnwindEdge(Pred, DTU);
5396 ++NumInvokes;
5397 }
5398
5399 // The landingpad is now unreachable. Zap it.
5400 DeleteDeadBlock(BB, DTU);
5401 return true;
5402}
5403
5405 // If this is a trivial cleanup pad that executes no instructions, it can be
5406 // eliminated. If the cleanup pad continues to the caller, any predecessor
5407 // that is an EH pad will be updated to continue to the caller and any
5408 // predecessor that terminates with an invoke instruction will have its invoke
5409 // instruction converted to a call instruction. If the cleanup pad being
5410 // simplified does not continue to the caller, each predecessor will be
5411 // updated to continue to the unwind destination of the cleanup pad being
5412 // simplified.
5413 BasicBlock *BB = RI->getParent();
5414 CleanupPadInst *CPInst = RI->getCleanupPad();
5415 if (CPInst->getParent() != BB)
5416 // This isn't an empty cleanup.
5417 return false;
5418
5419 // We cannot kill the pad if it has multiple uses. This typically arises
5420 // from unreachable basic blocks.
5421 if (!CPInst->hasOneUse())
5422 return false;
5423
5424 // Check that there are no other instructions except for benign intrinsics.
5426 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5427 return false;
5428
5429 // If the cleanup return we are simplifying unwinds to the caller, this will
5430 // set UnwindDest to nullptr.
5431 BasicBlock *UnwindDest = RI->getUnwindDest();
5432
5433 // We're about to remove BB from the control flow. Before we do, sink any
5434 // PHINodes into the unwind destination. Doing this before changing the
5435 // control flow avoids some potentially slow checks, since we can currently
5436 // be certain that UnwindDest and BB have no common predecessors (since they
5437 // are both EH pads).
5438 if (UnwindDest) {
5439 // First, go through the PHI nodes in UnwindDest and update any nodes that
5440 // reference the block we are removing
5441 for (PHINode &DestPN : UnwindDest->phis()) {
5442 int Idx = DestPN.getBasicBlockIndex(BB);
5443 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5444 assert(Idx != -1);
5445 // This PHI node has an incoming value that corresponds to a control
5446 // path through the cleanup pad we are removing. If the incoming
5447 // value is in the cleanup pad, it must be a PHINode (because we
5448 // verified above that the block is otherwise empty). Otherwise, the
5449 // value is either a constant or a value that dominates the cleanup
5450 // pad being removed.
5451 //
5452 // Because BB and UnwindDest are both EH pads, all of their
5453 // predecessors must unwind to these blocks, and since no instruction
5454 // can have multiple unwind destinations, there will be no overlap in
5455 // incoming blocks between SrcPN and DestPN.
5456 Value *SrcVal = DestPN.getIncomingValue(Idx);
5457 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5458
5459 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5460 for (auto *Pred : predecessors(BB)) {
5461 Value *Incoming =
5462 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5463 DestPN.addIncoming(Incoming, Pred);
5464 }
5465 }
5466
5467 // Sink any remaining PHI nodes directly into UnwindDest.
5468 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5469 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5470 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5471 // If the PHI node has no uses or all of its uses are in this basic
5472 // block (meaning they are debug or lifetime intrinsics), just leave
5473 // it. It will be erased when we erase BB below.
5474 continue;
5475
5476 // Otherwise, sink this PHI node into UnwindDest.
5477 // Any predecessors to UnwindDest which are not already represented
5478 // must be back edges which inherit the value from the path through
5479 // BB. In this case, the PHI value must reference itself.
5480 for (auto *pred : predecessors(UnwindDest))
5481 if (pred != BB)
5482 PN.addIncoming(&PN, pred);
5483 PN.moveBefore(InsertPt);
5484 // Also, add a dummy incoming value for the original BB itself,
5485 // so that the PHI is well-formed until we drop said predecessor.
5486 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5487 }
5488 }
5489
5490 std::vector<DominatorTree::UpdateType> Updates;
5491
5492 // We use make_early_inc_range here because we will remove all predecessors.
5494 if (UnwindDest == nullptr) {
5495 if (DTU) {
5496 DTU->applyUpdates(Updates);
5497 Updates.clear();
5498 }
5499 removeUnwindEdge(PredBB, DTU);
5500 ++NumInvokes;
5501 } else {
5502 BB->removePredecessor(PredBB);
5503 Instruction *TI = PredBB->getTerminator();
5504 TI->replaceUsesOfWith(BB, UnwindDest);
5505 if (DTU) {
5506 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5507 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5508 }
5509 }
5510 }
5511
5512 if (DTU)
5513 DTU->applyUpdates(Updates);
5514
5515 DeleteDeadBlock(BB, DTU);
5516
5517 return true;
5518}
5519
5520// Try to merge two cleanuppads together.
5522 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5523 // with.
5524 BasicBlock *UnwindDest = RI->getUnwindDest();
5525 if (!UnwindDest)
5526 return false;
5527
5528 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5529 // be safe to merge without code duplication.
5530 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5531 return false;
5532
5533 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5534 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5535 if (!SuccessorCleanupPad)
5536 return false;
5537
5538 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5539 // Replace any uses of the successor cleanupad with the predecessor pad
5540 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5541 // funclet bundle operands.
5542 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5543 // Remove the old cleanuppad.
5544 SuccessorCleanupPad->eraseFromParent();
5545 // Now, we simply replace the cleanupret with a branch to the unwind
5546 // destination.
5547 BranchInst::Create(UnwindDest, RI->getParent());
5548 RI->eraseFromParent();
5549
5550 return true;
5551}
5552
5553bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5554 // It is possible to transiantly have an undef cleanuppad operand because we
5555 // have deleted some, but not all, dead blocks.
5556 // Eventually, this block will be deleted.
5557 if (isa<UndefValue>(RI->getOperand(0)))
5558 return false;
5559
5560 if (mergeCleanupPad(RI))
5561 return true;
5562
5563 if (removeEmptyCleanup(RI, DTU))
5564 return true;
5565
5566 return false;
5567}
5568
5569// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5570bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5571 BasicBlock *BB = UI->getParent();
5572
5573 bool Changed = false;
5574
5575 // Ensure that any debug-info records that used to occur after the Unreachable
5576 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5577 // the block.
5579
5580 // Debug-info records on the unreachable inst itself should be deleted, as
5581 // below we delete everything past the final executable instruction.
5582 UI->dropDbgRecords();
5583
5584 // If there are any instructions immediately before the unreachable that can
5585 // be removed, do so.
5586 while (UI->getIterator() != BB->begin()) {
5588 --BBI;
5589
5591 break; // Can not drop any more instructions. We're done here.
5592 // Otherwise, this instruction can be freely erased,
5593 // even if it is not side-effect free.
5594
5595 // Note that deleting EH's here is in fact okay, although it involves a bit
5596 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5597 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5598 // and we can therefore guarantee this block will be erased.
5599
5600 // If we're deleting this, we're deleting any subsequent debug info, so
5601 // delete DbgRecords.
5602 BBI->dropDbgRecords();
5603
5604 // Delete this instruction (any uses are guaranteed to be dead)
5605 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5606 BBI->eraseFromParent();
5607 Changed = true;
5608 }
5609
5610 // If the unreachable instruction is the first in the block, take a gander
5611 // at all of the predecessors of this instruction, and simplify them.
5612 if (&BB->front() != UI)
5613 return Changed;
5614
5615 std::vector<DominatorTree::UpdateType> Updates;
5616
5617 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5618 for (BasicBlock *Predecessor : Preds) {
5619 Instruction *TI = Predecessor->getTerminator();
5620 IRBuilder<> Builder(TI);
5621 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5622 // We could either have a proper unconditional branch,
5623 // or a degenerate conditional branch with matching destinations.
5624 if (all_of(BI->successors(),
5625 [BB](auto *Successor) { return Successor == BB; })) {
5626 new UnreachableInst(TI->getContext(), TI->getIterator());
5627 TI->eraseFromParent();
5628 Changed = true;
5629 } else {
5630 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5631 Value* Cond = BI->getCondition();
5632 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5633 "The destinations are guaranteed to be different here.");
5634 CallInst *Assumption;
5635 if (BI->getSuccessor(0) == BB) {
5636 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5637 Builder.CreateBr(BI->getSuccessor(1));
5638 } else {
5639 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5640 Assumption = Builder.CreateAssumption(Cond);
5641 Builder.CreateBr(BI->getSuccessor(0));
5642 }
5643 if (Options.AC)
5644 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5645
5647 Changed = true;
5648 }
5649 if (DTU)
5650 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5651 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5652 SwitchInstProfUpdateWrapper SU(*SI);
5653 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5654 if (i->getCaseSuccessor() != BB) {
5655 ++i;
5656 continue;
5657 }
5658 BB->removePredecessor(SU->getParent());
5659 i = SU.removeCase(i);
5660 e = SU->case_end();
5661 Changed = true;
5662 }
5663 // Note that the default destination can't be removed!
5664 if (DTU && SI->getDefaultDest() != BB)
5665 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5666 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5667 if (II->getUnwindDest() == BB) {
5668 if (DTU) {
5669 DTU->applyUpdates(Updates);
5670 Updates.clear();
5671 }
5672 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5673 if (!CI->doesNotThrow())
5674 CI->setDoesNotThrow();
5675 Changed = true;
5676 }
5677 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5678 if (CSI->getUnwindDest() == BB) {
5679 if (DTU) {
5680 DTU->applyUpdates(Updates);
5681 Updates.clear();
5682 }
5683 removeUnwindEdge(TI->getParent(), DTU);
5684 Changed = true;
5685 continue;
5686 }
5687
5688 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5689 E = CSI->handler_end();
5690 I != E; ++I) {
5691 if (*I == BB) {
5692 CSI->removeHandler(I);
5693 --I;
5694 --E;
5695 Changed = true;
5696 }
5697 }
5698 if (DTU)
5699 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5700 if (CSI->getNumHandlers() == 0) {
5701 if (CSI->hasUnwindDest()) {
5702 // Redirect all predecessors of the block containing CatchSwitchInst
5703 // to instead branch to the CatchSwitchInst's unwind destination.
5704 if (DTU) {
5705 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5706 Updates.push_back({DominatorTree::Insert,
5707 PredecessorOfPredecessor,
5708 CSI->getUnwindDest()});
5709 Updates.push_back({DominatorTree::Delete,
5710 PredecessorOfPredecessor, Predecessor});
5711 }
5712 }
5713 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5714 } else {
5715 // Rewrite all preds to unwind to caller (or from invoke to call).
5716 if (DTU) {
5717 DTU->applyUpdates(Updates);
5718 Updates.clear();
5719 }
5720 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5721 for (BasicBlock *EHPred : EHPreds)
5722 removeUnwindEdge(EHPred, DTU);
5723 }
5724 // The catchswitch is no longer reachable.
5725 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5726 CSI->eraseFromParent();
5727 Changed = true;
5728 }
5729 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5730 (void)CRI;
5731 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5732 "Expected to always have an unwind to BB.");
5733 if (DTU)
5734 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5735 new UnreachableInst(TI->getContext(), TI->getIterator());
5736 TI->eraseFromParent();
5737 Changed = true;
5738 }
5739 }
5740
5741 if (DTU)
5742 DTU->applyUpdates(Updates);
5743
5744 // If this block is now dead, remove it.
5745 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5746 DeleteDeadBlock(BB, DTU);
5747 return true;
5748 }
5749
5750 return Changed;
5751}
5752
5754 assert(Cases.size() >= 1);
5755
5757 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5758 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5759 return false;
5760 }
5761 return true;
5762}
5763
5765 DomTreeUpdater *DTU,
5766 bool RemoveOrigDefaultBlock = true) {
5767 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5768 auto *BB = Switch->getParent();
5769 auto *OrigDefaultBlock = Switch->getDefaultDest();
5770 if (RemoveOrigDefaultBlock)
5771 OrigDefaultBlock->removePredecessor(BB);
5772 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5773 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5774 OrigDefaultBlock);
5775 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5777 Switch->setDefaultDest(&*NewDefaultBlock);
5778 if (DTU) {
5780 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5781 if (RemoveOrigDefaultBlock &&
5782 !is_contained(successors(BB), OrigDefaultBlock))
5783 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5784 DTU->applyUpdates(Updates);
5785 }
5786}
5787
5788/// Turn a switch into an integer range comparison and branch.
5789/// Switches with more than 2 destinations are ignored.
5790/// Switches with 1 destination are also ignored.
5791bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5792 IRBuilder<> &Builder) {
5793 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5794
5795 bool HasDefault = !SI->defaultDestUnreachable();
5796
5797 auto *BB = SI->getParent();
5798
5799 // Partition the cases into two sets with different destinations.
5800 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5801 BasicBlock *DestB = nullptr;
5804
5805 for (auto Case : SI->cases()) {
5806 BasicBlock *Dest = Case.getCaseSuccessor();
5807 if (!DestA)
5808 DestA = Dest;
5809 if (Dest == DestA) {
5810 CasesA.push_back(Case.getCaseValue());
5811 continue;
5812 }
5813 if (!DestB)
5814 DestB = Dest;
5815 if (Dest == DestB) {
5816 CasesB.push_back(Case.getCaseValue());
5817 continue;
5818 }
5819 return false; // More than two destinations.
5820 }
5821 if (!DestB)
5822 return false; // All destinations are the same and the default is unreachable
5823
5824 assert(DestA && DestB &&
5825 "Single-destination switch should have been folded.");
5826 assert(DestA != DestB);
5827 assert(DestB != SI->getDefaultDest());
5828 assert(!CasesB.empty() && "There must be non-default cases.");
5829 assert(!CasesA.empty() || HasDefault);
5830
5831 // Figure out if one of the sets of cases form a contiguous range.
5832 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5833 BasicBlock *ContiguousDest = nullptr;
5834 BasicBlock *OtherDest = nullptr;
5835 if (!CasesA.empty() && casesAreContiguous(CasesA)) {
5836 ContiguousCases = &CasesA;
5837 ContiguousDest = DestA;
5838 OtherDest = DestB;
5839 } else if (casesAreContiguous(CasesB)) {
5840 ContiguousCases = &CasesB;
5841 ContiguousDest = DestB;
5842 OtherDest = DestA;
5843 } else
5844 return false;
5845
5846 // Start building the compare and branch.
5847
5848 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5849 Constant *NumCases =
5850 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5851
5852 Value *Sub = SI->getCondition();
5853 if (!Offset->isNullValue())
5854 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5855
5856 Value *Cmp;
5857 // If NumCases overflowed, then all possible values jump to the successor.
5858 if (NumCases->isNullValue() && !ContiguousCases->empty())
5859 Cmp = ConstantInt::getTrue(SI->getContext());
5860 else
5861 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5862 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5863
5864 // Update weight for the newly-created conditional branch.
5865 if (hasBranchWeightMD(*SI)) {
5866 SmallVector<uint64_t, 8> Weights;
5867 getBranchWeights(SI, Weights);
5868 if (Weights.size() == 1 + SI->getNumCases()) {
5869 uint64_t TrueWeight = 0;
5870 uint64_t FalseWeight = 0;
5871 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5872 if (SI->getSuccessor(I) == ContiguousDest)
5873 TrueWeight += Weights[I];
5874 else
5875 FalseWeight += Weights[I];
5876 }
5877 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5878 TrueWeight /= 2;
5879 FalseWeight /= 2;
5880 }
5881 setBranchWeights(NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5882 }
5883 }
5884
5885 // Prune obsolete incoming values off the successors' PHI nodes.
5886 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5887 unsigned PreviousEdges = ContiguousCases->size();
5888 if (ContiguousDest == SI->getDefaultDest())
5889 ++PreviousEdges;
5890 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5891 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5892 }
5893 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5894 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5895 if (OtherDest == SI->getDefaultDest())
5896 ++PreviousEdges;
5897 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5898 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5899 }
5900
5901 // Clean up the default block - it may have phis or other instructions before
5902 // the unreachable terminator.
5903 if (!HasDefault)
5905
5906 auto *UnreachableDefault = SI->getDefaultDest();
5907
5908 // Drop the switch.
5909 SI->eraseFromParent();
5910
5911 if (!HasDefault && DTU)
5912 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5913
5914 return true;
5915}
5916
5917/// Compute masked bits for the condition of a switch
5918/// and use it to remove dead cases.
5920 AssumptionCache *AC,
5921 const DataLayout &DL) {
5922 Value *Cond = SI->getCondition();
5923 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
5924
5925 // We can also eliminate cases by determining that their values are outside of
5926 // the limited range of the condition based on how many significant (non-sign)
5927 // bits are in the condition value.
5928 unsigned MaxSignificantBitsInCond =
5930
5931 // Gather dead cases.
5933 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5934 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5935 for (const auto &Case : SI->cases()) {
5936 auto *Successor = Case.getCaseSuccessor();
5937 if (DTU) {
5938 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
5939 if (Inserted)
5940 UniqueSuccessors.push_back(Successor);
5941 ++It->second;
5942 }
5943 const APInt &CaseVal = Case.getCaseValue()->getValue();
5944 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5945 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5946 DeadCases.push_back(Case.getCaseValue());
5947 if (DTU)
5948 --NumPerSuccessorCases[Successor];
5949 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5950 << " is dead.\n");
5951 }
5952 }
5953
5954 // If we can prove that the cases must cover all possible values, the
5955 // default destination becomes dead and we can remove it. If we know some
5956 // of the bits in the value, we can use that to more precisely compute the
5957 // number of possible unique case values.
5958 bool HasDefault = !SI->defaultDestUnreachable();
5959 const unsigned NumUnknownBits =
5960 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5961 assert(NumUnknownBits <= Known.getBitWidth());
5962 if (HasDefault && DeadCases.empty() &&
5963 NumUnknownBits < 64 /* avoid overflow */) {
5964 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5965 if (SI->getNumCases() == AllNumCases) {
5967 return true;
5968 }
5969 // When only one case value is missing, replace default with that case.
5970 // Eliminating the default branch will provide more opportunities for
5971 // optimization, such as lookup tables.
5972 if (SI->getNumCases() == AllNumCases - 1) {
5973 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5974 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
5975 if (CondTy->getIntegerBitWidth() > 64 ||
5976 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5977 return false;
5978
5979 uint64_t MissingCaseVal = 0;
5980 for (const auto &Case : SI->cases())
5981 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5982 auto *MissingCase =
5983 cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
5985 SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0));
5986 createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5987 SIW.setSuccessorWeight(0, 0);
5988 return true;
5989 }
5990 }
5991
5992 if (DeadCases.empty())
5993 return false;
5994
5996 for (ConstantInt *DeadCase : DeadCases) {
5997 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5998 assert(CaseI != SI->case_default() &&
5999 "Case was not found. Probably mistake in DeadCases forming.");
6000 // Prune unused values from PHI nodes.
6001 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6002 SIW.removeCase(CaseI);
6003 }
6004
6005 if (DTU) {
6006 std::vector<DominatorTree::UpdateType> Updates;
6007 for (auto *Successor : UniqueSuccessors)
6008 if (NumPerSuccessorCases[Successor] == 0)
6009 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6010 DTU->applyUpdates(Updates);
6011 }
6012
6013 return true;
6014}
6015
6016/// If BB would be eligible for simplification by
6017/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6018/// by an unconditional branch), look at the phi node for BB in the successor
6019/// block and see if the incoming value is equal to CaseValue. If so, return
6020/// the phi node, and set PhiIndex to BB's index in the phi node.
6022 BasicBlock *BB, int *PhiIndex) {
6023 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6024 return nullptr; // BB must be empty to be a candidate for simplification.
6025 if (!BB->getSinglePredecessor())
6026 return nullptr; // BB must be dominated by the switch.
6027
6029 if (!Branch || !Branch->isUnconditional())
6030 return nullptr; // Terminator must be unconditional branch.
6031
6032 BasicBlock *Succ = Branch->getSuccessor(0);
6033
6034 for (PHINode &PHI : Succ->phis()) {
6035 int Idx = PHI.getBasicBlockIndex(BB);
6036 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6037
6038 Value *InValue = PHI.getIncomingValue(Idx);
6039 if (InValue != CaseValue)
6040 continue;
6041
6042 *PhiIndex = Idx;
6043 return &PHI;
6044 }
6045
6046 return nullptr;
6047}
6048
6049/// Try to forward the condition of a switch instruction to a phi node
6050/// dominated by the switch, if that would mean that some of the destination
6051/// blocks of the switch can be folded away. Return true if a change is made.
6053 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6054
6055 ForwardingNodesMap ForwardingNodes;
6056 BasicBlock *SwitchBlock = SI->getParent();
6057 bool Changed = false;
6058 for (const auto &Case : SI->cases()) {
6059 ConstantInt *CaseValue = Case.getCaseValue();
6060 BasicBlock *CaseDest = Case.getCaseSuccessor();
6061
6062 // Replace phi operands in successor blocks that are using the constant case
6063 // value rather than the switch condition variable:
6064 // switchbb:
6065 // switch i32 %x, label %default [
6066 // i32 17, label %succ
6067 // ...
6068 // succ:
6069 // %r = phi i32 ... [ 17, %switchbb ] ...
6070 // -->
6071 // %r = phi i32 ... [ %x, %switchbb ] ...
6072
6073 for (PHINode &Phi : CaseDest->phis()) {
6074 // This only works if there is exactly 1 incoming edge from the switch to
6075 // a phi. If there is >1, that means multiple cases of the switch map to 1
6076 // value in the phi, and that phi value is not the switch condition. Thus,
6077 // this transform would not make sense (the phi would be invalid because
6078 // a phi can't have different incoming values from the same block).
6079 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6080 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6081 count(Phi.blocks(), SwitchBlock) == 1) {
6082 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6083 Changed = true;
6084 }
6085 }
6086
6087 // Collect phi nodes that are indirectly using this switch's case constants.
6088 int PhiIdx;
6089 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6090 ForwardingNodes[Phi].push_back(PhiIdx);
6091 }
6092
6093 for (auto &ForwardingNode : ForwardingNodes) {
6094 PHINode *Phi = ForwardingNode.first;
6095 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6096 // Check if it helps to fold PHI.
6097 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6098 continue;
6099
6100 for (int Index : Indexes)
6101 Phi->setIncomingValue(Index, SI->getCondition());
6102 Changed = true;
6103 }
6104
6105 return Changed;
6106}
6107
6108/// Return true if the backend will be able to handle
6109/// initializing an array of constants like C.
6111 if (C->isThreadDependent())
6112 return false;
6113 if (C->isDLLImportDependent())
6114 return false;
6115
6116 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6119 return false;
6120
6122 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6123 // materializing the array of constants.
6124 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6125 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6126 return false;
6127 }
6128
6129 if (!TTI.shouldBuildLookupTablesForConstant(C))
6130 return false;
6131
6132 return true;
6133}
6134
6135/// If V is a Constant, return it. Otherwise, try to look up
6136/// its constant value in ConstantPool, returning 0 if it's not there.
6137static Constant *
6140 if (Constant *C = dyn_cast<Constant>(V))
6141 return C;
6142 return ConstantPool.lookup(V);
6143}
6144
6145/// Try to fold instruction I into a constant. This works for
6146/// simple instructions such as binary operations where both operands are
6147/// constant or can be replaced by constants from the ConstantPool. Returns the
6148/// resulting constant on success, 0 otherwise.
6149static Constant *
6153 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6154 if (!A)
6155 return nullptr;
6156 if (A->isAllOnesValue())
6157 return lookupConstant(Select->getTrueValue(), ConstantPool);
6158 if (A->isNullValue())
6159 return lookupConstant(Select->getFalseValue(), ConstantPool);
6160 return nullptr;
6161 }
6162
6164 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6165 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6166 COps.push_back(A);
6167 else
6168 return nullptr;
6169 }
6170
6171 return ConstantFoldInstOperands(I, COps, DL);
6172}
6173
6174/// Try to determine the resulting constant values in phi nodes
6175/// at the common destination basic block, *CommonDest, for one of the case
6176/// destionations CaseDest corresponding to value CaseVal (0 for the default
6177/// case), of a switch instruction SI.
6178static bool
6180 BasicBlock **CommonDest,
6181 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6182 const DataLayout &DL, const TargetTransformInfo &TTI) {
6183 // The block from which we enter the common destination.
6184 BasicBlock *Pred = SI->getParent();
6185
6186 // If CaseDest is empty except for some side-effect free instructions through
6187 // which we can constant-propagate the CaseVal, continue to its successor.
6189 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6190 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6191 if (I.isTerminator()) {
6192 // If the terminator is a simple branch, continue to the next block.
6193 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6194 return false;
6195 Pred = CaseDest;
6196 CaseDest = I.getSuccessor(0);
6197 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6198 // Instruction is side-effect free and constant.
6199
6200 // If the instruction has uses outside this block or a phi node slot for
6201 // the block, it is not safe to bypass the instruction since it would then
6202 // no longer dominate all its uses.
6203 for (auto &Use : I.uses()) {
6204 User *User = Use.getUser();
6206 if (I->getParent() == CaseDest)
6207 continue;
6208 if (PHINode *Phi = dyn_cast<PHINode>(User))
6209 if (Phi->getIncomingBlock(Use) == CaseDest)
6210 continue;
6211 return false;
6212 }
6213
6214 ConstantPool.insert(std::make_pair(&I, C));
6215 } else {
6216 break;
6217 }
6218 }
6219
6220 // If we did not have a CommonDest before, use the current one.
6221 if (!*CommonDest)
6222 *CommonDest = CaseDest;
6223 // If the destination isn't the common one, abort.
6224 if (CaseDest != *CommonDest)
6225 return false;
6226
6227 // Get the values for this case from phi nodes in the destination block.
6228 for (PHINode &PHI : (*CommonDest)->phis()) {
6229 int Idx = PHI.getBasicBlockIndex(Pred);
6230 if (Idx == -1)
6231 continue;
6232
6233 Constant *ConstVal =
6234 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6235 if (!ConstVal)
6236 return false;
6237
6238 // Be conservative about which kinds of constants we support.
6239 if (!validLookupTableConstant(ConstVal, TTI))
6240 return false;
6241
6242 Res.push_back(std::make_pair(&PHI, ConstVal));
6243 }
6244
6245 return Res.size() > 0;
6246}
6247
6248// Helper function used to add CaseVal to the list of cases that generate
6249// Result. Returns the updated number of cases that generate this result.
6250static size_t mapCaseToResult(ConstantInt *CaseVal,
6251 SwitchCaseResultVectorTy &UniqueResults,
6252 Constant *Result) {
6253 for (auto &I : UniqueResults) {
6254 if (I.first == Result) {
6255 I.second.push_back(CaseVal);
6256 return I.second.size();
6257 }
6258 }
6259 UniqueResults.push_back(
6260 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6261 return 1;
6262}
6263
6264// Helper function that initializes a map containing
6265// results for the PHI node of the common destination block for a switch
6266// instruction. Returns false if multiple PHI nodes have been found or if
6267// there is not a common destination block for the switch.
6269 BasicBlock *&CommonDest,
6270 SwitchCaseResultVectorTy &UniqueResults,
6271 Constant *&DefaultResult,
6272 const DataLayout &DL,
6273 const TargetTransformInfo &TTI,
6274 uintptr_t MaxUniqueResults) {
6275 for (const auto &I : SI->cases()) {
6276 ConstantInt *CaseVal = I.getCaseValue();
6277
6278 // Resulting value at phi nodes for this case value.
6279 SwitchCaseResultsTy Results;
6280 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6281 DL, TTI))
6282 return false;
6283
6284 // Only one value per case is permitted.
6285 if (Results.size() > 1)
6286 return false;
6287
6288 // Add the case->result mapping to UniqueResults.
6289 const size_t NumCasesForResult =
6290 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6291
6292 // Early out if there are too many cases for this result.
6293 if (NumCasesForResult > MaxSwitchCasesPerResult)
6294 return false;
6295
6296 // Early out if there are too many unique results.
6297 if (UniqueResults.size() > MaxUniqueResults)
6298 return false;
6299
6300 // Check the PHI consistency.
6301 if (!PHI)
6302 PHI = Results[0].first;
6303 else if (PHI != Results[0].first)
6304 return false;
6305 }
6306 // Find the default result value.
6308 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6309 DL, TTI);
6310 // If the default value is not found abort unless the default destination
6311 // is unreachable.
6312 DefaultResult =
6313 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6314
6315 return DefaultResult || SI->defaultDestUnreachable();
6316}
6317
6318// Helper function that checks if it is possible to transform a switch with only
6319// two cases (or two cases + default) that produces a result into a select.
6320// TODO: Handle switches with more than 2 cases that map to the same result.
6321static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6322 Constant *DefaultResult, Value *Condition,
6323 IRBuilder<> &Builder, const DataLayout &DL) {
6324 // If we are selecting between only two cases transform into a simple
6325 // select or a two-way select if default is possible.
6326 // Example:
6327 // switch (a) { %0 = icmp eq i32 %a, 10
6328 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6329 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6330 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6331 // }
6332 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6333 ResultVector[1].second.size() == 1) {
6334 ConstantInt *FirstCase = ResultVector[0].second[0];
6335 ConstantInt *SecondCase = ResultVector[1].second[0];
6336 Value *SelectValue = ResultVector[1].first;
6337 if (DefaultResult) {
6338 Value *ValueCompare =
6339 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6340 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6341 DefaultResult, "switch.select");
6342 }
6343 Value *ValueCompare =
6344 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6345 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6346 SelectValue, "switch.select");
6347 }
6348
6349 // Handle the degenerate case where two cases have the same result value.
6350 if (ResultVector.size() == 1 && DefaultResult) {
6351 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6352 unsigned CaseCount = CaseValues.size();
6353 // n bits group cases map to the same result:
6354 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6355 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6356 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6357 if (isPowerOf2_32(CaseCount)) {
6358 ConstantInt *MinCaseVal = CaseValues[0];
6359 // If there are bits that are set exclusively by CaseValues, we
6360 // can transform the switch into a select if the conjunction of
6361 // all the values uniquely identify CaseValues.
6362 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6363
6364 // Find the minimum value and compute the and of all the case values.
6365 for (auto *Case : CaseValues) {
6366 if (Case->getValue().slt(MinCaseVal->getValue()))
6367 MinCaseVal = Case;
6368 AndMask &= Case->getValue();
6369 }
6370 KnownBits Known = computeKnownBits(Condition, DL);
6371
6372 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6373 // Compute the number of bits that are free to vary.
6374 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6375
6376 // Check if the number of values covered by the mask is equal
6377 // to the number of cases.
6378 if (FreeBits == Log2_32(CaseCount)) {
6379 Value *And = Builder.CreateAnd(Condition, AndMask);
6380 Value *Cmp = Builder.CreateICmpEQ(
6381 And, Constant::getIntegerValue(And->getType(), AndMask));
6382 return Builder.CreateSelect(Cmp, ResultVector[0].first,
6383 DefaultResult);
6384 }
6385 }
6386
6387 // Mark the bits case number touched.
6388 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6389 for (auto *Case : CaseValues)
6390 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6391
6392 // Check if cases with the same result can cover all number
6393 // in touched bits.
6394 if (BitMask.popcount() == Log2_32(CaseCount)) {
6395 if (!MinCaseVal->isNullValue())
6396 Condition = Builder.CreateSub(Condition, MinCaseVal);
6397 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6398 Value *Cmp = Builder.CreateICmpEQ(
6399 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6400 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6401 }
6402 }
6403
6404 // Handle the degenerate case where two cases have the same value.
6405 if (CaseValues.size() == 2) {
6406 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6407 "switch.selectcmp.case1");
6408 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6409 "switch.selectcmp.case2");
6410 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6411 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6412 }
6413 }
6414
6415 return nullptr;
6416}
6417
6418// Helper function to cleanup a switch instruction that has been converted into
6419// a select, fixing up PHI nodes and basic blocks.
6421 Value *SelectValue,
6422 IRBuilder<> &Builder,
6423 DomTreeUpdater *DTU) {
6424 std::vector<DominatorTree::UpdateType> Updates;
6425
6426 BasicBlock *SelectBB = SI->getParent();
6427 BasicBlock *DestBB = PHI->getParent();
6428
6429 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6430 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6431 Builder.CreateBr(DestBB);
6432
6433 // Remove the switch.
6434
6435 PHI->removeIncomingValueIf(
6436 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6437 PHI->addIncoming(SelectValue, SelectBB);
6438
6439 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6440 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6441 BasicBlock *Succ = SI->getSuccessor(i);
6442
6443 if (Succ == DestBB)
6444 continue;
6445 Succ->removePredecessor(SelectBB);
6446 if (DTU && RemovedSuccessors.insert(Succ).second)
6447 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6448 }
6449 SI->eraseFromParent();
6450 if (DTU)
6451 DTU->applyUpdates(Updates);
6452}
6453
6454/// If a switch is only used to initialize one or more phi nodes in a common
6455/// successor block with only two different constant values, try to replace the
6456/// switch with a select. Returns true if the fold was made.
6458 DomTreeUpdater *DTU, const DataLayout &DL,
6459 const TargetTransformInfo &TTI) {
6460 Value *const Cond = SI->getCondition();
6461 PHINode *PHI = nullptr;
6462 BasicBlock *CommonDest = nullptr;
6463 Constant *DefaultResult;
6464 SwitchCaseResultVectorTy UniqueResults;
6465 // Collect all the cases that will deliver the same value from the switch.
6466 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6467 DL, TTI, /*MaxUniqueResults*/ 2))
6468 return false;
6469
6470 assert(PHI != nullptr && "PHI for value select not found");
6471 Builder.SetInsertPoint(SI);
6472 Value *SelectValue =
6473 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder, DL);
6474 if (!SelectValue)
6475 return false;
6476
6477 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6478 return true;
6479}
6480
6481namespace {
6482
6483/// This class finds alternatives for switches to ultimately
6484/// replace the switch.
6485class SwitchReplacement {
6486public:
6487 /// Create a helper for optimizations to use as a switch replacement.
6488 /// Find a better representation for the content of Values,
6489 /// using DefaultValue to fill any holes in the table.
6490 SwitchReplacement(
6491 Module &M, uint64_t TableSize, ConstantInt *Offset,
6492 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6493 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6494
6495 /// Build instructions with Builder to retrieve values using Index
6496 /// and replace the switch.
6497 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6498 Function *Func);
6499
6500 /// Return true if a table with TableSize elements of
6501 /// type ElementType would fit in a target-legal register.
6502 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6503 Type *ElementType);
6504
6505 /// Return the default value of the switch.
6506 Constant *getDefaultValue();
6507
6508 /// Return true if the replacement is a lookup table.
6509 bool isLookupTable();
6510
6511private:
6512 // Depending on the switch, there are different alternatives.
6513 enum {
6514 // For switches where each case contains the same value, we just have to
6515 // store that single value and return it for each lookup.
6516 SingleValueKind,
6517
6518 // For switches where there is a linear relationship between table index
6519 // and values. We calculate the result with a simple multiplication
6520 // and addition instead of a table lookup.
6521 LinearMapKind,
6522
6523 // For small tables with integer elements, we can pack them into a bitmap
6524 // that fits into a target-legal register. Values are retrieved by
6525 // shift and mask operations.
6526 BitMapKind,
6527
6528 // The table is stored as an array of values. Values are retrieved by load
6529 // instructions from the table.
6530 LookupTableKind
6531 } Kind;
6532
6533 // The default value of the switch.
6534 Constant *DefaultValue;
6535
6536 // The type of the output values.
6537 Type *ValueType;
6538
6539 // For SingleValueKind, this is the single value.
6540 Constant *SingleValue = nullptr;
6541
6542 // For BitMapKind, this is the bitmap.
6543 ConstantInt *BitMap = nullptr;
6544 IntegerType *BitMapElementTy = nullptr;
6545
6546 // For LinearMapKind, these are the constants used to derive the value.
6547 ConstantInt *LinearOffset = nullptr;
6548 ConstantInt *LinearMultiplier = nullptr;
6549 bool LinearMapValWrapped = false;
6550
6551 // For LookupTableKind, this is the table.
6552 Constant *Initializer = nullptr;
6553};
6554
6555} // end anonymous namespace
6556
6557SwitchReplacement::SwitchReplacement(
6558 Module &M, uint64_t TableSize, ConstantInt *Offset,
6559 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6560 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6561 : DefaultValue(DefaultValue) {
6562 assert(Values.size() && "Can't build lookup table without values!");
6563 assert(TableSize >= Values.size() && "Can't fit values in table!");
6564
6565 // If all values in the table are equal, this is that value.
6566 SingleValue = Values.begin()->second;
6567
6568 ValueType = Values.begin()->second->getType();
6569
6570 // Build up the table contents.
6571 SmallVector<Constant *, 64> TableContents(TableSize);
6572 for (const auto &[CaseVal, CaseRes] : Values) {
6573 assert(CaseRes->getType() == ValueType);
6574
6575 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6576 TableContents[Idx] = CaseRes;
6577
6578 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6579 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6580 }
6581
6582 // Fill in any holes in the table with the default result.
6583 if (Values.size() < TableSize) {
6584 assert(DefaultValue &&
6585 "Need a default value to fill the lookup table holes.");
6586 assert(DefaultValue->getType() == ValueType);
6587 for (uint64_t I = 0; I < TableSize; ++I) {
6588 if (!TableContents[I])
6589 TableContents[I] = DefaultValue;
6590 }
6591
6592 // If the default value is poison, all the holes are poison.
6593 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6594
6595 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6596 SingleValue = nullptr;
6597 }
6598
6599 // If each element in the table contains the same value, we only need to store
6600 // that single value.
6601 if (SingleValue) {
6602 Kind = SingleValueKind;
6603 return;
6604 }
6605
6606 // Check if we can derive the value with a linear transformation from the
6607 // table index.
6609 bool LinearMappingPossible = true;
6610 APInt PrevVal;
6611 APInt DistToPrev;
6612 // When linear map is monotonic and signed overflow doesn't happen on
6613 // maximum index, we can attach nsw on Add and Mul.
6614 bool NonMonotonic = false;
6615 assert(TableSize >= 2 && "Should be a SingleValue table.");
6616 // Check if there is the same distance between two consecutive values.
6617 for (uint64_t I = 0; I < TableSize; ++I) {
6618 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6619
6620 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6621 // This is an poison, so it's (probably) a lookup table hole.
6622 // To prevent any regressions from before we switched to using poison as
6623 // the default value, holes will fall back to using the first value.
6624 // This can be removed once we add proper handling for poisons in lookup
6625 // tables.
6626 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6627 }
6628
6629 if (!ConstVal) {
6630 // This is an undef. We could deal with it, but undefs in lookup tables
6631 // are very seldom. It's probably not worth the additional complexity.
6632 LinearMappingPossible = false;
6633 break;
6634 }
6635 const APInt &Val = ConstVal->getValue();
6636 if (I != 0) {
6637 APInt Dist = Val - PrevVal;
6638 if (I == 1) {
6639 DistToPrev = Dist;
6640 } else if (Dist != DistToPrev) {
6641 LinearMappingPossible = false;
6642 break;
6643 }
6644 NonMonotonic |=
6645 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6646 }
6647 PrevVal = Val;
6648 }
6649 if (LinearMappingPossible) {
6650 LinearOffset = cast<ConstantInt>(TableContents[0]);
6651 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6652 APInt M = LinearMultiplier->getValue();
6653 bool MayWrap = true;
6654 if (isIntN(M.getBitWidth(), TableSize - 1))
6655 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6656 LinearMapValWrapped = NonMonotonic || MayWrap;
6657 Kind = LinearMapKind;
6658 return;
6659 }
6660 }
6661
6662 // If the type is integer and the table fits in a register, build a bitmap.
6663 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6665 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6666 for (uint64_t I = TableSize; I > 0; --I) {
6667 TableInt <<= IT->getBitWidth();
6668 // Insert values into the bitmap. Undef values are set to zero.
6669 if (!isa<UndefValue>(TableContents[I - 1])) {
6670 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6671 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6672 }
6673 }
6674 BitMap = ConstantInt::get(M.getContext(), TableInt);
6675 BitMapElementTy = IT;
6676 Kind = BitMapKind;
6677 return;
6678 }
6679
6680 // Store the table in an array.
6681 auto *TableTy = ArrayType::get(ValueType, TableSize);
6682 Initializer = ConstantArray::get(TableTy, TableContents);
6683
6684 Kind = LookupTableKind;
6685}
6686
6687Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6688 const DataLayout &DL, Function *Func) {
6689 switch (Kind) {
6690 case SingleValueKind:
6691 return SingleValue;
6692 case LinearMapKind: {
6693 ++NumLinearMaps;
6694 // Derive the result value from the input value.
6695 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6696 false, "switch.idx.cast");
6697 if (!LinearMultiplier->isOne())
6698 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6699 /*HasNUW = */ false,
6700 /*HasNSW = */ !LinearMapValWrapped);
6701
6702 if (!LinearOffset->isZero())
6703 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6704 /*HasNUW = */ false,
6705 /*HasNSW = */ !LinearMapValWrapped);
6706 return Result;
6707 }
6708 case BitMapKind: {
6709 ++NumBitMaps;
6710 // Type of the bitmap (e.g. i59).
6711 IntegerType *MapTy = BitMap->getIntegerType();
6712
6713 // Cast Index to the same type as the bitmap.
6714 // Note: The Index is <= the number of elements in the table, so
6715 // truncating it to the width of the bitmask is safe.
6716 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6717
6718 // Multiply the shift amount by the element width. NUW/NSW can always be
6719 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6720 // BitMap's bit width.
6721 ShiftAmt = Builder.CreateMul(
6722 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6723 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6724
6725 // Shift down.
6726 Value *DownShifted =
6727 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6728 // Mask off.
6729 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6730 }
6731 case LookupTableKind: {
6732 ++NumLookupTables;
6733 auto *Table =
6734 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6735 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6736 Initializer, "switch.table." + Func->getName());
6737 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6738 // Set the alignment to that of an array items. We will be only loading one
6739 // value out of it.
6740 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6741 Type *IndexTy = DL.getIndexType(Table->getType());
6742 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6743
6744 if (Index->getType() != IndexTy) {
6745 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6746 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6747 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6748 Zext->setNonNeg(
6749 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6750 }
6751
6752 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6753 Value *GEP =
6754 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6755 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6756 }
6757 }
6758 llvm_unreachable("Unknown helper kind!");
6759}
6760
6761bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6762 uint64_t TableSize,
6763 Type *ElementType) {
6764 auto *IT = dyn_cast<IntegerType>(ElementType);
6765 if (!IT)
6766 return false;
6767 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6768 // are <= 15, we could try to narrow the type.
6769
6770 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6771 if (TableSize >= UINT_MAX / IT->getBitWidth())
6772 return false;
6773 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6774}
6775
6777 const DataLayout &DL) {
6778 // Allow any legal type.
6779 if (TTI.isTypeLegal(Ty))
6780 return true;
6781
6782 auto *IT = dyn_cast<IntegerType>(Ty);
6783 if (!IT)
6784 return false;
6785
6786 // Also allow power of 2 integer types that have at least 8 bits and fit in
6787 // a register. These types are common in frontend languages and targets
6788 // usually support loads of these types.
6789 // TODO: We could relax this to any integer that fits in a register and rely
6790 // on ABI alignment and padding in the table to allow the load to be widened.
6791 // Or we could widen the constants and truncate the load.
6792 unsigned BitWidth = IT->getBitWidth();
6793 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6794 DL.fitsInLegalInteger(IT->getBitWidth());
6795}
6796
6797Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
6798
6799bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
6800
6801static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6802 // 40% is the default density for building a jump table in optsize/minsize
6803 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6804 // function was based on.
6805 const uint64_t MinDensity = 40;
6806
6807 if (CaseRange >= UINT64_MAX / 100)
6808 return false; // Avoid multiplication overflows below.
6809
6810 return NumCases * 100 >= CaseRange * MinDensity;
6811}
6812
6814 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6815 uint64_t Range = Diff + 1;
6816 if (Range < Diff)
6817 return false; // Overflow.
6818
6819 return isSwitchDense(Values.size(), Range);
6820}
6821
6822/// Determine whether a lookup table should be built for this switch, based on
6823/// the number of cases, size of the table, and the types of the results.
6824// TODO: We could support larger than legal types by limiting based on the
6825// number of loads required and/or table size. If the constants are small we
6826// could use smaller table entries and extend after the load.
6828 const TargetTransformInfo &TTI,
6829 const DataLayout &DL,
6830 const SmallVector<Type *> &ResultTypes) {
6831 if (SI->getNumCases() > TableSize)
6832 return false; // TableSize overflowed.
6833
6834 bool AllTablesFitInRegister = true;
6835 bool HasIllegalType = false;
6836 for (const auto &Ty : ResultTypes) {
6837 // Saturate this flag to true.
6838 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6839
6840 // Saturate this flag to false.
6841 AllTablesFitInRegister =
6842 AllTablesFitInRegister &&
6843 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
6844
6845 // If both flags saturate, we're done. NOTE: This *only* works with
6846 // saturating flags, and all flags have to saturate first due to the
6847 // non-deterministic behavior of iterating over a dense map.
6848 if (HasIllegalType && !AllTablesFitInRegister)
6849 break;
6850 }
6851
6852 // If each table would fit in a register, we should build it anyway.
6853 if (AllTablesFitInRegister)
6854 return true;
6855
6856 // Don't build a table that doesn't fit in-register if it has illegal types.
6857 if (HasIllegalType)
6858 return false;
6859
6860 return isSwitchDense(SI->getNumCases(), TableSize);
6861}
6862
6864 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6865 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
6866 const DataLayout &DL, const TargetTransformInfo &TTI) {
6867 if (MinCaseVal.isNullValue())
6868 return true;
6869 if (MinCaseVal.isNegative() ||
6870 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6871 !HasDefaultResults)
6872 return false;
6873 return all_of(ResultTypes, [&](const auto &ResultType) {
6874 return SwitchReplacement::wouldFitInRegister(
6875 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
6876 });
6877}
6878
6879/// Try to reuse the switch table index compare. Following pattern:
6880/// \code
6881/// if (idx < tablesize)
6882/// r = table[idx]; // table does not contain default_value
6883/// else
6884/// r = default_value;
6885/// if (r != default_value)
6886/// ...
6887/// \endcode
6888/// Is optimized to:
6889/// \code
6890/// cond = idx < tablesize;
6891/// if (cond)
6892/// r = table[idx];
6893/// else
6894/// r = default_value;
6895/// if (cond)
6896/// ...
6897/// \endcode
6898/// Jump threading will then eliminate the second if(cond).
6900 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6901 Constant *DefaultValue,
6902 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6904 if (!CmpInst)
6905 return;
6906
6907 // We require that the compare is in the same block as the phi so that jump
6908 // threading can do its work afterwards.
6909 if (CmpInst->getParent() != PhiBlock)
6910 return;
6911
6913 if (!CmpOp1)
6914 return;
6915
6916 Value *RangeCmp = RangeCheckBranch->getCondition();
6917 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6918 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6919
6920 // Check if the compare with the default value is constant true or false.
6921 const DataLayout &DL = PhiBlock->getDataLayout();
6923 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
6924 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6925 return;
6926
6927 // Check if the compare with the case values is distinct from the default
6928 // compare result.
6929 for (auto ValuePair : Values) {
6931 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
6932 if (!CaseConst || CaseConst == DefaultConst ||
6933 (CaseConst != TrueConst && CaseConst != FalseConst))
6934 return;
6935 }
6936
6937 // Check if the branch instruction dominates the phi node. It's a simple
6938 // dominance check, but sufficient for our needs.
6939 // Although this check is invariant in the calling loops, it's better to do it
6940 // at this late stage. Practically we do it at most once for a switch.
6941 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6942 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6943 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6944 return;
6945 }
6946
6947 if (DefaultConst == FalseConst) {
6948 // The compare yields the same result. We can replace it.
6949 CmpInst->replaceAllUsesWith(RangeCmp);
6950 ++NumTableCmpReuses;
6951 } else {
6952 // The compare yields the same result, just inverted. We can replace it.
6953 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6954 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6955 RangeCheckBranch->getIterator());
6956 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6957 ++NumTableCmpReuses;
6958 }
6959}
6960
6961/// If the switch is only used to initialize one or more phi nodes in a common
6962/// successor block with different constant values, replace the switch with
6963/// lookup tables.
6965 DomTreeUpdater *DTU, const DataLayout &DL,
6966 const TargetTransformInfo &TTI) {
6967 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6968
6969 BasicBlock *BB = SI->getParent();
6970 Function *Fn = BB->getParent();
6971
6972 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6973 // split off a dense part and build a lookup table for that.
6974
6975 // FIXME: This creates arrays of GEPs to constant strings, which means each
6976 // GEP needs a runtime relocation in PIC code. We should just build one big
6977 // string and lookup indices into that.
6978
6979 // Ignore switches with less than three cases. Lookup tables will not make
6980 // them faster, so we don't analyze them.
6981 if (SI->getNumCases() < 3)
6982 return false;
6983
6984 // Figure out the corresponding result for each case value and phi node in the
6985 // common destination, as well as the min and max case values.
6986 assert(!SI->cases().empty());
6987 SwitchInst::CaseIt CI = SI->case_begin();
6988 ConstantInt *MinCaseVal = CI->getCaseValue();
6989 ConstantInt *MaxCaseVal = CI->getCaseValue();
6990
6991 BasicBlock *CommonDest = nullptr;
6992
6993 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6995
6997 SmallVector<Type *> ResultTypes;
6999
7000 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7001 ConstantInt *CaseVal = CI->getCaseValue();
7002 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7003 MinCaseVal = CaseVal;
7004 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7005 MaxCaseVal = CaseVal;
7006
7007 // Resulting value at phi nodes for this case value.
7009 ResultsTy Results;
7010 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7011 Results, DL, TTI))
7012 return false;
7013
7014 // Append the result and result types from this case to the list for each
7015 // phi.
7016 for (const auto &I : Results) {
7017 PHINode *PHI = I.first;
7018 Constant *Value = I.second;
7019 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7020 if (Inserted)
7021 PHIs.push_back(PHI);
7022 It->second.push_back(std::make_pair(CaseVal, Value));
7023 ResultTypes.push_back(PHI->getType());
7024 }
7025 }
7026
7027 // If the table has holes, we need a constant result for the default case
7028 // or a bitmask that fits in a register.
7029 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7030 bool HasDefaultResults =
7031 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7032 DefaultResultsList, DL, TTI);
7033 for (const auto &I : DefaultResultsList) {
7034 PHINode *PHI = I.first;
7035 Constant *Result = I.second;
7036 DefaultResults[PHI] = Result;
7037 }
7038
7039 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7040 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7041 uint64_t TableSize;
7042 ConstantInt *TableIndexOffset;
7043 if (UseSwitchConditionAsTableIndex) {
7044 TableSize = MaxCaseVal->getLimitedValue() + 1;
7045 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7046 } else {
7047 TableSize =
7048 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7049
7050 TableIndexOffset = MinCaseVal;
7051 }
7052
7053 // If the default destination is unreachable, or if the lookup table covers
7054 // all values of the conditional variable, branch directly to the lookup table
7055 // BB. Otherwise, check that the condition is within the case range.
7056 uint64_t NumResults = ResultLists[PHIs[0]].size();
7057 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7058
7059 bool TableHasHoles = (NumResults < TableSize);
7060
7061 // If the table has holes but the default destination doesn't produce any
7062 // constant results, the lookup table entries corresponding to the holes will
7063 // contain poison.
7064 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7065
7066 // If the default destination doesn't produce a constant result but is still
7067 // reachable, and the lookup table has holes, we need to use a mask to
7068 // determine if the current index should load from the lookup table or jump
7069 // to the default case.
7070 // The mask is unnecessary if the table has holes but the default destination
7071 // is unreachable, as in that case the holes must also be unreachable.
7072 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7073 if (NeedMask) {
7074 // As an extra penalty for the validity test we require more cases.
7075 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7076 return false;
7077 if (!DL.fitsInLegalInteger(TableSize))
7078 return false;
7079 }
7080
7081 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7082 return false;
7083
7084 // Compute the table index value.
7085 Value *TableIndex;
7086 if (UseSwitchConditionAsTableIndex) {
7087 TableIndex = SI->getCondition();
7088 if (HasDefaultResults) {
7089 // Grow the table to cover all possible index values to avoid the range
7090 // check. It will use the default result to fill in the table hole later,
7091 // so make sure it exist.
7092 ConstantRange CR =
7093 computeConstantRange(TableIndex, /* ForSigned */ false);
7094 // Grow the table shouldn't have any size impact by checking
7095 // wouldFitInRegister.
7096 // TODO: Consider growing the table also when it doesn't fit in a register
7097 // if no optsize is specified.
7098 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7099 if (!CR.isUpperWrapped() &&
7100 all_of(ResultTypes, [&](const auto &ResultType) {
7101 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7102 ResultType);
7103 })) {
7104 // There may be some case index larger than the UpperBound (unreachable
7105 // case), so make sure the table size does not get smaller.
7106 TableSize = std::max(UpperBound, TableSize);
7107 // The default branch is unreachable after we enlarge the lookup table.
7108 // Adjust DefaultIsReachable to reuse code path.
7109 DefaultIsReachable = false;
7110 }
7111 }
7112 }
7113
7114 // Keep track of the switch replacement for each phi
7116 for (PHINode *PHI : PHIs) {
7117 const auto &ResultList = ResultLists[PHI];
7118
7119 Type *ResultType = ResultList.begin()->second->getType();
7120 // Use any value to fill the lookup table holes.
7122 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7123 StringRef FuncName = Fn->getName();
7124 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7125 ResultList, DefaultVal, DL, FuncName);
7126 PhiToReplacementMap.insert({PHI, Replacement});
7127 }
7128
7129 bool AnyLookupTables = any_of(
7130 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7131
7132 // A few conditions prevent the generation of lookup tables:
7133 // 1. The target does not support lookup tables.
7134 // 2. The "no-jump-tables" function attribute is set.
7135 // However, these objections do not apply to other switch replacements, like
7136 // the bitmap, so we only stop here if any of these conditions are met and we
7137 // want to create a LUT. Otherwise, continue with the switch replacement.
7138 if (AnyLookupTables &&
7139 (!TTI.shouldBuildLookupTables() ||
7140 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7141 return false;
7142
7143 Builder.SetInsertPoint(SI);
7144 // TableIndex is the switch condition - TableIndexOffset if we don't
7145 // use the condition directly
7146 if (!UseSwitchConditionAsTableIndex) {
7147 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7148 // we can try to attach nsw.
7149 bool MayWrap = true;
7150 if (!DefaultIsReachable) {
7151 APInt Res =
7152 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7153 (void)Res;
7154 }
7155 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7156 "switch.tableidx", /*HasNUW =*/false,
7157 /*HasNSW =*/!MayWrap);
7158 }
7159
7160 std::vector<DominatorTree::UpdateType> Updates;
7161
7162 // Compute the maximum table size representable by the integer type we are
7163 // switching upon.
7164 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7165 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7166 assert(MaxTableSize >= TableSize &&
7167 "It is impossible for a switch to have more entries than the max "
7168 "representable value of its input integer type's size.");
7169
7170 // Create the BB that does the lookups.
7171 Module &Mod = *CommonDest->getParent()->getParent();
7172 BasicBlock *LookupBB = BasicBlock::Create(
7173 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7174
7175 BranchInst *RangeCheckBranch = nullptr;
7176
7177 Builder.SetInsertPoint(SI);
7178 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7179 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7180 Builder.CreateBr(LookupBB);
7181 if (DTU)
7182 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7183 // Note: We call removeProdecessor later since we need to be able to get the
7184 // PHI value for the default case in case we're using a bit mask.
7185 } else {
7186 Value *Cmp = Builder.CreateICmpULT(
7187 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7188 RangeCheckBranch =
7189 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7190 if (DTU)
7191 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7192 }
7193
7194 // Populate the BB that does the lookups.
7195 Builder.SetInsertPoint(LookupBB);
7196
7197 if (NeedMask) {
7198 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7199 // re-purposed to do the hole check, and we create a new LookupBB.
7200 BasicBlock *MaskBB = LookupBB;
7201 MaskBB->setName("switch.hole_check");
7202 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7203 CommonDest->getParent(), CommonDest);
7204
7205 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7206 // unnecessary illegal types.
7207 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7208 APInt MaskInt(TableSizePowOf2, 0);
7209 APInt One(TableSizePowOf2, 1);
7210 // Build bitmask; fill in a 1 bit for every case.
7211 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7212 for (const auto &Result : ResultList) {
7213 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7214 .getLimitedValue();
7215 MaskInt |= One << Idx;
7216 }
7217 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7218
7219 // Get the TableIndex'th bit of the bitmask.
7220 // If this bit is 0 (meaning hole) jump to the default destination,
7221 // else continue with table lookup.
7222 IntegerType *MapTy = TableMask->getIntegerType();
7223 Value *MaskIndex =
7224 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7225 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7226 Value *LoBit = Builder.CreateTrunc(
7227 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7228 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7229 if (DTU) {
7230 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7231 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7232 }
7233 Builder.SetInsertPoint(LookupBB);
7234 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7235 }
7236
7237 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7238 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7239 // do not delete PHINodes here.
7240 SI->getDefaultDest()->removePredecessor(BB,
7241 /*KeepOneInputPHIs=*/true);
7242 if (DTU)
7243 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7244 }
7245
7246 for (PHINode *PHI : PHIs) {
7247 const ResultListTy &ResultList = ResultLists[PHI];
7248 auto Replacement = PhiToReplacementMap.at(PHI);
7249 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7250 // Do a small peephole optimization: re-use the switch table compare if
7251 // possible.
7252 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7253 BasicBlock *PhiBlock = PHI->getParent();
7254 // Search for compare instructions which use the phi.
7255 for (auto *User : PHI->users()) {
7256 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7257 Replacement.getDefaultValue(), ResultList);
7258 }
7259 }
7260
7261 PHI->addIncoming(Result, LookupBB);
7262 }
7263
7264 Builder.CreateBr(CommonDest);
7265 if (DTU)
7266 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7267
7268 // Remove the switch.
7269 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7270 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7271 BasicBlock *Succ = SI->getSuccessor(i);
7272
7273 if (Succ == SI->getDefaultDest())
7274 continue;
7275 Succ->removePredecessor(BB);
7276 if (DTU && RemovedSuccessors.insert(Succ).second)
7277 Updates.push_back({DominatorTree::Delete, BB, Succ});
7278 }
7279 SI->eraseFromParent();
7280
7281 if (DTU)
7282 DTU->applyUpdates(Updates);
7283
7284 if (NeedMask)
7285 ++NumLookupTablesHoles;
7286 return true;
7287}
7288
7289/// Try to transform a switch that has "holes" in it to a contiguous sequence
7290/// of cases.
7291///
7292/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7293/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7294///
7295/// This converts a sparse switch into a dense switch which allows better
7296/// lowering and could also allow transforming into a lookup table.
7298 const DataLayout &DL,
7299 const TargetTransformInfo &TTI) {
7300 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7301 if (CondTy->getIntegerBitWidth() > 64 ||
7302 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7303 return false;
7304 // Only bother with this optimization if there are more than 3 switch cases;
7305 // SDAG will only bother creating jump tables for 4 or more cases.
7306 if (SI->getNumCases() < 4)
7307 return false;
7308
7309 // This transform is agnostic to the signedness of the input or case values. We
7310 // can treat the case values as signed or unsigned. We can optimize more common
7311 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7312 // as signed.
7314 for (const auto &C : SI->cases())
7315 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7316 llvm::sort(Values);
7317
7318 // If the switch is already dense, there's nothing useful to do here.
7319 if (isSwitchDense(Values))
7320 return false;
7321
7322 // First, transform the values such that they start at zero and ascend.
7323 int64_t Base = Values[0];
7324 for (auto &V : Values)
7325 V -= (uint64_t)(Base);
7326
7327 // Now we have signed numbers that have been shifted so that, given enough
7328 // precision, there are no negative values. Since the rest of the transform
7329 // is bitwise only, we switch now to an unsigned representation.
7330
7331 // This transform can be done speculatively because it is so cheap - it
7332 // results in a single rotate operation being inserted.
7333
7334 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7335 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7336 // less than 64.
7337 unsigned Shift = 64;
7338 for (auto &V : Values)
7339 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7340 assert(Shift < 64);
7341 if (Shift > 0)
7342 for (auto &V : Values)
7343 V = (int64_t)((uint64_t)V >> Shift);
7344
7345 if (!isSwitchDense(Values))
7346 // Transform didn't create a dense switch.
7347 return false;
7348
7349 // The obvious transform is to shift the switch condition right and emit a
7350 // check that the condition actually cleanly divided by GCD, i.e.
7351 // C & (1 << Shift - 1) == 0
7352 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7353 //
7354 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7355 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7356 // are nonzero then the switch condition will be very large and will hit the
7357 // default case.
7358
7359 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7360 Builder.SetInsertPoint(SI);
7361 Value *Sub =
7362 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7363 Value *Rot = Builder.CreateIntrinsic(
7364 Ty, Intrinsic::fshl,
7365 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7366 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7367
7368 for (auto Case : SI->cases()) {
7369 auto *Orig = Case.getCaseValue();
7370 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7371 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7372 }
7373 return true;
7374}
7375
7376/// Tries to transform switch of powers of two to reduce switch range.
7377/// For example, switch like:
7378/// switch (C) { case 1: case 2: case 64: case 128: }
7379/// will be transformed to:
7380/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7381///
7382/// This transformation allows better lowering and may transform the switch
7383/// instruction into a sequence of bit manipulation and a smaller
7384/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7385/// address of the jump target, and indirectly jump to it).
7387 const DataLayout &DL,
7388 const TargetTransformInfo &TTI) {
7389 Value *Condition = SI->getCondition();
7390 LLVMContext &Context = SI->getContext();
7391 auto *CondTy = cast<IntegerType>(Condition->getType());
7392
7393 if (CondTy->getIntegerBitWidth() > 64 ||
7394 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7395 return false;
7396
7397 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7398 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7399 {Condition, ConstantInt::getTrue(Context)});
7400 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7401 TTI::TCC_Basic * 2)
7402 return false;
7403
7404 // Only bother with this optimization if there are more than 3 switch cases.
7405 // SDAG will start emitting jump tables for 4 or more cases.
7406 if (SI->getNumCases() < 4)
7407 return false;
7408
7409 // We perform this optimization only for switches with
7410 // unreachable default case.
7411 // This assumtion will save us from checking if `Condition` is a power of two.
7412 if (!SI->defaultDestUnreachable())
7413 return false;
7414
7415 // Check that switch cases are powers of two.
7417 for (const auto &Case : SI->cases()) {
7418 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7419 if (llvm::has_single_bit(CaseValue))
7420 Values.push_back(CaseValue);
7421 else
7422 return false;
7423 }
7424
7425 // isSwichDense requires case values to be sorted.
7426 llvm::sort(Values);
7427 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7428 llvm::countr_zero(Values.front()) + 1))
7429 // Transform is unable to generate dense switch.
7430 return false;
7431
7432 Builder.SetInsertPoint(SI);
7433
7434 // Replace each case with its trailing zeros number.
7435 for (auto &Case : SI->cases()) {
7436 auto *OrigValue = Case.getCaseValue();
7437 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7438 OrigValue->getValue().countr_zero()));
7439 }
7440
7441 // Replace condition with its trailing zeros number.
7442 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7443 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7444
7445 SI->setCondition(ConditionTrailingZeros);
7446
7447 return true;
7448}
7449
7450/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7451/// the same destination.
7453 DomTreeUpdater *DTU) {
7454 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7455 if (!Cmp || !Cmp->hasOneUse())
7456 return false;
7457
7459 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7460 if (!HasWeights)
7461 Weights.resize(4); // Avoid checking HasWeights everywhere.
7462
7463 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7464 int64_t Res;
7465 BasicBlock *Succ, *OtherSucc;
7466 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7467 BasicBlock *Unreachable = nullptr;
7468
7469 if (SI->getNumCases() == 2) {
7470 // Find which of 1, 0 or -1 is missing (handled by default dest).
7471 SmallSet<int64_t, 3> Missing;
7472 Missing.insert(1);
7473 Missing.insert(0);
7474 Missing.insert(-1);
7475
7476 Succ = SI->getDefaultDest();
7477 SuccWeight = Weights[0];
7478 OtherSucc = nullptr;
7479 for (auto &Case : SI->cases()) {
7480 std::optional<int64_t> Val =
7481 Case.getCaseValue()->getValue().trySExtValue();
7482 if (!Val)
7483 return false;
7484 if (!Missing.erase(*Val))
7485 return false;
7486 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7487 return false;
7488 OtherSucc = Case.getCaseSuccessor();
7489 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7490 }
7491
7492 assert(Missing.size() == 1 && "Should have one case left");
7493 Res = *Missing.begin();
7494 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7495 // Normalize so that Succ is taken once and OtherSucc twice.
7496 Unreachable = SI->getDefaultDest();
7497 Succ = OtherSucc = nullptr;
7498 for (auto &Case : SI->cases()) {
7499 BasicBlock *NewSucc = Case.getCaseSuccessor();
7500 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7501 if (!OtherSucc || OtherSucc == NewSucc) {
7502 OtherSucc = NewSucc;
7503 OtherSuccWeight += Weight;
7504 } else if (!Succ) {
7505 Succ = NewSucc;
7506 SuccWeight = Weight;
7507 } else if (Succ == NewSucc) {
7508 std::swap(Succ, OtherSucc);
7509 std::swap(SuccWeight, OtherSuccWeight);
7510 } else
7511 return false;
7512 }
7513 for (auto &Case : SI->cases()) {
7514 std::optional<int64_t> Val =
7515 Case.getCaseValue()->getValue().trySExtValue();
7516 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7517 return false;
7518 if (Case.getCaseSuccessor() == Succ) {
7519 Res = *Val;
7520 break;
7521 }
7522 }
7523 } else {
7524 return false;
7525 }
7526
7527 // Determine predicate for the missing case.
7529 switch (Res) {
7530 case 1:
7531 Pred = ICmpInst::ICMP_UGT;
7532 break;
7533 case 0:
7534 Pred = ICmpInst::ICMP_EQ;
7535 break;
7536 case -1:
7537 Pred = ICmpInst::ICMP_ULT;
7538 break;
7539 }
7540 if (Cmp->isSigned())
7541 Pred = ICmpInst::getSignedPredicate(Pred);
7542
7543 MDNode *NewWeights = nullptr;
7544 if (HasWeights)
7545 NewWeights = MDBuilder(SI->getContext())
7546 .createBranchWeights(SuccWeight, OtherSuccWeight);
7547
7548 BasicBlock *BB = SI->getParent();
7549 Builder.SetInsertPoint(SI->getIterator());
7550 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7551 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7552 SI->getMetadata(LLVMContext::MD_unpredictable));
7553 OtherSucc->removePredecessor(BB);
7554 if (Unreachable)
7555 Unreachable->removePredecessor(BB);
7556 SI->eraseFromParent();
7557 Cmp->eraseFromParent();
7558 if (DTU && Unreachable)
7559 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7560 return true;
7561}
7562
7563/// Checking whether two cases of SI are equal depends on the contents of the
7564/// BasicBlock and the incoming values of their successor PHINodes.
7565/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7566/// calling this function on each BasicBlock every time isEqual is called,
7567/// especially since the same BasicBlock may be passed as an argument multiple
7568/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7569/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7570/// of the incoming values.
7575
7576namespace llvm {
7577template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7579 return static_cast<SwitchSuccWrapper *>(
7581 }
7583 return static_cast<SwitchSuccWrapper *>(
7585 }
7586 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7587 BasicBlock *Succ = SSW->Dest;
7589 assert(BI->isUnconditional() &&
7590 "Only supporting unconditional branches for now");
7591 assert(BI->getNumSuccessors() == 1 &&
7592 "Expected unconditional branches to have one successor");
7593 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7594
7595 // Since we assume the BB is just a single BranchInst with a single
7596 // successor, we hash as the BB and the incoming Values of its successor
7597 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7598 // including the incoming PHI values leads to better performance.
7599 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7600 // time and passing it in SwitchSuccWrapper, but this slowed down the
7601 // average compile time without having any impact on the worst case compile
7602 // time.
7603 BasicBlock *BB = BI->getSuccessor(0);
7604 SmallVector<Value *> PhiValsForBB;
7605 for (PHINode &Phi : BB->phis())
7606 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
7607
7608 return hash_combine(BB, hash_combine_range(PhiValsForBB));
7609 }
7610 static bool isEqual(const SwitchSuccWrapper *LHS,
7611 const SwitchSuccWrapper *RHS) {
7614 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7615 return LHS == RHS;
7616
7617 BasicBlock *A = LHS->Dest;
7618 BasicBlock *B = RHS->Dest;
7619
7620 // FIXME: we checked that the size of A and B are both 1 in
7621 // simplifyDuplicateSwitchArms to make the Case list smaller to
7622 // improve performance. If we decide to support BasicBlocks with more
7623 // than just a single instruction, we need to check that A.size() ==
7624 // B.size() here, and we need to check more than just the BranchInsts
7625 // for equality.
7626
7627 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
7628 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
7629 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7630 "Only supporting unconditional branches for now");
7631 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
7632 return false;
7633
7634 // Need to check that PHIs in successor have matching values
7635 BasicBlock *Succ = ABI->getSuccessor(0);
7636 for (PHINode &Phi : Succ->phis()) {
7637 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7638 if (PredIVs[A] != PredIVs[B])
7639 return false;
7640 }
7641
7642 return true;
7643 }
7644};
7645} // namespace llvm
7646
7647bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7648 DomTreeUpdater *DTU) {
7649 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7650 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7651 // an entire PHI at once after the loop, opposed to calling
7652 // getIncomingValueForBlock inside this loop, since each call to
7653 // getIncomingValueForBlock is O(|Preds|).
7654 SmallPtrSet<PHINode *, 8> Phis;
7655 SmallPtrSet<BasicBlock *, 8> Seen;
7656 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
7657 DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
7659 Cases.reserve(SI->getNumSuccessors());
7660
7661 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7662 BasicBlock *BB = SI->getSuccessor(I);
7663
7664 // FIXME: Support more than just a single BranchInst. One way we could do
7665 // this is by taking a hashing approach of all insts in BB.
7666 if (BB->size() != 1)
7667 continue;
7668
7669 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7670 // on other kinds of terminators. We decide to only support unconditional
7671 // branches for now for compile time reasons.
7672 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
7673 if (!BI || BI->isConditional())
7674 continue;
7675
7676 if (!Seen.insert(BB).second) {
7677 auto It = BBToSuccessorIndexes.find(BB);
7678 if (It != BBToSuccessorIndexes.end())
7679 It->second.emplace_back(I);
7680 continue;
7681 }
7682
7683 // FIXME: This case needs some extra care because the terminators other than
7684 // SI need to be updated. For now, consider only backedges to the SI.
7685 if (BB->getUniquePredecessor() != SI->getParent())
7686 continue;
7687
7688 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7689 for (BasicBlock *Succ : BI->successors())
7691
7692 // Add the successor only if not previously visited.
7693 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
7694 BBToSuccessorIndexes[BB].emplace_back(I);
7695 }
7696
7697 // Precompute a data structure to improve performance of isEqual for
7698 // SwitchSuccWrapper.
7699 PhiPredIVs.reserve(Phis.size());
7700 for (PHINode *Phi : Phis) {
7701 auto &IVs =
7702 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
7703 for (auto &IV : Phi->incoming_values())
7704 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
7705 }
7706
7707 // Build a set such that if the SwitchSuccWrapper exists in the set and
7708 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7709 // which is not in the set should be replaced with the one in the set. If the
7710 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7711 // other SwitchSuccWrappers can check against it in the same manner. We use
7712 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7713 // around information to isEquality, getHashValue, and when doing the
7714 // replacement with better performance.
7715 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7716 ReplaceWith.reserve(Cases.size());
7717
7719 Updates.reserve(ReplaceWith.size());
7720 bool MadeChange = false;
7721 for (auto &SSW : Cases) {
7722 // SSW is a candidate for simplification. If we find a duplicate BB,
7723 // replace it.
7724 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
7725 if (!Inserted) {
7726 // We know that SI's parent BB no longer dominates the old case successor
7727 // since we are making it dead.
7728 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
7729 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
7730 for (unsigned Idx : Successors)
7731 SI->setSuccessor(Idx, (*It)->Dest);
7732 MadeChange = true;
7733 }
7734 }
7735
7736 if (DTU)
7737 DTU->applyUpdates(Updates);
7738
7739 return MadeChange;
7740}
7741
7742bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7743 BasicBlock *BB = SI->getParent();
7744
7745 if (isValueEqualityComparison(SI)) {
7746 // If we only have one predecessor, and if it is a branch on this value,
7747 // see if that predecessor totally determines the outcome of this switch.
7748 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7749 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7750 return requestResimplify();
7751
7752 Value *Cond = SI->getCondition();
7753 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7754 if (simplifySwitchOnSelect(SI, Select))
7755 return requestResimplify();
7756
7757 // If the block only contains the switch, see if we can fold the block
7758 // away into any preds.
7759 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7760 if (foldValueComparisonIntoPredecessors(SI, Builder))
7761 return requestResimplify();
7762 }
7763
7764 // Try to transform the switch into an icmp and a branch.
7765 // The conversion from switch to comparison may lose information on
7766 // impossible switch values, so disable it early in the pipeline.
7767 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7768 return requestResimplify();
7769
7770 // Remove unreachable cases.
7771 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7772 return requestResimplify();
7773
7774 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7775 return requestResimplify();
7776
7777 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7778 return requestResimplify();
7779
7780 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7781 return requestResimplify();
7782
7783 // The conversion from switch to lookup tables results in difficult-to-analyze
7784 // code and makes pruning branches much harder. This is a problem if the
7785 // switch expression itself can still be restricted as a result of inlining or
7786 // CVP. Therefore, only apply this transformation during late stages of the
7787 // optimisation pipeline.
7788 if (Options.ConvertSwitchToLookupTable &&
7789 simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
7790 return requestResimplify();
7791
7792 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7793 return requestResimplify();
7794
7795 if (reduceSwitchRange(SI, Builder, DL, TTI))
7796 return requestResimplify();
7797
7798 if (HoistCommon &&
7799 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
7800 return requestResimplify();
7801
7802 if (simplifyDuplicateSwitchArms(SI, DTU))
7803 return requestResimplify();
7804
7805 return false;
7806}
7807
7808bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7809 BasicBlock *BB = IBI->getParent();
7810 bool Changed = false;
7811
7812 // Eliminate redundant destinations.
7813 SmallPtrSet<Value *, 8> Succs;
7814 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
7815 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7816 BasicBlock *Dest = IBI->getDestination(i);
7817 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7818 if (!Dest->hasAddressTaken())
7819 RemovedSuccs.insert(Dest);
7820 Dest->removePredecessor(BB);
7821 IBI->removeDestination(i);
7822 --i;
7823 --e;
7824 Changed = true;
7825 }
7826 }
7827
7828 if (DTU) {
7829 std::vector<DominatorTree::UpdateType> Updates;
7830 Updates.reserve(RemovedSuccs.size());
7831 for (auto *RemovedSucc : RemovedSuccs)
7832 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7833 DTU->applyUpdates(Updates);
7834 }
7835
7836 if (IBI->getNumDestinations() == 0) {
7837 // If the indirectbr has no successors, change it to unreachable.
7838 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7840 return true;
7841 }
7842
7843 if (IBI->getNumDestinations() == 1) {
7844 // If the indirectbr has one successor, change it to a direct branch.
7847 return true;
7848 }
7849
7850 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7851 if (simplifyIndirectBrOnSelect(IBI, SI))
7852 return requestResimplify();
7853 }
7854 return Changed;
7855}
7856
7857/// Given an block with only a single landing pad and a unconditional branch
7858/// try to find another basic block which this one can be merged with. This
7859/// handles cases where we have multiple invokes with unique landing pads, but
7860/// a shared handler.
7861///
7862/// We specifically choose to not worry about merging non-empty blocks
7863/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7864/// practice, the optimizer produces empty landing pad blocks quite frequently
7865/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7866/// sinking in this file)
7867///
7868/// This is primarily a code size optimization. We need to avoid performing
7869/// any transform which might inhibit optimization (such as our ability to
7870/// specialize a particular handler via tail commoning). We do this by not
7871/// merging any blocks which require us to introduce a phi. Since the same
7872/// values are flowing through both blocks, we don't lose any ability to
7873/// specialize. If anything, we make such specialization more likely.
7874///
7875/// TODO - This transformation could remove entries from a phi in the target
7876/// block when the inputs in the phi are the same for the two blocks being
7877/// merged. In some cases, this could result in removal of the PHI entirely.
7879 BasicBlock *BB, DomTreeUpdater *DTU) {
7880 auto Succ = BB->getUniqueSuccessor();
7881 assert(Succ);
7882 // If there's a phi in the successor block, we'd likely have to introduce
7883 // a phi into the merged landing pad block.
7884 if (isa<PHINode>(*Succ->begin()))
7885 return false;
7886
7887 for (BasicBlock *OtherPred : predecessors(Succ)) {
7888 if (BB == OtherPred)
7889 continue;
7890 BasicBlock::iterator I = OtherPred->begin();
7892 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7893 continue;
7894 ++I;
7896 if (!BI2 || !BI2->isIdenticalTo(BI))
7897 continue;
7898
7899 std::vector<DominatorTree::UpdateType> Updates;
7900
7901 // We've found an identical block. Update our predecessors to take that
7902 // path instead and make ourselves dead.
7904 for (BasicBlock *Pred : UniquePreds) {
7905 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7906 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7907 "unexpected successor");
7908 II->setUnwindDest(OtherPred);
7909 if (DTU) {
7910 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7911 Updates.push_back({DominatorTree::Delete, Pred, BB});
7912 }
7913 }
7914
7916 for (BasicBlock *Succ : UniqueSuccs) {
7917 Succ->removePredecessor(BB);
7918 if (DTU)
7919 Updates.push_back({DominatorTree::Delete, BB, Succ});
7920 }
7921
7922 IRBuilder<> Builder(BI);
7923 Builder.CreateUnreachable();
7924 BI->eraseFromParent();
7925 if (DTU)
7926 DTU->applyUpdates(Updates);
7927 return true;
7928 }
7929 return false;
7930}
7931
7932bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7933 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7934 : simplifyCondBranch(Branch, Builder);
7935}
7936
7937bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7938 IRBuilder<> &Builder) {
7939 BasicBlock *BB = BI->getParent();
7940 BasicBlock *Succ = BI->getSuccessor(0);
7941
7942 // If the Terminator is the only non-phi instruction, simplify the block.
7943 // If LoopHeader is provided, check if the block or its successor is a loop
7944 // header. (This is for early invocations before loop simplify and
7945 // vectorization to keep canonical loop forms for nested loops. These blocks
7946 // can be eliminated when the pass is invoked later in the back-end.)
7947 // Note that if BB has only one predecessor then we do not introduce new
7948 // backedge, so we can eliminate BB.
7949 bool NeedCanonicalLoop =
7950 Options.NeedCanonicalLoop &&
7951 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7952 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7954 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7955 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7956 return true;
7957
7958 // If the only instruction in the block is a seteq/setne comparison against a
7959 // constant, try to simplify the block.
7960 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7961 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7962 ++I;
7963 if (I->isTerminator() &&
7964 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7965 return true;
7966 }
7967
7968 // See if we can merge an empty landing pad block with another which is
7969 // equivalent.
7970 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7971 ++I;
7972 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7973 return true;
7974 }
7975
7976 // If this basic block is ONLY a compare and a branch, and if a predecessor
7977 // branches to us and our successor, fold the comparison into the
7978 // predecessor and use logical operations to update the incoming value
7979 // for PHI nodes in common successor.
7980 if (Options.SpeculateBlocks &&
7981 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7982 Options.BonusInstThreshold))
7983 return requestResimplify();
7984 return false;
7985}
7986
7988 BasicBlock *PredPred = nullptr;
7989 for (auto *P : predecessors(BB)) {
7990 BasicBlock *PPred = P->getSinglePredecessor();
7991 if (!PPred || (PredPred && PredPred != PPred))
7992 return nullptr;
7993 PredPred = PPred;
7994 }
7995 return PredPred;
7996}
7997
7998/// Fold the following pattern:
7999/// bb0:
8000/// br i1 %cond1, label %bb1, label %bb2
8001/// bb1:
8002/// br i1 %cond2, label %bb3, label %bb4
8003/// bb2:
8004/// br i1 %cond2, label %bb4, label %bb3
8005/// bb3:
8006/// ...
8007/// bb4:
8008/// ...
8009/// into
8010/// bb0:
8011/// %cond = xor i1 %cond1, %cond2
8012/// br i1 %cond, label %bb4, label %bb3
8013/// bb3:
8014/// ...
8015/// bb4:
8016/// ...
8017/// NOTE: %cond2 always dominates the terminator of bb0.
8019 BasicBlock *BB = BI->getParent();
8020 BasicBlock *BB1 = BI->getSuccessor(0);
8021 BasicBlock *BB2 = BI->getSuccessor(1);
8022 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8023 if (Succ == BB)
8024 return false;
8025 if (&Succ->front() != Succ->getTerminator())
8026 return false;
8027 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8028 if (!SuccBI || !SuccBI->isConditional())
8029 return false;
8030 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8031 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8032 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8033 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8034 };
8035 BranchInst *BB1BI, *BB2BI;
8036 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8037 return false;
8038
8039 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8040 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8041 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8042 return false;
8043
8044 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8045 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8046 IRBuilder<> Builder(BI);
8047 BI->setCondition(
8048 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8049 BB1->removePredecessor(BB);
8050 BI->setSuccessor(0, BB4);
8051 BB2->removePredecessor(BB);
8052 BI->setSuccessor(1, BB3);
8053 if (DTU) {
8055 Updates.push_back({DominatorTree::Delete, BB, BB1});
8056 Updates.push_back({DominatorTree::Insert, BB, BB4});
8057 Updates.push_back({DominatorTree::Delete, BB, BB2});
8058 Updates.push_back({DominatorTree::Insert, BB, BB3});
8059
8060 DTU->applyUpdates(Updates);
8061 }
8062 bool HasWeight = false;
8063 uint64_t BBTWeight, BBFWeight;
8064 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8065 HasWeight = true;
8066 else
8067 BBTWeight = BBFWeight = 1;
8068 uint64_t BB1TWeight, BB1FWeight;
8069 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8070 HasWeight = true;
8071 else
8072 BB1TWeight = BB1FWeight = 1;
8073 uint64_t BB2TWeight, BB2FWeight;
8074 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8075 HasWeight = true;
8076 else
8077 BB2TWeight = BB2FWeight = 1;
8078 if (HasWeight) {
8079 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8080 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8081 fitWeights(Weights);
8082 setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
8083 }
8084 return true;
8085}
8086
8087bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8088 assert(
8090 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8091 "Tautological conditional branch should have been eliminated already.");
8092
8093 BasicBlock *BB = BI->getParent();
8094 if (!Options.SimplifyCondBranch ||
8095 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8096 return false;
8097
8098 // Conditional branch
8099 if (isValueEqualityComparison(BI)) {
8100 // If we only have one predecessor, and if it is a branch on this value,
8101 // see if that predecessor totally determines the outcome of this
8102 // switch.
8103 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8104 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8105 return requestResimplify();
8106
8107 // This block must be empty, except for the setcond inst, if it exists.
8108 // Ignore dbg and pseudo intrinsics.
8109 auto I = BB->instructionsWithoutDebug(true).begin();
8110 if (&*I == BI) {
8111 if (foldValueComparisonIntoPredecessors(BI, Builder))
8112 return requestResimplify();
8113 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8114 ++I;
8115 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8116 return requestResimplify();
8117 }
8118 }
8119
8120 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8121 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8122 return true;
8123
8124 // If this basic block has dominating predecessor blocks and the dominating
8125 // blocks' conditions imply BI's condition, we know the direction of BI.
8126 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8127 if (Imp) {
8128 // Turn this into a branch on constant.
8129 auto *OldCond = BI->getCondition();
8130 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8131 : ConstantInt::getFalse(BB->getContext());
8132 BI->setCondition(TorF);
8134 return requestResimplify();
8135 }
8136
8137 // If this basic block is ONLY a compare and a branch, and if a predecessor
8138 // branches to us and one of our successors, fold the comparison into the
8139 // predecessor and use logical operations to pick the right destination.
8140 if (Options.SpeculateBlocks &&
8141 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8142 Options.BonusInstThreshold))
8143 return requestResimplify();
8144
8145 // We have a conditional branch to two blocks that are only reachable
8146 // from BI. We know that the condbr dominates the two blocks, so see if
8147 // there is any identical code in the "then" and "else" blocks. If so, we
8148 // can hoist it up to the branching block.
8149 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8150 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8151 if (HoistCommon &&
8152 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8153 return requestResimplify();
8154
8155 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8156 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8157 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8158 auto CanSpeculateConditionalLoadsStores = [&]() {
8159 for (auto *Succ : successors(BB)) {
8160 for (Instruction &I : *Succ) {
8161 if (I.isTerminator()) {
8162 if (I.getNumSuccessors() > 1)
8163 return false;
8164 continue;
8165 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8166 SpeculatedConditionalLoadsStores.size() ==
8168 return false;
8169 }
8170 SpeculatedConditionalLoadsStores.push_back(&I);
8171 }
8172 }
8173 return !SpeculatedConditionalLoadsStores.empty();
8174 };
8175
8176 if (CanSpeculateConditionalLoadsStores()) {
8177 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8178 std::nullopt, nullptr);
8179 return requestResimplify();
8180 }
8181 }
8182 } else {
8183 // If Successor #1 has multiple preds, we may be able to conditionally
8184 // execute Successor #0 if it branches to Successor #1.
8185 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8186 if (Succ0TI->getNumSuccessors() == 1 &&
8187 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8188 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8189 return requestResimplify();
8190 }
8191 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8192 // If Successor #0 has multiple preds, we may be able to conditionally
8193 // execute Successor #1 if it branches to Successor #0.
8194 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8195 if (Succ1TI->getNumSuccessors() == 1 &&
8196 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8197 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8198 return requestResimplify();
8199 }
8200
8201 // If this is a branch on something for which we know the constant value in
8202 // predecessors (e.g. a phi node in the current block), thread control
8203 // through this block.
8204 if (foldCondBranchOnValueKnownInPredecessor(BI))
8205 return requestResimplify();
8206
8207 // Scan predecessor blocks for conditional branches.
8208 for (BasicBlock *Pred : predecessors(BB))
8209 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8210 if (PBI != BI && PBI->isConditional())
8211 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8212 return requestResimplify();
8213
8214 // Look for diamond patterns.
8215 if (MergeCondStores)
8216 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8217 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8218 if (PBI != BI && PBI->isConditional())
8219 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8220 return requestResimplify();
8221
8222 // Look for nested conditional branches.
8223 if (mergeNestedCondBranch(BI, DTU))
8224 return requestResimplify();
8225
8226 return false;
8227}
8228
8229/// Check if passing a value to an instruction will cause undefined behavior.
8230static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8231 assert(V->getType() == I->getType() && "Mismatched types");
8233 if (!C)
8234 return false;
8235
8236 if (I->use_empty())
8237 return false;
8238
8239 if (C->isNullValue() || isa<UndefValue>(C)) {
8240 // Only look at the first use we can handle, avoid hurting compile time with
8241 // long uselists
8242 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8243 auto *Use = cast<Instruction>(U.getUser());
8244 // Change this list when we want to add new instructions.
8245 switch (Use->getOpcode()) {
8246 default:
8247 return false;
8248 case Instruction::GetElementPtr:
8249 case Instruction::Ret:
8250 case Instruction::BitCast:
8251 case Instruction::Load:
8252 case Instruction::Store:
8253 case Instruction::Call:
8254 case Instruction::CallBr:
8255 case Instruction::Invoke:
8256 case Instruction::UDiv:
8257 case Instruction::URem:
8258 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8259 // implemented to avoid code complexity as it is unclear how useful such
8260 // logic is.
8261 case Instruction::SDiv:
8262 case Instruction::SRem:
8263 return true;
8264 }
8265 });
8266 if (FindUse == I->use_end())
8267 return false;
8268 auto &Use = *FindUse;
8269 auto *User = cast<Instruction>(Use.getUser());
8270 // Bail out if User is not in the same BB as I or User == I or User comes
8271 // before I in the block. The latter two can be the case if User is a
8272 // PHI node.
8273 if (User->getParent() != I->getParent() || User == I ||
8274 User->comesBefore(I))
8275 return false;
8276
8277 // Now make sure that there are no instructions in between that can alter
8278 // control flow (eg. calls)
8279 auto InstrRange =
8280 make_range(std::next(I->getIterator()), User->getIterator());
8281 if (any_of(InstrRange, [](Instruction &I) {
8283 }))
8284 return false;
8285
8286 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8288 if (GEP->getPointerOperand() == I) {
8289 // The type of GEP may differ from the type of base pointer.
8290 // Bail out on vector GEPs, as they are not handled by other checks.
8291 if (GEP->getType()->isVectorTy())
8292 return false;
8293 // The current base address is null, there are four cases to consider:
8294 // getelementptr (TY, null, 0) -> null
8295 // getelementptr (TY, null, not zero) -> may be modified
8296 // getelementptr inbounds (TY, null, 0) -> null
8297 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8298 // undefined?
8299 if (!GEP->hasAllZeroIndices() &&
8300 (!GEP->isInBounds() ||
8301 NullPointerIsDefined(GEP->getFunction(),
8302 GEP->getPointerAddressSpace())))
8303 PtrValueMayBeModified = true;
8304 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8305 }
8306
8307 // Look through return.
8308 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8309 bool HasNoUndefAttr =
8310 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8311 // Return undefined to a noundef return value is undefined.
8312 if (isa<UndefValue>(C) && HasNoUndefAttr)
8313 return true;
8314 // Return null to a nonnull+noundef return value is undefined.
8315 if (C->isNullValue() && HasNoUndefAttr &&
8316 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8317 return !PtrValueMayBeModified;
8318 }
8319 }
8320
8321 // Load from null is undefined.
8322 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8323 if (!LI->isVolatile())
8324 return !NullPointerIsDefined(LI->getFunction(),
8325 LI->getPointerAddressSpace());
8326
8327 // Store to null is undefined.
8329 if (!SI->isVolatile())
8330 return (!NullPointerIsDefined(SI->getFunction(),
8331 SI->getPointerAddressSpace())) &&
8332 SI->getPointerOperand() == I;
8333
8334 // llvm.assume(false/undef) always triggers immediate UB.
8335 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8336 // Ignore assume operand bundles.
8337 if (I == Assume->getArgOperand(0))
8338 return true;
8339 }
8340
8341 if (auto *CB = dyn_cast<CallBase>(User)) {
8342 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8343 return false;
8344 // A call to null is undefined.
8345 if (CB->getCalledOperand() == I)
8346 return true;
8347
8348 if (CB->isArgOperand(&Use)) {
8349 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8350 // Passing null to a nonnnull+noundef argument is undefined.
8352 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8353 return !PtrValueMayBeModified;
8354 // Passing undef to a noundef argument is undefined.
8355 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8356 return true;
8357 }
8358 }
8359 // Div/Rem by zero is immediate UB
8360 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8361 return true;
8362 }
8363 return false;
8364}
8365
8366/// If BB has an incoming value that will always trigger undefined behavior
8367/// (eg. null pointer dereference), remove the branch leading here.
8369 DomTreeUpdater *DTU,
8370 AssumptionCache *AC) {
8371 for (PHINode &PHI : BB->phis())
8372 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8373 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8374 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8375 Instruction *T = Predecessor->getTerminator();
8376 IRBuilder<> Builder(T);
8377 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8378 BB->removePredecessor(Predecessor);
8379 // Turn unconditional branches into unreachables and remove the dead
8380 // destination from conditional branches.
8381 if (BI->isUnconditional())
8382 Builder.CreateUnreachable();
8383 else {
8384 // Preserve guarding condition in assume, because it might not be
8385 // inferrable from any dominating condition.
8386 Value *Cond = BI->getCondition();
8387 CallInst *Assumption;
8388 if (BI->getSuccessor(0) == BB)
8389 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8390 else
8391 Assumption = Builder.CreateAssumption(Cond);
8392 if (AC)
8393 AC->registerAssumption(cast<AssumeInst>(Assumption));
8394 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8395 : BI->getSuccessor(0));
8396 }
8397 BI->eraseFromParent();
8398 if (DTU)
8399 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8400 return true;
8401 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8402 // Redirect all branches leading to UB into
8403 // a newly created unreachable block.
8404 BasicBlock *Unreachable = BasicBlock::Create(
8405 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8406 Builder.SetInsertPoint(Unreachable);
8407 // The new block contains only one instruction: Unreachable
8408 Builder.CreateUnreachable();
8409 for (const auto &Case : SI->cases())
8410 if (Case.getCaseSuccessor() == BB) {
8411 BB->removePredecessor(Predecessor);
8412 Case.setSuccessor(Unreachable);
8413 }
8414 if (SI->getDefaultDest() == BB) {
8415 BB->removePredecessor(Predecessor);
8416 SI->setDefaultDest(Unreachable);
8417 }
8418
8419 if (DTU)
8420 DTU->applyUpdates(
8421 { { DominatorTree::Insert, Predecessor, Unreachable },
8422 { DominatorTree::Delete, Predecessor, BB } });
8423 return true;
8424 }
8425 }
8426
8427 return false;
8428}
8429
8430bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8431 bool Changed = false;
8432
8433 assert(BB && BB->getParent() && "Block not embedded in function!");
8434 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8435
8436 // Remove basic blocks that have no predecessors (except the entry block)...
8437 // or that just have themself as a predecessor. These are unreachable.
8438 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8439 BB->getSinglePredecessor() == BB) {
8440 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8441 DeleteDeadBlock(BB, DTU);
8442 return true;
8443 }
8444
8445 // Check to see if we can constant propagate this terminator instruction
8446 // away...
8447 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8448 /*TLI=*/nullptr, DTU);
8449
8450 // Check for and eliminate duplicate PHI nodes in this block.
8452
8453 // Check for and remove branches that will always cause undefined behavior.
8455 return requestResimplify();
8456
8457 // Merge basic blocks into their predecessor if there is only one distinct
8458 // pred, and if there is only one distinct successor of the predecessor, and
8459 // if there are no PHI nodes.
8460 if (MergeBlockIntoPredecessor(BB, DTU))
8461 return true;
8462
8463 if (SinkCommon && Options.SinkCommonInsts)
8464 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8465 mergeCompatibleInvokes(BB, DTU)) {
8466 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8467 // so we may now how duplicate PHI's.
8468 // Let's rerun EliminateDuplicatePHINodes() first,
8469 // before foldTwoEntryPHINode() potentially converts them into select's,
8470 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8471 return true;
8472 }
8473
8474 IRBuilder<> Builder(BB);
8475
8476 if (Options.SpeculateBlocks &&
8477 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8478 // If there is a trivial two-entry PHI node in this basic block, and we can
8479 // eliminate it, do so now.
8480 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8481 if (PN->getNumIncomingValues() == 2)
8482 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8483 Options.SpeculateUnpredictables))
8484 return true;
8485 }
8486
8488 Builder.SetInsertPoint(Terminator);
8489 switch (Terminator->getOpcode()) {
8490 case Instruction::Br:
8491 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8492 break;
8493 case Instruction::Resume:
8494 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8495 break;
8496 case Instruction::CleanupRet:
8497 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8498 break;
8499 case Instruction::Switch:
8500 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8501 break;
8502 case Instruction::Unreachable:
8503 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8504 break;
8505 case Instruction::IndirectBr:
8506 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8507 break;
8508 }
8509
8510 return Changed;
8511}
8512
8513bool SimplifyCFGOpt::run(BasicBlock *BB) {
8514 bool Changed = false;
8515
8516 // Repeated simplify BB as long as resimplification is requested.
8517 do {
8518 Resimplify = false;
8519
8520 // Perform one round of simplifcation. Resimplify flag will be set if
8521 // another iteration is requested.
8522 Changed |= simplifyOnce(BB);
8523 } while (Resimplify);
8524
8525 return Changed;
8526}
8527
8530 ArrayRef<WeakVH> LoopHeaders) {
8531 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8532 Options)
8533 .run(BB);
8534}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL)
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static void fitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool casesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1574
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:156
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:183
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:170
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:112
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2345
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2100
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1931
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1805
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1847
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1860
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2277
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1077
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:115
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:359
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:201
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition MathExtras.h:47
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:310
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:823
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1733
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1707
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:307
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:256
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2058
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:157
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2110
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:203
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< uint32_t, 2 > &B1, const SmallVector< uint32_t, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1632
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1721
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1407
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3081
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:317
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3339
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1936
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3842
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2012
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1740
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2102
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1879
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1592
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:591
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2070
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:465
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:289
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:138
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:249