LLVM 22.0.0git
ControlHeightReduction.cpp
Go to the documentation of this file.
1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/StringSet.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/PassManager.h"
40
41#include <optional>
42#include <set>
43#include <sstream>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "chr"
48
49#define CHR_DEBUG(X) LLVM_DEBUG(X)
50
51static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
52 cl::desc("Disable CHR for all functions"));
53
54static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
55 cl::desc("Apply CHR for all functions"));
56
58 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
59 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
60
62 "chr-merge-threshold", cl::init(2), cl::Hidden,
63 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
64
66 "chr-module-list", cl::init(""), cl::Hidden,
67 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
68
70 "chr-function-list", cl::init(""), cl::Hidden,
71 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
72
74 "chr-dup-threshold", cl::init(3), cl::Hidden,
75 cl::desc("Max number of duplications by CHR for a region"));
76
79
80static void parseCHRFilterFiles() {
81 if (!CHRModuleList.empty()) {
82 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
83 if (!FileOrErr) {
84 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
85 std::exit(1);
86 }
87 StringRef Buf = FileOrErr->get()->getBuffer();
89 Buf.split(Lines, '\n');
90 for (StringRef Line : Lines) {
91 Line = Line.trim();
92 if (!Line.empty())
93 CHRModules.insert(Line);
94 }
95 }
96 if (!CHRFunctionList.empty()) {
97 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
98 if (!FileOrErr) {
99 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
100 std::exit(1);
101 }
102 StringRef Buf = FileOrErr->get()->getBuffer();
104 Buf.split(Lines, '\n');
105 for (StringRef Line : Lines) {
106 Line = Line.trim();
107 if (!Line.empty())
108 CHRFunctions.insert(Line);
109 }
110 }
111}
112
113namespace {
114
115struct CHRStats {
116 CHRStats() = default;
117 void print(raw_ostream &OS) const {
118 OS << "CHRStats: NumBranches " << NumBranches
119 << " NumBranchesDelta " << NumBranchesDelta
120 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
121 }
122 // The original number of conditional branches / selects
123 uint64_t NumBranches = 0;
124 // The decrease of the number of conditional branches / selects in the hot
125 // paths due to CHR.
126 uint64_t NumBranchesDelta = 0;
127 // NumBranchesDelta weighted by the profile count at the scope entry.
128 uint64_t WeightedNumBranchesDelta = 0;
129};
130
131// RegInfo - some properties of a Region.
132struct RegInfo {
133 RegInfo() = default;
134 RegInfo(Region *RegionIn) : R(RegionIn) {}
135 Region *R = nullptr;
136 bool HasBranch = false;
138};
139
140typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
141
142// CHRScope - a sequence of regions to CHR together. It corresponds to a
143// sequence of conditional blocks. It can have subscopes which correspond to
144// nested conditional blocks. Nested CHRScopes form a tree.
145class CHRScope {
146 public:
147 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
148 assert(RI.R && "Null RegionIn");
149 RegInfos.push_back(RI);
150 }
151
152 Region *getParentRegion() {
153 assert(RegInfos.size() > 0 && "Empty CHRScope");
154 Region *Parent = RegInfos[0].R->getParent();
155 assert(Parent && "Unexpected to call this on the top-level region");
156 return Parent;
157 }
158
159 BasicBlock *getEntryBlock() {
160 assert(RegInfos.size() > 0 && "Empty CHRScope");
161 return RegInfos.front().R->getEntry();
162 }
163
164 BasicBlock *getExitBlock() {
165 assert(RegInfos.size() > 0 && "Empty CHRScope");
166 return RegInfos.back().R->getExit();
167 }
168
169 bool appendable(CHRScope *Next) {
170 // The next scope is appendable only if this scope is directly connected to
171 // it (which implies it post-dominates this scope) and this scope dominates
172 // it (no edge to the next scope outside this scope).
173 BasicBlock *NextEntry = Next->getEntryBlock();
174 if (getExitBlock() != NextEntry)
175 // Not directly connected.
176 return false;
177 Region *LastRegion = RegInfos.back().R;
178 for (BasicBlock *Pred : predecessors(NextEntry))
179 if (!LastRegion->contains(Pred))
180 // There's an edge going into the entry of the next scope from outside
181 // of this scope.
182 return false;
183 return true;
184 }
185
186 void append(CHRScope *Next) {
187 assert(RegInfos.size() > 0 && "Empty CHRScope");
188 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
189 assert(getParentRegion() == Next->getParentRegion() &&
190 "Must be siblings");
191 assert(getExitBlock() == Next->getEntryBlock() &&
192 "Must be adjacent");
193 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
194 Subs.append(Next->Subs.begin(), Next->Subs.end());
195 }
196
197 void addSub(CHRScope *SubIn) {
198#ifndef NDEBUG
199 bool IsChild = false;
200 for (RegInfo &RI : RegInfos)
201 if (RI.R == SubIn->getParentRegion()) {
202 IsChild = true;
203 break;
204 }
205 assert(IsChild && "Must be a child");
206#endif
207 Subs.push_back(SubIn);
208 }
209
210 // Split this scope at the boundary region into two, which will belong to the
211 // tail and returns the tail.
212 CHRScope *split(Region *Boundary) {
213 assert(Boundary && "Boundary null");
214 assert(RegInfos.begin()->R != Boundary &&
215 "Can't be split at beginning");
216 auto BoundaryIt = llvm::find_if(
217 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
218 if (BoundaryIt == RegInfos.end())
219 return nullptr;
220 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
221 DenseSet<Region *> TailRegionSet;
222 for (const RegInfo &RI : TailRegInfos)
223 TailRegionSet.insert(RI.R);
224
225 auto TailIt =
226 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
227 assert(Sub && "null Sub");
228 Region *Parent = Sub->getParentRegion();
229 if (TailRegionSet.count(Parent))
230 return false;
231
232 assert(llvm::any_of(
233 RegInfos,
234 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
235 "Must be in head");
236 return true;
237 });
238 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
239
240 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
241 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
242 RegInfos.erase(BoundaryIt, RegInfos.end());
243 Subs.erase(TailIt, Subs.end());
244 return Scope;
245 }
246
247 bool contains(Instruction *I) const {
248 BasicBlock *Parent = I->getParent();
249 for (const RegInfo &RI : RegInfos)
250 if (RI.R->contains(Parent))
251 return true;
252 return false;
253 }
254
255 void print(raw_ostream &OS) const;
256
257 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
258 SmallVector<CHRScope *, 8> Subs; // Subscopes.
259
260 // The instruction at which to insert the CHR conditional branch (and hoist
261 // the dependent condition values).
262 Instruction *BranchInsertPoint;
263
264 // True-biased and false-biased regions (conditional blocks),
265 // respectively. Used only for the outermost scope and includes regions in
266 // subscopes. The rest are unbiased.
267 DenseSet<Region *> TrueBiasedRegions;
268 DenseSet<Region *> FalseBiasedRegions;
269 // Among the biased regions, the regions that get CHRed.
270 SmallVector<RegInfo, 8> CHRRegions;
271
272 // True-biased and false-biased selects, respectively. Used only for the
273 // outermost scope and includes ones in subscopes.
274 DenseSet<SelectInst *> TrueBiasedSelects;
275 DenseSet<SelectInst *> FalseBiasedSelects;
276
277 // Map from one of the above regions to the instructions to stop
278 // hoisting instructions at through use-def chains.
279 HoistStopMapTy HoistStopMap;
280
281 private:
282 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
283 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
284};
285
286class CHR {
287 public:
288 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
289 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
291 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
292
293 ~CHR() {
294 for (CHRScope *Scope : Scopes) {
295 delete Scope;
296 }
297 }
298
299 bool run();
300
301 private:
302 // See the comments in CHR::run() for the high level flow of the algorithm and
303 // what the following functions do.
304
305 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
306 Region *R = RI.getTopLevelRegion();
307 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
308 Output.push_back(Scope);
309 }
310 }
311 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
313 CHRScope *findScope(Region *R);
314 void checkScopeHoistable(CHRScope *Scope);
315
316 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
318 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
319 CHRScope *Outer,
320 DenseSet<Value *> *OuterConditionValues,
321 Instruction *OuterInsertPoint,
323 DenseSet<Instruction *> &Unhoistables);
324
325 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
326 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
327
328 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
330
331 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
333 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
334
335 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
337
338 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
339 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
340 void cloneScopeBlocks(CHRScope *Scope,
341 BasicBlock *PreEntryBlock,
342 BasicBlock *ExitBlock,
343 Region *LastRegion,
344 ValueToValueMapTy &VMap);
345 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
346 BasicBlock *EntryBlock,
347 BasicBlock *NewEntryBlock,
348 ValueToValueMapTy &VMap);
349 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
350 BranchInst *MergedBR, uint64_t ProfileCount);
351 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
352 Value *&MergedCondition, BranchProbability &CHRBranchBias);
353 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
354 Value *&MergedCondition, BranchProbability &CHRBranchBias);
355 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
356 Instruction *BranchOrSelect, CHRScope *Scope,
357 IRBuilder<> &IRB, Value *&MergedCondition);
358 unsigned getRegionDuplicationCount(const Region *R) {
359 unsigned Count = 0;
360 // Find out how many times region R is cloned. Note that if the parent
361 // of R is cloned, R is also cloned, but R's clone count is not updated
362 // from the clone of the parent. We need to accumlate all the counts
363 // from the ancestors to get the clone count.
364 while (R) {
365 Count += DuplicationCount[R];
366 R = R->getParent();
367 }
368 return Count;
369 }
370
371 Function &F;
373 DominatorTree &DT;
375 RegionInfo &RI;
377 CHRStats Stats;
378
379 // All the true-biased regions in the function
380 DenseSet<Region *> TrueBiasedRegionsGlobal;
381 // All the false-biased regions in the function
382 DenseSet<Region *> FalseBiasedRegionsGlobal;
383 // All the true-biased selects in the function
384 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
385 // All the false-biased selects in the function
386 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
387 // A map from biased regions to their branch bias
389 // A map from biased selects to their branch bias
391 // All the scopes.
393 // This maps records how many times this region is cloned.
394 DenseMap<const Region *, unsigned> DuplicationCount;
395};
396
397} // end anonymous namespace
398
399static inline
401 const CHRStats &Stats) {
402 Stats.print(OS);
403 return OS;
404}
405
406static inline
407raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
408 Scope.print(OS);
409 return OS;
410}
411
413 if (DisableCHR)
414 return false;
415
416 if (ForceCHR)
417 return true;
418
419 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
420 if (CHRModules.count(F.getParent()->getName()))
421 return true;
422 return CHRFunctions.count(F.getName());
423 }
424
425 return PSI.isFunctionEntryHot(&F);
426}
427
428static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
429 CHRStats *Stats) {
430 StringRef FuncName = F.getName();
431 StringRef ModuleName = F.getParent()->getName();
432 (void)(FuncName); // Unused in release build.
433 (void)(ModuleName); // Unused in release build.
434 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
435 << FuncName);
436 if (Stats)
437 CHR_DEBUG(dbgs() << " " << *Stats);
438 CHR_DEBUG(dbgs() << "\n");
439 CHR_DEBUG(F.dump());
440}
441
442void CHRScope::print(raw_ostream &OS) const {
443 assert(RegInfos.size() > 0 && "Empty CHRScope");
444 OS << "CHRScope[";
445 OS << RegInfos.size() << ", Regions[";
446 for (const RegInfo &RI : RegInfos) {
447 OS << RI.R->getNameStr();
448 if (RI.HasBranch)
449 OS << " B";
450 if (RI.Selects.size() > 0)
451 OS << " S" << RI.Selects.size();
452 OS << ", ";
453 }
454 if (RegInfos[0].R->getParent()) {
455 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
456 } else {
457 // top level region
458 OS << "]";
459 }
460 OS << ", Subs[";
461 for (CHRScope *Sub : Subs) {
462 OS << *Sub << ", ";
463 }
464 OS << "]]";
465}
466
467// Return true if the given instruction type can be hoisted by CHR.
469 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
470 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
471 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
472 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
473 isa<InsertValueInst>(I);
474}
475
476// Return true if the given instruction can be hoisted by CHR.
479 return false;
480 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
481}
482
483// Recursively traverse the use-def chains of the given value and return a set
484// of the unhoistable base values defined within the scope (excluding the
485// first-region entry block) or the (hoistable or unhoistable) base values that
486// are defined outside (including the first-region entry block) of the
487// scope. The returned set doesn't include constants.
488static const std::set<Value *> &
490 DenseMap<Value *, std::set<Value *>> &Visited) {
491 auto It = Visited.find(V);
492 if (It != Visited.end()) {
493 return It->second;
494 }
495 std::set<Value *> Result;
496 if (auto *I = dyn_cast<Instruction>(V)) {
497 // We don't stop at a block that's not in the Scope because we would miss
498 // some instructions that are based on the same base values if we stop
499 // there.
500 if (!isHoistable(I, DT)) {
501 Result.insert(I);
502 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
503 }
504 // I is hoistable above the Scope.
505 for (Value *Op : I->operands()) {
506 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
507 Result.insert(OpResult.begin(), OpResult.end());
508 }
509 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
510 }
511 if (isa<Argument>(V)) {
512 Result.insert(V);
513 }
514 // We don't include others like constants because those won't lead to any
515 // chance of folding of conditions (eg two bit checks merged into one check)
516 // after CHR.
517 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
518}
519
520// Return true if V is already hoisted or can be hoisted (along with its
521// operands) above the insert point. When it returns true and HoistStops is
522// non-null, the instructions to stop hoisting at through the use-def chains are
523// inserted into HoistStops.
524static bool
526 DenseSet<Instruction *> &Unhoistables,
527 DenseSet<Instruction *> *HoistStops,
529 assert(InsertPoint && "Null InsertPoint");
530 if (auto *I = dyn_cast<Instruction>(V)) {
531 auto It = Visited.find(I);
532 if (It != Visited.end()) {
533 return It->second;
534 }
535 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
536 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
537 if (Unhoistables.count(I)) {
538 // Don't hoist if they are not to be hoisted.
539 Visited[I] = false;
540 return false;
541 }
542 if (DT.dominates(I, InsertPoint)) {
543 // We are already above the insert point. Stop here.
544 if (HoistStops)
545 HoistStops->insert(I);
546 Visited[I] = true;
547 return true;
548 }
549 // We aren't not above the insert point, check if we can hoist it above the
550 // insert point.
551 if (isHoistable(I, DT)) {
552 // Check operands first.
553 DenseSet<Instruction *> OpsHoistStops;
554 bool AllOpsHoisted = true;
555 for (Value *Op : I->operands()) {
556 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
557 Visited)) {
558 AllOpsHoisted = false;
559 break;
560 }
561 }
562 if (AllOpsHoisted) {
563 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
564 if (HoistStops)
565 HoistStops->insert_range(OpsHoistStops);
566 Visited[I] = true;
567 return true;
568 }
569 }
570 Visited[I] = false;
571 return false;
572 }
573 // Non-instructions are considered hoistable.
574 return true;
575}
576
577// Constructs the true and false branch probabilities if the the instruction has
578// valid branch weights. Returns true when this was successful, false otherwise.
580 BranchProbability &TrueProb,
581 BranchProbability &FalseProb) {
582 uint64_t TrueWeight;
583 uint64_t FalseWeight;
584 if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
585 return false;
586 uint64_t SumWeight = TrueWeight + FalseWeight;
587
588 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
589 "Overflow calculating branch probabilities.");
590
591 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
592 if (SumWeight == 0)
593 return false;
594
595 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
596 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
597 return true;
598}
599
602 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
603}
604
605// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
606// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
607// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
608// false.
609template <typename K, typename S, typename M>
610static bool checkBias(K *Key, BranchProbability TrueProb,
611 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
612 M &BiasMap) {
614 if (TrueProb >= Threshold) {
615 TrueSet.insert(Key);
616 BiasMap[Key] = TrueProb;
617 return true;
618 } else if (FalseProb >= Threshold) {
619 FalseSet.insert(Key);
620 BiasMap[Key] = FalseProb;
621 return true;
622 }
623 return false;
624}
625
626// Returns true and insert a region into the right biased set and the map if the
627// branch of the region is biased.
629 DenseSet<Region *> &TrueBiasedRegionsGlobal,
630 DenseSet<Region *> &FalseBiasedRegionsGlobal,
632 if (!BI->isConditional())
633 return false;
634 BranchProbability ThenProb, ElseProb;
635 if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
636 return false;
637 BasicBlock *IfThen = BI->getSuccessor(0);
638 BasicBlock *IfElse = BI->getSuccessor(1);
639 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
640 IfThen != IfElse &&
641 "Invariant from findScopes");
642 if (IfThen == R->getExit()) {
643 // Swap them so that IfThen/ThenProb means going into the conditional code
644 // and IfElse/ElseProb means skipping it.
645 std::swap(IfThen, IfElse);
646 std::swap(ThenProb, ElseProb);
647 }
648 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
649 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
650 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
651 return checkBias(R, ThenProb, ElseProb,
652 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
653 BranchBiasMap);
654}
655
656// Returns true and insert a select into the right biased set and the map if the
657// select is biased.
659 SelectInst *SI, Region *R,
660 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
661 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
663 BranchProbability TrueProb, FalseProb;
664 if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
665 return false;
666 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
667 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
668 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
669 return checkBias(SI, TrueProb, FalseProb,
670 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
671 SelectBiasMap);
672}
673
674// Returns the instruction at which to hoist the dependent condition values and
675// insert the CHR branch for a region. This is the terminator branch in the
676// entry block or the first select in the entry block, if any.
678 Region *R = RI.R;
679 BasicBlock *EntryBB = R->getEntry();
680 // The hoist point is by default the terminator of the entry block, which is
681 // the same as the branch instruction if RI.HasBranch is true.
682 Instruction *HoistPoint = EntryBB->getTerminator();
683 for (SelectInst *SI : RI.Selects) {
684 if (SI->getParent() == EntryBB) {
685 // Pick the first select in Selects in the entry block. Note Selects is
686 // sorted in the instruction order within a block (asserted below).
687 HoistPoint = SI;
688 break;
689 }
690 }
691 assert(HoistPoint && "Null HoistPoint");
692#ifndef NDEBUG
693 // Check that HoistPoint is the first one in Selects in the entry block,
694 // if any.
695 DenseSet<Instruction *> EntryBlockSelectSet;
696 for (SelectInst *SI : RI.Selects) {
697 if (SI->getParent() == EntryBB) {
698 EntryBlockSelectSet.insert(SI);
699 }
700 }
701 for (Instruction &I : *EntryBB) {
702 if (EntryBlockSelectSet.contains(&I)) {
703 assert(&I == HoistPoint &&
704 "HoistPoint must be the first one in Selects");
705 break;
706 }
707 }
708#endif
709 return HoistPoint;
710}
711
712// Find a CHR scope in the given region.
713CHRScope * CHR::findScope(Region *R) {
714 CHRScope *Result = nullptr;
715 BasicBlock *Entry = R->getEntry();
716 BasicBlock *Exit = R->getExit(); // null if top level.
717 assert(Entry && "Entry must not be null");
718 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
719 "Only top level region has a null exit");
720 if (Entry)
721 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
722 else
723 CHR_DEBUG(dbgs() << "Entry null\n");
724 if (Exit)
725 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
726 else
727 CHR_DEBUG(dbgs() << "Exit null\n");
728 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
729 // to this region).
730 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
731 if (EntryInSubregion)
732 return nullptr;
733 // Exclude loops
734 for (BasicBlock *Pred : predecessors(Entry))
735 if (R->contains(Pred))
736 return nullptr;
737 // If any of the basic blocks have address taken, we must skip this region
738 // because we cannot clone basic blocks that have address taken.
739 for (BasicBlock *BB : R->blocks()) {
740 if (BB->hasAddressTaken())
741 return nullptr;
742 // If we encounter llvm.coro.id, skip this region because if the basic block
743 // is cloned, we end up inserting a token type PHI node to the block with
744 // llvm.coro.begin.
745 // FIXME: This could lead to less optimal codegen, because the region is
746 // excluded, it can prevent CHR from merging adjacent regions into bigger
747 // scope and hoisting more branches.
748 for (Instruction &I : *BB)
749 if (auto *II = dyn_cast<IntrinsicInst>(&I))
750 if (II->getIntrinsicID() == Intrinsic::coro_id)
751 return nullptr;
752 }
753
754 if (Exit) {
755 // Try to find an if-then block (check if R is an if-then).
756 // if (cond) {
757 // ...
758 // }
759 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
760 if (BI)
761 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
762 else
763 CHR_DEBUG(dbgs() << "BI null\n");
764 if (BI && BI->isConditional()) {
765 BasicBlock *S0 = BI->getSuccessor(0);
766 BasicBlock *S1 = BI->getSuccessor(1);
767 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
768 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
769 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
770 RegInfo RI(R);
771 RI.HasBranch = checkBiasedBranch(
772 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
773 BranchBiasMap);
774 Result = new CHRScope(RI);
775 Scopes.insert(Result);
776 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
777 ++Stats.NumBranches;
778 if (!RI.HasBranch) {
779 ORE.emit([&]() {
780 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
781 << "Branch not biased";
782 });
783 }
784 }
785 }
786 }
787 {
788 // Try to look for selects in the direct child blocks (as opposed to in
789 // subregions) of R.
790 // ...
791 // if (..) { // Some subregion
792 // ...
793 // }
794 // if (..) { // Some subregion
795 // ...
796 // }
797 // ...
798 // a = cond ? b : c;
799 // ...
801 for (RegionNode *E : R->elements()) {
802 if (E->isSubRegion())
803 continue;
804 // This returns the basic block of E if E is a direct child of R (not a
805 // subregion.)
806 BasicBlock *BB = E->getEntry();
807 // Need to push in the order to make it easier to find the first Select
808 // later.
809 for (Instruction &I : *BB) {
810 if (auto *SI = dyn_cast<SelectInst>(&I)) {
811 Selects.push_back(SI);
812 ++Stats.NumBranches;
813 }
814 }
815 }
816 if (Selects.size() > 0) {
817 auto AddSelects = [&](RegInfo &RI) {
818 for (auto *SI : Selects)
819 if (checkBiasedSelect(SI, RI.R,
820 TrueBiasedSelectsGlobal,
821 FalseBiasedSelectsGlobal,
822 SelectBiasMap))
823 RI.Selects.push_back(SI);
824 else
825 ORE.emit([&]() {
826 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
827 << "Select not biased";
828 });
829 };
830 if (!Result) {
831 CHR_DEBUG(dbgs() << "Found a select-only region\n");
832 RegInfo RI(R);
833 AddSelects(RI);
834 Result = new CHRScope(RI);
835 Scopes.insert(Result);
836 } else {
837 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
838 AddSelects(Result->RegInfos[0]);
839 }
840 }
841 }
842
843 if (Result) {
844 checkScopeHoistable(Result);
845 }
846 return Result;
847}
848
849// Check that any of the branch and the selects in the region could be
850// hoisted above the the CHR branch insert point (the most dominating of
851// them, either the branch (at the end of the first block) or the first
852// select in the first block). If the branch can't be hoisted, drop the
853// selects in the first blocks.
854//
855// For example, for the following scope/region with selects, we want to insert
856// the merged branch right before the first select in the first/entry block by
857// hoisting c1, c2, c3, and c4.
858//
859// // Branch insert point here.
860// a = c1 ? b : c; // Select 1
861// d = c2 ? e : f; // Select 2
862// if (c3) { // Branch
863// ...
864// c4 = foo() // A call.
865// g = c4 ? h : i; // Select 3
866// }
867//
868// But suppose we can't hoist c4 because it's dependent on the preceding
869// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
870// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
871void CHR::checkScopeHoistable(CHRScope *Scope) {
872 RegInfo &RI = Scope->RegInfos[0];
873 Region *R = RI.R;
874 BasicBlock *EntryBB = R->getEntry();
875 auto *Branch = RI.HasBranch ?
876 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
877 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
878 if (RI.HasBranch || !Selects.empty()) {
879 Instruction *InsertPoint = getBranchInsertPoint(RI);
880 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
881 // Avoid a data dependence from a select or a branch to a(nother)
882 // select. Note no instruction can't data-depend on a branch (a branch
883 // instruction doesn't produce a value).
884 // Initialize Unhoistables with the selects.
885 DenseSet<Instruction *> Unhoistables(llvm::from_range, Selects);
886 // Remove Selects that can't be hoisted.
887 for (auto it = Selects.begin(); it != Selects.end(); ) {
888 SelectInst *SI = *it;
889 if (SI == InsertPoint) {
890 ++it;
891 continue;
892 }
894 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
895 DT, Unhoistables, nullptr, Visited);
896 if (!IsHoistable) {
897 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
898 ORE.emit([&]() {
900 "DropUnhoistableSelect", SI)
901 << "Dropped unhoistable select";
902 });
903 it = Selects.erase(it);
904 // Since we are dropping the select here, we also drop it from
905 // Unhoistables.
906 Unhoistables.erase(SI);
907 } else
908 ++it;
909 }
910 // Update InsertPoint after potentially removing selects.
911 InsertPoint = getBranchInsertPoint(RI);
912 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
913 if (RI.HasBranch && InsertPoint != Branch) {
915 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
916 DT, Unhoistables, nullptr, Visited);
917 if (!IsHoistable) {
918 // If the branch isn't hoistable, drop the selects in the entry
919 // block, preferring the branch, which makes the branch the hoist
920 // point.
921 assert(InsertPoint != Branch && "Branch must not be the hoist point");
922 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
923 CHR_DEBUG(
924 for (SelectInst *SI : Selects) {
925 dbgs() << "SI " << *SI << "\n";
926 });
927 for (SelectInst *SI : Selects) {
928 ORE.emit([&]() {
930 "DropSelectUnhoistableBranch", SI)
931 << "Dropped select due to unhoistable branch";
932 });
933 }
934 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
935 return SI->getParent() == EntryBB;
936 });
937 Unhoistables.clear();
938 InsertPoint = Branch;
939 }
940 }
941 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
942#ifndef NDEBUG
943 if (RI.HasBranch) {
944 assert(!DT.dominates(Branch, InsertPoint) &&
945 "Branch can't be already above the hoist point");
947 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
948 DT, Unhoistables, nullptr, Visited) &&
949 "checkHoistValue for branch");
950 }
951 for (auto *SI : Selects) {
952 assert(!DT.dominates(SI, InsertPoint) &&
953 "SI can't be already above the hoist point");
955 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
956 Unhoistables, nullptr, Visited) &&
957 "checkHoistValue for selects");
958 }
959 CHR_DEBUG(dbgs() << "Result\n");
960 if (RI.HasBranch) {
961 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
962 }
963 for (auto *SI : Selects) {
964 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
965 }
966#endif
967 }
968}
969
970// Traverse the region tree, find all nested scopes and merge them if possible.
971CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
973 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
974 CHRScope *Result = findScope(R);
975 // Visit subscopes.
976 CHRScope *ConsecutiveSubscope = nullptr;
978 for (auto It = R->begin(); It != R->end(); ++It) {
979 const std::unique_ptr<Region> &SubR = *It;
980 auto NextIt = std::next(It);
981 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
982 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
983 << "\n");
984 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
985 if (SubCHRScope) {
986 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
987 } else {
988 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
989 }
990 if (SubCHRScope) {
991 if (!ConsecutiveSubscope)
992 ConsecutiveSubscope = SubCHRScope;
993 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
994 Subscopes.push_back(ConsecutiveSubscope);
995 ConsecutiveSubscope = SubCHRScope;
996 } else
997 ConsecutiveSubscope->append(SubCHRScope);
998 } else {
999 if (ConsecutiveSubscope) {
1000 Subscopes.push_back(ConsecutiveSubscope);
1001 }
1002 ConsecutiveSubscope = nullptr;
1003 }
1004 }
1005 if (ConsecutiveSubscope) {
1006 Subscopes.push_back(ConsecutiveSubscope);
1007 }
1008 for (CHRScope *Sub : Subscopes) {
1009 if (Result) {
1010 // Combine it with the parent.
1011 Result->addSub(Sub);
1012 } else {
1013 // Push Subscopes as they won't be combined with the parent.
1014 Scopes.push_back(Sub);
1015 }
1016 }
1017 return Result;
1018}
1019
1021 DenseSet<Value *> ConditionValues;
1022 if (RI.HasBranch) {
1023 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1024 ConditionValues.insert(BI->getCondition());
1025 }
1026 for (SelectInst *SI : RI.Selects) {
1027 ConditionValues.insert(SI->getCondition());
1028 }
1029 return ConditionValues;
1030}
1031
1032
1033// Determine whether to split a scope depending on the sets of the branch
1034// condition values of the previous region and the current region. We split
1035// (return true) it if 1) the condition values of the inner/lower scope can't be
1036// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1037// values have an empty intersection (because the combined branch conditions
1038// won't probably lead to a simpler combined condition).
1039static bool shouldSplit(Instruction *InsertPoint,
1040 DenseSet<Value *> &PrevConditionValues,
1041 DenseSet<Value *> &ConditionValues,
1042 DominatorTree &DT,
1043 DenseSet<Instruction *> &Unhoistables) {
1044 assert(InsertPoint && "Null InsertPoint");
1045 CHR_DEBUG(
1046 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1047 for (Value *V : PrevConditionValues) {
1048 dbgs() << *V << ", ";
1049 }
1050 dbgs() << " ConditionValues ";
1051 for (Value *V : ConditionValues) {
1052 dbgs() << *V << ", ";
1053 }
1054 dbgs() << "\n");
1055 // If any of Bases isn't hoistable to the hoist point, split.
1056 for (Value *V : ConditionValues) {
1058 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1059 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1060 return true; // Not hoistable, split.
1061 }
1062 }
1063 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1064 // unnecessary splits at scopes with no branch/selects. If
1065 // PrevConditionValues and ConditionValues don't intersect at all, split.
1066 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1067 // Use std::set as DenseSet doesn't work with set_intersection.
1068 std::set<Value *> PrevBases, Bases;
1070 for (Value *V : PrevConditionValues) {
1071 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1072 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1073 }
1074 for (Value *V : ConditionValues) {
1075 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1076 Bases.insert(BaseValues.begin(), BaseValues.end());
1077 }
1078 CHR_DEBUG(
1079 dbgs() << "PrevBases ";
1080 for (Value *V : PrevBases) {
1081 dbgs() << *V << ", ";
1082 }
1083 dbgs() << " Bases ";
1084 for (Value *V : Bases) {
1085 dbgs() << *V << ", ";
1086 }
1087 dbgs() << "\n");
1088 std::vector<Value *> Intersection;
1089 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1090 Bases.end(), std::back_inserter(Intersection));
1091 if (Intersection.empty()) {
1092 // Empty intersection, split.
1093 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1094 return true;
1095 }
1096 }
1097 CHR_DEBUG(dbgs() << "No split\n");
1098 return false; // Don't split.
1099}
1100
1101static void getSelectsInScope(CHRScope *Scope,
1102 DenseSet<Instruction *> &Output) {
1103 for (RegInfo &RI : Scope->RegInfos)
1104 Output.insert_range(RI.Selects);
1105 for (CHRScope *Sub : Scope->Subs)
1106 getSelectsInScope(Sub, Output);
1107}
1108
1109void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1111 for (CHRScope *Scope : Input) {
1112 assert(!Scope->BranchInsertPoint &&
1113 "BranchInsertPoint must not be set");
1114 DenseSet<Instruction *> Unhoistables;
1115 getSelectsInScope(Scope, Unhoistables);
1116 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1117 }
1118#ifndef NDEBUG
1119 for (CHRScope *Scope : Output) {
1120 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1121 }
1122#endif
1123}
1124
1125SmallVector<CHRScope *, 8> CHR::splitScope(
1126 CHRScope *Scope,
1127 CHRScope *Outer,
1128 DenseSet<Value *> *OuterConditionValues,
1129 Instruction *OuterInsertPoint,
1131 DenseSet<Instruction *> &Unhoistables) {
1132 if (Outer) {
1133 assert(OuterConditionValues && "Null OuterConditionValues");
1134 assert(OuterInsertPoint && "Null OuterInsertPoint");
1135 }
1136 bool PrevSplitFromOuter = true;
1137 DenseSet<Value *> PrevConditionValues;
1138 Instruction *PrevInsertPoint = nullptr;
1140 SmallVector<bool, 8> SplitsSplitFromOuter;
1141 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1142 SmallVector<Instruction *, 8> SplitsInsertPoints;
1143 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1144 for (RegInfo &RI : RegInfos) {
1145 Instruction *InsertPoint = getBranchInsertPoint(RI);
1147 CHR_DEBUG(
1148 dbgs() << "ConditionValues ";
1149 for (Value *V : ConditionValues) {
1150 dbgs() << *V << ", ";
1151 }
1152 dbgs() << "\n");
1153 if (RI.R == RegInfos[0].R) {
1154 // First iteration. Check to see if we should split from the outer.
1155 if (Outer) {
1156 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1157 CHR_DEBUG(dbgs() << "Should split from outer at "
1158 << RI.R->getNameStr() << "\n");
1159 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1160 ConditionValues, DT, Unhoistables)) {
1161 PrevConditionValues = ConditionValues;
1162 PrevInsertPoint = InsertPoint;
1163 ORE.emit([&]() {
1165 "SplitScopeFromOuter",
1166 RI.R->getEntry()->getTerminator())
1167 << "Split scope from outer due to unhoistable branch/select "
1168 << "and/or lack of common condition values";
1169 });
1170 } else {
1171 // Not splitting from the outer. Use the outer bases and insert
1172 // point. Union the bases.
1173 PrevSplitFromOuter = false;
1174 PrevConditionValues = *OuterConditionValues;
1175 PrevConditionValues.insert_range(ConditionValues);
1176 PrevInsertPoint = OuterInsertPoint;
1177 }
1178 } else {
1179 CHR_DEBUG(dbgs() << "Outer null\n");
1180 PrevConditionValues = ConditionValues;
1181 PrevInsertPoint = InsertPoint;
1182 }
1183 } else {
1184 CHR_DEBUG(dbgs() << "Should split from prev at "
1185 << RI.R->getNameStr() << "\n");
1186 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1187 DT, Unhoistables)) {
1188 CHRScope *Tail = Scope->split(RI.R);
1189 Scopes.insert(Tail);
1190 Splits.push_back(Scope);
1191 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1192 SplitsConditionValues.push_back(PrevConditionValues);
1193 SplitsInsertPoints.push_back(PrevInsertPoint);
1194 Scope = Tail;
1195 PrevConditionValues = ConditionValues;
1196 PrevInsertPoint = InsertPoint;
1197 PrevSplitFromOuter = true;
1198 ORE.emit([&]() {
1200 "SplitScopeFromPrev",
1201 RI.R->getEntry()->getTerminator())
1202 << "Split scope from previous due to unhoistable branch/select "
1203 << "and/or lack of common condition values";
1204 });
1205 } else {
1206 // Not splitting. Union the bases. Keep the hoist point.
1207 PrevConditionValues.insert_range(ConditionValues);
1208 }
1209 }
1210 }
1211 Splits.push_back(Scope);
1212 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1213 SplitsConditionValues.push_back(PrevConditionValues);
1214 assert(PrevInsertPoint && "Null PrevInsertPoint");
1215 SplitsInsertPoints.push_back(PrevInsertPoint);
1216 assert(Splits.size() == SplitsConditionValues.size() &&
1217 Splits.size() == SplitsSplitFromOuter.size() &&
1218 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1219 for (size_t I = 0; I < Splits.size(); ++I) {
1220 CHRScope *Split = Splits[I];
1221 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1222 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1224 DenseSet<Instruction *> SplitUnhoistables;
1225 getSelectsInScope(Split, SplitUnhoistables);
1226 for (CHRScope *Sub : Split->Subs) {
1227 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1228 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1229 SplitUnhoistables);
1230 llvm::append_range(NewSubs, SubSplits);
1231 }
1232 Split->Subs = NewSubs;
1233 }
1235 for (size_t I = 0; I < Splits.size(); ++I) {
1236 CHRScope *Split = Splits[I];
1237 if (SplitsSplitFromOuter[I]) {
1238 // Split from the outer.
1239 Output.push_back(Split);
1240 Split->BranchInsertPoint = SplitsInsertPoints[I];
1241 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1242 << "\n");
1243 } else {
1244 // Connected to the outer.
1245 Result.push_back(Split);
1246 }
1247 }
1248 if (!Outer)
1249 assert(Result.empty() &&
1250 "If no outer (top-level), must return no nested ones");
1251 return Result;
1252}
1253
1254void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1255 for (CHRScope *Scope : Scopes) {
1256 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1257 classifyBiasedScopes(Scope, Scope);
1258 CHR_DEBUG(
1259 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1260 dbgs() << "TrueBiasedRegions ";
1261 for (Region *R : Scope->TrueBiasedRegions) {
1262 dbgs() << R->getNameStr() << ", ";
1263 }
1264 dbgs() << "\n";
1265 dbgs() << "FalseBiasedRegions ";
1266 for (Region *R : Scope->FalseBiasedRegions) {
1267 dbgs() << R->getNameStr() << ", ";
1268 }
1269 dbgs() << "\n";
1270 dbgs() << "TrueBiasedSelects ";
1271 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1272 dbgs() << *SI << ", ";
1273 }
1274 dbgs() << "\n";
1275 dbgs() << "FalseBiasedSelects ";
1276 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1277 dbgs() << *SI << ", ";
1278 }
1279 dbgs() << "\n";);
1280 }
1281}
1282
1283void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1284 for (RegInfo &RI : Scope->RegInfos) {
1285 if (RI.HasBranch) {
1286 Region *R = RI.R;
1287 if (TrueBiasedRegionsGlobal.contains(R))
1288 OutermostScope->TrueBiasedRegions.insert(R);
1289 else if (FalseBiasedRegionsGlobal.contains(R))
1290 OutermostScope->FalseBiasedRegions.insert(R);
1291 else
1292 llvm_unreachable("Must be biased");
1293 }
1294 for (SelectInst *SI : RI.Selects) {
1295 if (TrueBiasedSelectsGlobal.contains(SI))
1296 OutermostScope->TrueBiasedSelects.insert(SI);
1297 else if (FalseBiasedSelectsGlobal.contains(SI))
1298 OutermostScope->FalseBiasedSelects.insert(SI);
1299 else
1300 llvm_unreachable("Must be biased");
1301 }
1302 }
1303 for (CHRScope *Sub : Scope->Subs) {
1304 classifyBiasedScopes(Sub, OutermostScope);
1305 }
1306}
1307
1308static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1309 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1310 Scope->FalseBiasedRegions.size() +
1311 Scope->TrueBiasedSelects.size() +
1312 Scope->FalseBiasedSelects.size();
1313 return NumBiased >= CHRMergeThreshold;
1314}
1315
1316void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1318 for (CHRScope *Scope : Input) {
1319 // Filter out the ones with only one region and no subs.
1320 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1321 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1322 << Scope->TrueBiasedRegions.size()
1323 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1324 << " true-selects " << Scope->TrueBiasedSelects.size()
1325 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1326 ORE.emit([&]() {
1328 DEBUG_TYPE,
1329 "DropScopeWithOneBranchOrSelect",
1330 Scope->RegInfos[0].R->getEntry()->getTerminator())
1331 << "Drop scope with < "
1332 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1333 << " biased branch(es) or select(s)";
1334 });
1335 continue;
1336 }
1337 Output.push_back(Scope);
1338 }
1339}
1340
1341void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1343 for (CHRScope *Scope : Input) {
1344 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1345 "Empty");
1346 setCHRRegions(Scope, Scope);
1347 Output.push_back(Scope);
1348 CHR_DEBUG(
1349 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1350 for (auto pair : Scope->HoistStopMap) {
1351 Region *R = pair.first;
1352 dbgs() << "Region " << R->getNameStr() << "\n";
1353 for (Instruction *I : pair.second) {
1354 dbgs() << "HoistStop " << *I << "\n";
1355 }
1356 }
1357 dbgs() << "CHRRegions" << "\n";
1358 for (RegInfo &RI : Scope->CHRRegions) {
1359 dbgs() << RI.R->getNameStr() << "\n";
1360 });
1361 }
1362}
1363
1364void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1365 DenseSet<Instruction *> Unhoistables;
1366 // Put the biased selects in Unhoistables because they should stay where they
1367 // are and constant-folded after CHR (in case one biased select or a branch
1368 // can depend on another biased select.)
1369 for (RegInfo &RI : Scope->RegInfos)
1370 Unhoistables.insert_range(RI.Selects);
1371 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1372 for (RegInfo &RI : Scope->RegInfos) {
1373 Region *R = RI.R;
1374 DenseSet<Instruction *> HoistStops;
1375 bool IsHoisted = false;
1376 if (RI.HasBranch) {
1377 assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1378 OutermostScope->FalseBiasedRegions.contains(R)) &&
1379 "Must be truthy or falsy");
1380 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1381 // Note checkHoistValue fills in HoistStops.
1383 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1384 Unhoistables, &HoistStops, Visited);
1385 assert(IsHoistable && "Must be hoistable");
1386 (void)(IsHoistable); // Unused in release build
1387 IsHoisted = true;
1388 }
1389 for (SelectInst *SI : RI.Selects) {
1390 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1391 OutermostScope->FalseBiasedSelects.contains(SI)) &&
1392 "Must be true or false biased");
1393 // Note checkHoistValue fills in HoistStops.
1395 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1396 Unhoistables, &HoistStops, Visited);
1397 assert(IsHoistable && "Must be hoistable");
1398 (void)(IsHoistable); // Unused in release build
1399 IsHoisted = true;
1400 }
1401 if (IsHoisted) {
1402 OutermostScope->CHRRegions.push_back(RI);
1403 OutermostScope->HoistStopMap[R] = HoistStops;
1404 }
1405 }
1406 for (CHRScope *Sub : Scope->Subs)
1407 setCHRRegions(Sub, OutermostScope);
1408}
1409
1410static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1411 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1412}
1413
1414void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1416 Output.resize(Input.size());
1417 llvm::copy(Input, Output.begin());
1419}
1420
1421// Return true if V is already hoisted or was hoisted (along with its operands)
1422// to the insert point.
1423static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1424 HoistStopMapTy &HoistStopMap,
1425 DenseSet<Instruction *> &HoistedSet,
1426 DenseSet<PHINode *> &TrivialPHIs,
1427 DominatorTree &DT) {
1428 auto IT = HoistStopMap.find(R);
1429 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1430 DenseSet<Instruction *> &HoistStops = IT->second;
1431 if (auto *I = dyn_cast<Instruction>(V)) {
1432 if (I == HoistPoint)
1433 return;
1434 if (HoistStops.count(I))
1435 return;
1436 if (auto *PN = dyn_cast<PHINode>(I))
1437 if (TrivialPHIs.count(PN))
1438 // The trivial phi inserted by the previous CHR scope could replace a
1439 // non-phi in HoistStops. Note that since this phi is at the exit of a
1440 // previous CHR scope, which dominates this scope, it's safe to stop
1441 // hoisting there.
1442 return;
1443 if (HoistedSet.count(I))
1444 // Already hoisted, return.
1445 return;
1446 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1447 assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1448 assert(DT.getNode(HoistPoint->getParent()) &&
1449 "DT must contain HoistPoint block");
1450 if (DT.dominates(I, HoistPoint))
1451 // We are already above the hoist point. Stop here. This may be necessary
1452 // when multiple scopes would independently hoist the same
1453 // instruction. Since an outer (dominating) scope would hoist it to its
1454 // entry before an inner (dominated) scope would to its entry, the inner
1455 // scope may see the instruction already hoisted, in which case it
1456 // potentially wrong for the inner scope to hoist it and could cause bad
1457 // IR (non-dominating def), but safe to skip hoisting it instead because
1458 // it's already in a block that dominates the inner scope.
1459 return;
1460 for (Value *Op : I->operands()) {
1461 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1462 }
1463 I->moveBefore(HoistPoint->getIterator());
1464 HoistedSet.insert(I);
1465 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1466 }
1467}
1468
1469// Hoist the dependent condition values of the branches and the selects in the
1470// scope to the insert point.
1471static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1472 DenseSet<PHINode *> &TrivialPHIs,
1473 DominatorTree &DT) {
1474 DenseSet<Instruction *> HoistedSet;
1475 for (const RegInfo &RI : Scope->CHRRegions) {
1476 Region *R = RI.R;
1477 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1478 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1479 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1480 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1481 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1482 HoistedSet, TrivialPHIs, DT);
1483 }
1484 for (SelectInst *SI : RI.Selects) {
1485 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1486 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1487 if (!(IsTrueBiased || IsFalseBiased))
1488 continue;
1489 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1490 HoistedSet, TrivialPHIs, DT);
1491 }
1492 }
1493}
1494
1495// Negate the predicate if an ICmp if it's used only by branches or selects by
1496// swapping the operands of the branches or the selects. Returns true if success.
1498 Instruction *ExcludedUser,
1499 CHRScope *Scope) {
1500 for (User *U : ICmp->users()) {
1501 if (U == ExcludedUser)
1502 continue;
1503 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1504 continue;
1505 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1506 continue;
1507 return false;
1508 }
1509 for (User *U : ICmp->users()) {
1510 if (U == ExcludedUser)
1511 continue;
1512 if (auto *BI = dyn_cast<BranchInst>(U)) {
1513 assert(BI->isConditional() && "Must be conditional");
1514 BI->swapSuccessors();
1515 // Don't need to swap this in terms of
1516 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1517 // mean whehter the branch is likely go into the if-then rather than
1518 // successor0/successor1 and because we can tell which edge is the then or
1519 // the else one by comparing the destination to the region exit block.
1520 continue;
1521 }
1522 if (auto *SI = dyn_cast<SelectInst>(U)) {
1523 // Swap operands
1524 SI->swapValues();
1525 SI->swapProfMetadata();
1526 if (Scope->TrueBiasedSelects.count(SI)) {
1527 assert(!Scope->FalseBiasedSelects.contains(SI) &&
1528 "Must not be already in");
1529 Scope->FalseBiasedSelects.insert(SI);
1530 } else if (Scope->FalseBiasedSelects.count(SI)) {
1531 assert(!Scope->TrueBiasedSelects.contains(SI) &&
1532 "Must not be already in");
1533 Scope->TrueBiasedSelects.insert(SI);
1534 }
1535 continue;
1536 }
1537 llvm_unreachable("Must be a branch or a select");
1538 }
1540 return true;
1541}
1542
1543// A helper for transformScopes. Insert a trivial phi at the scope exit block
1544// for a value that's defined in the scope but used outside it (meaning it's
1545// alive at the exit block).
1546static void insertTrivialPHIs(CHRScope *Scope,
1547 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1548 DenseSet<PHINode *> &TrivialPHIs) {
1549 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1550 for (RegInfo &RI : Scope->RegInfos) {
1551 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1552 // sub-Scopes.
1553 BlocksInScope.insert(BB);
1554 }
1555 }
1556 CHR_DEBUG({
1557 dbgs() << "Inserting redundant phis\n";
1558 for (BasicBlock *BB : BlocksInScope)
1559 dbgs() << "BlockInScope " << BB->getName() << "\n";
1560 });
1561 for (BasicBlock *BB : BlocksInScope) {
1562 for (Instruction &I : *BB) {
1564 for (User *U : I.users()) {
1565 if (auto *UI = dyn_cast<Instruction>(U)) {
1566 if (!BlocksInScope.contains(UI->getParent()) &&
1567 // Unless there's already a phi for I at the exit block.
1568 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1569 CHR_DEBUG(dbgs() << "V " << I << "\n");
1570 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1571 Users.push_back(UI);
1572 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1573 // There's a loop backedge from a block that's dominated by this
1574 // scope to the entry block.
1575 CHR_DEBUG(dbgs() << "V " << I << "\n");
1576 CHR_DEBUG(dbgs()
1577 << "Used at entry block (for a back edge) by a phi user "
1578 << *UI << "\n");
1579 Users.push_back(UI);
1580 }
1581 }
1582 }
1583 if (Users.size() > 0) {
1584 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1585 // ExitBlock. Replace I with the new phi in UI unless UI is another
1586 // phi at ExitBlock.
1587 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1588 PN->insertBefore(ExitBlock->begin());
1589 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1590 PN->addIncoming(&I, Pred);
1591 }
1592 TrivialPHIs.insert(PN);
1593 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1594 for (Instruction *UI : Users) {
1595 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1596 if (UI->getOperand(J) == &I) {
1597 UI->setOperand(J, PN);
1598 }
1599 }
1600 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1601 }
1602 }
1603 }
1604 }
1605}
1606
1607// Assert that all the CHR regions of the scope have a biased branch or select.
1608static void LLVM_ATTRIBUTE_UNUSED
1610#ifndef NDEBUG
1611 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1612 if (Scope->TrueBiasedRegions.count(RI.R) ||
1613 Scope->FalseBiasedRegions.count(RI.R))
1614 return true;
1615 for (SelectInst *SI : RI.Selects)
1616 if (Scope->TrueBiasedSelects.count(SI) ||
1617 Scope->FalseBiasedSelects.count(SI))
1618 return true;
1619 return false;
1620 };
1621 for (RegInfo &RI : Scope->CHRRegions) {
1622 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1623 "Must have biased branch or select");
1624 }
1625#endif
1626}
1627
1628// Assert that all the condition values of the biased branches and selects have
1629// been hoisted to the pre-entry block or outside of the scope.
1631 CHRScope *Scope, BasicBlock *PreEntryBlock) {
1632 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1633 for (RegInfo &RI : Scope->CHRRegions) {
1634 Region *R = RI.R;
1635 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1636 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1637 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1638 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1639 Value *V = BI->getCondition();
1640 CHR_DEBUG(dbgs() << *V << "\n");
1641 if (auto *I = dyn_cast<Instruction>(V)) {
1642 (void)(I); // Unused in release build.
1643 assert((I->getParent() == PreEntryBlock ||
1644 !Scope->contains(I)) &&
1645 "Must have been hoisted to PreEntryBlock or outside the scope");
1646 }
1647 }
1648 for (SelectInst *SI : RI.Selects) {
1649 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1650 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1651 if (!(IsTrueBiased || IsFalseBiased))
1652 continue;
1653 Value *V = SI->getCondition();
1654 CHR_DEBUG(dbgs() << *V << "\n");
1655 if (auto *I = dyn_cast<Instruction>(V)) {
1656 (void)(I); // Unused in release build.
1657 assert((I->getParent() == PreEntryBlock ||
1658 !Scope->contains(I)) &&
1659 "Must have been hoisted to PreEntryBlock or outside the scope");
1660 }
1661 }
1662 }
1663}
1664
1665void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1666 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1667
1668 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1669
1670 for (RegInfo &RI : Scope->RegInfos) {
1671 const Region *R = RI.R;
1672 unsigned Duplication = getRegionDuplicationCount(R);
1673 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
1674 << "\n");
1676 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1677 << " for this region");
1678 ORE.emit([&]() {
1679 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1680 R->getEntry()->getTerminator())
1681 << "Reached the duplication threshold for the region";
1682 });
1683 return;
1684 }
1685 }
1686 for (RegInfo &RI : Scope->RegInfos) {
1687 DuplicationCount[RI.R]++;
1688 }
1689
1690 Region *FirstRegion = Scope->RegInfos[0].R;
1691 BasicBlock *EntryBlock = FirstRegion->getEntry();
1692 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1693 BasicBlock *ExitBlock = LastRegion->getExit();
1694 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1695
1696 if (ExitBlock) {
1697 // Insert a trivial phi at the exit block (where the CHR hot path and the
1698 // cold path merges) for a value that's defined in the scope but used
1699 // outside it (meaning it's alive at the exit block). We will add the
1700 // incoming values for the CHR cold paths to it below. Without this, we'd
1701 // miss updating phi's for such values unless there happens to already be a
1702 // phi for that value there.
1703 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1704 }
1705
1706 // Split the entry block of the first region. The new block becomes the new
1707 // entry block of the first region. The old entry block becomes the block to
1708 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1709 // through the split, we update the entry of the first region after the split,
1710 // and Region only points to the entry and the exit blocks, rather than
1711 // keeping everything in a list or set, the blocks membership and the
1712 // entry/exit blocks of the region are still valid after the split.
1713 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1714 << " at " << *Scope->BranchInsertPoint << "\n");
1715 BasicBlock *NewEntryBlock =
1716 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1717 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1718 "NewEntryBlock's only pred must be EntryBlock");
1719 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1720 BasicBlock *PreEntryBlock = EntryBlock;
1721
1722 ValueToValueMapTy VMap;
1723 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1724 // hot path (originals) and a cold path (clones) and update the PHIs at the
1725 // exit block.
1726 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1727
1728 // Replace the old (placeholder) branch with the new (merged) conditional
1729 // branch.
1730 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1731 NewEntryBlock, VMap);
1732
1733#ifndef NDEBUG
1735#endif
1736
1737 // Hoist the conditional values of the branches/selects.
1738 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1739
1740#ifndef NDEBUG
1741 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1742#endif
1743
1744 // Create the combined branch condition and constant-fold the branches/selects
1745 // in the hot path.
1746 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1747 ProfileCount.value_or(0));
1748}
1749
1750// A helper for transformScopes. Clone the blocks in the scope (excluding the
1751// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1752// at the exit block.
1753void CHR::cloneScopeBlocks(CHRScope *Scope,
1754 BasicBlock *PreEntryBlock,
1755 BasicBlock *ExitBlock,
1756 Region *LastRegion,
1757 ValueToValueMapTy &VMap) {
1758 // Clone all the blocks. The original blocks will be the hot-path
1759 // CHR-optimized code and the cloned blocks will be the original unoptimized
1760 // code. This is so that the block pointers from the
1761 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1762 // which CHR should apply to.
1764 for (RegInfo &RI : Scope->RegInfos)
1765 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1766 // sub-Scopes.
1767 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1768 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1769 NewBlocks.push_back(NewBB);
1770 VMap[BB] = NewBB;
1771
1772 // Unreachable predecessors will not be cloned and will not have an edge
1773 // to the cloned block. As such, also remove them from any phi nodes.
1774 for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1775 PN.removeIncomingValueIf([&](unsigned Idx) {
1776 return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1777 });
1778 }
1779
1780 // Place the cloned blocks right after the original blocks (right before the
1781 // exit block of.)
1782 if (ExitBlock)
1783 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1784 F.end());
1785
1786 // Update the cloned blocks/instructions to refer to themselves.
1787 for (BasicBlock *NewBB : NewBlocks)
1788 for (Instruction &I : *NewBB)
1789 RemapInstruction(&I, VMap,
1791
1792 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1793 // the top-level region but we don't need to add PHIs. The trivial PHIs
1794 // inserted above will be updated here.
1795 if (ExitBlock)
1796 for (PHINode &PN : ExitBlock->phis())
1797 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1798 ++I) {
1799 BasicBlock *Pred = PN.getIncomingBlock(I);
1800 if (LastRegion->contains(Pred)) {
1801 Value *V = PN.getIncomingValue(I);
1802 auto It = VMap.find(V);
1803 if (It != VMap.end()) V = It->second;
1804 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1805 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1806 }
1807 }
1808}
1809
1810// A helper for transformScope. Replace the old (placeholder) branch with the
1811// new (merged) conditional branch.
1812BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1813 BasicBlock *EntryBlock,
1814 BasicBlock *NewEntryBlock,
1815 ValueToValueMapTy &VMap) {
1816 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1817 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1818 "SplitBlock did not work correctly!");
1819 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1820 "NewEntryBlock's only pred must be EntryBlock");
1821 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1822 "NewEntryBlock must have been copied");
1823 OldBR->dropAllReferences();
1824 OldBR->eraseFromParent();
1825 // The true predicate is a placeholder. It will be replaced later in
1826 // fixupBranchesAndSelects().
1827 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1828 cast<BasicBlock>(VMap[NewEntryBlock]),
1829 ConstantInt::getTrue(F.getContext()));
1830 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1831 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1832 "NewEntryBlock's only pred must be EntryBlock");
1833 return NewBR;
1834}
1835
1836// A helper for transformScopes. Create the combined branch condition and
1837// constant-fold the branches/selects in the hot path.
1838void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1839 BasicBlock *PreEntryBlock,
1840 BranchInst *MergedBR,
1842 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1843 BranchProbability CHRBranchBias(1, 1);
1844 uint64_t NumCHRedBranches = 0;
1845 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1846 for (RegInfo &RI : Scope->CHRRegions) {
1847 Region *R = RI.R;
1848 if (RI.HasBranch) {
1849 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1850 ++NumCHRedBranches;
1851 }
1852 for (SelectInst *SI : RI.Selects) {
1853 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1854 ++NumCHRedBranches;
1855 }
1856 }
1857 assert(NumCHRedBranches > 0);
1858 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1859 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1860 ORE.emit([&]() {
1862 "CHR",
1863 // Refer to the hot (original) path
1864 MergedBR->getSuccessor(0)->getTerminator())
1865 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1866 << " branches or selects";
1867 });
1868 MergedBR->setCondition(MergedCondition);
1869 uint32_t Weights[] = {
1870 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1871 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1872 };
1873 setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1874 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1875 << "\n");
1876}
1877
1878// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1879// and constant-fold a branch in the hot path.
1880void CHR::fixupBranch(Region *R, CHRScope *Scope,
1881 IRBuilder<> &IRB,
1882 Value *&MergedCondition,
1883 BranchProbability &CHRBranchBias) {
1884 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1885 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1886 "Must be truthy or falsy");
1887 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1888 assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1889 BranchProbability Bias = BranchBiasMap[R];
1890 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1891 // Take the min.
1892 if (CHRBranchBias > Bias)
1893 CHRBranchBias = Bias;
1894 BasicBlock *IfThen = BI->getSuccessor(1);
1895 BasicBlock *IfElse = BI->getSuccessor(0);
1896 BasicBlock *RegionExitBlock = R->getExit();
1897 assert(RegionExitBlock && "Null ExitBlock");
1898 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1899 IfThen != IfElse && "Invariant from findScopes");
1900 if (IfThen == RegionExitBlock) {
1901 // Swap them so that IfThen means going into it and IfElse means skipping
1902 // it.
1903 std::swap(IfThen, IfElse);
1904 }
1905 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1906 << " IfElse " << IfElse->getName() << "\n");
1907 Value *Cond = BI->getCondition();
1908 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1909 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1910 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1911 MergedCondition);
1912 // Constant-fold the branch at ClonedEntryBlock.
1913 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1914 "The successor shouldn't change");
1915 Value *NewCondition = ConditionTrue ?
1916 ConstantInt::getTrue(F.getContext()) :
1917 ConstantInt::getFalse(F.getContext());
1918 BI->setCondition(NewCondition);
1919}
1920
1921// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1922// and constant-fold a select in the hot path.
1923void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1924 IRBuilder<> &IRB,
1925 Value *&MergedCondition,
1926 BranchProbability &CHRBranchBias) {
1927 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1928 assert((IsTrueBiased ||
1929 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1930 assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1931 BranchProbability Bias = SelectBiasMap[SI];
1932 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1933 // Take the min.
1934 if (CHRBranchBias > Bias)
1935 CHRBranchBias = Bias;
1936 Value *Cond = SI->getCondition();
1937 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1938 MergedCondition);
1939 Value *NewCondition = IsTrueBiased ?
1940 ConstantInt::getTrue(F.getContext()) :
1941 ConstantInt::getFalse(F.getContext());
1942 SI->setCondition(NewCondition);
1943}
1944
1945// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1946// condition.
1947void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1948 Instruction *BranchOrSelect, CHRScope *Scope,
1949 IRBuilder<> &IRB, Value *&MergedCondition) {
1950 if (!IsTrueBiased) {
1951 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1952 // branch, negate the icmp predicate and swap the branch targets and avoid
1953 // inserting an Xor to negate Cond.
1954 auto *ICmp = dyn_cast<ICmpInst>(Cond);
1955 if (!ICmp ||
1956 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
1957 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
1958 }
1959
1960 // Freeze potentially poisonous conditions.
1962 Cond = IRB.CreateFreeze(Cond);
1963
1964 // Use logical and to avoid propagating poison from later conditions.
1965 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
1966}
1967
1968void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1969 unsigned I = 0;
1970 DenseSet<PHINode *> TrivialPHIs;
1971 for (CHRScope *Scope : CHRScopes) {
1972 transformScopes(Scope, TrivialPHIs);
1973 CHR_DEBUG(
1974 std::ostringstream oss;
1975 oss << " after transformScopes " << I++;
1976 dumpIR(F, oss.str().c_str(), nullptr));
1977 (void)I;
1978 }
1979}
1980
1981static void LLVM_ATTRIBUTE_UNUSED
1982dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1983 dbgs() << Label << " " << Scopes.size() << "\n";
1984 for (CHRScope *Scope : Scopes) {
1985 dbgs() << *Scope << "\n";
1986 }
1987}
1988
1989bool CHR::run() {
1990 if (!shouldApply(F, PSI))
1991 return false;
1992
1993 CHR_DEBUG(dumpIR(F, "before", nullptr));
1994
1995 bool Changed = false;
1996 {
1997 CHR_DEBUG(
1998 dbgs() << "RegionInfo:\n";
1999 RI.print(dbgs()));
2000
2001 // Recursively traverse the region tree and find regions that have biased
2002 // branches and/or selects and create scopes.
2004 findScopes(AllScopes);
2005 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2006
2007 // Split the scopes if 1) the conditional values of the biased
2008 // branches/selects of the inner/lower scope can't be hoisted up to the
2009 // outermost/uppermost scope entry, or 2) the condition values of the biased
2010 // branches/selects in a scope (including subscopes) don't share at least
2011 // one common value.
2012 SmallVector<CHRScope *, 8> SplitScopes;
2013 splitScopes(AllScopes, SplitScopes);
2014 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2015
2016 // After splitting, set the biased regions and selects of a scope (a tree
2017 // root) that include those of the subscopes.
2018 classifyBiasedScopes(SplitScopes);
2019 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2020
2021 // Filter out the scopes that has only one biased region or select (CHR
2022 // isn't useful in such a case).
2023 SmallVector<CHRScope *, 8> FilteredScopes;
2024 filterScopes(SplitScopes, FilteredScopes);
2025 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2026
2027 // Set the regions to be CHR'ed and their hoist stops for each scope.
2029 setCHRRegions(FilteredScopes, SetScopes);
2030 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2031
2032 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2033 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2034 // to the hot path, rather than both hot and cold paths.
2035 SmallVector<CHRScope *, 8> SortedScopes;
2036 sortScopes(SetScopes, SortedScopes);
2037 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2038
2039 CHR_DEBUG(
2040 dbgs() << "RegionInfo:\n";
2041 RI.print(dbgs()));
2042
2043 // Apply the CHR transformation.
2044 if (!SortedScopes.empty()) {
2045 transformScopes(SortedScopes);
2046 Changed = true;
2047 }
2048 }
2049
2050 if (Changed) {
2051 CHR_DEBUG(dumpIR(F, "after", &Stats));
2052 ORE.emit([&]() {
2053 return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2054 << ore::NV("Function", &F) << " "
2055 << "Reduced the number of branches in hot paths by "
2056 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2057 << " (static) and "
2058 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2059 << " (weighted by PGO count)";
2060 });
2061 }
2062
2063 return Changed;
2064}
2065
2066namespace llvm {
2067
2070}
2071
2073 Function &F,
2076 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2077 // If there is no profile summary, we should not do CHR.
2078 if (!PPSI || !PPSI->hasProfileSummary())
2079 return PreservedAnalyses::all();
2080 auto &PSI = *PPSI;
2081 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2082 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2083 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2085 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2086 if (!Changed)
2087 return PreservedAnalyses::all();
2088 return PreservedAnalyses::none();
2089}
2090
2091} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:298
static void insertTrivialPHIs(CHRScope *Scope, BasicBlock *EntryBlock, BasicBlock *ExitBlock, DenseSet< PHINode * > &TrivialPHIs)
static raw_ostream LLVM_ATTRIBUTE_UNUSED & operator<<(raw_ostream &OS, const CHRStats &Stats)
static cl::opt< bool > DisableCHR("disable-chr", cl::init(false), cl::Hidden, cl::desc("Disable CHR for all functions"))
static StringSet CHRFunctions
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2)
static bool checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables, DenseSet< Instruction * > *HoistStops, DenseMap< Instruction *, bool > &Visited)
static cl::opt< unsigned > CHRMergeThreshold("chr-merge-threshold", cl::init(2), cl::Hidden, cl::desc("CHR merges a group of N branches/selects where N >= this value"))
static bool isHoistable(Instruction *I, DominatorTree &DT)
static cl::opt< unsigned > CHRDupThreshsold("chr-dup-threshold", cl::init(3), cl::Hidden, cl::desc("Max number of duplications by CHR for a region"))
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static bool checkBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, S &TrueSet, S &FalseSet, M &BiasMap)
static StringSet CHRModules
static bool checkBiasedSelect(SelectInst *SI, Region *R, DenseSet< SelectInst * > &TrueBiasedSelectsGlobal, DenseSet< SelectInst * > &FalseBiasedSelectsGlobal, DenseMap< SelectInst *, BranchProbability > &SelectBiasMap)
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, HoistStopMapTy &HoistStopMap, DenseSet< Instruction * > &HoistedSet, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope)
static bool shouldApply(Function &F, ProfileSummaryInfo &PSI)
#define CHR_DEBUG(X)
static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, CHRStats *Stats)
static void getSelectsInScope(CHRScope *Scope, DenseSet< Instruction * > &Output)
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool isHoistableInstructionType(Instruction *I)
static DenseSet< Value * > getCHRConditionValuesForRegion(RegInfo &RI)
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, Instruction *ExcludedUser, CHRScope *Scope)
static Instruction * getBranchInsertPoint(RegInfo &RI)
static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(CHRScope *Scope, BasicBlock *PreEntryBlock)
static cl::opt< std::string > CHRModuleList("chr-module-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of modules to apply CHR to"))
#define DEBUG_TYPE
static void LLVM_ATTRIBUTE_UNUSED assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope)
static cl::opt< double > CHRBiasThreshold("chr-bias-threshold", cl::init(0.99), cl::Hidden, cl::desc("CHR considers a branch bias greater than this ratio as biased"))
static void LLVM_ATTRIBUTE_UNUSED dumpScopes(SmallVectorImpl< CHRScope * > &Scopes, const char *Label)
static BranchProbability getCHRBiasThreshold()
static cl::opt< std::string > CHRFunctionList("chr-function-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of functions to apply CHR to"))
static cl::opt< bool > ForceCHR("force-chr", cl::init(false), cl::Hidden, cl::desc("Apply CHR for all functions"))
static bool checkBiasedBranch(BranchInst *BI, Region *R, DenseSet< Region * > &TrueBiasedRegionsGlobal, DenseSet< Region * > &FalseBiasedRegionsGlobal, DenseMap< Region *, BranchProbability > &BranchBiasMap)
static const std::set< Value * > & getBaseValues(Value *V, DominatorTree &DT, DenseMap< Value *, std::set< Value * > > &Visited)
static bool extractBranchProbabilities(Instruction *I, BranchProbability &TrueProb, BranchProbability &FalseProb)
static void parseCHRFilterFiles()
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
early Early Tail Duplication
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:770
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
Class to represent profile counts.
Definition: Function.h:297
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1725
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
The optimization diagnostic interface.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:716
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionEntryHot(const FuncT *F) const
Returns true if F has hot function entry.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition: RegionInfo.h:357
void replaceEntryRecursive(BlockT *NewEntry)
Recursively replace the entry basic block of the region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:362
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition: RegionInfo.h:965
This class represents the LLVM 'select' instruction.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:269
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:280
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:25
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:39
void dropAllReferences()
Drop all references to operands.
Definition: User.h:349
iterator find(const KeyT &Val)
Definition: ValueMap.h:160
iterator end()
Definition: ValueMap.h:139
LLVM Value Representation.
Definition: Value.h:75
iterator_range< user_iterator > users()
Definition: Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
void insert_range(Range &&R)
Definition: DenseSet.h:222
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:169
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:174
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:862
@ Exit
Definition: COFF.h:863
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:456
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Sub
Subtraction of integers.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:289
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1854
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
auto predecessors(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858