LLVM 21.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
148
149using namespace llvm;
150
152 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
153 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
154 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
155 "Heuristics-based inliner version"),
156 clEnumValN(InliningAdvisorMode::Development, "development",
157 "Use development mode (runtime-loadable model)"),
158 clEnumValN(InliningAdvisorMode::Release, "release",
159 "Use release mode (AOT-compiled model)")));
160
161/// Flag to enable inline deferral during PGO.
162static cl::opt<bool>
163 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
165 cl::desc("Enable inline deferral during PGO"));
166
167static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
168 cl::init(false), cl::Hidden,
169 cl::desc("Enable module inliner"));
170
172 "mandatory-inlining-first", cl::init(false), cl::Hidden,
173 cl::desc("Perform mandatory inlinings module-wide, before performing "
174 "inlining"));
175
177 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
178 cl::desc("Eagerly invalidate more analyses in default pipelines"));
179
181 "enable-merge-functions", cl::init(false), cl::Hidden,
182 cl::desc("Enable function merging as part of the optimization pipeline"));
183
185 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
186 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
187
189 "enable-global-analyses", cl::init(true), cl::Hidden,
190 cl::desc("Enable inter-procedural analyses"));
191
192static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
193 cl::init(false), cl::Hidden,
194 cl::desc("Run Partial inlining pass"));
195
197 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
198 cl::desc("Run cleanup optimization passes after vectorization"));
199
200static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
201 cl::desc("Run the NewGVN pass"));
202
203static cl::opt<bool>
204 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
205 cl::desc("Enable the LoopInterchange Pass"));
206
207static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
208 cl::init(false), cl::Hidden,
209 cl::desc("Enable Unroll And Jam Pass"));
210
211static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
213 cl::desc("Enable the LoopFlatten Pass"));
214
215// Experimentally allow loop header duplication. This should allow for better
216// optimization at Oz, since loop-idiom recognition can then recognize things
217// like memcpy. If this ends up being useful for many targets, we should drop
218// this flag and make a code generation option that can be controlled
219// independent of the opt level and exposed through the frontend.
221 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
222 cl::desc("Enable loop header duplication at any optimization level"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
242 "preinline-threshold", cl::Hidden, cl::init(75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
265 "flattened-profile-used", cl::init(false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierarchy exists in the profile"));
268
270 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
271 cl::desc("Enable order file instrumentation (default = off)"));
272
273static cl::opt<bool>
274 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
275 cl::desc("Enable lowering of the matrix intrinsics"));
276
278 "enable-constraint-elimination", cl::init(true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
283 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
285 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
286 "enable all attributor runs"),
287 clEnumValN(AttributorRunOption::MODULE, "module",
288 "enable module-wide attributor runs"),
289 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
290 "enable call graph SCC attributor runs"),
291 clEnumValN(AttributorRunOption::NONE, "none",
292 "disable attributor runs")));
293
295 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
296 cl::desc("Enable profile instrumentation sampling (default = off)"));
298 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
299 cl::desc("Enable the experimental Loop Versioning LICM pass"));
300
302 "instrument-cold-function-only-path", cl::init(""),
303 cl::desc("File path for cold function only instrumentation(requires use "
304 "with --pgo-instrument-cold-function-only)"),
305 cl::Hidden);
306
309
310namespace llvm {
312} // namespace llvm
313
315 LoopInterleaving = true;
316 LoopVectorization = true;
317 SLPVectorization = false;
318 LoopUnrolling = true;
323 CallGraphProfile = true;
324 UnifiedLTO = false;
326 InlinerThreshold = -1;
328}
329
330namespace llvm {
332} // namespace llvm
333
335 OptimizationLevel Level) {
336 for (auto &C : PeepholeEPCallbacks)
337 C(FPM, Level);
338}
341 for (auto &C : LateLoopOptimizationsEPCallbacks)
342 C(LPM, Level);
343}
345 OptimizationLevel Level) {
346 for (auto &C : LoopOptimizerEndEPCallbacks)
347 C(LPM, Level);
348}
351 for (auto &C : ScalarOptimizerLateEPCallbacks)
352 C(FPM, Level);
353}
355 OptimizationLevel Level) {
356 for (auto &C : CGSCCOptimizerLateEPCallbacks)
357 C(CGPM, Level);
358}
360 OptimizationLevel Level) {
361 for (auto &C : VectorizerStartEPCallbacks)
362 C(FPM, Level);
363}
365 OptimizationLevel Level) {
366 for (auto &C : VectorizerEndEPCallbacks)
367 C(FPM, Level);
368}
370 OptimizationLevel Level,
372 for (auto &C : OptimizerEarlyEPCallbacks)
373 C(MPM, Level, Phase);
374}
376 OptimizationLevel Level,
378 for (auto &C : OptimizerLastEPCallbacks)
379 C(MPM, Level, Phase);
380}
383 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
384 C(MPM, Level);
385}
388 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
389 C(MPM, Level);
390}
392 OptimizationLevel Level) {
393 for (auto &C : PipelineStartEPCallbacks)
394 C(MPM, Level);
395}
398 for (auto &C : PipelineEarlySimplificationEPCallbacks)
399 C(MPM, Level, Phase);
400}
401
402// Helper to add AnnotationRemarksPass.
405}
406
407// Helper to check if the current compilation phase is preparing for LTO
411}
412
413// Helper to wrap conditionally Coro passes.
415 // TODO: Skip passes according to Phase.
416 ModulePassManager CoroPM;
417 CoroPM.addPass(CoroEarlyPass());
418 CGSCCPassManager CGPM;
419 CGPM.addPass(CoroSplitPass());
420 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
421 CoroPM.addPass(CoroCleanupPass());
422 CoroPM.addPass(GlobalDCEPass());
423 return CoroConditionalWrapper(std::move(CoroPM));
424}
425
426// TODO: Investigate the cost/benefit of tail call elimination on debugging.
428PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
430
432
435
436 // Form SSA out of local memory accesses after breaking apart aggregates into
437 // scalars.
439
440 // Catch trivial redundancies
441 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
442
443 // Hoisting of scalars and load expressions.
444 FPM.addPass(
445 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
447
449
450 invokePeepholeEPCallbacks(FPM, Level);
451
452 FPM.addPass(
453 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
454
455 // Form canonically associated expression trees, and simplify the trees using
456 // basic mathematical properties. For example, this will form (nearly)
457 // minimal multiplication trees.
459
460 // Add the primary loop simplification pipeline.
461 // FIXME: Currently this is split into two loop pass pipelines because we run
462 // some function passes in between them. These can and should be removed
463 // and/or replaced by scheduling the loop pass equivalents in the correct
464 // positions. But those equivalent passes aren't powerful enough yet.
465 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
466 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
467 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
468 // `LoopInstSimplify`.
469 LoopPassManager LPM1, LPM2;
470
471 // Simplify the loop body. We do this initially to clean up after other loop
472 // passes run, either when iterating on a loop or on inner loops with
473 // implications on the outer loop.
476
477 // Try to remove as much code from the loop header as possible,
478 // to reduce amount of IR that will have to be duplicated. However,
479 // do not perform speculative hoisting the first time as LICM
480 // will destroy metadata that may not need to be destroyed if run
481 // after loop rotation.
482 // TODO: Investigate promotion cap for O1.
484 /*AllowSpeculation=*/false));
485
486 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
488 // TODO: Investigate promotion cap for O1.
490 /*AllowSpeculation=*/true));
493 LPM1.addPass(LoopFlattenPass());
494
497
499
501
502 if (PTO.LoopInterchange)
504
505 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
506 // because it changes IR to makes profile annotation in back compile
507 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
508 // attributes so we need to make sure and allow the full unroll pass to pay
509 // attention to it.
510 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
511 PGOOpt->Action != PGOOptions::SampleUse)
512 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
513 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
515
517
518 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
519 /*UseMemorySSA=*/true,
520 /*UseBlockFrequencyInfo=*/true));
521 FPM.addPass(
522 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
524 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
525 // *All* loop passes must preserve it, in order to be able to use it.
526 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
527 /*UseMemorySSA=*/false,
528 /*UseBlockFrequencyInfo=*/false));
529
530 // Delete small array after loop unroll.
532
533 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
534 FPM.addPass(MemCpyOptPass());
535
536 // Sparse conditional constant propagation.
537 // FIXME: It isn't clear why we do this *after* loop passes rather than
538 // before...
539 FPM.addPass(SCCPPass());
540
541 // Delete dead bit computations (instcombine runs after to fold away the dead
542 // computations, and then ADCE will run later to exploit any new DCE
543 // opportunities that creates).
544 FPM.addPass(BDCEPass());
545
546 // Run instcombine after redundancy and dead bit elimination to exploit
547 // opportunities opened up by them.
549 invokePeepholeEPCallbacks(FPM, Level);
550
551 FPM.addPass(CoroElidePass());
552
554
555 // Finally, do an expensive DCE pass to catch all the dead code exposed by
556 // the simplifications and basic cleanup after all the simplifications.
557 // TODO: Investigate if this is too expensive.
558 FPM.addPass(ADCEPass());
559 FPM.addPass(
560 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
562 invokePeepholeEPCallbacks(FPM, Level);
563
564 return FPM;
565}
566
570 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
571
572 // The O1 pipeline has a separate pipeline creation function to simplify
573 // construction readability.
574 if (Level.getSpeedupLevel() == 1)
575 return buildO1FunctionSimplificationPipeline(Level, Phase);
576
578
581
582 // Form SSA out of local memory accesses after breaking apart aggregates into
583 // scalars.
585
586 // Catch trivial redundancies
587 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
590
591 // Hoisting of scalars and load expressions.
592 if (EnableGVNHoist)
593 FPM.addPass(GVNHoistPass());
594
595 // Global value numbering based sinking.
596 if (EnableGVNSink) {
597 FPM.addPass(GVNSinkPass());
598 FPM.addPass(
599 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
600 }
601
602 // Speculative execution if the target has divergent branches; otherwise nop.
603 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
604
605 // Optimize based on known information about branches, and cleanup afterward.
608
609 // Jump table to switch conversion.
612
613 FPM.addPass(
614 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
617
618 if (!Level.isOptimizingForSize())
620
621 invokePeepholeEPCallbacks(FPM, Level);
622
623 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
624 // using the size value profile. Don't perform this when optimizing for size.
625 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
626 !Level.isOptimizingForSize())
628
630 FPM.addPass(
631 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
632
633 // Form canonically associated expression trees, and simplify the trees using
634 // basic mathematical properties. For example, this will form (nearly)
635 // minimal multiplication trees.
637
640
641 // Add the primary loop simplification pipeline.
642 // FIXME: Currently this is split into two loop pass pipelines because we run
643 // some function passes in between them. These can and should be removed
644 // and/or replaced by scheduling the loop pass equivalents in the correct
645 // positions. But those equivalent passes aren't powerful enough yet.
646 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
647 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
648 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
649 // `LoopInstSimplify`.
650 LoopPassManager LPM1, LPM2;
651
652 // Simplify the loop body. We do this initially to clean up after other loop
653 // passes run, either when iterating on a loop or on inner loops with
654 // implications on the outer loop.
657
658 // Try to remove as much code from the loop header as possible,
659 // to reduce amount of IR that will have to be duplicated. However,
660 // do not perform speculative hoisting the first time as LICM
661 // will destroy metadata that may not need to be destroyed if run
662 // after loop rotation.
663 // TODO: Investigate promotion cap for O1.
665 /*AllowSpeculation=*/false));
666
667 // Disable header duplication in loop rotation at -Oz.
669 Level != OptimizationLevel::Oz,
671 // TODO: Investigate promotion cap for O1.
673 /*AllowSpeculation=*/true));
674 LPM1.addPass(
675 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
677 LPM1.addPass(LoopFlattenPass());
678
681
682 {
684 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
686 LPM2.addPass(std::move(ExtraPasses));
687 }
688
690
692
693 if (PTO.LoopInterchange)
695
696 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
697 // because it changes IR to makes profile annotation in back compile
698 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
699 // attributes so we need to make sure and allow the full unroll pass to pay
700 // attention to it.
701 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
702 PGOOpt->Action != PGOOptions::SampleUse)
703 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
704 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
706
708
709 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
710 /*UseMemorySSA=*/true,
711 /*UseBlockFrequencyInfo=*/true));
712 FPM.addPass(
713 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
715 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
716 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
717 // *All* loop passes must preserve it, in order to be able to use it.
718 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
719 /*UseMemorySSA=*/false,
720 /*UseBlockFrequencyInfo=*/false));
721
722 // Delete small array after loop unroll.
724
725 // Try vectorization/scalarization transforms that are both improvements
726 // themselves and can allow further folds with GVN and InstCombine.
727 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
728
729 // Eliminate redundancies.
731 if (RunNewGVN)
732 FPM.addPass(NewGVNPass());
733 else
734 FPM.addPass(GVNPass());
735
736 // Sparse conditional constant propagation.
737 // FIXME: It isn't clear why we do this *after* loop passes rather than
738 // before...
739 FPM.addPass(SCCPPass());
740
741 // Delete dead bit computations (instcombine runs after to fold away the dead
742 // computations, and then ADCE will run later to exploit any new DCE
743 // opportunities that creates).
744 FPM.addPass(BDCEPass());
745
746 // Run instcombine after redundancy and dead bit elimination to exploit
747 // opportunities opened up by them.
749 invokePeepholeEPCallbacks(FPM, Level);
750
751 // Re-consider control flow based optimizations after redundancy elimination,
752 // redo DCE, etc.
755
758
759 // Finally, do an expensive DCE pass to catch all the dead code exposed by
760 // the simplifications and basic cleanup after all the simplifications.
761 // TODO: Investigate if this is too expensive.
762 FPM.addPass(ADCEPass());
763
764 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
765 FPM.addPass(MemCpyOptPass());
766
767 FPM.addPass(DSEPass());
769
772 /*AllowSpeculation=*/true),
773 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
774
775 FPM.addPass(CoroElidePass());
776
778
780 .convertSwitchRangeToICmp(true)
781 .hoistCommonInsts(true)
782 .sinkCommonInsts(true)));
784 invokePeepholeEPCallbacks(FPM, Level);
785
786 return FPM;
787}
788
789void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
792}
793
794void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
795 OptimizationLevel Level,
796 ThinOrFullLTOPhase LTOPhase) {
797 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
799 return;
800 InlineParams IP;
801
803
804 // FIXME: The hint threshold has the same value used by the regular inliner
805 // when not optimzing for size. This should probably be lowered after
806 // performance testing.
807 // FIXME: this comment is cargo culted from the old pass manager, revisit).
808 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
810 IP, /* MandatoryFirst */ true,
812 CGSCCPassManager &CGPipeline = MIWP.getPM();
813
816 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
817 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
818 true))); // Merge & remove basic blocks.
819 FPM.addPass(InstCombinePass()); // Combine silly sequences.
820 invokePeepholeEPCallbacks(FPM, Level);
821
822 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
823 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
824
825 MPM.addPass(std::move(MIWP));
826
827 // Delete anything that is now dead to make sure that we don't instrument
828 // dead code. Instrumentation can end up keeping dead code around and
829 // dramatically increase code size.
830 MPM.addPass(GlobalDCEPass());
831}
832
833void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
834 OptimizationLevel Level) {
836 // Disable header duplication in loop rotation at -Oz.
840 Level != OptimizationLevel::Oz),
841 /*UseMemorySSA=*/false,
842 /*UseBlockFrequencyInfo=*/false),
844 }
845}
846
847void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
848 OptimizationLevel Level, bool RunProfileGen,
849 bool IsCS, bool AtomicCounterUpdate,
850 std::string ProfileFile,
851 std::string ProfileRemappingFile,
853 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
854
855 if (!RunProfileGen) {
856 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
857 MPM.addPass(
858 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
859 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
860 // RequireAnalysisPass for PSI before subsequent non-module passes.
862 return;
863 }
864
865 // Perform PGO instrumentation.
868
869 addPostPGOLoopRotation(MPM, Level);
870 // Add the profile lowering pass.
872 if (!ProfileFile.empty())
873 Options.InstrProfileOutput = ProfileFile;
874 // Do counter promotion at Level greater than O0.
875 Options.DoCounterPromotion = true;
876 Options.UseBFIInPromotion = IsCS;
877 if (EnableSampledInstr) {
878 Options.Sampling = true;
879 // With sampling, there is little beneifit to enable counter promotion.
880 // But note that sampling does work with counter promotion.
881 Options.DoCounterPromotion = false;
882 }
883 Options.Atomic = AtomicCounterUpdate;
885}
886
888 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
889 bool AtomicCounterUpdate, std::string ProfileFile,
890 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
891 if (!RunProfileGen) {
892 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
893 MPM.addPass(
894 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
895 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
896 // RequireAnalysisPass for PSI before subsequent non-module passes.
898 return;
899 }
900
901 // Perform PGO instrumentation.
904 // Add the profile lowering pass.
906 if (!ProfileFile.empty())
907 Options.InstrProfileOutput = ProfileFile;
908 // Do not do counter promotion at O0.
909 Options.DoCounterPromotion = false;
910 Options.UseBFIInPromotion = IsCS;
911 Options.Atomic = AtomicCounterUpdate;
913}
914
916 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
917}
918
922 InlineParams IP;
923 if (PTO.InlinerThreshold == -1)
924 IP = getInlineParamsFromOptLevel(Level);
925 else
927 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
928 // disable hot callsite inline (as much as possible [1]) because it makes
929 // profile annotation in the backend inaccurate.
930 //
931 // [1] Note the cost of a function could be below zero due to erased
932 // prologue / epilogue.
933 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
934 PGOOpt->Action == PGOOptions::SampleUse)
936
937 if (PGOOpt)
939
943
944 // Require the GlobalsAA analysis for the module so we can query it within
945 // the CGSCC pipeline.
948 // Invalidate AAManager so it can be recreated and pick up the newly
949 // available GlobalsAA.
950 MIWP.addModulePass(
952 }
953
954 // Require the ProfileSummaryAnalysis for the module so we can query it within
955 // the inliner pass.
957
958 // Now begin the main postorder CGSCC pipeline.
959 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
960 // manager and trying to emulate its precise behavior. Much of this doesn't
961 // make a lot of sense and we should revisit the core CGSCC structure.
962 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
963
964 // Note: historically, the PruneEH pass was run first to deduce nounwind and
965 // generally clean up exception handling overhead. It isn't clear this is
966 // valuable as the inliner doesn't currently care whether it is inlining an
967 // invoke or a call.
968
970 MainCGPipeline.addPass(AttributorCGSCCPass());
971
972 // Deduce function attributes. We do another run of this after the function
973 // simplification pipeline, so this only needs to run when it could affect the
974 // function simplification pipeline, which is only the case with recursive
975 // functions.
976 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
977
978 // When at O3 add argument promotion to the pass pipeline.
979 // FIXME: It isn't at all clear why this should be limited to O3.
980 if (Level == OptimizationLevel::O3)
981 MainCGPipeline.addPass(ArgumentPromotionPass());
982
983 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
984 // there are no OpenMP runtime calls present in the module.
985 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
986 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
987
988 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
989
990 // Add the core function simplification pipeline nested inside the
991 // CGSCC walk.
994 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
995
996 // Finally, deduce any function attributes based on the fully simplified
997 // function.
998 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
999
1000 // Mark that the function is fully simplified and that it shouldn't be
1001 // simplified again if we somehow revisit it due to CGSCC mutations unless
1002 // it's been modified since.
1005
1007 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1008 MainCGPipeline.addPass(CoroAnnotationElidePass());
1009 }
1010
1011 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1012 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1014
1015 return MIWP;
1016}
1017
1022
1024 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
1025 // disable hot callsite inline (as much as possible [1]) because it makes
1026 // profile annotation in the backend inaccurate.
1027 //
1028 // [1] Note the cost of a function could be below zero due to erased
1029 // prologue / epilogue.
1030 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1031 PGOOpt->Action == PGOOptions::SampleUse)
1032 IP.HotCallSiteThreshold = 0;
1033
1034 if (PGOOpt)
1036
1037 // The inline deferral logic is used to avoid losing some
1038 // inlining chance in future. It is helpful in SCC inliner, in which
1039 // inlining is processed in bottom-up order.
1040 // While in module inliner, the inlining order is a priority-based order
1041 // by default. The inline deferral is unnecessary there. So we disable the
1042 // inline deferral logic in module inliner.
1043 IP.EnableDeferral = false;
1044
1047 MPM.addPass(GlobalOptPass());
1048 MPM.addPass(GlobalDCEPass());
1050 }
1051
1055
1059 MPM.addPass(
1061 }
1062
1063 return MPM;
1064}
1065
1069 assert(Level != OptimizationLevel::O0 &&
1070 "Should not be used for O0 pipeline");
1071
1073 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1074
1076
1077 // Place pseudo probe instrumentation as the first pass of the pipeline to
1078 // minimize the impact of optimization changes.
1079 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1082
1083 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1084
1085 // In ThinLTO mode, when flattened profile is used, all the available
1086 // profile information will be annotated in PreLink phase so there is
1087 // no need to load the profile again in PostLink.
1088 bool LoadSampleProfile =
1089 HasSampleProfile &&
1091
1092 // During the ThinLTO backend phase we perform early indirect call promotion
1093 // here, before globalopt. Otherwise imported available_externally functions
1094 // look unreferenced and are removed. If we are going to load the sample
1095 // profile then defer until later.
1096 // TODO: See if we can move later and consolidate with the location where
1097 // we perform ICP when we are loading a sample profile.
1098 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1099 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1100 // determine whether the new direct calls are annotated with prof metadata.
1101 // Ideally this should be determined from whether the IR is annotated with
1102 // sample profile, and not whether the a sample profile was provided on the
1103 // command line. E.g. for flattened profiles where we will not be reloading
1104 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1105 // provide the sample profile file.
1106 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1107 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1108
1109 // Create an early function pass manager to cleanup the output of the
1110 // frontend. Not necessary with LTO post link pipelines since the pre link
1111 // pipeline already cleaned up the frontend output.
1113 // Do basic inference of function attributes from known properties of system
1114 // libraries and other oracles.
1116 MPM.addPass(CoroEarlyPass());
1117
1118 FunctionPassManager EarlyFPM;
1119 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1120 // Lower llvm.expect to metadata before attempting transforms.
1121 // Compare/branch metadata may alter the behavior of passes like
1122 // SimplifyCFG.
1124 EarlyFPM.addPass(SimplifyCFGPass());
1126 EarlyFPM.addPass(EarlyCSEPass());
1127 if (Level == OptimizationLevel::O3)
1128 EarlyFPM.addPass(CallSiteSplittingPass());
1130 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1131 }
1132
1133 if (LoadSampleProfile) {
1134 // Annotate sample profile right after early FPM to ensure freshness of
1135 // the debug info.
1136 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1137 PGOOpt->ProfileRemappingFile, Phase));
1138 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1139 // RequireAnalysisPass for PSI before subsequent non-module passes.
1141 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1142 // for the profile annotation to be accurate in the LTO backend.
1143 if (!isLTOPreLink(Phase))
1144 // We perform early indirect call promotion here, before globalopt.
1145 // This is important for the ThinLTO backend phase because otherwise
1146 // imported available_externally functions look unreferenced and are
1147 // removed.
1148 MPM.addPass(
1149 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1150 }
1151
1152 // Try to perform OpenMP specific optimizations on the module. This is a
1153 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1155
1157 MPM.addPass(AttributorPass());
1158
1159 // Lower type metadata and the type.test intrinsic in the ThinLTO
1160 // post link pipeline after ICP. This is to enable usage of the type
1161 // tests in ICP sequences.
1163 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1165
1167
1168 // Interprocedural constant propagation now that basic cleanup has occurred
1169 // and prior to optimizing globals.
1170 // FIXME: This position in the pipeline hasn't been carefully considered in
1171 // years, it should be re-analyzed.
1172 MPM.addPass(IPSCCPPass(
1173 IPSCCPOptions(/*AllowFuncSpec=*/
1174 Level != OptimizationLevel::Os &&
1175 Level != OptimizationLevel::Oz &&
1176 !isLTOPreLink(Phase))));
1177
1178 // Attach metadata to indirect call sites indicating the set of functions
1179 // they may target at run-time. This should follow IPSCCP.
1181
1182 // Optimize globals to try and fold them into constants.
1183 MPM.addPass(GlobalOptPass());
1184
1185 // Create a small function pass pipeline to cleanup after all the global
1186 // optimizations.
1187 FunctionPassManager GlobalCleanupPM;
1188 // FIXME: Should this instead by a run of SROA?
1189 GlobalCleanupPM.addPass(PromotePass());
1190 GlobalCleanupPM.addPass(InstCombinePass());
1191 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1192 GlobalCleanupPM.addPass(
1193 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1194 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1196
1197 // We already asserted this happens in non-FullLTOPostLink earlier.
1198 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1199 const bool IsPGOPreLink = PGOOpt && IsPreLink;
1200 const bool IsPGOInstrGen =
1201 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1202 const bool IsPGOInstrUse =
1203 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1204 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1205 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1206 // enable ctx profiling from the frontend.
1208 "Enabling both instrumented PGO and contextual instrumentation is not "
1209 "supported.");
1210 // Enable contextual profiling instrumentation.
1211 const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1213 const bool IsCtxProfUse =
1215
1216 assert(
1218 "--instrument-cold-function-only-path is provided but "
1219 "--pgo-instrument-cold-function-only is not enabled");
1220 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1221 IsPGOPreLink &&
1223
1224 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1225 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1226 addPreInlinerPasses(MPM, Level, Phase);
1227
1228 // Add all the requested passes for instrumentation PGO, if requested.
1229 if (IsPGOInstrGen || IsPGOInstrUse) {
1230 addPGOInstrPasses(MPM, Level,
1231 /*RunProfileGen=*/IsPGOInstrGen,
1232 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1233 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1234 PGOOpt->FS);
1235 } else if (IsCtxProfGen || IsCtxProfUse) {
1237 // In pre-link, we just want the instrumented IR. We use the contextual
1238 // profile in the post-thinlink phase.
1239 // The instrumentation will be removed in post-thinlink after IPO.
1240 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1241 // mechanism for GUIDs.
1242 MPM.addPass(AssignGUIDPass());
1243 if (IsCtxProfUse)
1244 return MPM;
1245 addPostPGOLoopRotation(MPM, Level);
1247 } else if (IsColdFuncOnlyInstrGen) {
1248 addPGOInstrPasses(
1249 MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1250 /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1251 /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
1252 }
1253
1254 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1255 MPM.addPass(PGOIndirectCallPromotion(false, false));
1256
1257 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1258 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1260
1261 if (IsMemprofUse)
1262 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1263
1264 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1265 PGOOpt->Action == PGOOptions::SampleUse))
1266 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1267
1268 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1269
1272 else
1273 MPM.addPass(buildInlinerPipeline(Level, Phase));
1274
1275 // Remove any dead arguments exposed by cleanups, constant folding globals,
1276 // and argument promotion.
1278
1280 MPM.addPass(CoroCleanupPass());
1281
1282 // Optimize globals now that functions are fully simplified.
1283 MPM.addPass(GlobalOptPass());
1284 MPM.addPass(GlobalDCEPass());
1285
1286 return MPM;
1287}
1288
1289/// TODO: Should LTO cause any differences to this set of passes?
1290void PassBuilder::addVectorPasses(OptimizationLevel Level,
1291 FunctionPassManager &FPM, bool IsFullLTO) {
1294
1296 if (IsFullLTO) {
1297 // The vectorizer may have significantly shortened a loop body; unroll
1298 // again. Unroll small loops to hide loop backedge latency and saturate any
1299 // parallel execution resources of an out-of-order processor. We also then
1300 // need to clean up redundancies and loop invariant code.
1301 // FIXME: It would be really good to use a loop-integrated instruction
1302 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1303 // across the loop nests.
1304 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1307 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1309 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1312 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1313 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1314 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1315 // NOTE: we are very late in the pipeline, and we don't have any LICM
1316 // or SimplifyCFG passes scheduled after us, that would cleanup
1317 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1319 }
1320
1321 if (!IsFullLTO) {
1322 // Eliminate loads by forwarding stores from the previous iteration to loads
1323 // of the current iteration.
1325 }
1326 // Cleanup after the loop optimization passes.
1327 FPM.addPass(InstCombinePass());
1328
1329 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1331 // At higher optimization levels, try to clean up any runtime overlap and
1332 // alignment checks inserted by the vectorizer. We want to track correlated
1333 // runtime checks for two inner loops in the same outer loop, fold any
1334 // common computations, hoist loop-invariant aspects out of any outer loop,
1335 // and unswitch the runtime checks if possible. Once hoisted, we may have
1336 // dead (or speculatable) control flows or more combining opportunities.
1337 ExtraPasses.addPass(EarlyCSEPass());
1339 ExtraPasses.addPass(InstCombinePass());
1340 LoopPassManager LPM;
1342 /*AllowSpeculation=*/true));
1343 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1345 ExtraPasses.addPass(
1346 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1347 /*UseBlockFrequencyInfo=*/true));
1348 ExtraPasses.addPass(
1349 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1350 ExtraPasses.addPass(InstCombinePass());
1351 FPM.addPass(std::move(ExtraPasses));
1352 }
1353
1354 // Now that we've formed fast to execute loop structures, we do further
1355 // optimizations. These are run afterward as they might block doing complex
1356 // analyses and transforms such as what are needed for loop vectorization.
1357
1358 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1359 // GVN, loop transforms, and others have already run, so it's now better to
1360 // convert to more optimized IR using more aggressive simplify CFG options.
1361 // The extra sinking transform can create larger basic blocks, so do this
1362 // before SLP vectorization.
1364 .forwardSwitchCondToPhi(true)
1365 .convertSwitchRangeToICmp(true)
1366 .convertSwitchToLookupTable(true)
1367 .needCanonicalLoops(false)
1368 .hoistCommonInsts(true)
1369 .sinkCommonInsts(true)));
1370
1371 if (IsFullLTO) {
1372 FPM.addPass(SCCPPass());
1373 FPM.addPass(InstCombinePass());
1374 FPM.addPass(BDCEPass());
1375 }
1376
1377 // Optimize parallel scalar instruction chains into SIMD instructions.
1378 if (PTO.SLPVectorization) {
1380 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1381 FPM.addPass(EarlyCSEPass());
1382 }
1383 }
1384 // Enhance/cleanup vector code.
1386
1387 if (!IsFullLTO) {
1388 FPM.addPass(InstCombinePass());
1389 // Unroll small loops to hide loop backedge latency and saturate any
1390 // parallel execution resources of an out-of-order processor. We also then
1391 // need to clean up redundancies and loop invariant code.
1392 // FIXME: It would be really good to use a loop-integrated instruction
1393 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1394 // across the loop nests.
1395 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1396 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1398 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1399 }
1401 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1404 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1405 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1406 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1407 // NOTE: we are very late in the pipeline, and we don't have any LICM
1408 // or SimplifyCFG passes scheduled after us, that would cleanup
1409 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1411 }
1412
1414 FPM.addPass(InstCombinePass());
1415
1416 // This is needed for two reasons:
1417 // 1. It works around problems that instcombine introduces, such as sinking
1418 // expensive FP divides into loops containing multiplications using the
1419 // divide result.
1420 // 2. It helps to clean up some loop-invariant code created by the loop
1421 // unroll pass when IsFullLTO=false.
1424 /*AllowSpeculation=*/true),
1425 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1426
1427 // Now that we've vectorized and unrolled loops, we may have more refined
1428 // alignment information, try to re-derive it here.
1430}
1431
1434 ThinOrFullLTOPhase LTOPhase) {
1435 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1437
1438 // Run partial inlining pass to partially inline functions that have
1439 // large bodies.
1442
1443 // Remove avail extern fns and globals definitions since we aren't compiling
1444 // an object file for later LTO. For LTO we want to preserve these so they
1445 // are eligible for inlining at link-time. Note if they are unreferenced they
1446 // will be removed by GlobalDCE later, so this only impacts referenced
1447 // available externally globals. Eventually they will be suppressed during
1448 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1449 // may make globals referenced by available external functions dead and saves
1450 // running remaining passes on the eliminated functions. These should be
1451 // preserved during prelinking for link-time inlining decisions.
1452 if (!LTOPreLink)
1454
1457
1458 // Do RPO function attribute inference across the module to forward-propagate
1459 // attributes where applicable.
1460 // FIXME: Is this really an optimization rather than a canonicalization?
1462
1463 // Do a post inline PGO instrumentation and use pass. This is a context
1464 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1465 // cross-module inline has not been done yet. The context sensitive
1466 // instrumentation is after all the inlines are done.
1467 if (!LTOPreLink && PGOOpt) {
1468 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1469 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1470 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1471 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1472 PGOOpt->FS);
1473 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1474 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1475 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1476 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1477 PGOOpt->FS);
1478 }
1479
1480 // Re-compute GlobalsAA here prior to function passes. This is particularly
1481 // useful as the above will have inlined, DCE'ed, and function-attr
1482 // propagated everything. We should at this point have a reasonably minimal
1483 // and richly annotated call graph. By computing aliasing and mod/ref
1484 // information for all local globals here, the late loop passes and notably
1485 // the vectorizer will be able to use them to help recognize vectorizable
1486 // memory operations.
1489
1490 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1491
1492 FunctionPassManager OptimizePM;
1493 // Scheduling LoopVersioningLICM when inlining is over, because after that
1494 // we may see more accurate aliasing. Reason to run this late is that too
1495 // early versioning may prevent further inlining due to increase of code
1496 // size. Other optimizations which runs later might get benefit of no-alias
1497 // assumption in clone loop.
1499 OptimizePM.addPass(
1501 // LoopVersioningLICM pass might increase new LICM opportunities.
1504 /*AllowSpeculation=*/true),
1505 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1506 }
1507
1508 OptimizePM.addPass(Float2IntPass());
1510
1511 if (EnableMatrix) {
1512 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1513 OptimizePM.addPass(EarlyCSEPass());
1514 }
1515
1516 // CHR pass should only be applied with the profile information.
1517 // The check is to check the profile summary information in CHR.
1518 if (EnableCHR && Level == OptimizationLevel::O3)
1519 OptimizePM.addPass(ControlHeightReductionPass());
1520
1521 // FIXME: We need to run some loop optimizations to re-rotate loops after
1522 // simplifycfg and others undo their rotation.
1523
1524 // Optimize the loop execution. These passes operate on entire loop nests
1525 // rather than on each loop in an inside-out manner, and so they are actually
1526 // function passes.
1527
1528 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1529
1530 LoopPassManager LPM;
1531 // First rotate loops that may have been un-rotated by prior passes.
1532 // Disable header duplication at -Oz.
1534 Level != OptimizationLevel::Oz,
1535 LTOPreLink));
1536 // Some loops may have become dead by now. Try to delete them.
1537 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1538 // this may need to be revisited once we run GVN before loop deletion
1539 // in the simplification pipeline.
1542 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1543
1544 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1545 // into separate loop that would otherwise inhibit vectorization. This is
1546 // currently only performed for loops marked with the metadata
1547 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1548 OptimizePM.addPass(LoopDistributePass());
1549
1550 // Populates the VFABI attribute with the scalar-to-vector mappings
1551 // from the TargetLibraryInfo.
1552 OptimizePM.addPass(InjectTLIMappings());
1553
1554 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1555
1556 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1557
1558 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1559 // canonicalization pass that enables other optimizations. As a result,
1560 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1561 // result too early.
1562 OptimizePM.addPass(LoopSinkPass());
1563
1564 // And finally clean up LCSSA form before generating code.
1565 OptimizePM.addPass(InstSimplifyPass());
1566
1567 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1568 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1569 // flattening of blocks.
1570 OptimizePM.addPass(DivRemPairsPass());
1571
1572 // Try to annotate calls that were created during optimization.
1573 OptimizePM.addPass(TailCallElimPass());
1574
1575 // LoopSink (and other loop passes since the last simplifyCFG) might have
1576 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1577 OptimizePM.addPass(
1579 .convertSwitchRangeToICmp(true)
1580 .speculateUnpredictables(true)
1581 .hoistLoadsStoresWithCondFaulting(true)));
1582
1583 // Add the core optimizing pipeline.
1584 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1586
1587 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1588
1589 // Split out cold code. Splitting is done late to avoid hiding context from
1590 // other optimizations and inadvertently regressing performance. The tradeoff
1591 // is that this has a higher code size cost than splitting early.
1592 if (EnableHotColdSplit && !LTOPreLink)
1594
1595 // Search the code for similar regions of code. If enough similar regions can
1596 // be found where extracting the regions into their own function will decrease
1597 // the size of the program, we extract the regions, a deduplicate the
1598 // structurally similar regions.
1599 if (EnableIROutliner)
1600 MPM.addPass(IROutlinerPass());
1601
1602 // Now we need to do some global optimization transforms.
1603 // FIXME: It would seem like these should come first in the optimization
1604 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1605 // ordering here.
1606 MPM.addPass(GlobalDCEPass());
1608
1609 // Merge functions if requested. It has a better chance to merge functions
1610 // after ConstantMerge folded jump tables.
1611 if (PTO.MergeFunctions)
1613
1614 if (PTO.CallGraphProfile && !LTOPreLink)
1617
1618 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1619 if (!LTOPreLink)
1621
1622 return MPM;
1623}
1624
1628 if (Level == OptimizationLevel::O0)
1629 return buildO0DefaultPipeline(Level, Phase);
1630
1632
1633 // Convert @llvm.global.annotations to !annotation metadata.
1635
1636 // Force any function attributes we want the rest of the pipeline to observe.
1638
1639 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1641
1642 // Apply module pipeline start EP callback.
1644
1645 // Add the core simplification pipeline.
1647
1648 // Now add the optimization pipeline.
1650
1651 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1652 PGOOpt->Action == PGOOptions::SampleUse)
1654
1655 // Emit annotation remarks.
1657
1658 if (isLTOPreLink(Phase))
1659 addRequiredLTOPreLinkPasses(MPM);
1660 return MPM;
1661}
1662
1665 bool EmitSummary) {
1667 if (ThinLTO)
1669 else
1671 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1672
1673 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1674 // object code, only in the bitcode section, so drop it before we run
1675 // module optimization and generate machine code. If llvm.type.test() isn't in
1676 // the IR, this won't do anything.
1677 MPM.addPass(
1679
1680 // Use the ThinLTO post-link pipeline with sample profiling
1681 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1682 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1683 else {
1684 // otherwise, just use module optimization
1685 MPM.addPass(
1687 // Emit annotation remarks.
1689 }
1690 return MPM;
1691}
1692
1695 if (Level == OptimizationLevel::O0)
1697
1699
1700 // Convert @llvm.global.annotations to !annotation metadata.
1702
1703 // Force any function attributes we want the rest of the pipeline to observe.
1705
1706 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1708
1709 // Apply module pipeline start EP callback.
1711
1712 // If we are planning to perform ThinLTO later, we don't bloat the code with
1713 // unrolling/vectorization/... now. Just simplify the module as much as we
1714 // can.
1717 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1718 // thinlto use the contextual info to perform imports; then use the contextual
1719 // profile in the post-thinlink phase.
1720 if (!UseCtxProfile.empty()) {
1721 addRequiredLTOPreLinkPasses(MPM);
1722 return MPM;
1723 }
1724
1725 // Run partial inlining pass to partially inline functions that have
1726 // large bodies.
1727 // FIXME: It isn't clear whether this is really the right place to run this
1728 // in ThinLTO. Because there is another canonicalization and simplification
1729 // phase that will run after the thin link, running this here ends up with
1730 // less information than will be available later and it may grow functions in
1731 // ways that aren't beneficial.
1734
1735 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1736 PGOOpt->Action == PGOOptions::SampleUse)
1738
1739 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1740 // optimization is going to be done in PostLink stage, but clang can't add
1741 // callbacks there in case of in-process ThinLTO called by linker.
1746
1747 // Emit annotation remarks.
1749
1750 addRequiredLTOPreLinkPasses(MPM);
1751
1752 return MPM;
1753}
1754
1756 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1758
1759 if (ImportSummary) {
1760 // For ThinLTO we must apply the context disambiguation decisions early, to
1761 // ensure we can correctly match the callsites to summary data.
1764 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1765
1766 // These passes import type identifier resolutions for whole-program
1767 // devirtualization and CFI. They must run early because other passes may
1768 // disturb the specific instruction patterns that these passes look for,
1769 // creating dependencies on resolutions that may not appear in the summary.
1770 //
1771 // For example, GVN may transform the pattern assume(type.test) appearing in
1772 // two basic blocks into assume(phi(type.test, type.test)), which would
1773 // transform a dependency on a WPD resolution into a dependency on a type
1774 // identifier resolution for CFI.
1775 //
1776 // Also, WPD has access to more precise information than ICP and can
1777 // devirtualize more effectively, so it should operate on the IR first.
1778 //
1779 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1780 // metadata and intrinsics.
1781 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1782 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1783 }
1784
1785 if (Level == OptimizationLevel::O0) {
1786 // Run a second time to clean up any type tests left behind by WPD for use
1787 // in ICP.
1788 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1790 // Drop available_externally and unreferenced globals. This is necessary
1791 // with ThinLTO in order to avoid leaving undefined references to dead
1792 // globals in the object file.
1794 MPM.addPass(GlobalDCEPass());
1795 return MPM;
1796 }
1797 if (!UseCtxProfile.empty()) {
1798 MPM.addPass(
1800 } else {
1801 // Add the core simplification pipeline.
1804 }
1805 // Now add the optimization pipeline.
1808
1809 // Emit annotation remarks.
1811
1812 return MPM;
1813}
1814
1817 // FIXME: We should use a customized pre-link pipeline!
1818 return buildPerModuleDefaultPipeline(Level,
1820}
1821
1824 ModuleSummaryIndex *ExportSummary) {
1826
1828
1829 // Create a function that performs CFI checks for cross-DSO calls with targets
1830 // in the current module.
1831 MPM.addPass(CrossDSOCFIPass());
1832
1833 if (Level == OptimizationLevel::O0) {
1834 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1835 // metadata and intrinsics.
1836 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1837 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1838 // Run a second time to clean up any type tests left behind by WPD for use
1839 // in ICP.
1840 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1842
1844
1845 // Emit annotation remarks.
1847
1848 return MPM;
1849 }
1850
1851 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1852 // Load sample profile before running the LTO optimization pipeline.
1853 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1854 PGOOpt->ProfileRemappingFile,
1856 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1857 // RequireAnalysisPass for PSI before subsequent non-module passes.
1859 }
1860
1861 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1863
1864 // Remove unused virtual tables to improve the quality of code generated by
1865 // whole-program devirtualization and bitset lowering.
1866 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1867
1868 // Do basic inference of function attributes from known properties of system
1869 // libraries and other oracles.
1871
1872 if (Level.getSpeedupLevel() > 1) {
1875
1876 // Indirect call promotion. This should promote all the targets that are
1877 // left by the earlier promotion pass that promotes intra-module targets.
1878 // This two-step promotion is to save the compile time. For LTO, it should
1879 // produce the same result as if we only do promotion here.
1881 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1882
1883 // Promoting by-reference arguments to by-value exposes more constants to
1884 // IPSCCP.
1885 CGSCCPassManager CGPM;
1888 CGPM.addPass(
1891
1892 // Propagate constants at call sites into the functions they call. This
1893 // opens opportunities for globalopt (and inlining) by substituting function
1894 // pointers passed as arguments to direct uses of functions.
1895 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1896 Level != OptimizationLevel::Os &&
1897 Level != OptimizationLevel::Oz)));
1898
1899 // Attach metadata to indirect call sites indicating the set of functions
1900 // they may target at run-time. This should follow IPSCCP.
1902 }
1903
1904 // Do RPO function attribute inference across the module to forward-propagate
1905 // attributes where applicable.
1906 // FIXME: Is this really an optimization rather than a canonicalization?
1908
1909 // Use in-range annotations on GEP indices to split globals where beneficial.
1910 MPM.addPass(GlobalSplitPass());
1911
1912 // Run whole program optimization of virtual call when the list of callees
1913 // is fixed.
1914 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1915
1916 // Stop here at -O1.
1917 if (Level == OptimizationLevel::O1) {
1918 // The LowerTypeTestsPass needs to run to lower type metadata and the
1919 // type.test intrinsics. The pass does nothing if CFI is disabled.
1920 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1921 // Run a second time to clean up any type tests left behind by WPD for use
1922 // in ICP (which is performed earlier than this in the regular LTO
1923 // pipeline).
1924 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1926
1928
1929 // Emit annotation remarks.
1931
1932 return MPM;
1933 }
1934
1935 // Optimize globals to try and fold them into constants.
1936 MPM.addPass(GlobalOptPass());
1937
1938 // Promote any localized globals to SSA registers.
1940
1941 // Linking modules together can lead to duplicate global constant, only
1942 // keep one copy of each constant.
1944
1945 // Remove unused arguments from functions.
1947
1948 // Reduce the code after globalopt and ipsccp. Both can open up significant
1949 // simplification opportunities, and both can propagate functions through
1950 // function pointers. When this happens, we often have to resolve varargs
1951 // calls, etc, so let instcombine do this.
1952 FunctionPassManager PeepholeFPM;
1953 PeepholeFPM.addPass(InstCombinePass());
1954 if (Level.getSpeedupLevel() > 1)
1955 PeepholeFPM.addPass(AggressiveInstCombinePass());
1956 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1957
1958 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1960
1961 // Lower variadic functions for supported targets prior to inlining.
1963
1964 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1965 // generally clean up exception handling overhead. It isn't clear this is
1966 // valuable as the inliner doesn't currently care whether it is inlining an
1967 // invoke or a call.
1968 // Run the inliner now.
1969 if (EnableModuleInliner) {
1973 } else {
1976 /* MandatoryFirst */ true,
1979 }
1980
1981 // Perform context disambiguation after inlining, since that would reduce the
1982 // amount of additional cloning required to distinguish the allocation
1983 // contexts.
1986 /*Summary=*/nullptr,
1987 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1988
1989 // Optimize globals again after we ran the inliner.
1990 MPM.addPass(GlobalOptPass());
1991
1992 // Run the OpenMPOpt pass again after global optimizations.
1994
1995 // Garbage collect dead functions.
1996 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1997
1998 // If we didn't decide to inline a function, check to see if we can
1999 // transform it to pass arguments by value instead of by reference.
2001
2003 // The IPO Passes may leave cruft around. Clean up after them.
2004 FPM.addPass(InstCombinePass());
2005 invokePeepholeEPCallbacks(FPM, Level);
2006
2009
2011
2012 // Do a post inline PGO instrumentation and use pass. This is a context
2013 // sensitive PGO pass.
2014 if (PGOOpt) {
2015 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2016 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2017 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2018 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
2019 PGOOpt->FS);
2020 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2021 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2022 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2023 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
2024 PGOOpt->FS);
2025 }
2026
2027 // Break up allocas
2029
2030 // LTO provides additional opportunities for tailcall elimination due to
2031 // link-time inlining, and visibility of nocapture attribute.
2033
2034 // Run a few AA driver optimizations here and now to cleanup the code.
2035 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2037
2038 MPM.addPass(
2040
2041 // Require the GlobalsAA analysis for the module so we can query it within
2042 // MainFPM.
2045 // Invalidate AAManager so it can be recreated and pick up the newly
2046 // available GlobalsAA.
2047 MPM.addPass(
2049 }
2050
2051 FunctionPassManager MainFPM;
2054 /*AllowSpeculation=*/true),
2055 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2056
2057 if (RunNewGVN)
2058 MainFPM.addPass(NewGVNPass());
2059 else
2060 MainFPM.addPass(GVNPass());
2061
2062 // Remove dead memcpy()'s.
2063 MainFPM.addPass(MemCpyOptPass());
2064
2065 // Nuke dead stores.
2066 MainFPM.addPass(DSEPass());
2067 MainFPM.addPass(MoveAutoInitPass());
2069
2070 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2071
2072 LoopPassManager LPM;
2073 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2074 LPM.addPass(LoopFlattenPass());
2077 // FIXME: Add loop interchange.
2078
2079 // Unroll small loops and perform peeling.
2080 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2081 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2083 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2084 // *All* loop passes must preserve it, in order to be able to use it.
2086 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2087
2088 MainFPM.addPass(LoopDistributePass());
2089
2090 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2091
2092 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2093
2094 // Run the OpenMPOpt CGSCC pass again late.
2097
2098 invokePeepholeEPCallbacks(MainFPM, Level);
2099 MainFPM.addPass(JumpThreadingPass());
2100 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2102
2103 // Lower type metadata and the type.test intrinsic. This pass supports
2104 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2105 // to be run at link time if CFI is enabled. This pass does nothing if
2106 // CFI is disabled.
2107 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2108 // Run a second time to clean up any type tests left behind by WPD for use
2109 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2110 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2112
2113 // Enable splitting late in the FullLTO post-link pipeline.
2116
2117 // Add late LTO optimization passes.
2118 FunctionPassManager LateFPM;
2119
2120 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2121 // canonicalization pass that enables other optimizations. As a result,
2122 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2123 // result too early.
2124 LateFPM.addPass(LoopSinkPass());
2125
2126 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2127 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2128 // flattening of blocks.
2129 LateFPM.addPass(DivRemPairsPass());
2130
2131 // Delete basic blocks, which optimization passes may have killed.
2133 .convertSwitchRangeToICmp(true)
2134 .hoistCommonInsts(true)
2135 .speculateUnpredictables(true)));
2136 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2137
2138 // Drop bodies of available eternally objects to improve GlobalDCE.
2140
2141 // Now that we have optimized the program, discard unreachable functions.
2142 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2143
2144 if (PTO.MergeFunctions)
2146
2148
2149 if (PTO.CallGraphProfile)
2150 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2151
2153
2154 // Emit annotation remarks.
2156
2157 return MPM;
2158}
2159
2163 assert(Level == OptimizationLevel::O0 &&
2164 "buildO0DefaultPipeline should only be used with O0");
2165
2167
2168 // Perform pseudo probe instrumentation in O0 mode. This is for the
2169 // consistency between different build modes. For example, a LTO build can be
2170 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2171 // the postlink will require pseudo probe instrumentation in the prelink.
2172 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2174
2175 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2176 PGOOpt->Action == PGOOptions::IRUse))
2178 MPM,
2179 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2180 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2181 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2182
2183 // Instrument function entry and exit before all inlining.
2185 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2186
2188
2189 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2191
2192 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2193 // Explicitly disable sample loader inlining and use flattened profile in O0
2194 // pipeline.
2195 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2196 PGOOpt->ProfileRemappingFile,
2197 ThinOrFullLTOPhase::None, nullptr,
2198 /*DisableSampleProfileInlining=*/true,
2199 /*UseFlattenedProfile=*/true));
2200 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2201 // RequireAnalysisPass for PSI before subsequent non-module passes.
2203 }
2204
2206
2207 // Build a minimal pipeline based on the semantics required by LLVM,
2208 // which is just that always inlining occurs. Further, disable generating
2209 // lifetime intrinsics to avoid enabling further optimizations during
2210 // code generation.
2212 /*InsertLifetimeIntrinsics=*/false));
2213
2214 if (PTO.MergeFunctions)
2216
2217 if (EnableMatrix)
2218 MPM.addPass(
2220
2221 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2222 CGSCCPassManager CGPM;
2224 if (!CGPM.isEmpty())
2226 }
2227 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2228 LoopPassManager LPM;
2230 if (!LPM.isEmpty()) {
2232 createFunctionToLoopPassAdaptor(std::move(LPM))));
2233 }
2234 }
2235 if (!LoopOptimizerEndEPCallbacks.empty()) {
2236 LoopPassManager LPM;
2238 if (!LPM.isEmpty()) {
2240 createFunctionToLoopPassAdaptor(std::move(LPM))));
2241 }
2242 }
2243 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2246 if (!FPM.isEmpty())
2247 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2248 }
2249
2251
2252 if (!VectorizerStartEPCallbacks.empty()) {
2255 if (!FPM.isEmpty())
2256 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2257 }
2258
2259 if (!VectorizerEndEPCallbacks.empty()) {
2262 if (!FPM.isEmpty())
2263 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2264 }
2265
2267
2269
2270 if (isLTOPreLink(Phase))
2271 addRequiredLTOPreLinkPasses(MPM);
2272
2274
2275 return MPM;
2276}
2277
2279 AAManager AA;
2280
2281 // The order in which these are registered determines their priority when
2282 // being queried.
2283
2284 // First we register the basic alias analysis that provides the majority of
2285 // per-function local AA logic. This is a stateless, on-demand local set of
2286 // AA techniques.
2288
2289 // Next we query fast, specialized alias analyses that wrap IR-embedded
2290 // information about aliasing.
2293
2294 // Add support for querying global aliasing information when available.
2295 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2296 // analysis, all that the `AAManager` can do is query for any *cached*
2297 // results from `GlobalsAA` through a readonly proxy.
2300
2301 // Add target-specific alias analyses.
2302 if (TM)
2304
2305 return AA;
2306}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
cl::opt< std::string > UseCtxProfile
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra function passes if the ShouldRunExtraPasses marker analysis is p...
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
void addPass(PassT &&Pass)
The core GVN pass object.
Definition: GVN.h:124
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:195
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:217
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:77
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:58
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:91
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:81
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:88
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:69
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:73
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:50
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:61
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition: PassBuilder.h:65
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:54
Reassociate commutative expressions.
Definition: Reassociate.h:85
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:852
@ MODULE
Definition: Attributor.h:6489
@ CGSCC
Definition: Attributor.h:6490
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:399
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:406
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:205
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:222
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:207
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:235
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:210
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:905
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:878