LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
150
151using namespace llvm;
152
154 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
155 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
157 "Heuristics-based inliner version"),
159 "Use development mode (runtime-loadable model)"),
161 "Use release mode (AOT-compiled model)")));
162
163/// Flag to enable inline deferral during PGO.
164static cl::opt<bool>
165 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
167 cl::desc("Enable inline deferral during PGO"));
168
169static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
170 cl::init(false), cl::Hidden,
171 cl::desc("Enable module inliner"));
172
174 "mandatory-inlining-first", cl::init(false), cl::Hidden,
175 cl::desc("Perform mandatory inlinings module-wide, before performing "
176 "inlining"));
177
179 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
180 cl::desc("Eagerly invalidate more analyses in default pipelines"));
181
183 "enable-merge-functions", cl::init(false), cl::Hidden,
184 cl::desc("Enable function merging as part of the optimization pipeline"));
185
187 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
188 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
189
191 "enable-global-analyses", cl::init(true), cl::Hidden,
192 cl::desc("Enable inter-procedural analyses"));
193
194static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
195 cl::init(false), cl::Hidden,
196 cl::desc("Run Partial inlining pass"));
197
199 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
200 cl::desc("Run cleanup optimization passes after vectorization"));
201
202static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
203 cl::desc("Run the NewGVN pass"));
204
205static cl::opt<bool>
206 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
207 cl::desc("Enable the LoopInterchange Pass"));
208
209static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
210 cl::init(false), cl::Hidden,
211 cl::desc("Enable Unroll And Jam Pass"));
212
213static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
215 cl::desc("Enable the LoopFlatten Pass"));
216
217// Experimentally allow loop header duplication. This should allow for better
218// optimization at Oz, since loop-idiom recognition can then recognize things
219// like memcpy. If this ends up being useful for many targets, we should drop
220// this flag and make a code generation option that can be controlled
221// independent of the opt level and exposed through the frontend.
223 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
224 cl::desc("Enable loop header duplication at any optimization level"));
225
226static cl::opt<bool>
227 EnableDFAJumpThreading("enable-dfa-jump-thread",
228 cl::desc("Enable DFA jump threading"),
229 cl::init(false), cl::Hidden);
230
231static cl::opt<bool>
232 EnableHotColdSplit("hot-cold-split",
233 cl::desc("Enable hot-cold splitting pass"));
234
235static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
237 cl::desc("Enable ir outliner pass"));
238
239static cl::opt<bool>
240 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
241 cl::desc("Disable pre-instrumentation inliner"));
242
244 "preinline-threshold", cl::Hidden, cl::init(75),
245 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
246 "(default = 75)"));
247
248static cl::opt<bool>
249 EnableGVNHoist("enable-gvn-hoist",
250 cl::desc("Enable the GVN hoisting pass (default = off)"));
251
252static cl::opt<bool>
253 EnableGVNSink("enable-gvn-sink",
254 cl::desc("Enable the GVN sinking pass (default = off)"));
255
257 "enable-jump-table-to-switch",
258 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
259
260// This option is used in simplifying testing SampleFDO optimizations for
261// profile loading.
262static cl::opt<bool>
263 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
264 cl::desc("Enable control height reduction optimization (CHR)"));
265
267 "flattened-profile-used", cl::init(false), cl::Hidden,
268 cl::desc("Indicate the sample profile being used is flattened, i.e., "
269 "no inline hierarchy exists in the profile"));
270
271static cl::opt<bool>
272 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
273 cl::desc("Enable lowering of the matrix intrinsics"));
274
276 "enable-constraint-elimination", cl::init(true), cl::Hidden,
277 cl::desc(
278 "Enable pass to eliminate conditions based on linear constraints"));
279
281 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
282 cl::desc("Enable the attributor inter-procedural deduction pass"),
284 "enable all attributor runs"),
286 "enable module-wide attributor runs"),
288 "enable call graph SCC attributor runs"),
289 clEnumValN(AttributorRunOption::NONE, "none",
290 "disable attributor runs")));
291
293 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
294 cl::desc("Enable profile instrumentation sampling (default = off)"));
296 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
297 cl::desc("Enable the experimental Loop Versioning LICM pass"));
298
300 "instrument-cold-function-only-path", cl::init(""),
301 cl::desc("File path for cold function only instrumentation(requires use "
302 "with --pgo-instrument-cold-function-only)"),
303 cl::Hidden);
304
307
308namespace llvm {
310} // namespace llvm
311
328
329namespace llvm {
331} // namespace llvm
332
334 OptimizationLevel Level) {
335 for (auto &C : PeepholeEPCallbacks)
336 C(FPM, Level);
337}
340 for (auto &C : LateLoopOptimizationsEPCallbacks)
341 C(LPM, Level);
342}
344 OptimizationLevel Level) {
345 for (auto &C : LoopOptimizerEndEPCallbacks)
346 C(LPM, Level);
347}
350 for (auto &C : ScalarOptimizerLateEPCallbacks)
351 C(FPM, Level);
352}
354 OptimizationLevel Level) {
355 for (auto &C : CGSCCOptimizerLateEPCallbacks)
356 C(CGPM, Level);
357}
359 OptimizationLevel Level) {
360 for (auto &C : VectorizerStartEPCallbacks)
361 C(FPM, Level);
362}
364 OptimizationLevel Level) {
365 for (auto &C : VectorizerEndEPCallbacks)
366 C(FPM, Level);
367}
369 OptimizationLevel Level,
371 for (auto &C : OptimizerEarlyEPCallbacks)
372 C(MPM, Level, Phase);
373}
375 OptimizationLevel Level,
377 for (auto &C : OptimizerLastEPCallbacks)
378 C(MPM, Level, Phase);
379}
382 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
383 C(MPM, Level);
384}
387 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
388 C(MPM, Level);
389}
391 OptimizationLevel Level) {
392 for (auto &C : PipelineStartEPCallbacks)
393 C(MPM, Level);
394}
397 for (auto &C : PipelineEarlySimplificationEPCallbacks)
398 C(MPM, Level, Phase);
399}
400
401// Helper to add AnnotationRemarksPass.
405
406// Helper to check if the current compilation phase is preparing for LTO
411
412// Helper to check if the current compilation phase is LTO backend
417
418// Helper to wrap conditionally Coro passes.
420 // TODO: Skip passes according to Phase.
421 ModulePassManager CoroPM;
422 CoroPM.addPass(CoroEarlyPass());
423 CGSCCPassManager CGPM;
424 CGPM.addPass(CoroSplitPass());
425 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
426 CoroPM.addPass(CoroCleanupPass());
427 CoroPM.addPass(GlobalDCEPass());
428 return CoroConditionalWrapper(std::move(CoroPM));
429}
430
431// TODO: Investigate the cost/benefit of tail call elimination on debugging.
433PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
435
437
439 FPM.addPass(CountVisitsPass());
440
441 // Form SSA out of local memory accesses after breaking apart aggregates into
442 // scalars.
443 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
444
445 // Catch trivial redundancies
446 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
447
448 // Hoisting of scalars and load expressions.
449 FPM.addPass(
450 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
451 FPM.addPass(InstCombinePass());
452
453 FPM.addPass(LibCallsShrinkWrapPass());
454
455 invokePeepholeEPCallbacks(FPM, Level);
456
457 FPM.addPass(
458 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
459
460 // Form canonically associated expression trees, and simplify the trees using
461 // basic mathematical properties. For example, this will form (nearly)
462 // minimal multiplication trees.
463 FPM.addPass(ReassociatePass());
464
465 // Add the primary loop simplification pipeline.
466 // FIXME: Currently this is split into two loop pass pipelines because we run
467 // some function passes in between them. These can and should be removed
468 // and/or replaced by scheduling the loop pass equivalents in the correct
469 // positions. But those equivalent passes aren't powerful enough yet.
470 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
471 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
472 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
473 // `LoopInstSimplify`.
474 LoopPassManager LPM1, LPM2;
475
476 // Simplify the loop body. We do this initially to clean up after other loop
477 // passes run, either when iterating on a loop or on inner loops with
478 // implications on the outer loop.
479 LPM1.addPass(LoopInstSimplifyPass());
480 LPM1.addPass(LoopSimplifyCFGPass());
481
482 // Try to remove as much code from the loop header as possible,
483 // to reduce amount of IR that will have to be duplicated. However,
484 // do not perform speculative hoisting the first time as LICM
485 // will destroy metadata that may not need to be destroyed if run
486 // after loop rotation.
487 // TODO: Investigate promotion cap for O1.
488 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
489 /*AllowSpeculation=*/false));
490
491 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
493 // TODO: Investigate promotion cap for O1.
494 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
495 /*AllowSpeculation=*/true));
496 LPM1.addPass(SimpleLoopUnswitchPass());
498 LPM1.addPass(LoopFlattenPass());
499
500 LPM2.addPass(LoopIdiomRecognizePass());
501 LPM2.addPass(IndVarSimplifyPass());
502
504
505 LPM2.addPass(LoopDeletionPass());
506
507 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
508 // because it changes IR to makes profile annotation in back compile
509 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
510 // attributes so we need to make sure and allow the full unroll pass to pay
511 // attention to it.
512 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
513 PGOOpt->Action != PGOOptions::SampleUse)
514 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
515 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
516 PTO.ForgetAllSCEVInLoopUnroll));
517
519
520 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
521 /*UseMemorySSA=*/true,
522 /*UseBlockFrequencyInfo=*/true));
523 FPM.addPass(
524 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
525 FPM.addPass(InstCombinePass());
526 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
527 // *All* loop passes must preserve it, in order to be able to use it.
528 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
529 /*UseMemorySSA=*/false,
530 /*UseBlockFrequencyInfo=*/false));
531
532 // Delete small array after loop unroll.
533 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
534
535 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
536 FPM.addPass(MemCpyOptPass());
537
538 // Sparse conditional constant propagation.
539 // FIXME: It isn't clear why we do this *after* loop passes rather than
540 // before...
541 FPM.addPass(SCCPPass());
542
543 // Delete dead bit computations (instcombine runs after to fold away the dead
544 // computations, and then ADCE will run later to exploit any new DCE
545 // opportunities that creates).
546 FPM.addPass(BDCEPass());
547
548 // Run instcombine after redundancy and dead bit elimination to exploit
549 // opportunities opened up by them.
550 FPM.addPass(InstCombinePass());
551 invokePeepholeEPCallbacks(FPM, Level);
552
553 FPM.addPass(CoroElidePass());
554
556
557 // Finally, do an expensive DCE pass to catch all the dead code exposed by
558 // the simplifications and basic cleanup after all the simplifications.
559 // TODO: Investigate if this is too expensive.
560 FPM.addPass(ADCEPass());
561 FPM.addPass(
562 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
563 FPM.addPass(InstCombinePass());
564 invokePeepholeEPCallbacks(FPM, Level);
565
566 return FPM;
567}
568
572 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
573
574 // The O1 pipeline has a separate pipeline creation function to simplify
575 // construction readability.
576 if (Level.getSpeedupLevel() == 1)
577 return buildO1FunctionSimplificationPipeline(Level, Phase);
578
580
583
584 // Form SSA out of local memory accesses after breaking apart aggregates into
585 // scalars.
587
588 // Catch trivial redundancies
589 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
592
593 // Hoisting of scalars and load expressions.
594 if (EnableGVNHoist)
595 FPM.addPass(GVNHoistPass());
596
597 // Global value numbering based sinking.
598 if (EnableGVNSink) {
599 FPM.addPass(GVNSinkPass());
600 FPM.addPass(
601 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
602 }
603
604 // Speculative execution if the target has divergent branches; otherwise nop.
605 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
606
607 // Optimize based on known information about branches, and cleanup afterward.
610
611 // Jump table to switch conversion.
614
615 FPM.addPass(
616 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
619
620 if (!Level.isOptimizingForSize())
622
623 invokePeepholeEPCallbacks(FPM, Level);
624
625 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
626 // using the size value profile. Don't perform this when optimizing for size.
627 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
628 !Level.isOptimizingForSize())
630
631 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
632 isInstrumentedPGOUse()));
633 FPM.addPass(
634 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
635
636 // Form canonically associated expression trees, and simplify the trees using
637 // basic mathematical properties. For example, this will form (nearly)
638 // minimal multiplication trees.
640
643
644 // Add the primary loop simplification pipeline.
645 // FIXME: Currently this is split into two loop pass pipelines because we run
646 // some function passes in between them. These can and should be removed
647 // and/or replaced by scheduling the loop pass equivalents in the correct
648 // positions. But those equivalent passes aren't powerful enough yet.
649 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
650 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
651 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
652 // `LoopInstSimplify`.
653 LoopPassManager LPM1, LPM2;
654
655 // Simplify the loop body. We do this initially to clean up after other loop
656 // passes run, either when iterating on a loop or on inner loops with
657 // implications on the outer loop.
658 LPM1.addPass(LoopInstSimplifyPass());
659 LPM1.addPass(LoopSimplifyCFGPass());
660
661 // Try to remove as much code from the loop header as possible,
662 // to reduce amount of IR that will have to be duplicated. However,
663 // do not perform speculative hoisting the first time as LICM
664 // will destroy metadata that may not need to be destroyed if run
665 // after loop rotation.
666 // TODO: Investigate promotion cap for O1.
667 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
668 /*AllowSpeculation=*/false));
669
670 // Disable header duplication in loop rotation at -Oz.
672 Level != OptimizationLevel::Oz,
674 // TODO: Investigate promotion cap for O1.
675 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
676 /*AllowSpeculation=*/true));
677 LPM1.addPass(
678 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
680 LPM1.addPass(LoopFlattenPass());
681
682 LPM2.addPass(LoopIdiomRecognizePass());
683 LPM2.addPass(IndVarSimplifyPass());
684
685 {
687 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
689 LPM2.addPass(std::move(ExtraPasses));
690 }
691
693
694 LPM2.addPass(LoopDeletionPass());
695
696 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
697 // because it changes IR to makes profile annotation in back compile
698 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
699 // attributes so we need to make sure and allow the full unroll pass to pay
700 // attention to it.
701 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
702 PGOOpt->Action != PGOOptions::SampleUse)
703 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
704 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
705 PTO.ForgetAllSCEVInLoopUnroll));
706
708
709 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
710 /*UseMemorySSA=*/true,
711 /*UseBlockFrequencyInfo=*/true));
712 FPM.addPass(
713 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
715 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
716 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
717 // *All* loop passes must preserve it, in order to be able to use it.
718 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
719 /*UseMemorySSA=*/false,
720 /*UseBlockFrequencyInfo=*/false));
721
722 // Delete small array after loop unroll.
724
725 // Try vectorization/scalarization transforms that are both improvements
726 // themselves and can allow further folds with GVN and InstCombine.
727 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
728
729 // Eliminate redundancies.
731 if (RunNewGVN)
732 FPM.addPass(NewGVNPass());
733 else
734 FPM.addPass(GVNPass());
735
736 // Sparse conditional constant propagation.
737 // FIXME: It isn't clear why we do this *after* loop passes rather than
738 // before...
739 FPM.addPass(SCCPPass());
740
741 // Delete dead bit computations (instcombine runs after to fold away the dead
742 // computations, and then ADCE will run later to exploit any new DCE
743 // opportunities that creates).
744 FPM.addPass(BDCEPass());
745
746 // Run instcombine after redundancy and dead bit elimination to exploit
747 // opportunities opened up by them.
749 invokePeepholeEPCallbacks(FPM, Level);
750
751 // Re-consider control flow based optimizations after redundancy elimination,
752 // redo DCE, etc.
755
758
759 // Finally, do an expensive DCE pass to catch all the dead code exposed by
760 // the simplifications and basic cleanup after all the simplifications.
761 // TODO: Investigate if this is too expensive.
762 FPM.addPass(ADCEPass());
763
764 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
765 FPM.addPass(MemCpyOptPass());
766
767 FPM.addPass(DSEPass());
769
771 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
772 /*AllowSpeculation=*/true),
773 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
774
775 FPM.addPass(CoroElidePass());
776
778
780 .convertSwitchRangeToICmp(true)
781 .hoistCommonInsts(true)
782 .sinkCommonInsts(true)));
784 invokePeepholeEPCallbacks(FPM, Level);
785
786 return FPM;
787}
788
789void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
792}
793
794void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
795 OptimizationLevel Level,
796 ThinOrFullLTOPhase LTOPhase) {
797 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
799 return;
800 InlineParams IP;
801
803
804 // FIXME: The hint threshold has the same value used by the regular inliner
805 // when not optimzing for size. This should probably be lowered after
806 // performance testing.
807 // FIXME: this comment is cargo culted from the old pass manager, revisit).
808 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
810 IP, /* MandatoryFirst */ true,
812 CGSCCPassManager &CGPipeline = MIWP.getPM();
813
815 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
816 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
817 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
818 true))); // Merge & remove basic blocks.
819 FPM.addPass(InstCombinePass()); // Combine silly sequences.
820 invokePeepholeEPCallbacks(FPM, Level);
821
822 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
823 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
824
825 MPM.addPass(std::move(MIWP));
826
827 // Delete anything that is now dead to make sure that we don't instrument
828 // dead code. Instrumentation can end up keeping dead code around and
829 // dramatically increase code size.
830 MPM.addPass(GlobalDCEPass());
831}
832
833void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
834 OptimizationLevel Level) {
836 // Disable header duplication in loop rotation at -Oz.
839 LoopRotatePass(EnableLoopHeaderDuplication ||
840 Level != OptimizationLevel::Oz),
841 /*UseMemorySSA=*/false,
842 /*UseBlockFrequencyInfo=*/false),
843 PTO.EagerlyInvalidateAnalyses));
844 }
845}
846
847void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
848 OptimizationLevel Level, bool RunProfileGen,
849 bool IsCS, bool AtomicCounterUpdate,
850 std::string ProfileFile,
851 std::string ProfileRemappingFile,
853 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
854
855 if (!RunProfileGen) {
856 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
857 MPM.addPass(
858 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
859 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
860 // RequireAnalysisPass for PSI before subsequent non-module passes.
861 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
862 return;
863 }
864
865 // Perform PGO instrumentation.
866 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
868
869 addPostPGOLoopRotation(MPM, Level);
870 // Add the profile lowering pass.
871 InstrProfOptions Options;
872 if (!ProfileFile.empty())
873 Options.InstrProfileOutput = ProfileFile;
874 // Do counter promotion at Level greater than O0.
875 Options.DoCounterPromotion = true;
876 Options.UseBFIInPromotion = IsCS;
877 if (EnableSampledInstr) {
878 Options.Sampling = true;
879 // With sampling, there is little beneifit to enable counter promotion.
880 // But note that sampling does work with counter promotion.
881 Options.DoCounterPromotion = false;
882 }
883 Options.Atomic = AtomicCounterUpdate;
884 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
885}
886
888 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
889 bool AtomicCounterUpdate, std::string ProfileFile,
890 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
891 if (!RunProfileGen) {
892 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
893 MPM.addPass(
894 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
895 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
896 // RequireAnalysisPass for PSI before subsequent non-module passes.
898 return;
899 }
900
901 // Perform PGO instrumentation.
904 // Add the profile lowering pass.
906 if (!ProfileFile.empty())
907 Options.InstrProfileOutput = ProfileFile;
908 // Do not do counter promotion at O0.
909 Options.DoCounterPromotion = false;
910 Options.UseBFIInPromotion = IsCS;
911 Options.Atomic = AtomicCounterUpdate;
913}
914
916 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
917}
918
922 InlineParams IP;
923 if (PTO.InlinerThreshold == -1)
924 IP = getInlineParamsFromOptLevel(Level);
925 else
926 IP = getInlineParams(PTO.InlinerThreshold);
927 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
928 // set hot-caller threshold to 0 to disable hot
929 // callsite inline (as much as possible [1]) because it makes
930 // profile annotation in the backend inaccurate.
931 //
932 // [1] Note the cost of a function could be below zero due to erased
933 // prologue / epilogue.
934 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
936
937 if (PGOOpt)
939
943
944 // Require the GlobalsAA analysis for the module so we can query it within
945 // the CGSCC pipeline.
947 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
948 // Invalidate AAManager so it can be recreated and pick up the newly
949 // available GlobalsAA.
950 MIWP.addModulePass(
952 }
953
954 // Require the ProfileSummaryAnalysis for the module so we can query it within
955 // the inliner pass.
957
958 // Now begin the main postorder CGSCC pipeline.
959 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
960 // manager and trying to emulate its precise behavior. Much of this doesn't
961 // make a lot of sense and we should revisit the core CGSCC structure.
962 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
963
964 // Note: historically, the PruneEH pass was run first to deduce nounwind and
965 // generally clean up exception handling overhead. It isn't clear this is
966 // valuable as the inliner doesn't currently care whether it is inlining an
967 // invoke or a call.
968
970 MainCGPipeline.addPass(AttributorCGSCCPass());
971
972 // Deduce function attributes. We do another run of this after the function
973 // simplification pipeline, so this only needs to run when it could affect the
974 // function simplification pipeline, which is only the case with recursive
975 // functions.
976 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
977
978 // When at O3 add argument promotion to the pass pipeline.
979 // FIXME: It isn't at all clear why this should be limited to O3.
980 if (Level == OptimizationLevel::O3)
981 MainCGPipeline.addPass(ArgumentPromotionPass());
982
983 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
984 // there are no OpenMP runtime calls present in the module.
985 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
986 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
987
988 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
989
990 // Add the core function simplification pipeline nested inside the
991 // CGSCC walk.
994 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
995
996 // Finally, deduce any function attributes based on the fully simplified
997 // function.
998 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
999
1000 // Mark that the function is fully simplified and that it shouldn't be
1001 // simplified again if we somehow revisit it due to CGSCC mutations unless
1002 // it's been modified since.
1005
1007 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1008 MainCGPipeline.addPass(CoroAnnotationElidePass());
1009 }
1010
1011 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1012 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1014
1015 return MIWP;
1016}
1017
1022
1024 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1025 // set hot-caller threshold to 0 to disable hot
1026 // callsite inline (as much as possible [1]) because it makes
1027 // profile annotation in the backend inaccurate.
1028 //
1029 // [1] Note the cost of a function could be below zero due to erased
1030 // prologue / epilogue.
1031 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1032 IP.HotCallSiteThreshold = 0;
1033
1034 if (PGOOpt)
1036
1037 // The inline deferral logic is used to avoid losing some
1038 // inlining chance in future. It is helpful in SCC inliner, in which
1039 // inlining is processed in bottom-up order.
1040 // While in module inliner, the inlining order is a priority-based order
1041 // by default. The inline deferral is unnecessary there. So we disable the
1042 // inline deferral logic in module inliner.
1043 IP.EnableDeferral = false;
1044
1047 MPM.addPass(GlobalOptPass());
1048 MPM.addPass(GlobalDCEPass());
1049 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1050 }
1051
1054 PTO.EagerlyInvalidateAnalyses));
1055
1059 MPM.addPass(
1061 }
1062
1063 return MPM;
1064}
1065
1069 assert(Level != OptimizationLevel::O0 &&
1070 "Should not be used for O0 pipeline");
1071
1073 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1074
1076
1077 // Place pseudo probe instrumentation as the first pass of the pipeline to
1078 // minimize the impact of optimization changes.
1079 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1082
1083 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1084
1085 // In ThinLTO mode, when flattened profile is used, all the available
1086 // profile information will be annotated in PreLink phase so there is
1087 // no need to load the profile again in PostLink.
1088 bool LoadSampleProfile =
1089 HasSampleProfile &&
1091
1092 // During the ThinLTO backend phase we perform early indirect call promotion
1093 // here, before globalopt. Otherwise imported available_externally functions
1094 // look unreferenced and are removed. If we are going to load the sample
1095 // profile then defer until later.
1096 // TODO: See if we can move later and consolidate with the location where
1097 // we perform ICP when we are loading a sample profile.
1098 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1099 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1100 // determine whether the new direct calls are annotated with prof metadata.
1101 // Ideally this should be determined from whether the IR is annotated with
1102 // sample profile, and not whether the a sample profile was provided on the
1103 // command line. E.g. for flattened profiles where we will not be reloading
1104 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1105 // provide the sample profile file.
1106 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1107 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1108
1109 // Create an early function pass manager to cleanup the output of the
1110 // frontend. Not necessary with LTO post link pipelines since the pre link
1111 // pipeline already cleaned up the frontend output.
1113 // Do basic inference of function attributes from known properties of system
1114 // libraries and other oracles.
1116 MPM.addPass(CoroEarlyPass());
1117
1118 FunctionPassManager EarlyFPM;
1119 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1120 // Lower llvm.expect to metadata before attempting transforms.
1121 // Compare/branch metadata may alter the behavior of passes like
1122 // SimplifyCFG.
1124 EarlyFPM.addPass(SimplifyCFGPass());
1126 EarlyFPM.addPass(EarlyCSEPass());
1127 if (Level == OptimizationLevel::O3)
1128 EarlyFPM.addPass(CallSiteSplittingPass());
1130 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1131 }
1132
1133 if (LoadSampleProfile) {
1134 // Annotate sample profile right after early FPM to ensure freshness of
1135 // the debug info.
1136 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1137 PGOOpt->ProfileRemappingFile, Phase));
1138 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1139 // RequireAnalysisPass for PSI before subsequent non-module passes.
1141 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1142 // for the profile annotation to be accurate in the LTO backend.
1143 if (!isLTOPreLink(Phase))
1144 // We perform early indirect call promotion here, before globalopt.
1145 // This is important for the ThinLTO backend phase because otherwise
1146 // imported available_externally functions look unreferenced and are
1147 // removed.
1148 MPM.addPass(
1149 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1150 }
1151
1152 // Try to perform OpenMP specific optimizations on the module. This is a
1153 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1155
1157 MPM.addPass(AttributorPass());
1158
1159 // Lower type metadata and the type.test intrinsic in the ThinLTO
1160 // post link pipeline after ICP. This is to enable usage of the type
1161 // tests in ICP sequences.
1163 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1165
1167
1168 // Interprocedural constant propagation now that basic cleanup has occurred
1169 // and prior to optimizing globals.
1170 // FIXME: This position in the pipeline hasn't been carefully considered in
1171 // years, it should be re-analyzed.
1172 MPM.addPass(IPSCCPPass(
1173 IPSCCPOptions(/*AllowFuncSpec=*/
1174 Level != OptimizationLevel::Os &&
1175 Level != OptimizationLevel::Oz &&
1176 !isLTOPreLink(Phase))));
1177
1178 // Attach metadata to indirect call sites indicating the set of functions
1179 // they may target at run-time. This should follow IPSCCP.
1181
1182 // Optimize globals to try and fold them into constants.
1183 MPM.addPass(GlobalOptPass());
1184
1185 // Create a small function pass pipeline to cleanup after all the global
1186 // optimizations.
1187 FunctionPassManager GlobalCleanupPM;
1188 // FIXME: Should this instead by a run of SROA?
1189 GlobalCleanupPM.addPass(PromotePass());
1190 GlobalCleanupPM.addPass(InstCombinePass());
1191 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1192 GlobalCleanupPM.addPass(
1193 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1194 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1195 PTO.EagerlyInvalidateAnalyses));
1196
1197 // We already asserted this happens in non-FullLTOPostLink earlier.
1198 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1199 // Enable contextual profiling instrumentation.
1200 const bool IsCtxProfGen =
1202 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1203 const bool IsPGOInstrGen =
1204 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1205 const bool IsPGOInstrUse =
1206 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1207 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1208 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1209 // enable ctx profiling from the frontend.
1211 "Enabling both instrumented PGO and contextual instrumentation is not "
1212 "supported.");
1213 const bool IsCtxProfUse =
1215
1216 assert(
1218 "--instrument-cold-function-only-path is provided but "
1219 "--pgo-instrument-cold-function-only is not enabled");
1220 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1221 IsPGOPreLink &&
1223
1224 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1225 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1226 addPreInlinerPasses(MPM, Level, Phase);
1227
1228 // Add all the requested passes for instrumentation PGO, if requested.
1229 if (IsPGOInstrGen || IsPGOInstrUse) {
1230 addPGOInstrPasses(MPM, Level,
1231 /*RunProfileGen=*/IsPGOInstrGen,
1232 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1233 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1234 PGOOpt->FS);
1235 } else if (IsCtxProfGen || IsCtxProfUse) {
1237 // In pre-link, we just want the instrumented IR. We use the contextual
1238 // profile in the post-thinlink phase.
1239 // The instrumentation will be removed in post-thinlink after IPO.
1240 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1241 // mechanism for GUIDs.
1242 MPM.addPass(AssignGUIDPass());
1243 if (IsCtxProfUse) {
1244 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1245 return MPM;
1246 }
1247 // Block further inlining in the instrumented ctxprof case. This avoids
1248 // confusingly collecting profiles for the same GUID corresponding to
1249 // different variants of the function. We could do like PGO and identify
1250 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1251 // thinlto to happen before performing any further optimizations, it's
1252 // unnecessary to collect profiles for non-prevailing copies.
1254 addPostPGOLoopRotation(MPM, Level);
1256 } else if (IsColdFuncOnlyInstrGen) {
1257 addPGOInstrPasses(
1258 MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1259 /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1260 /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
1261 }
1262
1263 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1264 MPM.addPass(PGOIndirectCallPromotion(false, false));
1265
1266 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1267 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1269
1270 if (IsMemprofUse)
1271 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1272
1273 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1274 PGOOpt->Action == PGOOptions::SampleUse))
1275 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1276
1277 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1278
1281 else
1282 MPM.addPass(buildInlinerPipeline(Level, Phase));
1283
1284 // Remove any dead arguments exposed by cleanups, constant folding globals,
1285 // and argument promotion.
1287
1290
1292 MPM.addPass(CoroCleanupPass());
1293
1294 // Optimize globals now that functions are fully simplified.
1295 MPM.addPass(GlobalOptPass());
1296 MPM.addPass(GlobalDCEPass());
1297
1298 return MPM;
1299}
1300
1301/// TODO: Should LTO cause any differences to this set of passes?
1302void PassBuilder::addVectorPasses(OptimizationLevel Level,
1303 FunctionPassManager &FPM, bool IsFullLTO) {
1306
1308 if (IsFullLTO) {
1309 // The vectorizer may have significantly shortened a loop body; unroll
1310 // again. Unroll small loops to hide loop backedge latency and saturate any
1311 // parallel execution resources of an out-of-order processor. We also then
1312 // need to clean up redundancies and loop invariant code.
1313 // FIXME: It would be really good to use a loop-integrated instruction
1314 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1315 // across the loop nests.
1316 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1319 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1321 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1324 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1325 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1326 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1327 // NOTE: we are very late in the pipeline, and we don't have any LICM
1328 // or SimplifyCFG passes scheduled after us, that would cleanup
1329 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1331 }
1332
1333 if (!IsFullLTO) {
1334 // Eliminate loads by forwarding stores from the previous iteration to loads
1335 // of the current iteration.
1337 }
1338 // Cleanup after the loop optimization passes.
1339 FPM.addPass(InstCombinePass());
1340
1341 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1342 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1343 // At higher optimization levels, try to clean up any runtime overlap and
1344 // alignment checks inserted by the vectorizer. We want to track correlated
1345 // runtime checks for two inner loops in the same outer loop, fold any
1346 // common computations, hoist loop-invariant aspects out of any outer loop,
1347 // and unswitch the runtime checks if possible. Once hoisted, we may have
1348 // dead (or speculatable) control flows or more combining opportunities.
1349 ExtraPasses.addPass(EarlyCSEPass());
1350 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1351 ExtraPasses.addPass(InstCombinePass());
1352 LoopPassManager LPM;
1353 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1354 /*AllowSpeculation=*/true));
1355 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1357 ExtraPasses.addPass(
1358 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1359 /*UseBlockFrequencyInfo=*/true));
1360 ExtraPasses.addPass(
1361 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1362 ExtraPasses.addPass(InstCombinePass());
1363 FPM.addPass(std::move(ExtraPasses));
1364 }
1365
1366 // Now that we've formed fast to execute loop structures, we do further
1367 // optimizations. These are run afterward as they might block doing complex
1368 // analyses and transforms such as what are needed for loop vectorization.
1369
1370 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1371 // GVN, loop transforms, and others have already run, so it's now better to
1372 // convert to more optimized IR using more aggressive simplify CFG options.
1373 // The extra sinking transform can create larger basic blocks, so do this
1374 // before SLP vectorization.
1375 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1376 .forwardSwitchCondToPhi(true)
1377 .convertSwitchRangeToICmp(true)
1378 .convertSwitchToLookupTable(true)
1379 .needCanonicalLoops(false)
1380 .hoistCommonInsts(true)
1381 .sinkCommonInsts(true)));
1382
1383 if (IsFullLTO) {
1384 FPM.addPass(SCCPPass());
1385 FPM.addPass(InstCombinePass());
1386 FPM.addPass(BDCEPass());
1387 }
1388
1389 // Optimize parallel scalar instruction chains into SIMD instructions.
1390 if (PTO.SLPVectorization) {
1391 FPM.addPass(SLPVectorizerPass());
1392 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1393 FPM.addPass(EarlyCSEPass());
1394 }
1395 }
1396 // Enhance/cleanup vector code.
1397 FPM.addPass(VectorCombinePass());
1398
1399 if (!IsFullLTO) {
1400 FPM.addPass(InstCombinePass());
1401 // Unroll small loops to hide loop backedge latency and saturate any
1402 // parallel execution resources of an out-of-order processor. We also then
1403 // need to clean up redundancies and loop invariant code.
1404 // FIXME: It would be really good to use a loop-integrated instruction
1405 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1406 // across the loop nests.
1407 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1408 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1410 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1411 }
1412 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1413 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1414 PTO.ForgetAllSCEVInLoopUnroll)));
1415 FPM.addPass(WarnMissedTransformationsPass());
1416 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1417 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1418 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1419 // NOTE: we are very late in the pipeline, and we don't have any LICM
1420 // or SimplifyCFG passes scheduled after us, that would cleanup
1421 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1422 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1423 }
1424
1425 FPM.addPass(InferAlignmentPass());
1426 FPM.addPass(InstCombinePass());
1427
1428 // This is needed for two reasons:
1429 // 1. It works around problems that instcombine introduces, such as sinking
1430 // expensive FP divides into loops containing multiplications using the
1431 // divide result.
1432 // 2. It helps to clean up some loop-invariant code created by the loop
1433 // unroll pass when IsFullLTO=false.
1435 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1436 /*AllowSpeculation=*/true),
1437 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1438
1439 // Now that we've vectorized and unrolled loops, we may have more refined
1440 // alignment information, try to re-derive it here.
1441 FPM.addPass(AlignmentFromAssumptionsPass());
1442}
1443
1446 ThinOrFullLTOPhase LTOPhase) {
1447 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1449
1450 // Run partial inlining pass to partially inline functions that have
1451 // large bodies.
1454
1455 // Remove avail extern fns and globals definitions since we aren't compiling
1456 // an object file for later LTO. For LTO we want to preserve these so they
1457 // are eligible for inlining at link-time. Note if they are unreferenced they
1458 // will be removed by GlobalDCE later, so this only impacts referenced
1459 // available externally globals. Eventually they will be suppressed during
1460 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1461 // may make globals referenced by available external functions dead and saves
1462 // running remaining passes on the eliminated functions. These should be
1463 // preserved during prelinking for link-time inlining decisions.
1464 if (!LTOPreLink)
1466
1467 // Do RPO function attribute inference across the module to forward-propagate
1468 // attributes where applicable.
1469 // FIXME: Is this really an optimization rather than a canonicalization?
1471
1472 // Do a post inline PGO instrumentation and use pass. This is a context
1473 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1474 // cross-module inline has not been done yet. The context sensitive
1475 // instrumentation is after all the inlines are done.
1476 if (!LTOPreLink && PGOOpt) {
1477 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1478 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1479 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1480 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1481 PGOOpt->FS);
1482 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1483 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1484 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1485 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1486 PGOOpt->FS);
1487 }
1488
1489 // Re-compute GlobalsAA here prior to function passes. This is particularly
1490 // useful as the above will have inlined, DCE'ed, and function-attr
1491 // propagated everything. We should at this point have a reasonably minimal
1492 // and richly annotated call graph. By computing aliasing and mod/ref
1493 // information for all local globals here, the late loop passes and notably
1494 // the vectorizer will be able to use them to help recognize vectorizable
1495 // memory operations.
1498
1499 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1500
1501 FunctionPassManager OptimizePM;
1502
1503 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1504 // additional uses of the affected value may be introduced through inlining
1505 // and CSE.
1506 if (!isLTOPreLink(LTOPhase))
1507 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1508
1509 // Scheduling LoopVersioningLICM when inlining is over, because after that
1510 // we may see more accurate aliasing. Reason to run this late is that too
1511 // early versioning may prevent further inlining due to increase of code
1512 // size. Other optimizations which runs later might get benefit of no-alias
1513 // assumption in clone loop.
1515 OptimizePM.addPass(
1517 // LoopVersioningLICM pass might increase new LICM opportunities.
1519 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1520 /*AllowSpeculation=*/true),
1521 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1522 }
1523
1524 OptimizePM.addPass(Float2IntPass());
1526
1527 if (EnableMatrix) {
1528 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1529 OptimizePM.addPass(EarlyCSEPass());
1530 }
1531
1532 // CHR pass should only be applied with the profile information.
1533 // The check is to check the profile summary information in CHR.
1534 if (EnableCHR && Level == OptimizationLevel::O3)
1535 OptimizePM.addPass(ControlHeightReductionPass());
1536
1537 // FIXME: We need to run some loop optimizations to re-rotate loops after
1538 // simplifycfg and others undo their rotation.
1539
1540 // Optimize the loop execution. These passes operate on entire loop nests
1541 // rather than on each loop in an inside-out manner, and so they are actually
1542 // function passes.
1543
1544 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1545
1546 LoopPassManager LPM;
1547 // First rotate loops that may have been un-rotated by prior passes.
1548 // Disable header duplication at -Oz.
1550 Level != OptimizationLevel::Oz,
1551 LTOPreLink));
1552 // Some loops may have become dead by now. Try to delete them.
1553 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1554 // this may need to be revisited once we run GVN before loop deletion
1555 // in the simplification pipeline.
1556 LPM.addPass(LoopDeletionPass());
1557
1558 if (PTO.LoopInterchange)
1559 LPM.addPass(LoopInterchangePass());
1560
1562 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1563
1564 // FIXME: This may not be the right place in the pipeline.
1565 // We need to have the data to support the right place.
1566 if (PTO.LoopFusion)
1567 OptimizePM.addPass(LoopFusePass());
1568
1569 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1570 // into separate loop that would otherwise inhibit vectorization. This is
1571 // currently only performed for loops marked with the metadata
1572 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1573 OptimizePM.addPass(LoopDistributePass());
1574
1575 // Populates the VFABI attribute with the scalar-to-vector mappings
1576 // from the TargetLibraryInfo.
1577 OptimizePM.addPass(InjectTLIMappings());
1578
1579 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1580
1581 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1582
1583 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1584 // canonicalization pass that enables other optimizations. As a result,
1585 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1586 // result too early.
1587 OptimizePM.addPass(LoopSinkPass());
1588
1589 // And finally clean up LCSSA form before generating code.
1590 OptimizePM.addPass(InstSimplifyPass());
1591
1592 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1593 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1594 // flattening of blocks.
1595 OptimizePM.addPass(DivRemPairsPass());
1596
1597 // Try to annotate calls that were created during optimization.
1598 OptimizePM.addPass(
1599 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1600
1601 // LoopSink (and other loop passes since the last simplifyCFG) might have
1602 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1603 OptimizePM.addPass(
1605 .convertSwitchRangeToICmp(true)
1606 .speculateUnpredictables(true)
1607 .hoistLoadsStoresWithCondFaulting(true)));
1608
1609 // Add the core optimizing pipeline.
1610 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1611 PTO.EagerlyInvalidateAnalyses));
1612
1613 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1614
1615 // Split out cold code. Splitting is done late to avoid hiding context from
1616 // other optimizations and inadvertently regressing performance. The tradeoff
1617 // is that this has a higher code size cost than splitting early.
1618 if (EnableHotColdSplit && !LTOPreLink)
1620
1621 // Search the code for similar regions of code. If enough similar regions can
1622 // be found where extracting the regions into their own function will decrease
1623 // the size of the program, we extract the regions, a deduplicate the
1624 // structurally similar regions.
1625 if (EnableIROutliner)
1626 MPM.addPass(IROutlinerPass());
1627
1628 // Now we need to do some global optimization transforms.
1629 // FIXME: It would seem like these should come first in the optimization
1630 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1631 // ordering here.
1632 MPM.addPass(GlobalDCEPass());
1634
1635 // Merge functions if requested. It has a better chance to merge functions
1636 // after ConstantMerge folded jump tables.
1637 if (PTO.MergeFunctions)
1639
1640 if (PTO.CallGraphProfile && !LTOPreLink)
1641 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1642
1643 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1644 if (!LTOPreLink)
1646
1647 return MPM;
1648}
1649
1653 if (Level == OptimizationLevel::O0)
1654 return buildO0DefaultPipeline(Level, Phase);
1655
1657
1658 // Convert @llvm.global.annotations to !annotation metadata.
1660
1661 // Force any function attributes we want the rest of the pipeline to observe.
1663
1664 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1666
1667 // Apply module pipeline start EP callback.
1669
1670 // Add the core simplification pipeline.
1672
1673 // Now add the optimization pipeline.
1675
1676 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1677 PGOOpt->Action == PGOOptions::SampleUse)
1679
1680 // Emit annotation remarks.
1682
1683 if (isLTOPreLink(Phase))
1684 addRequiredLTOPreLinkPasses(MPM);
1685 return MPM;
1686}
1687
1690 bool EmitSummary) {
1692 if (ThinLTO)
1694 else
1696 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1697
1698 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1699 // like removing CFI/WPD related instructions. Note, we reuse
1700 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1701 // in FatLtoCleanup.
1702 MPM.addPass(FatLtoCleanup());
1703
1704 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1705 // object code, only in the bitcode section, so drop it before we run
1706 // module optimization and generate machine code. If llvm.type.test() isn't in
1707 // the IR, this won't do anything.
1708 MPM.addPass(
1710
1711 // Use the ThinLTO post-link pipeline with sample profiling
1712 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1713 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1714 else {
1715 // ModuleSimplification does not run the coroutine passes for
1716 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1717 // builds, otherwise they will miscompile.
1718 if (ThinLTO) {
1719 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1720 // consideration.
1721 CGSCCPassManager CGPM;
1725 MPM.addPass(CoroCleanupPass());
1726 }
1727
1728 // otherwise, just use module optimization
1729 MPM.addPass(
1731 // Emit annotation remarks.
1733 }
1734 return MPM;
1735}
1736
1739 if (Level == OptimizationLevel::O0)
1741
1743
1744 // Convert @llvm.global.annotations to !annotation metadata.
1746
1747 // Force any function attributes we want the rest of the pipeline to observe.
1749
1750 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1752
1753 // Apply module pipeline start EP callback.
1755
1756 // If we are planning to perform ThinLTO later, we don't bloat the code with
1757 // unrolling/vectorization/... now. Just simplify the module as much as we
1758 // can.
1761 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1762 // thinlto use the contextual info to perform imports; then use the contextual
1763 // profile in the post-thinlink phase.
1764 if (!UseCtxProfile.empty()) {
1765 addRequiredLTOPreLinkPasses(MPM);
1766 return MPM;
1767 }
1768
1769 // Run partial inlining pass to partially inline functions that have
1770 // large bodies.
1771 // FIXME: It isn't clear whether this is really the right place to run this
1772 // in ThinLTO. Because there is another canonicalization and simplification
1773 // phase that will run after the thin link, running this here ends up with
1774 // less information than will be available later and it may grow functions in
1775 // ways that aren't beneficial.
1778
1779 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1780 PGOOpt->Action == PGOOptions::SampleUse)
1782
1783 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1784 // optimization is going to be done in PostLink stage, but clang can't add
1785 // callbacks there in case of in-process ThinLTO called by linker.
1790
1791 // Emit annotation remarks.
1793
1794 addRequiredLTOPreLinkPasses(MPM);
1795
1796 return MPM;
1797}
1798
1800 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1802
1803 if (ImportSummary) {
1804 // For ThinLTO we must apply the context disambiguation decisions early, to
1805 // ensure we can correctly match the callsites to summary data.
1808 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1809
1810 // These passes import type identifier resolutions for whole-program
1811 // devirtualization and CFI. They must run early because other passes may
1812 // disturb the specific instruction patterns that these passes look for,
1813 // creating dependencies on resolutions that may not appear in the summary.
1814 //
1815 // For example, GVN may transform the pattern assume(type.test) appearing in
1816 // two basic blocks into assume(phi(type.test, type.test)), which would
1817 // transform a dependency on a WPD resolution into a dependency on a type
1818 // identifier resolution for CFI.
1819 //
1820 // Also, WPD has access to more precise information than ICP and can
1821 // devirtualize more effectively, so it should operate on the IR first.
1822 //
1823 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1824 // metadata and intrinsics.
1825 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1826 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1827 }
1828
1829 if (Level == OptimizationLevel::O0) {
1830 // Run a second time to clean up any type tests left behind by WPD for use
1831 // in ICP.
1832 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1834 // Drop available_externally and unreferenced globals. This is necessary
1835 // with ThinLTO in order to avoid leaving undefined references to dead
1836 // globals in the object file.
1838 MPM.addPass(GlobalDCEPass());
1839 return MPM;
1840 }
1841 if (!UseCtxProfile.empty()) {
1842 MPM.addPass(
1844 } else {
1845 // Add the core simplification pipeline.
1848 }
1849 // Now add the optimization pipeline.
1852
1853 // Emit annotation remarks.
1855
1856 return MPM;
1857}
1858
1861 // FIXME: We should use a customized pre-link pipeline!
1862 return buildPerModuleDefaultPipeline(Level,
1864}
1865
1868 ModuleSummaryIndex *ExportSummary) {
1870
1872
1873 // Create a function that performs CFI checks for cross-DSO calls with targets
1874 // in the current module.
1875 MPM.addPass(CrossDSOCFIPass());
1876
1877 if (Level == OptimizationLevel::O0) {
1878 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1879 // metadata and intrinsics.
1880 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1881 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1882 // Run a second time to clean up any type tests left behind by WPD for use
1883 // in ICP.
1884 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1886
1888
1890
1891 // Emit annotation remarks.
1893
1894 return MPM;
1895 }
1896
1897 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1898 // Load sample profile before running the LTO optimization pipeline.
1899 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1900 PGOOpt->ProfileRemappingFile,
1902 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1903 // RequireAnalysisPass for PSI before subsequent non-module passes.
1905 }
1906
1907 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1909
1910 // Remove unused virtual tables to improve the quality of code generated by
1911 // whole-program devirtualization and bitset lowering.
1912 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1913
1914 // Do basic inference of function attributes from known properties of system
1915 // libraries and other oracles.
1917
1918 if (Level.getSpeedupLevel() > 1) {
1920 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1921
1922 // Indirect call promotion. This should promote all the targets that are
1923 // left by the earlier promotion pass that promotes intra-module targets.
1924 // This two-step promotion is to save the compile time. For LTO, it should
1925 // produce the same result as if we only do promotion here.
1927 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1928
1929 // Promoting by-reference arguments to by-value exposes more constants to
1930 // IPSCCP.
1931 CGSCCPassManager CGPM;
1934 CGPM.addPass(
1937
1938 // Propagate constants at call sites into the functions they call. This
1939 // opens opportunities for globalopt (and inlining) by substituting function
1940 // pointers passed as arguments to direct uses of functions.
1941 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1942 Level != OptimizationLevel::Os &&
1943 Level != OptimizationLevel::Oz)));
1944
1945 // Attach metadata to indirect call sites indicating the set of functions
1946 // they may target at run-time. This should follow IPSCCP.
1948 }
1949
1950 // Do RPO function attribute inference across the module to forward-propagate
1951 // attributes where applicable.
1952 // FIXME: Is this really an optimization rather than a canonicalization?
1954
1955 // Use in-range annotations on GEP indices to split globals where beneficial.
1956 MPM.addPass(GlobalSplitPass());
1957
1958 // Run whole program optimization of virtual call when the list of callees
1959 // is fixed.
1960 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1961
1962 // Stop here at -O1.
1963 if (Level == OptimizationLevel::O1) {
1964 // The LowerTypeTestsPass needs to run to lower type metadata and the
1965 // type.test intrinsics. The pass does nothing if CFI is disabled.
1966 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1967 // Run a second time to clean up any type tests left behind by WPD for use
1968 // in ICP (which is performed earlier than this in the regular LTO
1969 // pipeline).
1970 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1972
1974
1976
1977 // Emit annotation remarks.
1979
1980 return MPM;
1981 }
1982
1983 // TODO: Skip to match buildCoroWrapper.
1984 MPM.addPass(CoroEarlyPass());
1985
1986 // Optimize globals to try and fold them into constants.
1987 MPM.addPass(GlobalOptPass());
1988
1989 // Promote any localized globals to SSA registers.
1991
1992 // Linking modules together can lead to duplicate global constant, only
1993 // keep one copy of each constant.
1995
1996 // Remove unused arguments from functions.
1998
1999 // Reduce the code after globalopt and ipsccp. Both can open up significant
2000 // simplification opportunities, and both can propagate functions through
2001 // function pointers. When this happens, we often have to resolve varargs
2002 // calls, etc, so let instcombine do this.
2003 FunctionPassManager PeepholeFPM;
2004 PeepholeFPM.addPass(InstCombinePass());
2005 if (Level.getSpeedupLevel() > 1)
2006 PeepholeFPM.addPass(AggressiveInstCombinePass());
2007 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2008
2009 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2010 PTO.EagerlyInvalidateAnalyses));
2011
2012 // Lower variadic functions for supported targets prior to inlining.
2014
2015 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2016 // generally clean up exception handling overhead. It isn't clear this is
2017 // valuable as the inliner doesn't currently care whether it is inlining an
2018 // invoke or a call.
2019 // Run the inliner now.
2020 if (EnableModuleInliner) {
2024 } else {
2027 /* MandatoryFirst */ true,
2030 }
2031
2032 // Perform context disambiguation after inlining, since that would reduce the
2033 // amount of additional cloning required to distinguish the allocation
2034 // contexts.
2037 /*Summary=*/nullptr,
2038 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2039
2040 // Optimize globals again after we ran the inliner.
2041 MPM.addPass(GlobalOptPass());
2042
2043 // Run the OpenMPOpt pass again after global optimizations.
2045
2046 // Garbage collect dead functions.
2047 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2048
2049 // If we didn't decide to inline a function, check to see if we can
2050 // transform it to pass arguments by value instead of by reference.
2051 CGSCCPassManager CGPM;
2056
2058 // The IPO Passes may leave cruft around. Clean up after them.
2059 FPM.addPass(InstCombinePass());
2060 invokePeepholeEPCallbacks(FPM, Level);
2061
2064
2066
2067 // Do a post inline PGO instrumentation and use pass. This is a context
2068 // sensitive PGO pass.
2069 if (PGOOpt) {
2070 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2071 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2072 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2073 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
2074 PGOOpt->FS);
2075 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2076 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2077 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2078 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
2079 PGOOpt->FS);
2080 }
2081
2082 // Break up allocas
2084
2085 // LTO provides additional opportunities for tailcall elimination due to
2086 // link-time inlining, and visibility of nocapture attribute.
2087 FPM.addPass(
2088 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2089
2090 // Run a few AA driver optimizations here and now to cleanup the code.
2091 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2092 PTO.EagerlyInvalidateAnalyses));
2093
2094 MPM.addPass(
2096
2097 // Require the GlobalsAA analysis for the module so we can query it within
2098 // MainFPM.
2101 // Invalidate AAManager so it can be recreated and pick up the newly
2102 // available GlobalsAA.
2103 MPM.addPass(
2105 }
2106
2107 FunctionPassManager MainFPM;
2109 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2110 /*AllowSpeculation=*/true),
2111 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2112
2113 if (RunNewGVN)
2114 MainFPM.addPass(NewGVNPass());
2115 else
2116 MainFPM.addPass(GVNPass());
2117
2118 // Remove dead memcpy()'s.
2119 MainFPM.addPass(MemCpyOptPass());
2120
2121 // Nuke dead stores.
2122 MainFPM.addPass(DSEPass());
2123 MainFPM.addPass(MoveAutoInitPass());
2125
2126 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2127
2128 LoopPassManager LPM;
2129 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2130 LPM.addPass(LoopFlattenPass());
2131 LPM.addPass(IndVarSimplifyPass());
2132 LPM.addPass(LoopDeletionPass());
2133 // FIXME: Add loop interchange.
2134
2135 // Unroll small loops and perform peeling.
2136 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2137 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2138 PTO.ForgetAllSCEVInLoopUnroll));
2139 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2140 // *All* loop passes must preserve it, in order to be able to use it.
2142 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2143
2144 MainFPM.addPass(LoopDistributePass());
2145
2146 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2147
2148 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2149
2150 // Run the OpenMPOpt CGSCC pass again late.
2153
2154 invokePeepholeEPCallbacks(MainFPM, Level);
2155 MainFPM.addPass(JumpThreadingPass());
2156 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2157 PTO.EagerlyInvalidateAnalyses));
2158
2159 // Lower type metadata and the type.test intrinsic. This pass supports
2160 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2161 // to be run at link time if CFI is enabled. This pass does nothing if
2162 // CFI is disabled.
2163 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2164 // Run a second time to clean up any type tests left behind by WPD for use
2165 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2166 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2168
2169 // Enable splitting late in the FullLTO post-link pipeline.
2172
2173 // Add late LTO optimization passes.
2174 FunctionPassManager LateFPM;
2175
2176 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2177 // canonicalization pass that enables other optimizations. As a result,
2178 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2179 // result too early.
2180 LateFPM.addPass(LoopSinkPass());
2181
2182 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2183 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2184 // flattening of blocks.
2185 LateFPM.addPass(DivRemPairsPass());
2186
2187 // Delete basic blocks, which optimization passes may have killed.
2189 .convertSwitchRangeToICmp(true)
2190 .hoistCommonInsts(true)
2191 .speculateUnpredictables(true)));
2192 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2193
2194 // Drop bodies of available eternally objects to improve GlobalDCE.
2196
2197 // Now that we have optimized the program, discard unreachable functions.
2198 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2199
2200 if (PTO.MergeFunctions)
2202
2204
2205 if (PTO.CallGraphProfile)
2206 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2207
2208 MPM.addPass(CoroCleanupPass());
2209
2211
2212 // Emit annotation remarks.
2214
2215 return MPM;
2216}
2217
2221 assert(Level == OptimizationLevel::O0 &&
2222 "buildO0DefaultPipeline should only be used with O0");
2223
2225
2226 // Perform pseudo probe instrumentation in O0 mode. This is for the
2227 // consistency between different build modes. For example, a LTO build can be
2228 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2229 // the postlink will require pseudo probe instrumentation in the prelink.
2230 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2232
2233 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2234 PGOOpt->Action == PGOOptions::IRUse))
2236 MPM,
2237 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2238 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2239 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2240
2241 // Instrument function entry and exit before all inlining.
2243 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2244
2246
2247 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2249
2250 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2251 // Explicitly disable sample loader inlining and use flattened profile in O0
2252 // pipeline.
2253 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2254 PGOOpt->ProfileRemappingFile,
2255 ThinOrFullLTOPhase::None, nullptr,
2256 /*DisableSampleProfileInlining=*/true,
2257 /*UseFlattenedProfile=*/true));
2258 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2259 // RequireAnalysisPass for PSI before subsequent non-module passes.
2261 }
2262
2264
2265 // Build a minimal pipeline based on the semantics required by LLVM,
2266 // which is just that always inlining occurs. Further, disable generating
2267 // lifetime intrinsics to avoid enabling further optimizations during
2268 // code generation.
2270 /*InsertLifetimeIntrinsics=*/false));
2271
2272 if (PTO.MergeFunctions)
2274
2275 if (EnableMatrix)
2276 MPM.addPass(
2278
2279 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2280 CGSCCPassManager CGPM;
2282 if (!CGPM.isEmpty())
2284 }
2285 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2286 LoopPassManager LPM;
2288 if (!LPM.isEmpty()) {
2290 createFunctionToLoopPassAdaptor(std::move(LPM))));
2291 }
2292 }
2293 if (!LoopOptimizerEndEPCallbacks.empty()) {
2294 LoopPassManager LPM;
2296 if (!LPM.isEmpty()) {
2298 createFunctionToLoopPassAdaptor(std::move(LPM))));
2299 }
2300 }
2301 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2304 if (!FPM.isEmpty())
2305 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2306 }
2307
2309
2310 if (!VectorizerStartEPCallbacks.empty()) {
2313 if (!FPM.isEmpty())
2314 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2315 }
2316
2317 if (!VectorizerEndEPCallbacks.empty()) {
2320 if (!FPM.isEmpty())
2321 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2322 }
2323
2325
2327
2328 if (isLTOPreLink(Phase))
2329 addRequiredLTOPreLinkPasses(MPM);
2330
2332
2333 return MPM;
2334}
2335
2337 AAManager AA;
2338
2339 // The order in which these are registered determines their priority when
2340 // being queried.
2341
2342 // Add any target-specific alias analyses that should be run early.
2343 if (TM)
2344 TM->registerEarlyDefaultAliasAnalyses(AA);
2345
2346 // First we register the basic alias analysis that provides the majority of
2347 // per-function local AA logic. This is a stateless, on-demand local set of
2348 // AA techniques.
2349 AA.registerFunctionAnalysis<BasicAA>();
2350
2351 // Next we query fast, specialized alias analyses that wrap IR-embedded
2352 // information about aliasing.
2353 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2354 AA.registerFunctionAnalysis<TypeBasedAA>();
2355
2356 // Add support for querying global aliasing information when available.
2357 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2358 // analysis, all that the `AAManager` can do is query for any *cached*
2359 // results from `GlobalsAA` through a readonly proxy.
2361 AA.registerModuleAnalysis<GlobalsAA>();
2362
2363 // Add target-specific alias analyses.
2364 if (TM)
2365 TM->registerDefaultAliasAnalyses(AA);
2366
2367 return AA;
2368}
2369
2370bool PassBuilder::isInstrumentedPGOUse() const {
2371 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2372 !UseCtxProfile.empty();
2373}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:126
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:81
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:59
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:95
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:69
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:85
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:92
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:73
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:77
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:51
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:62
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:66
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:55
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:417
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:424
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.