LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
148
149using namespace llvm;
150
152 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
153 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
154 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
155 "Heuristics-based inliner version"),
156 clEnumValN(InliningAdvisorMode::Development, "development",
157 "Use development mode (runtime-loadable model)"),
158 clEnumValN(InliningAdvisorMode::Release, "release",
159 "Use release mode (AOT-compiled model)")));
160
161/// Flag to enable inline deferral during PGO.
162static cl::opt<bool>
163 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
165 cl::desc("Enable inline deferral during PGO"));
166
167static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
168 cl::init(false), cl::Hidden,
169 cl::desc("Enable module inliner"));
170
172 "mandatory-inlining-first", cl::init(false), cl::Hidden,
173 cl::desc("Perform mandatory inlinings module-wide, before performing "
174 "inlining"));
175
177 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
178 cl::desc("Eagerly invalidate more analyses in default pipelines"));
179
181 "enable-merge-functions", cl::init(false), cl::Hidden,
182 cl::desc("Enable function merging as part of the optimization pipeline"));
183
185 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
186 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
187
189 "enable-global-analyses", cl::init(true), cl::Hidden,
190 cl::desc("Enable inter-procedural analyses"));
191
192static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
193 cl::init(false), cl::Hidden,
194 cl::desc("Run Partial inlining pass"));
195
197 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
198 cl::desc("Run cleanup optimization passes after vectorization"));
199
200static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
201 cl::desc("Run the NewGVN pass"));
202
203static cl::opt<bool>
204 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
205 cl::desc("Enable the LoopInterchange Pass"));
206
207static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
208 cl::init(false), cl::Hidden,
209 cl::desc("Enable Unroll And Jam Pass"));
210
211static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
213 cl::desc("Enable the LoopFlatten Pass"));
214
215// Experimentally allow loop header duplication. This should allow for better
216// optimization at Oz, since loop-idiom recognition can then recognize things
217// like memcpy. If this ends up being useful for many targets, we should drop
218// this flag and make a code generation option that can be controlled
219// independent of the opt level and exposed through the frontend.
221 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
222 cl::desc("Enable loop header duplication at any optimization level"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
242 "preinline-threshold", cl::Hidden, cl::init(75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
265 "flattened-profile-used", cl::init(false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierarchy exists in the profile"));
268
269static cl::opt<bool>
270 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
271 cl::desc("Enable lowering of the matrix intrinsics"));
272
274 "enable-constraint-elimination", cl::init(true), cl::Hidden,
275 cl::desc(
276 "Enable pass to eliminate conditions based on linear constraints"));
277
279 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
280 cl::desc("Enable the attributor inter-procedural deduction pass"),
281 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
282 "enable all attributor runs"),
283 clEnumValN(AttributorRunOption::MODULE, "module",
284 "enable module-wide attributor runs"),
285 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
286 "enable call graph SCC attributor runs"),
287 clEnumValN(AttributorRunOption::NONE, "none",
288 "disable attributor runs")));
289
291 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
292 cl::desc("Enable profile instrumentation sampling (default = off)"));
294 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
295 cl::desc("Enable the experimental Loop Versioning LICM pass"));
296
298 "instrument-cold-function-only-path", cl::init(""),
299 cl::desc("File path for cold function only instrumentation(requires use "
300 "with --pgo-instrument-cold-function-only)"),
301 cl::Hidden);
302
305
306namespace llvm {
308} // namespace llvm
309
311 LoopInterleaving = true;
312 LoopVectorization = true;
313 SLPVectorization = false;
314 LoopUnrolling = true;
319 CallGraphProfile = true;
320 UnifiedLTO = false;
322 InlinerThreshold = -1;
324}
325
326namespace llvm {
328} // namespace llvm
329
331 OptimizationLevel Level) {
332 for (auto &C : PeepholeEPCallbacks)
333 C(FPM, Level);
334}
337 for (auto &C : LateLoopOptimizationsEPCallbacks)
338 C(LPM, Level);
339}
341 OptimizationLevel Level) {
342 for (auto &C : LoopOptimizerEndEPCallbacks)
343 C(LPM, Level);
344}
347 for (auto &C : ScalarOptimizerLateEPCallbacks)
348 C(FPM, Level);
349}
351 OptimizationLevel Level) {
352 for (auto &C : CGSCCOptimizerLateEPCallbacks)
353 C(CGPM, Level);
354}
356 OptimizationLevel Level) {
357 for (auto &C : VectorizerStartEPCallbacks)
358 C(FPM, Level);
359}
361 OptimizationLevel Level) {
362 for (auto &C : VectorizerEndEPCallbacks)
363 C(FPM, Level);
364}
366 OptimizationLevel Level,
368 for (auto &C : OptimizerEarlyEPCallbacks)
369 C(MPM, Level, Phase);
370}
372 OptimizationLevel Level,
374 for (auto &C : OptimizerLastEPCallbacks)
375 C(MPM, Level, Phase);
376}
379 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
380 C(MPM, Level);
381}
384 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
385 C(MPM, Level);
386}
388 OptimizationLevel Level) {
389 for (auto &C : PipelineStartEPCallbacks)
390 C(MPM, Level);
391}
394 for (auto &C : PipelineEarlySimplificationEPCallbacks)
395 C(MPM, Level, Phase);
396}
397
398// Helper to add AnnotationRemarksPass.
401}
402
403// Helper to check if the current compilation phase is preparing for LTO
407}
408
409// Helper to check if the current compilation phase is LTO backend
413}
414
415// Helper to wrap conditionally Coro passes.
417 // TODO: Skip passes according to Phase.
418 ModulePassManager CoroPM;
419 CoroPM.addPass(CoroEarlyPass());
420 CGSCCPassManager CGPM;
421 CGPM.addPass(CoroSplitPass());
422 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
423 CoroPM.addPass(CoroCleanupPass());
424 CoroPM.addPass(GlobalDCEPass());
425 return CoroConditionalWrapper(std::move(CoroPM));
426}
427
428// TODO: Investigate the cost/benefit of tail call elimination on debugging.
430PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
432
434
437
438 // Form SSA out of local memory accesses after breaking apart aggregates into
439 // scalars.
441
442 // Catch trivial redundancies
443 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
444
445 // Hoisting of scalars and load expressions.
446 FPM.addPass(
447 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
449
451
452 invokePeepholeEPCallbacks(FPM, Level);
453
454 FPM.addPass(
455 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
456
457 // Form canonically associated expression trees, and simplify the trees using
458 // basic mathematical properties. For example, this will form (nearly)
459 // minimal multiplication trees.
461
462 // Add the primary loop simplification pipeline.
463 // FIXME: Currently this is split into two loop pass pipelines because we run
464 // some function passes in between them. These can and should be removed
465 // and/or replaced by scheduling the loop pass equivalents in the correct
466 // positions. But those equivalent passes aren't powerful enough yet.
467 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
468 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
469 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
470 // `LoopInstSimplify`.
471 LoopPassManager LPM1, LPM2;
472
473 // Simplify the loop body. We do this initially to clean up after other loop
474 // passes run, either when iterating on a loop or on inner loops with
475 // implications on the outer loop.
478
479 // Try to remove as much code from the loop header as possible,
480 // to reduce amount of IR that will have to be duplicated. However,
481 // do not perform speculative hoisting the first time as LICM
482 // will destroy metadata that may not need to be destroyed if run
483 // after loop rotation.
484 // TODO: Investigate promotion cap for O1.
486 /*AllowSpeculation=*/false));
487
488 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
490 // TODO: Investigate promotion cap for O1.
492 /*AllowSpeculation=*/true));
495 LPM1.addPass(LoopFlattenPass());
496
499
501
503
504 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
505 // because it changes IR to makes profile annotation in back compile
506 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
507 // attributes so we need to make sure and allow the full unroll pass to pay
508 // attention to it.
509 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
510 PGOOpt->Action != PGOOptions::SampleUse)
511 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
512 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
514
516
517 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
518 /*UseMemorySSA=*/true,
519 /*UseBlockFrequencyInfo=*/true));
520 FPM.addPass(
521 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
523 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
524 // *All* loop passes must preserve it, in order to be able to use it.
525 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
526 /*UseMemorySSA=*/false,
527 /*UseBlockFrequencyInfo=*/false));
528
529 // Delete small array after loop unroll.
531
532 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
533 FPM.addPass(MemCpyOptPass());
534
535 // Sparse conditional constant propagation.
536 // FIXME: It isn't clear why we do this *after* loop passes rather than
537 // before...
538 FPM.addPass(SCCPPass());
539
540 // Delete dead bit computations (instcombine runs after to fold away the dead
541 // computations, and then ADCE will run later to exploit any new DCE
542 // opportunities that creates).
543 FPM.addPass(BDCEPass());
544
545 // Run instcombine after redundancy and dead bit elimination to exploit
546 // opportunities opened up by them.
548 invokePeepholeEPCallbacks(FPM, Level);
549
550 FPM.addPass(CoroElidePass());
551
553
554 // Finally, do an expensive DCE pass to catch all the dead code exposed by
555 // the simplifications and basic cleanup after all the simplifications.
556 // TODO: Investigate if this is too expensive.
557 FPM.addPass(ADCEPass());
558 FPM.addPass(
559 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
561 invokePeepholeEPCallbacks(FPM, Level);
562
563 return FPM;
564}
565
569 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
570
571 // The O1 pipeline has a separate pipeline creation function to simplify
572 // construction readability.
573 if (Level.getSpeedupLevel() == 1)
574 return buildO1FunctionSimplificationPipeline(Level, Phase);
575
577
580
581 // Form SSA out of local memory accesses after breaking apart aggregates into
582 // scalars.
584
585 // Catch trivial redundancies
586 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
589
590 // Hoisting of scalars and load expressions.
591 if (EnableGVNHoist)
592 FPM.addPass(GVNHoistPass());
593
594 // Global value numbering based sinking.
595 if (EnableGVNSink) {
596 FPM.addPass(GVNSinkPass());
597 FPM.addPass(
598 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
599 }
600
601 // Speculative execution if the target has divergent branches; otherwise nop.
602 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
603
604 // Optimize based on known information about branches, and cleanup afterward.
607
608 // Jump table to switch conversion.
611
612 FPM.addPass(
613 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
616
617 if (!Level.isOptimizingForSize())
619
620 invokePeepholeEPCallbacks(FPM, Level);
621
622 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
623 // using the size value profile. Don't perform this when optimizing for size.
624 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
625 !Level.isOptimizingForSize())
627
628 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
629 isInstrumentedPGOUse()));
630 FPM.addPass(
631 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
632
633 // Form canonically associated expression trees, and simplify the trees using
634 // basic mathematical properties. For example, this will form (nearly)
635 // minimal multiplication trees.
637
640
641 // Add the primary loop simplification pipeline.
642 // FIXME: Currently this is split into two loop pass pipelines because we run
643 // some function passes in between them. These can and should be removed
644 // and/or replaced by scheduling the loop pass equivalents in the correct
645 // positions. But those equivalent passes aren't powerful enough yet.
646 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
647 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
648 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
649 // `LoopInstSimplify`.
650 LoopPassManager LPM1, LPM2;
651
652 // Simplify the loop body. We do this initially to clean up after other loop
653 // passes run, either when iterating on a loop or on inner loops with
654 // implications on the outer loop.
657
658 // Try to remove as much code from the loop header as possible,
659 // to reduce amount of IR that will have to be duplicated. However,
660 // do not perform speculative hoisting the first time as LICM
661 // will destroy metadata that may not need to be destroyed if run
662 // after loop rotation.
663 // TODO: Investigate promotion cap for O1.
665 /*AllowSpeculation=*/false));
666
667 // Disable header duplication in loop rotation at -Oz.
669 Level != OptimizationLevel::Oz,
671 // TODO: Investigate promotion cap for O1.
673 /*AllowSpeculation=*/true));
674 LPM1.addPass(
675 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
677 LPM1.addPass(LoopFlattenPass());
678
681
682 {
684 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
686 LPM2.addPass(std::move(ExtraPasses));
687 }
688
690
692
693 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
694 // because it changes IR to makes profile annotation in back compile
695 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
696 // attributes so we need to make sure and allow the full unroll pass to pay
697 // attention to it.
698 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
699 PGOOpt->Action != PGOOptions::SampleUse)
700 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
701 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
703
705
706 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
707 /*UseMemorySSA=*/true,
708 /*UseBlockFrequencyInfo=*/true));
709 FPM.addPass(
710 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
712 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
713 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
714 // *All* loop passes must preserve it, in order to be able to use it.
715 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
716 /*UseMemorySSA=*/false,
717 /*UseBlockFrequencyInfo=*/false));
718
719 // Delete small array after loop unroll.
721
722 // Try vectorization/scalarization transforms that are both improvements
723 // themselves and can allow further folds with GVN and InstCombine.
724 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
725
726 // Eliminate redundancies.
728 if (RunNewGVN)
729 FPM.addPass(NewGVNPass());
730 else
731 FPM.addPass(GVNPass());
732
733 // Sparse conditional constant propagation.
734 // FIXME: It isn't clear why we do this *after* loop passes rather than
735 // before...
736 FPM.addPass(SCCPPass());
737
738 // Delete dead bit computations (instcombine runs after to fold away the dead
739 // computations, and then ADCE will run later to exploit any new DCE
740 // opportunities that creates).
741 FPM.addPass(BDCEPass());
742
743 // Run instcombine after redundancy and dead bit elimination to exploit
744 // opportunities opened up by them.
746 invokePeepholeEPCallbacks(FPM, Level);
747
748 // Re-consider control flow based optimizations after redundancy elimination,
749 // redo DCE, etc.
752
755
756 // Finally, do an expensive DCE pass to catch all the dead code exposed by
757 // the simplifications and basic cleanup after all the simplifications.
758 // TODO: Investigate if this is too expensive.
759 FPM.addPass(ADCEPass());
760
761 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
762 FPM.addPass(MemCpyOptPass());
763
764 FPM.addPass(DSEPass());
766
769 /*AllowSpeculation=*/true),
770 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
771
772 FPM.addPass(CoroElidePass());
773
775
777 .convertSwitchRangeToICmp(true)
778 .hoistCommonInsts(true)
779 .sinkCommonInsts(true)));
781 invokePeepholeEPCallbacks(FPM, Level);
782
783 return FPM;
784}
785
786void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
789}
790
791void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
792 OptimizationLevel Level,
793 ThinOrFullLTOPhase LTOPhase) {
794 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
796 return;
797 InlineParams IP;
798
800
801 // FIXME: The hint threshold has the same value used by the regular inliner
802 // when not optimzing for size. This should probably be lowered after
803 // performance testing.
804 // FIXME: this comment is cargo culted from the old pass manager, revisit).
805 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
807 IP, /* MandatoryFirst */ true,
809 CGSCCPassManager &CGPipeline = MIWP.getPM();
810
813 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
814 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
815 true))); // Merge & remove basic blocks.
816 FPM.addPass(InstCombinePass()); // Combine silly sequences.
817 invokePeepholeEPCallbacks(FPM, Level);
818
819 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
820 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
821
822 MPM.addPass(std::move(MIWP));
823
824 // Delete anything that is now dead to make sure that we don't instrument
825 // dead code. Instrumentation can end up keeping dead code around and
826 // dramatically increase code size.
827 MPM.addPass(GlobalDCEPass());
828}
829
830void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
831 OptimizationLevel Level) {
833 // Disable header duplication in loop rotation at -Oz.
837 Level != OptimizationLevel::Oz),
838 /*UseMemorySSA=*/false,
839 /*UseBlockFrequencyInfo=*/false),
841 }
842}
843
844void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
845 OptimizationLevel Level, bool RunProfileGen,
846 bool IsCS, bool AtomicCounterUpdate,
847 std::string ProfileFile,
848 std::string ProfileRemappingFile,
850 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
851
852 if (!RunProfileGen) {
853 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
854 MPM.addPass(
855 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
856 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
857 // RequireAnalysisPass for PSI before subsequent non-module passes.
859 return;
860 }
861
862 // Perform PGO instrumentation.
865
866 addPostPGOLoopRotation(MPM, Level);
867 // Add the profile lowering pass.
869 if (!ProfileFile.empty())
870 Options.InstrProfileOutput = ProfileFile;
871 // Do counter promotion at Level greater than O0.
872 Options.DoCounterPromotion = true;
873 Options.UseBFIInPromotion = IsCS;
874 if (EnableSampledInstr) {
875 Options.Sampling = true;
876 // With sampling, there is little beneifit to enable counter promotion.
877 // But note that sampling does work with counter promotion.
878 Options.DoCounterPromotion = false;
879 }
880 Options.Atomic = AtomicCounterUpdate;
882}
883
885 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
886 bool AtomicCounterUpdate, std::string ProfileFile,
887 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
888 if (!RunProfileGen) {
889 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
890 MPM.addPass(
891 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
892 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
893 // RequireAnalysisPass for PSI before subsequent non-module passes.
895 return;
896 }
897
898 // Perform PGO instrumentation.
901 // Add the profile lowering pass.
903 if (!ProfileFile.empty())
904 Options.InstrProfileOutput = ProfileFile;
905 // Do not do counter promotion at O0.
906 Options.DoCounterPromotion = false;
907 Options.UseBFIInPromotion = IsCS;
908 Options.Atomic = AtomicCounterUpdate;
910}
911
913 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
914}
915
919 InlineParams IP;
920 if (PTO.InlinerThreshold == -1)
921 IP = getInlineParamsFromOptLevel(Level);
922 else
924 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
925 // set hot-caller threshold to 0 to disable hot
926 // callsite inline (as much as possible [1]) because it makes
927 // profile annotation in the backend inaccurate.
928 //
929 // [1] Note the cost of a function could be below zero due to erased
930 // prologue / epilogue.
931 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
933
934 if (PGOOpt)
936
940
941 // Require the GlobalsAA analysis for the module so we can query it within
942 // the CGSCC pipeline.
945 // Invalidate AAManager so it can be recreated and pick up the newly
946 // available GlobalsAA.
947 MIWP.addModulePass(
949 }
950
951 // Require the ProfileSummaryAnalysis for the module so we can query it within
952 // the inliner pass.
954
955 // Now begin the main postorder CGSCC pipeline.
956 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
957 // manager and trying to emulate its precise behavior. Much of this doesn't
958 // make a lot of sense and we should revisit the core CGSCC structure.
959 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
960
961 // Note: historically, the PruneEH pass was run first to deduce nounwind and
962 // generally clean up exception handling overhead. It isn't clear this is
963 // valuable as the inliner doesn't currently care whether it is inlining an
964 // invoke or a call.
965
967 MainCGPipeline.addPass(AttributorCGSCCPass());
968
969 // Deduce function attributes. We do another run of this after the function
970 // simplification pipeline, so this only needs to run when it could affect the
971 // function simplification pipeline, which is only the case with recursive
972 // functions.
973 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
974
975 // When at O3 add argument promotion to the pass pipeline.
976 // FIXME: It isn't at all clear why this should be limited to O3.
977 if (Level == OptimizationLevel::O3)
978 MainCGPipeline.addPass(ArgumentPromotionPass());
979
980 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
981 // there are no OpenMP runtime calls present in the module.
982 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
983 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
984
985 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
986
987 // Add the core function simplification pipeline nested inside the
988 // CGSCC walk.
991 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
992
993 // Finally, deduce any function attributes based on the fully simplified
994 // function.
995 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
996
997 // Mark that the function is fully simplified and that it shouldn't be
998 // simplified again if we somehow revisit it due to CGSCC mutations unless
999 // it's been modified since.
1002
1004 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1005 MainCGPipeline.addPass(CoroAnnotationElidePass());
1006 }
1007
1008 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1009 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1011
1012 return MIWP;
1013}
1014
1019
1021 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1022 // set hot-caller threshold to 0 to disable hot
1023 // callsite inline (as much as possible [1]) because it makes
1024 // profile annotation in the backend inaccurate.
1025 //
1026 // [1] Note the cost of a function could be below zero due to erased
1027 // prologue / epilogue.
1028 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1029 IP.HotCallSiteThreshold = 0;
1030
1031 if (PGOOpt)
1033
1034 // The inline deferral logic is used to avoid losing some
1035 // inlining chance in future. It is helpful in SCC inliner, in which
1036 // inlining is processed in bottom-up order.
1037 // While in module inliner, the inlining order is a priority-based order
1038 // by default. The inline deferral is unnecessary there. So we disable the
1039 // inline deferral logic in module inliner.
1040 IP.EnableDeferral = false;
1041
1044 MPM.addPass(GlobalOptPass());
1045 MPM.addPass(GlobalDCEPass());
1046 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1047 }
1048
1052
1056 MPM.addPass(
1058 }
1059
1060 return MPM;
1061}
1062
1066 assert(Level != OptimizationLevel::O0 &&
1067 "Should not be used for O0 pipeline");
1068
1070 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1071
1073
1074 // Place pseudo probe instrumentation as the first pass of the pipeline to
1075 // minimize the impact of optimization changes.
1076 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1079
1080 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1081
1082 // In ThinLTO mode, when flattened profile is used, all the available
1083 // profile information will be annotated in PreLink phase so there is
1084 // no need to load the profile again in PostLink.
1085 bool LoadSampleProfile =
1086 HasSampleProfile &&
1088
1089 // During the ThinLTO backend phase we perform early indirect call promotion
1090 // here, before globalopt. Otherwise imported available_externally functions
1091 // look unreferenced and are removed. If we are going to load the sample
1092 // profile then defer until later.
1093 // TODO: See if we can move later and consolidate with the location where
1094 // we perform ICP when we are loading a sample profile.
1095 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1096 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1097 // determine whether the new direct calls are annotated with prof metadata.
1098 // Ideally this should be determined from whether the IR is annotated with
1099 // sample profile, and not whether the a sample profile was provided on the
1100 // command line. E.g. for flattened profiles where we will not be reloading
1101 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1102 // provide the sample profile file.
1103 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1104 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1105
1106 // Create an early function pass manager to cleanup the output of the
1107 // frontend. Not necessary with LTO post link pipelines since the pre link
1108 // pipeline already cleaned up the frontend output.
1110 // Do basic inference of function attributes from known properties of system
1111 // libraries and other oracles.
1113 MPM.addPass(CoroEarlyPass());
1114
1115 FunctionPassManager EarlyFPM;
1116 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1117 // Lower llvm.expect to metadata before attempting transforms.
1118 // Compare/branch metadata may alter the behavior of passes like
1119 // SimplifyCFG.
1121 EarlyFPM.addPass(SimplifyCFGPass());
1123 EarlyFPM.addPass(EarlyCSEPass());
1124 if (Level == OptimizationLevel::O3)
1125 EarlyFPM.addPass(CallSiteSplittingPass());
1127 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1128 }
1129
1130 if (LoadSampleProfile) {
1131 // Annotate sample profile right after early FPM to ensure freshness of
1132 // the debug info.
1133 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1134 PGOOpt->ProfileRemappingFile, Phase));
1135 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1136 // RequireAnalysisPass for PSI before subsequent non-module passes.
1138 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1139 // for the profile annotation to be accurate in the LTO backend.
1140 if (!isLTOPreLink(Phase))
1141 // We perform early indirect call promotion here, before globalopt.
1142 // This is important for the ThinLTO backend phase because otherwise
1143 // imported available_externally functions look unreferenced and are
1144 // removed.
1145 MPM.addPass(
1146 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1147 }
1148
1149 // Try to perform OpenMP specific optimizations on the module. This is a
1150 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1152
1154 MPM.addPass(AttributorPass());
1155
1156 // Lower type metadata and the type.test intrinsic in the ThinLTO
1157 // post link pipeline after ICP. This is to enable usage of the type
1158 // tests in ICP sequences.
1160 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1162
1164
1165 // Interprocedural constant propagation now that basic cleanup has occurred
1166 // and prior to optimizing globals.
1167 // FIXME: This position in the pipeline hasn't been carefully considered in
1168 // years, it should be re-analyzed.
1169 MPM.addPass(IPSCCPPass(
1170 IPSCCPOptions(/*AllowFuncSpec=*/
1171 Level != OptimizationLevel::Os &&
1172 Level != OptimizationLevel::Oz &&
1173 !isLTOPreLink(Phase))));
1174
1175 // Attach metadata to indirect call sites indicating the set of functions
1176 // they may target at run-time. This should follow IPSCCP.
1178
1179 // Optimize globals to try and fold them into constants.
1180 MPM.addPass(GlobalOptPass());
1181
1182 // Create a small function pass pipeline to cleanup after all the global
1183 // optimizations.
1184 FunctionPassManager GlobalCleanupPM;
1185 // FIXME: Should this instead by a run of SROA?
1186 GlobalCleanupPM.addPass(PromotePass());
1187 GlobalCleanupPM.addPass(InstCombinePass());
1188 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1189 GlobalCleanupPM.addPass(
1190 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1191 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1193
1194 // We already asserted this happens in non-FullLTOPostLink earlier.
1195 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1196 // Enable contextual profiling instrumentation.
1197 const bool IsCtxProfGen =
1199 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1200 const bool IsPGOInstrGen =
1201 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1202 const bool IsPGOInstrUse =
1203 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1204 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1205 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1206 // enable ctx profiling from the frontend.
1208 "Enabling both instrumented PGO and contextual instrumentation is not "
1209 "supported.");
1210 const bool IsCtxProfUse =
1212
1213 assert(
1215 "--instrument-cold-function-only-path is provided but "
1216 "--pgo-instrument-cold-function-only is not enabled");
1217 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1218 IsPGOPreLink &&
1220
1221 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1222 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1223 addPreInlinerPasses(MPM, Level, Phase);
1224
1225 // Add all the requested passes for instrumentation PGO, if requested.
1226 if (IsPGOInstrGen || IsPGOInstrUse) {
1227 addPGOInstrPasses(MPM, Level,
1228 /*RunProfileGen=*/IsPGOInstrGen,
1229 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1230 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1231 PGOOpt->FS);
1232 } else if (IsCtxProfGen || IsCtxProfUse) {
1234 // In pre-link, we just want the instrumented IR. We use the contextual
1235 // profile in the post-thinlink phase.
1236 // The instrumentation will be removed in post-thinlink after IPO.
1237 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1238 // mechanism for GUIDs.
1239 MPM.addPass(AssignGUIDPass());
1240 if (IsCtxProfUse) {
1241 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1242 return MPM;
1243 }
1244 // Block further inlining in the instrumented ctxprof case. This avoids
1245 // confusingly collecting profiles for the same GUID corresponding to
1246 // different variants of the function. We could do like PGO and identify
1247 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1248 // thinlto to happen before performing any further optimizations, it's
1249 // unnecessary to collect profiles for non-prevailing copies.
1251 addPostPGOLoopRotation(MPM, Level);
1253 } else if (IsColdFuncOnlyInstrGen) {
1254 addPGOInstrPasses(
1255 MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1256 /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1257 /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
1258 }
1259
1260 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1261 MPM.addPass(PGOIndirectCallPromotion(false, false));
1262
1263 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1264 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1266
1267 if (IsMemprofUse)
1268 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1269
1270 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1271 PGOOpt->Action == PGOOptions::SampleUse))
1272 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1273
1274 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1275
1278 else
1279 MPM.addPass(buildInlinerPipeline(Level, Phase));
1280
1281 // Remove any dead arguments exposed by cleanups, constant folding globals,
1282 // and argument promotion.
1284
1287
1289 MPM.addPass(CoroCleanupPass());
1290
1291 // Optimize globals now that functions are fully simplified.
1292 MPM.addPass(GlobalOptPass());
1293 MPM.addPass(GlobalDCEPass());
1294
1295 return MPM;
1296}
1297
1298/// TODO: Should LTO cause any differences to this set of passes?
1299void PassBuilder::addVectorPasses(OptimizationLevel Level,
1300 FunctionPassManager &FPM, bool IsFullLTO) {
1303
1305 if (IsFullLTO) {
1306 // The vectorizer may have significantly shortened a loop body; unroll
1307 // again. Unroll small loops to hide loop backedge latency and saturate any
1308 // parallel execution resources of an out-of-order processor. We also then
1309 // need to clean up redundancies and loop invariant code.
1310 // FIXME: It would be really good to use a loop-integrated instruction
1311 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1312 // across the loop nests.
1313 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1316 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1318 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1321 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1322 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1323 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1324 // NOTE: we are very late in the pipeline, and we don't have any LICM
1325 // or SimplifyCFG passes scheduled after us, that would cleanup
1326 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1328 }
1329
1330 if (!IsFullLTO) {
1331 // Eliminate loads by forwarding stores from the previous iteration to loads
1332 // of the current iteration.
1334 }
1335 // Cleanup after the loop optimization passes.
1336 FPM.addPass(InstCombinePass());
1337
1338 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1340 // At higher optimization levels, try to clean up any runtime overlap and
1341 // alignment checks inserted by the vectorizer. We want to track correlated
1342 // runtime checks for two inner loops in the same outer loop, fold any
1343 // common computations, hoist loop-invariant aspects out of any outer loop,
1344 // and unswitch the runtime checks if possible. Once hoisted, we may have
1345 // dead (or speculatable) control flows or more combining opportunities.
1346 ExtraPasses.addPass(EarlyCSEPass());
1348 ExtraPasses.addPass(InstCombinePass());
1349 LoopPassManager LPM;
1351 /*AllowSpeculation=*/true));
1352 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1354 ExtraPasses.addPass(
1355 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1356 /*UseBlockFrequencyInfo=*/true));
1357 ExtraPasses.addPass(
1358 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1359 ExtraPasses.addPass(InstCombinePass());
1360 FPM.addPass(std::move(ExtraPasses));
1361 }
1362
1363 // Now that we've formed fast to execute loop structures, we do further
1364 // optimizations. These are run afterward as they might block doing complex
1365 // analyses and transforms such as what are needed for loop vectorization.
1366
1367 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1368 // GVN, loop transforms, and others have already run, so it's now better to
1369 // convert to more optimized IR using more aggressive simplify CFG options.
1370 // The extra sinking transform can create larger basic blocks, so do this
1371 // before SLP vectorization.
1373 .forwardSwitchCondToPhi(true)
1374 .convertSwitchRangeToICmp(true)
1375 .convertSwitchToLookupTable(true)
1376 .needCanonicalLoops(false)
1377 .hoistCommonInsts(true)
1378 .sinkCommonInsts(true)));
1379
1380 if (IsFullLTO) {
1381 FPM.addPass(SCCPPass());
1382 FPM.addPass(InstCombinePass());
1383 FPM.addPass(BDCEPass());
1384 }
1385
1386 // Optimize parallel scalar instruction chains into SIMD instructions.
1387 if (PTO.SLPVectorization) {
1389 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1390 FPM.addPass(EarlyCSEPass());
1391 }
1392 }
1393 // Enhance/cleanup vector code.
1395
1396 if (!IsFullLTO) {
1397 FPM.addPass(InstCombinePass());
1398 // Unroll small loops to hide loop backedge latency and saturate any
1399 // parallel execution resources of an out-of-order processor. We also then
1400 // need to clean up redundancies and loop invariant code.
1401 // FIXME: It would be really good to use a loop-integrated instruction
1402 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1403 // across the loop nests.
1404 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1405 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1407 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1408 }
1410 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1413 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1414 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1415 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1416 // NOTE: we are very late in the pipeline, and we don't have any LICM
1417 // or SimplifyCFG passes scheduled after us, that would cleanup
1418 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1420 }
1421
1423 FPM.addPass(InstCombinePass());
1424
1425 // This is needed for two reasons:
1426 // 1. It works around problems that instcombine introduces, such as sinking
1427 // expensive FP divides into loops containing multiplications using the
1428 // divide result.
1429 // 2. It helps to clean up some loop-invariant code created by the loop
1430 // unroll pass when IsFullLTO=false.
1433 /*AllowSpeculation=*/true),
1434 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1435
1436 // Now that we've vectorized and unrolled loops, we may have more refined
1437 // alignment information, try to re-derive it here.
1439}
1440
1443 ThinOrFullLTOPhase LTOPhase) {
1444 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1446
1447 // Run partial inlining pass to partially inline functions that have
1448 // large bodies.
1451
1452 // Remove avail extern fns and globals definitions since we aren't compiling
1453 // an object file for later LTO. For LTO we want to preserve these so they
1454 // are eligible for inlining at link-time. Note if they are unreferenced they
1455 // will be removed by GlobalDCE later, so this only impacts referenced
1456 // available externally globals. Eventually they will be suppressed during
1457 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1458 // may make globals referenced by available external functions dead and saves
1459 // running remaining passes on the eliminated functions. These should be
1460 // preserved during prelinking for link-time inlining decisions.
1461 if (!LTOPreLink)
1463
1464 // Do RPO function attribute inference across the module to forward-propagate
1465 // attributes where applicable.
1466 // FIXME: Is this really an optimization rather than a canonicalization?
1468
1469 // Do a post inline PGO instrumentation and use pass. This is a context
1470 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1471 // cross-module inline has not been done yet. The context sensitive
1472 // instrumentation is after all the inlines are done.
1473 if (!LTOPreLink && PGOOpt) {
1474 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1475 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1476 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1477 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1478 PGOOpt->FS);
1479 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1480 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1481 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1482 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1483 PGOOpt->FS);
1484 }
1485
1486 // Re-compute GlobalsAA here prior to function passes. This is particularly
1487 // useful as the above will have inlined, DCE'ed, and function-attr
1488 // propagated everything. We should at this point have a reasonably minimal
1489 // and richly annotated call graph. By computing aliasing and mod/ref
1490 // information for all local globals here, the late loop passes and notably
1491 // the vectorizer will be able to use them to help recognize vectorizable
1492 // memory operations.
1495
1496 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1497
1498 FunctionPassManager OptimizePM;
1499 // Scheduling LoopVersioningLICM when inlining is over, because after that
1500 // we may see more accurate aliasing. Reason to run this late is that too
1501 // early versioning may prevent further inlining due to increase of code
1502 // size. Other optimizations which runs later might get benefit of no-alias
1503 // assumption in clone loop.
1505 OptimizePM.addPass(
1507 // LoopVersioningLICM pass might increase new LICM opportunities.
1510 /*AllowSpeculation=*/true),
1511 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1512 }
1513
1514 OptimizePM.addPass(Float2IntPass());
1516
1517 if (EnableMatrix) {
1518 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1519 OptimizePM.addPass(EarlyCSEPass());
1520 }
1521
1522 // CHR pass should only be applied with the profile information.
1523 // The check is to check the profile summary information in CHR.
1524 if (EnableCHR && Level == OptimizationLevel::O3)
1525 OptimizePM.addPass(ControlHeightReductionPass());
1526
1527 // FIXME: We need to run some loop optimizations to re-rotate loops after
1528 // simplifycfg and others undo their rotation.
1529
1530 // Optimize the loop execution. These passes operate on entire loop nests
1531 // rather than on each loop in an inside-out manner, and so they are actually
1532 // function passes.
1533
1534 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1535
1536 LoopPassManager LPM;
1537 // First rotate loops that may have been un-rotated by prior passes.
1538 // Disable header duplication at -Oz.
1540 Level != OptimizationLevel::Oz,
1541 LTOPreLink));
1542 // Some loops may have become dead by now. Try to delete them.
1543 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1544 // this may need to be revisited once we run GVN before loop deletion
1545 // in the simplification pipeline.
1547
1548 if (PTO.LoopInterchange)
1550
1552 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1553
1554 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1555 // into separate loop that would otherwise inhibit vectorization. This is
1556 // currently only performed for loops marked with the metadata
1557 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1558 OptimizePM.addPass(LoopDistributePass());
1559
1560 // Populates the VFABI attribute with the scalar-to-vector mappings
1561 // from the TargetLibraryInfo.
1562 OptimizePM.addPass(InjectTLIMappings());
1563
1564 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1565
1566 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1567
1568 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1569 // canonicalization pass that enables other optimizations. As a result,
1570 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1571 // result too early.
1572 OptimizePM.addPass(LoopSinkPass());
1573
1574 // And finally clean up LCSSA form before generating code.
1575 OptimizePM.addPass(InstSimplifyPass());
1576
1577 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1578 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1579 // flattening of blocks.
1580 OptimizePM.addPass(DivRemPairsPass());
1581
1582 // Try to annotate calls that were created during optimization.
1583 OptimizePM.addPass(
1584 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1585
1586 // LoopSink (and other loop passes since the last simplifyCFG) might have
1587 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1588 OptimizePM.addPass(
1590 .convertSwitchRangeToICmp(true)
1591 .speculateUnpredictables(true)
1592 .hoistLoadsStoresWithCondFaulting(true)));
1593
1594 // Add the core optimizing pipeline.
1595 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1597
1598 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1599
1600 // Split out cold code. Splitting is done late to avoid hiding context from
1601 // other optimizations and inadvertently regressing performance. The tradeoff
1602 // is that this has a higher code size cost than splitting early.
1603 if (EnableHotColdSplit && !LTOPreLink)
1605
1606 // Search the code for similar regions of code. If enough similar regions can
1607 // be found where extracting the regions into their own function will decrease
1608 // the size of the program, we extract the regions, a deduplicate the
1609 // structurally similar regions.
1610 if (EnableIROutliner)
1611 MPM.addPass(IROutlinerPass());
1612
1613 // Now we need to do some global optimization transforms.
1614 // FIXME: It would seem like these should come first in the optimization
1615 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1616 // ordering here.
1617 MPM.addPass(GlobalDCEPass());
1619
1620 // Merge functions if requested. It has a better chance to merge functions
1621 // after ConstantMerge folded jump tables.
1622 if (PTO.MergeFunctions)
1624
1625 if (PTO.CallGraphProfile && !LTOPreLink)
1626 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1627
1628 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1629 if (!LTOPreLink)
1631
1632 return MPM;
1633}
1634
1638 if (Level == OptimizationLevel::O0)
1639 return buildO0DefaultPipeline(Level, Phase);
1640
1642
1643 // Convert @llvm.global.annotations to !annotation metadata.
1645
1646 // Force any function attributes we want the rest of the pipeline to observe.
1648
1649 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1651
1652 // Apply module pipeline start EP callback.
1654
1655 // Add the core simplification pipeline.
1657
1658 // Now add the optimization pipeline.
1660
1661 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1662 PGOOpt->Action == PGOOptions::SampleUse)
1664
1665 // Emit annotation remarks.
1667
1668 if (isLTOPreLink(Phase))
1669 addRequiredLTOPreLinkPasses(MPM);
1670 return MPM;
1671}
1672
1675 bool EmitSummary) {
1677 if (ThinLTO)
1679 else
1681 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1682
1683 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1684 // like removing CFI/WPD related instructions. Note, we reuse
1685 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1686 // in FatLtoCleanup.
1687 MPM.addPass(FatLtoCleanup());
1688
1689 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1690 // object code, only in the bitcode section, so drop it before we run
1691 // module optimization and generate machine code. If llvm.type.test() isn't in
1692 // the IR, this won't do anything.
1693 MPM.addPass(
1695
1696 // Use the ThinLTO post-link pipeline with sample profiling
1697 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1698 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1699 else {
1700 // ModuleSimplification does not run the coroutine passes for
1701 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1702 // builds, otherwise they will miscompile.
1703 if (ThinLTO) {
1704 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1705 // consideration.
1706 CGSCCPassManager CGPM;
1710 MPM.addPass(CoroCleanupPass());
1711 }
1712
1713 // otherwise, just use module optimization
1714 MPM.addPass(
1716 // Emit annotation remarks.
1718 }
1719 return MPM;
1720}
1721
1724 if (Level == OptimizationLevel::O0)
1726
1728
1729 // Convert @llvm.global.annotations to !annotation metadata.
1731
1732 // Force any function attributes we want the rest of the pipeline to observe.
1734
1735 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1737
1738 // Apply module pipeline start EP callback.
1740
1741 // If we are planning to perform ThinLTO later, we don't bloat the code with
1742 // unrolling/vectorization/... now. Just simplify the module as much as we
1743 // can.
1746 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1747 // thinlto use the contextual info to perform imports; then use the contextual
1748 // profile in the post-thinlink phase.
1749 if (!UseCtxProfile.empty()) {
1750 addRequiredLTOPreLinkPasses(MPM);
1751 return MPM;
1752 }
1753
1754 // Run partial inlining pass to partially inline functions that have
1755 // large bodies.
1756 // FIXME: It isn't clear whether this is really the right place to run this
1757 // in ThinLTO. Because there is another canonicalization and simplification
1758 // phase that will run after the thin link, running this here ends up with
1759 // less information than will be available later and it may grow functions in
1760 // ways that aren't beneficial.
1763
1764 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1765 PGOOpt->Action == PGOOptions::SampleUse)
1767
1768 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1769 // optimization is going to be done in PostLink stage, but clang can't add
1770 // callbacks there in case of in-process ThinLTO called by linker.
1775
1776 // Emit annotation remarks.
1778
1779 addRequiredLTOPreLinkPasses(MPM);
1780
1781 return MPM;
1782}
1783
1785 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1787
1788 if (ImportSummary) {
1789 // For ThinLTO we must apply the context disambiguation decisions early, to
1790 // ensure we can correctly match the callsites to summary data.
1793 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1794
1795 // These passes import type identifier resolutions for whole-program
1796 // devirtualization and CFI. They must run early because other passes may
1797 // disturb the specific instruction patterns that these passes look for,
1798 // creating dependencies on resolutions that may not appear in the summary.
1799 //
1800 // For example, GVN may transform the pattern assume(type.test) appearing in
1801 // two basic blocks into assume(phi(type.test, type.test)), which would
1802 // transform a dependency on a WPD resolution into a dependency on a type
1803 // identifier resolution for CFI.
1804 //
1805 // Also, WPD has access to more precise information than ICP and can
1806 // devirtualize more effectively, so it should operate on the IR first.
1807 //
1808 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1809 // metadata and intrinsics.
1810 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1811 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1812 }
1813
1814 if (Level == OptimizationLevel::O0) {
1815 // Run a second time to clean up any type tests left behind by WPD for use
1816 // in ICP.
1817 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1819 // Drop available_externally and unreferenced globals. This is necessary
1820 // with ThinLTO in order to avoid leaving undefined references to dead
1821 // globals in the object file.
1823 MPM.addPass(GlobalDCEPass());
1824 return MPM;
1825 }
1826 if (!UseCtxProfile.empty()) {
1827 MPM.addPass(
1829 } else {
1830 // Add the core simplification pipeline.
1833 }
1834 // Now add the optimization pipeline.
1837
1838 // Emit annotation remarks.
1840
1841 return MPM;
1842}
1843
1846 // FIXME: We should use a customized pre-link pipeline!
1847 return buildPerModuleDefaultPipeline(Level,
1849}
1850
1853 ModuleSummaryIndex *ExportSummary) {
1855
1857
1858 // Create a function that performs CFI checks for cross-DSO calls with targets
1859 // in the current module.
1860 MPM.addPass(CrossDSOCFIPass());
1861
1862 if (Level == OptimizationLevel::O0) {
1863 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1864 // metadata and intrinsics.
1865 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1866 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1867 // Run a second time to clean up any type tests left behind by WPD for use
1868 // in ICP.
1869 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1871
1873
1875
1876 // Emit annotation remarks.
1878
1879 return MPM;
1880 }
1881
1882 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1883 // Load sample profile before running the LTO optimization pipeline.
1884 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1885 PGOOpt->ProfileRemappingFile,
1887 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1888 // RequireAnalysisPass for PSI before subsequent non-module passes.
1890 }
1891
1892 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1894
1895 // Remove unused virtual tables to improve the quality of code generated by
1896 // whole-program devirtualization and bitset lowering.
1897 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1898
1899 // Do basic inference of function attributes from known properties of system
1900 // libraries and other oracles.
1902
1903 if (Level.getSpeedupLevel() > 1) {
1906
1907 // Indirect call promotion. This should promote all the targets that are
1908 // left by the earlier promotion pass that promotes intra-module targets.
1909 // This two-step promotion is to save the compile time. For LTO, it should
1910 // produce the same result as if we only do promotion here.
1912 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1913
1914 // Promoting by-reference arguments to by-value exposes more constants to
1915 // IPSCCP.
1916 CGSCCPassManager CGPM;
1919 CGPM.addPass(
1922
1923 // Propagate constants at call sites into the functions they call. This
1924 // opens opportunities for globalopt (and inlining) by substituting function
1925 // pointers passed as arguments to direct uses of functions.
1926 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1927 Level != OptimizationLevel::Os &&
1928 Level != OptimizationLevel::Oz)));
1929
1930 // Attach metadata to indirect call sites indicating the set of functions
1931 // they may target at run-time. This should follow IPSCCP.
1933 }
1934
1935 // Do RPO function attribute inference across the module to forward-propagate
1936 // attributes where applicable.
1937 // FIXME: Is this really an optimization rather than a canonicalization?
1939
1940 // Use in-range annotations on GEP indices to split globals where beneficial.
1941 MPM.addPass(GlobalSplitPass());
1942
1943 // Run whole program optimization of virtual call when the list of callees
1944 // is fixed.
1945 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1946
1947 // Stop here at -O1.
1948 if (Level == OptimizationLevel::O1) {
1949 // The LowerTypeTestsPass needs to run to lower type metadata and the
1950 // type.test intrinsics. The pass does nothing if CFI is disabled.
1951 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1952 // Run a second time to clean up any type tests left behind by WPD for use
1953 // in ICP (which is performed earlier than this in the regular LTO
1954 // pipeline).
1955 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1957
1959
1961
1962 // Emit annotation remarks.
1964
1965 return MPM;
1966 }
1967
1968 // TODO: Skip to match buildCoroWrapper.
1969 MPM.addPass(CoroEarlyPass());
1970
1971 // Optimize globals to try and fold them into constants.
1972 MPM.addPass(GlobalOptPass());
1973
1974 // Promote any localized globals to SSA registers.
1976
1977 // Linking modules together can lead to duplicate global constant, only
1978 // keep one copy of each constant.
1980
1981 // Remove unused arguments from functions.
1983
1984 // Reduce the code after globalopt and ipsccp. Both can open up significant
1985 // simplification opportunities, and both can propagate functions through
1986 // function pointers. When this happens, we often have to resolve varargs
1987 // calls, etc, so let instcombine do this.
1988 FunctionPassManager PeepholeFPM;
1989 PeepholeFPM.addPass(InstCombinePass());
1990 if (Level.getSpeedupLevel() > 1)
1991 PeepholeFPM.addPass(AggressiveInstCombinePass());
1992 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1993
1994 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1996
1997 // Lower variadic functions for supported targets prior to inlining.
1999
2000 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2001 // generally clean up exception handling overhead. It isn't clear this is
2002 // valuable as the inliner doesn't currently care whether it is inlining an
2003 // invoke or a call.
2004 // Run the inliner now.
2005 if (EnableModuleInliner) {
2009 } else {
2012 /* MandatoryFirst */ true,
2015 }
2016
2017 // Perform context disambiguation after inlining, since that would reduce the
2018 // amount of additional cloning required to distinguish the allocation
2019 // contexts.
2022 /*Summary=*/nullptr,
2023 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2024
2025 // Optimize globals again after we ran the inliner.
2026 MPM.addPass(GlobalOptPass());
2027
2028 // Run the OpenMPOpt pass again after global optimizations.
2030
2031 // Garbage collect dead functions.
2032 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2033
2034 // If we didn't decide to inline a function, check to see if we can
2035 // transform it to pass arguments by value instead of by reference.
2036 CGSCCPassManager CGPM;
2041
2043 // The IPO Passes may leave cruft around. Clean up after them.
2044 FPM.addPass(InstCombinePass());
2045 invokePeepholeEPCallbacks(FPM, Level);
2046
2049
2051
2052 // Do a post inline PGO instrumentation and use pass. This is a context
2053 // sensitive PGO pass.
2054 if (PGOOpt) {
2055 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2056 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2057 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2058 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
2059 PGOOpt->FS);
2060 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2061 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2062 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2063 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
2064 PGOOpt->FS);
2065 }
2066
2067 // Break up allocas
2069
2070 // LTO provides additional opportunities for tailcall elimination due to
2071 // link-time inlining, and visibility of nocapture attribute.
2072 FPM.addPass(
2073 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2074
2075 // Run a few AA driver optimizations here and now to cleanup the code.
2076 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2078
2079 MPM.addPass(
2081
2082 // Require the GlobalsAA analysis for the module so we can query it within
2083 // MainFPM.
2086 // Invalidate AAManager so it can be recreated and pick up the newly
2087 // available GlobalsAA.
2088 MPM.addPass(
2090 }
2091
2092 FunctionPassManager MainFPM;
2095 /*AllowSpeculation=*/true),
2096 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2097
2098 if (RunNewGVN)
2099 MainFPM.addPass(NewGVNPass());
2100 else
2101 MainFPM.addPass(GVNPass());
2102
2103 // Remove dead memcpy()'s.
2104 MainFPM.addPass(MemCpyOptPass());
2105
2106 // Nuke dead stores.
2107 MainFPM.addPass(DSEPass());
2108 MainFPM.addPass(MoveAutoInitPass());
2110
2111 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2112
2113 LoopPassManager LPM;
2114 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2115 LPM.addPass(LoopFlattenPass());
2118 // FIXME: Add loop interchange.
2119
2120 // Unroll small loops and perform peeling.
2121 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2122 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2124 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2125 // *All* loop passes must preserve it, in order to be able to use it.
2127 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2128
2129 MainFPM.addPass(LoopDistributePass());
2130
2131 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2132
2133 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2134
2135 // Run the OpenMPOpt CGSCC pass again late.
2138
2139 invokePeepholeEPCallbacks(MainFPM, Level);
2140 MainFPM.addPass(JumpThreadingPass());
2141 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2143
2144 // Lower type metadata and the type.test intrinsic. This pass supports
2145 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2146 // to be run at link time if CFI is enabled. This pass does nothing if
2147 // CFI is disabled.
2148 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2149 // Run a second time to clean up any type tests left behind by WPD for use
2150 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2151 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2153
2154 // Enable splitting late in the FullLTO post-link pipeline.
2157
2158 // Add late LTO optimization passes.
2159 FunctionPassManager LateFPM;
2160
2161 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2162 // canonicalization pass that enables other optimizations. As a result,
2163 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2164 // result too early.
2165 LateFPM.addPass(LoopSinkPass());
2166
2167 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2168 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2169 // flattening of blocks.
2170 LateFPM.addPass(DivRemPairsPass());
2171
2172 // Delete basic blocks, which optimization passes may have killed.
2174 .convertSwitchRangeToICmp(true)
2175 .hoistCommonInsts(true)
2176 .speculateUnpredictables(true)));
2177 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2178
2179 // Drop bodies of available eternally objects to improve GlobalDCE.
2181
2182 // Now that we have optimized the program, discard unreachable functions.
2183 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2184
2185 if (PTO.MergeFunctions)
2187
2189
2190 if (PTO.CallGraphProfile)
2191 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2192
2193 MPM.addPass(CoroCleanupPass());
2194
2196
2197 // Emit annotation remarks.
2199
2200 return MPM;
2201}
2202
2206 assert(Level == OptimizationLevel::O0 &&
2207 "buildO0DefaultPipeline should only be used with O0");
2208
2210
2211 // Perform pseudo probe instrumentation in O0 mode. This is for the
2212 // consistency between different build modes. For example, a LTO build can be
2213 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2214 // the postlink will require pseudo probe instrumentation in the prelink.
2215 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2217
2218 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2219 PGOOpt->Action == PGOOptions::IRUse))
2221 MPM,
2222 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2223 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2224 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2225
2226 // Instrument function entry and exit before all inlining.
2228 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2229
2231
2232 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2234
2235 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2236 // Explicitly disable sample loader inlining and use flattened profile in O0
2237 // pipeline.
2238 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2239 PGOOpt->ProfileRemappingFile,
2240 ThinOrFullLTOPhase::None, nullptr,
2241 /*DisableSampleProfileInlining=*/true,
2242 /*UseFlattenedProfile=*/true));
2243 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2244 // RequireAnalysisPass for PSI before subsequent non-module passes.
2246 }
2247
2249
2250 // Build a minimal pipeline based on the semantics required by LLVM,
2251 // which is just that always inlining occurs. Further, disable generating
2252 // lifetime intrinsics to avoid enabling further optimizations during
2253 // code generation.
2255 /*InsertLifetimeIntrinsics=*/false));
2256
2257 if (PTO.MergeFunctions)
2259
2260 if (EnableMatrix)
2261 MPM.addPass(
2263
2264 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2265 CGSCCPassManager CGPM;
2267 if (!CGPM.isEmpty())
2269 }
2270 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2271 LoopPassManager LPM;
2273 if (!LPM.isEmpty()) {
2275 createFunctionToLoopPassAdaptor(std::move(LPM))));
2276 }
2277 }
2278 if (!LoopOptimizerEndEPCallbacks.empty()) {
2279 LoopPassManager LPM;
2281 if (!LPM.isEmpty()) {
2283 createFunctionToLoopPassAdaptor(std::move(LPM))));
2284 }
2285 }
2286 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2289 if (!FPM.isEmpty())
2290 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2291 }
2292
2294
2295 if (!VectorizerStartEPCallbacks.empty()) {
2298 if (!FPM.isEmpty())
2299 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2300 }
2301
2302 if (!VectorizerEndEPCallbacks.empty()) {
2305 if (!FPM.isEmpty())
2306 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2307 }
2308
2310
2312
2313 if (isLTOPreLink(Phase))
2314 addRequiredLTOPreLinkPasses(MPM);
2315
2317
2318 return MPM;
2319}
2320
2322 AAManager AA;
2323
2324 // The order in which these are registered determines their priority when
2325 // being queried.
2326
2327 // Add any target-specific alias analyses that should be run early.
2328 if (TM)
2330
2331 // First we register the basic alias analysis that provides the majority of
2332 // per-function local AA logic. This is a stateless, on-demand local set of
2333 // AA techniques.
2335
2336 // Next we query fast, specialized alias analyses that wrap IR-embedded
2337 // information about aliasing.
2340
2341 // Add support for querying global aliasing information when available.
2342 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2343 // analysis, all that the `AAManager` can do is query for any *cached*
2344 // results from `GlobalsAA` through a readonly proxy.
2347
2348 // Add target-specific alias analyses.
2349 if (TM)
2351
2352 return AA;
2353}
2354
2355bool PassBuilder::isInstrumentedPGOUse() const {
2356 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2357 !UseCtxProfile.empty();
2358}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
cl::opt< std::string > UseCtxProfile
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:33
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra function passes if the ShouldRunExtraPasses marker analysis is p...
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
void addPass(PassT &&Pass)
The core GVN pass object.
Definition: GVN.h:126
Pass to remove unused function declarations.
Definition: GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:80
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:28
Module pass, wrapping the inliner pass.
Definition: Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:196
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:218
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:59
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:92
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:51
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:62
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition: PassBuilder.h:66
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:55
Reassociate commutative expressions.
Definition: Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:77
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:30
The sample profiler data loader pass.
Definition: SampleProfile.h:40
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:30
virtual void registerEarlyDefaultAliasAnalyses(AAManager &)
Allow the target to register early alias analyses (AA before BasicAA) with the AAManager for use with...
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:877
@ MODULE
Definition: Attributor.h:6613
@ CGSCC
Definition: Attributor.h:6614
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:417
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:424
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:59
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:930
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:51
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:903