LLVM 21.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
148
149using namespace llvm;
150
152 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
153 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
154 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
155 "Heuristics-based inliner version"),
156 clEnumValN(InliningAdvisorMode::Development, "development",
157 "Use development mode (runtime-loadable model)"),
158 clEnumValN(InliningAdvisorMode::Release, "release",
159 "Use release mode (AOT-compiled model)")));
160
161/// Flag to enable inline deferral during PGO.
162static cl::opt<bool>
163 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
165 cl::desc("Enable inline deferral during PGO"));
166
167static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
168 cl::init(false), cl::Hidden,
169 cl::desc("Enable module inliner"));
170
172 "mandatory-inlining-first", cl::init(false), cl::Hidden,
173 cl::desc("Perform mandatory inlinings module-wide, before performing "
174 "inlining"));
175
177 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
178 cl::desc("Eagerly invalidate more analyses in default pipelines"));
179
181 "enable-merge-functions", cl::init(false), cl::Hidden,
182 cl::desc("Enable function merging as part of the optimization pipeline"));
183
185 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
186 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
187
189 "enable-global-analyses", cl::init(true), cl::Hidden,
190 cl::desc("Enable inter-procedural analyses"));
191
192static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
193 cl::init(false), cl::Hidden,
194 cl::desc("Run Partial inlining pass"));
195
197 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
198 cl::desc("Run cleanup optimization passes after vectorization"));
199
200static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
201 cl::desc("Run the NewGVN pass"));
202
204 "enable-loopinterchange", cl::init(false), cl::Hidden,
205 cl::desc("Enable the experimental LoopInterchange Pass"));
206
207static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
208 cl::init(false), cl::Hidden,
209 cl::desc("Enable Unroll And Jam Pass"));
210
211static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
213 cl::desc("Enable the LoopFlatten Pass"));
214
215// Experimentally allow loop header duplication. This should allow for better
216// optimization at Oz, since loop-idiom recognition can then recognize things
217// like memcpy. If this ends up being useful for many targets, we should drop
218// this flag and make a code generation option that can be controlled
219// independent of the opt level and exposed through the frontend.
221 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
222 cl::desc("Enable loop header duplication at any optimization level"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
242 "preinline-threshold", cl::Hidden, cl::init(75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
265 "flattened-profile-used", cl::init(false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierarchy exists in the profile"));
268
270 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
271 cl::desc("Enable order file instrumentation (default = off)"));
272
273static cl::opt<bool>
274 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
275 cl::desc("Enable lowering of the matrix intrinsics"));
276
278 "enable-constraint-elimination", cl::init(true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
283 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
285 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
286 "enable all attributor runs"),
287 clEnumValN(AttributorRunOption::MODULE, "module",
288 "enable module-wide attributor runs"),
289 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
290 "enable call graph SCC attributor runs"),
291 clEnumValN(AttributorRunOption::NONE, "none",
292 "disable attributor runs")));
293
295 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
296 cl::desc("Enable profile instrumentation sampling (default = off)"));
298 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
299 cl::desc("Enable the experimental Loop Versioning LICM pass"));
300
302 "instrument-cold-function-only-path", cl::init(""),
303 cl::desc("File path for cold function only instrumentation(requires use "
304 "with --pgo-instrument-cold-function-only)"),
305 cl::Hidden);
306
309
310namespace llvm {
312} // namespace llvm
313
315 LoopInterleaving = true;
316 LoopVectorization = true;
317 SLPVectorization = false;
318 LoopUnrolling = true;
322 CallGraphProfile = true;
323 UnifiedLTO = false;
325 InlinerThreshold = -1;
327}
328
329namespace llvm {
331} // namespace llvm
332
334 OptimizationLevel Level) {
335 for (auto &C : PeepholeEPCallbacks)
336 C(FPM, Level);
337}
340 for (auto &C : LateLoopOptimizationsEPCallbacks)
341 C(LPM, Level);
342}
344 OptimizationLevel Level) {
345 for (auto &C : LoopOptimizerEndEPCallbacks)
346 C(LPM, Level);
347}
350 for (auto &C : ScalarOptimizerLateEPCallbacks)
351 C(FPM, Level);
352}
354 OptimizationLevel Level) {
355 for (auto &C : CGSCCOptimizerLateEPCallbacks)
356 C(CGPM, Level);
357}
359 OptimizationLevel Level) {
360 for (auto &C : VectorizerStartEPCallbacks)
361 C(FPM, Level);
362}
364 OptimizationLevel Level) {
365 for (auto &C : VectorizerEndEPCallbacks)
366 C(FPM, Level);
367}
369 OptimizationLevel Level,
371 for (auto &C : OptimizerEarlyEPCallbacks)
372 C(MPM, Level, Phase);
373}
375 OptimizationLevel Level,
377 for (auto &C : OptimizerLastEPCallbacks)
378 C(MPM, Level, Phase);
379}
382 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
383 C(MPM, Level);
384}
387 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
388 C(MPM, Level);
389}
391 OptimizationLevel Level) {
392 for (auto &C : PipelineStartEPCallbacks)
393 C(MPM, Level);
394}
397 for (auto &C : PipelineEarlySimplificationEPCallbacks)
398 C(MPM, Level, Phase);
399}
400
401// Helper to add AnnotationRemarksPass.
404}
405
406// Helper to check if the current compilation phase is preparing for LTO
410}
411
412// TODO: Investigate the cost/benefit of tail call elimination on debugging.
414PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
416
418
421
422 // Form SSA out of local memory accesses after breaking apart aggregates into
423 // scalars.
425
426 // Catch trivial redundancies
427 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
428
429 // Hoisting of scalars and load expressions.
430 FPM.addPass(
431 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
433
435
436 invokePeepholeEPCallbacks(FPM, Level);
437
438 FPM.addPass(
439 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
440
441 // Form canonically associated expression trees, and simplify the trees using
442 // basic mathematical properties. For example, this will form (nearly)
443 // minimal multiplication trees.
445
446 // Add the primary loop simplification pipeline.
447 // FIXME: Currently this is split into two loop pass pipelines because we run
448 // some function passes in between them. These can and should be removed
449 // and/or replaced by scheduling the loop pass equivalents in the correct
450 // positions. But those equivalent passes aren't powerful enough yet.
451 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
452 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
453 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
454 // `LoopInstSimplify`.
455 LoopPassManager LPM1, LPM2;
456
457 // Simplify the loop body. We do this initially to clean up after other loop
458 // passes run, either when iterating on a loop or on inner loops with
459 // implications on the outer loop.
462
463 // Try to remove as much code from the loop header as possible,
464 // to reduce amount of IR that will have to be duplicated. However,
465 // do not perform speculative hoisting the first time as LICM
466 // will destroy metadata that may not need to be destroyed if run
467 // after loop rotation.
468 // TODO: Investigate promotion cap for O1.
470 /*AllowSpeculation=*/false));
471
472 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
474 // TODO: Investigate promotion cap for O1.
476 /*AllowSpeculation=*/true));
479 LPM1.addPass(LoopFlattenPass());
480
483
485
487
490
491 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
492 // because it changes IR to makes profile annotation in back compile
493 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
494 // attributes so we need to make sure and allow the full unroll pass to pay
495 // attention to it.
496 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
497 PGOOpt->Action != PGOOptions::SampleUse)
498 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
499 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
501
503
504 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
505 /*UseMemorySSA=*/true,
506 /*UseBlockFrequencyInfo=*/true));
507 FPM.addPass(
508 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
510 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
511 // *All* loop passes must preserve it, in order to be able to use it.
512 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
513 /*UseMemorySSA=*/false,
514 /*UseBlockFrequencyInfo=*/false));
515
516 // Delete small array after loop unroll.
518
519 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
520 FPM.addPass(MemCpyOptPass());
521
522 // Sparse conditional constant propagation.
523 // FIXME: It isn't clear why we do this *after* loop passes rather than
524 // before...
525 FPM.addPass(SCCPPass());
526
527 // Delete dead bit computations (instcombine runs after to fold away the dead
528 // computations, and then ADCE will run later to exploit any new DCE
529 // opportunities that creates).
530 FPM.addPass(BDCEPass());
531
532 // Run instcombine after redundancy and dead bit elimination to exploit
533 // opportunities opened up by them.
535 invokePeepholeEPCallbacks(FPM, Level);
536
537 FPM.addPass(CoroElidePass());
538
540
541 // Finally, do an expensive DCE pass to catch all the dead code exposed by
542 // the simplifications and basic cleanup after all the simplifications.
543 // TODO: Investigate if this is too expensive.
544 FPM.addPass(ADCEPass());
545 FPM.addPass(
546 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
548 invokePeepholeEPCallbacks(FPM, Level);
549
550 return FPM;
551}
552
556 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
557
558 // The O1 pipeline has a separate pipeline creation function to simplify
559 // construction readability.
560 if (Level.getSpeedupLevel() == 1)
561 return buildO1FunctionSimplificationPipeline(Level, Phase);
562
564
567
568 // Form SSA out of local memory accesses after breaking apart aggregates into
569 // scalars.
571
572 // Catch trivial redundancies
573 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
576
577 // Hoisting of scalars and load expressions.
578 if (EnableGVNHoist)
579 FPM.addPass(GVNHoistPass());
580
581 // Global value numbering based sinking.
582 if (EnableGVNSink) {
583 FPM.addPass(GVNSinkPass());
584 FPM.addPass(
585 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
586 }
587
588 // Speculative execution if the target has divergent branches; otherwise nop.
589 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
590
591 // Optimize based on known information about branches, and cleanup afterward.
594
595 // Jump table to switch conversion.
598
599 FPM.addPass(
600 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
603
604 if (!Level.isOptimizingForSize())
606
607 invokePeepholeEPCallbacks(FPM, Level);
608
609 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
610 // using the size value profile. Don't perform this when optimizing for size.
611 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
612 !Level.isOptimizingForSize())
614
616 FPM.addPass(
617 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
618
619 // Form canonically associated expression trees, and simplify the trees using
620 // basic mathematical properties. For example, this will form (nearly)
621 // minimal multiplication trees.
623
626
627 // Add the primary loop simplification pipeline.
628 // FIXME: Currently this is split into two loop pass pipelines because we run
629 // some function passes in between them. These can and should be removed
630 // and/or replaced by scheduling the loop pass equivalents in the correct
631 // positions. But those equivalent passes aren't powerful enough yet.
632 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
633 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
634 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
635 // `LoopInstSimplify`.
636 LoopPassManager LPM1, LPM2;
637
638 // Simplify the loop body. We do this initially to clean up after other loop
639 // passes run, either when iterating on a loop or on inner loops with
640 // implications on the outer loop.
643
644 // Try to remove as much code from the loop header as possible,
645 // to reduce amount of IR that will have to be duplicated. However,
646 // do not perform speculative hoisting the first time as LICM
647 // will destroy metadata that may not need to be destroyed if run
648 // after loop rotation.
649 // TODO: Investigate promotion cap for O1.
651 /*AllowSpeculation=*/false));
652
653 // Disable header duplication in loop rotation at -Oz.
655 Level != OptimizationLevel::Oz,
657 // TODO: Investigate promotion cap for O1.
659 /*AllowSpeculation=*/true));
660 LPM1.addPass(
661 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
663 LPM1.addPass(LoopFlattenPass());
664
667
668 {
670 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
672 LPM2.addPass(std::move(ExtraPasses));
673 }
674
676
678
681
682 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
683 // because it changes IR to makes profile annotation in back compile
684 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
685 // attributes so we need to make sure and allow the full unroll pass to pay
686 // attention to it.
687 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
688 PGOOpt->Action != PGOOptions::SampleUse)
689 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
690 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
692
694
695 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
696 /*UseMemorySSA=*/true,
697 /*UseBlockFrequencyInfo=*/true));
698 FPM.addPass(
699 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
701 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
702 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
703 // *All* loop passes must preserve it, in order to be able to use it.
704 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
705 /*UseMemorySSA=*/false,
706 /*UseBlockFrequencyInfo=*/false));
707
708 // Delete small array after loop unroll.
710
711 // Try vectorization/scalarization transforms that are both improvements
712 // themselves and can allow further folds with GVN and InstCombine.
713 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
714
715 // Eliminate redundancies.
717 if (RunNewGVN)
718 FPM.addPass(NewGVNPass());
719 else
720 FPM.addPass(GVNPass());
721
722 // Sparse conditional constant propagation.
723 // FIXME: It isn't clear why we do this *after* loop passes rather than
724 // before...
725 FPM.addPass(SCCPPass());
726
727 // Delete dead bit computations (instcombine runs after to fold away the dead
728 // computations, and then ADCE will run later to exploit any new DCE
729 // opportunities that creates).
730 FPM.addPass(BDCEPass());
731
732 // Run instcombine after redundancy and dead bit elimination to exploit
733 // opportunities opened up by them.
735 invokePeepholeEPCallbacks(FPM, Level);
736
737 // Re-consider control flow based optimizations after redundancy elimination,
738 // redo DCE, etc.
741
744
745 // Finally, do an expensive DCE pass to catch all the dead code exposed by
746 // the simplifications and basic cleanup after all the simplifications.
747 // TODO: Investigate if this is too expensive.
748 FPM.addPass(ADCEPass());
749
750 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
751 FPM.addPass(MemCpyOptPass());
752
753 FPM.addPass(DSEPass());
755
758 /*AllowSpeculation=*/true),
759 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
760
761 FPM.addPass(CoroElidePass());
762
764
766 .convertSwitchRangeToICmp(true)
767 .hoistCommonInsts(true)
768 .sinkCommonInsts(true)));
770 invokePeepholeEPCallbacks(FPM, Level);
771
772 return FPM;
773}
774
775void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
778}
779
780void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
781 OptimizationLevel Level,
782 ThinOrFullLTOPhase LTOPhase) {
783 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
785 return;
786 InlineParams IP;
787
789
790 // FIXME: The hint threshold has the same value used by the regular inliner
791 // when not optimzing for size. This should probably be lowered after
792 // performance testing.
793 // FIXME: this comment is cargo culted from the old pass manager, revisit).
794 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
796 IP, /* MandatoryFirst */ true,
798 CGSCCPassManager &CGPipeline = MIWP.getPM();
799
802 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
803 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
804 true))); // Merge & remove basic blocks.
805 FPM.addPass(InstCombinePass()); // Combine silly sequences.
806 invokePeepholeEPCallbacks(FPM, Level);
807
808 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
809 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
810
811 MPM.addPass(std::move(MIWP));
812
813 // Delete anything that is now dead to make sure that we don't instrument
814 // dead code. Instrumentation can end up keeping dead code around and
815 // dramatically increase code size.
816 MPM.addPass(GlobalDCEPass());
817}
818
819void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
820 OptimizationLevel Level) {
822 // Disable header duplication in loop rotation at -Oz.
826 Level != OptimizationLevel::Oz),
827 /*UseMemorySSA=*/false,
828 /*UseBlockFrequencyInfo=*/false),
830 }
831}
832
833void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
834 OptimizationLevel Level, bool RunProfileGen,
835 bool IsCS, bool AtomicCounterUpdate,
836 std::string ProfileFile,
837 std::string ProfileRemappingFile,
839 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
840
841 if (!RunProfileGen) {
842 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
843 MPM.addPass(
844 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
845 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
846 // RequireAnalysisPass for PSI before subsequent non-module passes.
848 return;
849 }
850
851 // Perform PGO instrumentation.
854
855 addPostPGOLoopRotation(MPM, Level);
856 // Add the profile lowering pass.
858 if (!ProfileFile.empty())
859 Options.InstrProfileOutput = ProfileFile;
860 // Do counter promotion at Level greater than O0.
861 Options.DoCounterPromotion = true;
862 Options.UseBFIInPromotion = IsCS;
863 if (EnableSampledInstr) {
864 Options.Sampling = true;
865 // With sampling, there is little beneifit to enable counter promotion.
866 // But note that sampling does work with counter promotion.
867 Options.DoCounterPromotion = false;
868 }
869 Options.Atomic = AtomicCounterUpdate;
871}
872
874 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
875 bool AtomicCounterUpdate, std::string ProfileFile,
876 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
877 if (!RunProfileGen) {
878 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
879 MPM.addPass(
880 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
881 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
882 // RequireAnalysisPass for PSI before subsequent non-module passes.
884 return;
885 }
886
887 // Perform PGO instrumentation.
890 // Add the profile lowering pass.
892 if (!ProfileFile.empty())
893 Options.InstrProfileOutput = ProfileFile;
894 // Do not do counter promotion at O0.
895 Options.DoCounterPromotion = false;
896 Options.UseBFIInPromotion = IsCS;
897 Options.Atomic = AtomicCounterUpdate;
899}
900
902 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
903}
904
908 InlineParams IP;
909 if (PTO.InlinerThreshold == -1)
910 IP = getInlineParamsFromOptLevel(Level);
911 else
913 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
914 // disable hot callsite inline (as much as possible [1]) because it makes
915 // profile annotation in the backend inaccurate.
916 //
917 // [1] Note the cost of a function could be below zero due to erased
918 // prologue / epilogue.
919 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
920 PGOOpt->Action == PGOOptions::SampleUse)
922
923 if (PGOOpt)
925
929
930 // Require the GlobalsAA analysis for the module so we can query it within
931 // the CGSCC pipeline.
934 // Invalidate AAManager so it can be recreated and pick up the newly
935 // available GlobalsAA.
936 MIWP.addModulePass(
938 }
939
940 // Require the ProfileSummaryAnalysis for the module so we can query it within
941 // the inliner pass.
943
944 // Now begin the main postorder CGSCC pipeline.
945 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
946 // manager and trying to emulate its precise behavior. Much of this doesn't
947 // make a lot of sense and we should revisit the core CGSCC structure.
948 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
949
950 // Note: historically, the PruneEH pass was run first to deduce nounwind and
951 // generally clean up exception handling overhead. It isn't clear this is
952 // valuable as the inliner doesn't currently care whether it is inlining an
953 // invoke or a call.
954
956 MainCGPipeline.addPass(AttributorCGSCCPass());
957
958 // Deduce function attributes. We do another run of this after the function
959 // simplification pipeline, so this only needs to run when it could affect the
960 // function simplification pipeline, which is only the case with recursive
961 // functions.
962 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
963
964 // When at O3 add argument promotion to the pass pipeline.
965 // FIXME: It isn't at all clear why this should be limited to O3.
966 if (Level == OptimizationLevel::O3)
967 MainCGPipeline.addPass(ArgumentPromotionPass());
968
969 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
970 // there are no OpenMP runtime calls present in the module.
971 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
972 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
973
974 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
975
976 // Add the core function simplification pipeline nested inside the
977 // CGSCC walk.
980 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
981
982 // Finally, deduce any function attributes based on the fully simplified
983 // function.
984 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
985
986 // Mark that the function is fully simplified and that it shouldn't be
987 // simplified again if we somehow revisit it due to CGSCC mutations unless
988 // it's been modified since.
991
993 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
994 MainCGPipeline.addPass(CoroAnnotationElidePass());
995 }
996
997 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
998 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1000
1001 return MIWP;
1002}
1003
1008
1010 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
1011 // disable hot callsite inline (as much as possible [1]) because it makes
1012 // profile annotation in the backend inaccurate.
1013 //
1014 // [1] Note the cost of a function could be below zero due to erased
1015 // prologue / epilogue.
1016 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
1017 PGOOpt->Action == PGOOptions::SampleUse)
1018 IP.HotCallSiteThreshold = 0;
1019
1020 if (PGOOpt)
1022
1023 // The inline deferral logic is used to avoid losing some
1024 // inlining chance in future. It is helpful in SCC inliner, in which
1025 // inlining is processed in bottom-up order.
1026 // While in module inliner, the inlining order is a priority-based order
1027 // by default. The inline deferral is unnecessary there. So we disable the
1028 // inline deferral logic in module inliner.
1029 IP.EnableDeferral = false;
1030
1033 MPM.addPass(GlobalOptPass());
1034 MPM.addPass(GlobalDCEPass());
1036 }
1037
1041
1045 MPM.addPass(
1047 }
1048
1049 return MPM;
1050}
1051
1055 assert(Level != OptimizationLevel::O0 &&
1056 "Should not be used for O0 pipeline");
1057
1059 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1060
1062
1063 // Place pseudo probe instrumentation as the first pass of the pipeline to
1064 // minimize the impact of optimization changes.
1065 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1068
1069 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1070
1071 // In ThinLTO mode, when flattened profile is used, all the available
1072 // profile information will be annotated in PreLink phase so there is
1073 // no need to load the profile again in PostLink.
1074 bool LoadSampleProfile =
1075 HasSampleProfile &&
1077
1078 // During the ThinLTO backend phase we perform early indirect call promotion
1079 // here, before globalopt. Otherwise imported available_externally functions
1080 // look unreferenced and are removed. If we are going to load the sample
1081 // profile then defer until later.
1082 // TODO: See if we can move later and consolidate with the location where
1083 // we perform ICP when we are loading a sample profile.
1084 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1085 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1086 // determine whether the new direct calls are annotated with prof metadata.
1087 // Ideally this should be determined from whether the IR is annotated with
1088 // sample profile, and not whether the a sample profile was provided on the
1089 // command line. E.g. for flattened profiles where we will not be reloading
1090 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1091 // provide the sample profile file.
1092 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1093 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1094
1095 // Create an early function pass manager to cleanup the output of the
1096 // frontend. Not necessary with LTO post link pipelines since the pre link
1097 // pipeline already cleaned up the frontend output.
1099 // Do basic inference of function attributes from known properties of system
1100 // libraries and other oracles.
1102 MPM.addPass(CoroEarlyPass());
1103
1104 FunctionPassManager EarlyFPM;
1105 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1106 // Lower llvm.expect to metadata before attempting transforms.
1107 // Compare/branch metadata may alter the behavior of passes like
1108 // SimplifyCFG.
1110 EarlyFPM.addPass(SimplifyCFGPass());
1112 EarlyFPM.addPass(EarlyCSEPass());
1113 if (Level == OptimizationLevel::O3)
1114 EarlyFPM.addPass(CallSiteSplittingPass());
1116 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1117 }
1118
1119 if (LoadSampleProfile) {
1120 // Annotate sample profile right after early FPM to ensure freshness of
1121 // the debug info.
1122 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1123 PGOOpt->ProfileRemappingFile, Phase));
1124 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1125 // RequireAnalysisPass for PSI before subsequent non-module passes.
1127 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1128 // for the profile annotation to be accurate in the LTO backend.
1129 if (!isLTOPreLink(Phase))
1130 // We perform early indirect call promotion here, before globalopt.
1131 // This is important for the ThinLTO backend phase because otherwise
1132 // imported available_externally functions look unreferenced and are
1133 // removed.
1134 MPM.addPass(
1135 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1136 }
1137
1138 // Try to perform OpenMP specific optimizations on the module. This is a
1139 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1140 MPM.addPass(OpenMPOptPass());
1141
1143 MPM.addPass(AttributorPass());
1144
1145 // Lower type metadata and the type.test intrinsic in the ThinLTO
1146 // post link pipeline after ICP. This is to enable usage of the type
1147 // tests in ICP sequences.
1149 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1151
1153
1154 // Interprocedural constant propagation now that basic cleanup has occurred
1155 // and prior to optimizing globals.
1156 // FIXME: This position in the pipeline hasn't been carefully considered in
1157 // years, it should be re-analyzed.
1158 MPM.addPass(IPSCCPPass(
1159 IPSCCPOptions(/*AllowFuncSpec=*/
1160 Level != OptimizationLevel::Os &&
1161 Level != OptimizationLevel::Oz &&
1162 !isLTOPreLink(Phase))));
1163
1164 // Attach metadata to indirect call sites indicating the set of functions
1165 // they may target at run-time. This should follow IPSCCP.
1167
1168 // Optimize globals to try and fold them into constants.
1169 MPM.addPass(GlobalOptPass());
1170
1171 // Create a small function pass pipeline to cleanup after all the global
1172 // optimizations.
1173 FunctionPassManager GlobalCleanupPM;
1174 // FIXME: Should this instead by a run of SROA?
1175 GlobalCleanupPM.addPass(PromotePass());
1176 GlobalCleanupPM.addPass(InstCombinePass());
1177 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1178 GlobalCleanupPM.addPass(
1179 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1180 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1182
1183 // We already asserted this happens in non-FullLTOPostLink earlier.
1184 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1185 const bool IsPGOPreLink = PGOOpt && IsPreLink;
1186 const bool IsPGOInstrGen =
1187 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1188 const bool IsPGOInstrUse =
1189 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1190 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1191 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1192 // enable ctx profiling from the frontend.
1194 "Enabling both instrumented PGO and contextual instrumentation is not "
1195 "supported.");
1196 // Enable contextual profiling instrumentation.
1197 const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1199 const bool IsCtxProfUse =
1201
1202 assert(
1204 "--instrument-cold-function-only-path is provided but "
1205 "--pgo-instrument-cold-function-only is not enabled");
1206 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1207 IsPGOPreLink &&
1209
1210 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1211 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1212 addPreInlinerPasses(MPM, Level, Phase);
1213
1214 // Add all the requested passes for instrumentation PGO, if requested.
1215 if (IsPGOInstrGen || IsPGOInstrUse) {
1216 addPGOInstrPasses(MPM, Level,
1217 /*RunProfileGen=*/IsPGOInstrGen,
1218 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1219 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1220 PGOOpt->FS);
1221 } else if (IsCtxProfGen || IsCtxProfUse) {
1223 // In pre-link, we just want the instrumented IR. We use the contextual
1224 // profile in the post-thinlink phase.
1225 // The instrumentation will be removed in post-thinlink after IPO.
1226 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1227 // mechanism for GUIDs.
1228 MPM.addPass(AssignGUIDPass());
1229 if (IsCtxProfUse)
1230 return MPM;
1231 addPostPGOLoopRotation(MPM, Level);
1233 } else if (IsColdFuncOnlyInstrGen) {
1234 addPGOInstrPasses(
1235 MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1236 /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1237 /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
1238 }
1239
1240 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1241 MPM.addPass(PGOIndirectCallPromotion(false, false));
1242
1243 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1244 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1246
1247 if (IsMemprofUse)
1248 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1249
1250 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1251 PGOOpt->Action == PGOOptions::SampleUse))
1252 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1253
1254 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1255
1258 else
1259 MPM.addPass(buildInlinerPipeline(Level, Phase));
1260
1261 // Remove any dead arguments exposed by cleanups, constant folding globals,
1262 // and argument promotion.
1264
1266 MPM.addPass(CoroCleanupPass());
1267
1268 // Optimize globals now that functions are fully simplified.
1269 MPM.addPass(GlobalOptPass());
1270 MPM.addPass(GlobalDCEPass());
1271
1272 return MPM;
1273}
1274
1275/// TODO: Should LTO cause any differences to this set of passes?
1276void PassBuilder::addVectorPasses(OptimizationLevel Level,
1277 FunctionPassManager &FPM, bool IsFullLTO) {
1280
1282 if (IsFullLTO) {
1283 // The vectorizer may have significantly shortened a loop body; unroll
1284 // again. Unroll small loops to hide loop backedge latency and saturate any
1285 // parallel execution resources of an out-of-order processor. We also then
1286 // need to clean up redundancies and loop invariant code.
1287 // FIXME: It would be really good to use a loop-integrated instruction
1288 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1289 // across the loop nests.
1290 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1293 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1295 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1298 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1299 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1300 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1301 // NOTE: we are very late in the pipeline, and we don't have any LICM
1302 // or SimplifyCFG passes scheduled after us, that would cleanup
1303 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1305 }
1306
1307 if (!IsFullLTO) {
1308 // Eliminate loads by forwarding stores from the previous iteration to loads
1309 // of the current iteration.
1311 }
1312 // Cleanup after the loop optimization passes.
1313 FPM.addPass(InstCombinePass());
1314
1315 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1317 // At higher optimization levels, try to clean up any runtime overlap and
1318 // alignment checks inserted by the vectorizer. We want to track correlated
1319 // runtime checks for two inner loops in the same outer loop, fold any
1320 // common computations, hoist loop-invariant aspects out of any outer loop,
1321 // and unswitch the runtime checks if possible. Once hoisted, we may have
1322 // dead (or speculatable) control flows or more combining opportunities.
1323 ExtraPasses.addPass(EarlyCSEPass());
1325 ExtraPasses.addPass(InstCombinePass());
1326 LoopPassManager LPM;
1328 /*AllowSpeculation=*/true));
1329 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1331 ExtraPasses.addPass(
1332 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1333 /*UseBlockFrequencyInfo=*/true));
1334 ExtraPasses.addPass(
1335 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1336 ExtraPasses.addPass(InstCombinePass());
1337 FPM.addPass(std::move(ExtraPasses));
1338 }
1339
1340 // Now that we've formed fast to execute loop structures, we do further
1341 // optimizations. These are run afterward as they might block doing complex
1342 // analyses and transforms such as what are needed for loop vectorization.
1343
1344 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1345 // GVN, loop transforms, and others have already run, so it's now better to
1346 // convert to more optimized IR using more aggressive simplify CFG options.
1347 // The extra sinking transform can create larger basic blocks, so do this
1348 // before SLP vectorization.
1350 .forwardSwitchCondToPhi(true)
1351 .convertSwitchRangeToICmp(true)
1352 .convertSwitchToLookupTable(true)
1353 .needCanonicalLoops(false)
1354 .hoistCommonInsts(true)
1355 .sinkCommonInsts(true)));
1356
1357 if (IsFullLTO) {
1358 FPM.addPass(SCCPPass());
1359 FPM.addPass(InstCombinePass());
1360 FPM.addPass(BDCEPass());
1361 }
1362
1363 // Optimize parallel scalar instruction chains into SIMD instructions.
1364 if (PTO.SLPVectorization) {
1366 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1367 FPM.addPass(EarlyCSEPass());
1368 }
1369 }
1370 // Enhance/cleanup vector code.
1372
1373 if (!IsFullLTO) {
1374 FPM.addPass(InstCombinePass());
1375 // Unroll small loops to hide loop backedge latency and saturate any
1376 // parallel execution resources of an out-of-order processor. We also then
1377 // need to clean up redundancies and loop invariant code.
1378 // FIXME: It would be really good to use a loop-integrated instruction
1379 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1380 // across the loop nests.
1381 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1382 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1384 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1385 }
1387 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1390 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1391 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1392 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1393 // NOTE: we are very late in the pipeline, and we don't have any LICM
1394 // or SimplifyCFG passes scheduled after us, that would cleanup
1395 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1397 }
1398
1400 FPM.addPass(InstCombinePass());
1401
1402 // This is needed for two reasons:
1403 // 1. It works around problems that instcombine introduces, such as sinking
1404 // expensive FP divides into loops containing multiplications using the
1405 // divide result.
1406 // 2. It helps to clean up some loop-invariant code created by the loop
1407 // unroll pass when IsFullLTO=false.
1410 /*AllowSpeculation=*/true),
1411 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1412
1413 // Now that we've vectorized and unrolled loops, we may have more refined
1414 // alignment information, try to re-derive it here.
1416}
1417
1420 ThinOrFullLTOPhase LTOPhase) {
1421 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1423
1424 // Run partial inlining pass to partially inline functions that have
1425 // large bodies.
1428
1429 // Remove avail extern fns and globals definitions since we aren't compiling
1430 // an object file for later LTO. For LTO we want to preserve these so they
1431 // are eligible for inlining at link-time. Note if they are unreferenced they
1432 // will be removed by GlobalDCE later, so this only impacts referenced
1433 // available externally globals. Eventually they will be suppressed during
1434 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1435 // may make globals referenced by available external functions dead and saves
1436 // running remaining passes on the eliminated functions. These should be
1437 // preserved during prelinking for link-time inlining decisions.
1438 if (!LTOPreLink)
1440
1443
1444 // Do RPO function attribute inference across the module to forward-propagate
1445 // attributes where applicable.
1446 // FIXME: Is this really an optimization rather than a canonicalization?
1448
1449 // Do a post inline PGO instrumentation and use pass. This is a context
1450 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1451 // cross-module inline has not been done yet. The context sensitive
1452 // instrumentation is after all the inlines are done.
1453 if (!LTOPreLink && PGOOpt) {
1454 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1455 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1456 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1457 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1458 PGOOpt->FS);
1459 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1460 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1461 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1462 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1463 PGOOpt->FS);
1464 }
1465
1466 // Re-compute GlobalsAA here prior to function passes. This is particularly
1467 // useful as the above will have inlined, DCE'ed, and function-attr
1468 // propagated everything. We should at this point have a reasonably minimal
1469 // and richly annotated call graph. By computing aliasing and mod/ref
1470 // information for all local globals here, the late loop passes and notably
1471 // the vectorizer will be able to use them to help recognize vectorizable
1472 // memory operations.
1475
1476 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1477
1478 FunctionPassManager OptimizePM;
1479 // Scheduling LoopVersioningLICM when inlining is over, because after that
1480 // we may see more accurate aliasing. Reason to run this late is that too
1481 // early versioning may prevent further inlining due to increase of code
1482 // size. Other optimizations which runs later might get benefit of no-alias
1483 // assumption in clone loop.
1485 OptimizePM.addPass(
1487 // LoopVersioningLICM pass might increase new LICM opportunities.
1490 /*AllowSpeculation=*/true),
1491 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1492 }
1493
1494 OptimizePM.addPass(Float2IntPass());
1496
1497 if (EnableMatrix) {
1498 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1499 OptimizePM.addPass(EarlyCSEPass());
1500 }
1501
1502 // CHR pass should only be applied with the profile information.
1503 // The check is to check the profile summary information in CHR.
1504 if (EnableCHR && Level == OptimizationLevel::O3)
1505 OptimizePM.addPass(ControlHeightReductionPass());
1506
1507 // FIXME: We need to run some loop optimizations to re-rotate loops after
1508 // simplifycfg and others undo their rotation.
1509
1510 // Optimize the loop execution. These passes operate on entire loop nests
1511 // rather than on each loop in an inside-out manner, and so they are actually
1512 // function passes.
1513
1514 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1515
1516 LoopPassManager LPM;
1517 // First rotate loops that may have been un-rotated by prior passes.
1518 // Disable header duplication at -Oz.
1520 Level != OptimizationLevel::Oz,
1521 LTOPreLink));
1522 // Some loops may have become dead by now. Try to delete them.
1523 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1524 // this may need to be revisited once we run GVN before loop deletion
1525 // in the simplification pipeline.
1528 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1529
1530 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1531 // into separate loop that would otherwise inhibit vectorization. This is
1532 // currently only performed for loops marked with the metadata
1533 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1534 OptimizePM.addPass(LoopDistributePass());
1535
1536 // Populates the VFABI attribute with the scalar-to-vector mappings
1537 // from the TargetLibraryInfo.
1538 OptimizePM.addPass(InjectTLIMappings());
1539
1540 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1541
1542 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1543
1544 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1545 // canonicalization pass that enables other optimizations. As a result,
1546 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1547 // result too early.
1548 OptimizePM.addPass(LoopSinkPass());
1549
1550 // And finally clean up LCSSA form before generating code.
1551 OptimizePM.addPass(InstSimplifyPass());
1552
1553 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1554 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1555 // flattening of blocks.
1556 OptimizePM.addPass(DivRemPairsPass());
1557
1558 // Try to annotate calls that were created during optimization.
1559 OptimizePM.addPass(TailCallElimPass());
1560
1561 // LoopSink (and other loop passes since the last simplifyCFG) might have
1562 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1563 OptimizePM.addPass(
1565 .convertSwitchRangeToICmp(true)
1566 .speculateUnpredictables(true)
1567 .hoistLoadsStoresWithCondFaulting(true)));
1568
1569 // Add the core optimizing pipeline.
1570 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1572
1573 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1574
1575 // Split out cold code. Splitting is done late to avoid hiding context from
1576 // other optimizations and inadvertently regressing performance. The tradeoff
1577 // is that this has a higher code size cost than splitting early.
1578 if (EnableHotColdSplit && !LTOPreLink)
1580
1581 // Search the code for similar regions of code. If enough similar regions can
1582 // be found where extracting the regions into their own function will decrease
1583 // the size of the program, we extract the regions, a deduplicate the
1584 // structurally similar regions.
1585 if (EnableIROutliner)
1586 MPM.addPass(IROutlinerPass());
1587
1588 // Now we need to do some global optimization transforms.
1589 // FIXME: It would seem like these should come first in the optimization
1590 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1591 // ordering here.
1592 MPM.addPass(GlobalDCEPass());
1594
1595 // Merge functions if requested. It has a better chance to merge functions
1596 // after ConstantMerge folded jump tables.
1597 if (PTO.MergeFunctions)
1599
1600 if (PTO.CallGraphProfile && !LTOPreLink)
1603
1604 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1605 if (!LTOPreLink)
1607
1608 return MPM;
1609}
1610
1614 if (Level == OptimizationLevel::O0)
1615 return buildO0DefaultPipeline(Level, Phase);
1616
1618
1619 // Convert @llvm.global.annotations to !annotation metadata.
1621
1622 // Force any function attributes we want the rest of the pipeline to observe.
1624
1625 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1627
1628 // Apply module pipeline start EP callback.
1630
1631 // Add the core simplification pipeline.
1633
1634 // Now add the optimization pipeline.
1636
1637 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1638 PGOOpt->Action == PGOOptions::SampleUse)
1640
1641 // Emit annotation remarks.
1643
1644 if (isLTOPreLink(Phase))
1645 addRequiredLTOPreLinkPasses(MPM);
1646 return MPM;
1647}
1648
1651 bool EmitSummary) {
1653 if (ThinLTO)
1655 else
1657 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1658
1659 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1660 // object code, only in the bitcode section, so drop it before we run
1661 // module optimization and generate machine code. If llvm.type.test() isn't in
1662 // the IR, this won't do anything.
1663 MPM.addPass(
1665
1666 // Use the ThinLTO post-link pipeline with sample profiling
1667 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1668 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1669 else {
1670 // otherwise, just use module optimization
1671 MPM.addPass(
1673 // Emit annotation remarks.
1675 }
1676 return MPM;
1677}
1678
1681 if (Level == OptimizationLevel::O0)
1683
1685
1686 // Convert @llvm.global.annotations to !annotation metadata.
1688
1689 // Force any function attributes we want the rest of the pipeline to observe.
1691
1692 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1694
1695 // Apply module pipeline start EP callback.
1697
1698 // If we are planning to perform ThinLTO later, we don't bloat the code with
1699 // unrolling/vectorization/... now. Just simplify the module as much as we
1700 // can.
1703 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1704 // thinlto use the contextual info to perform imports; then use the contextual
1705 // profile in the post-thinlink phase.
1706 if (!UseCtxProfile.empty()) {
1707 addRequiredLTOPreLinkPasses(MPM);
1708 return MPM;
1709 }
1710
1711 // Run partial inlining pass to partially inline functions that have
1712 // large bodies.
1713 // FIXME: It isn't clear whether this is really the right place to run this
1714 // in ThinLTO. Because there is another canonicalization and simplification
1715 // phase that will run after the thin link, running this here ends up with
1716 // less information than will be available later and it may grow functions in
1717 // ways that aren't beneficial.
1720
1721 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1722 PGOOpt->Action == PGOOptions::SampleUse)
1724
1725 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1726 // optimization is going to be done in PostLink stage, but clang can't add
1727 // callbacks there in case of in-process ThinLTO called by linker.
1732
1733 // Emit annotation remarks.
1735
1736 addRequiredLTOPreLinkPasses(MPM);
1737
1738 return MPM;
1739}
1740
1742 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1744
1745 if (ImportSummary) {
1746 // For ThinLTO we must apply the context disambiguation decisions early, to
1747 // ensure we can correctly match the callsites to summary data.
1750 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1751
1752 // These passes import type identifier resolutions for whole-program
1753 // devirtualization and CFI. They must run early because other passes may
1754 // disturb the specific instruction patterns that these passes look for,
1755 // creating dependencies on resolutions that may not appear in the summary.
1756 //
1757 // For example, GVN may transform the pattern assume(type.test) appearing in
1758 // two basic blocks into assume(phi(type.test, type.test)), which would
1759 // transform a dependency on a WPD resolution into a dependency on a type
1760 // identifier resolution for CFI.
1761 //
1762 // Also, WPD has access to more precise information than ICP and can
1763 // devirtualize more effectively, so it should operate on the IR first.
1764 //
1765 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1766 // metadata and intrinsics.
1767 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1768 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1769 }
1770
1771 if (Level == OptimizationLevel::O0) {
1772 // Run a second time to clean up any type tests left behind by WPD for use
1773 // in ICP.
1774 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1776 // Drop available_externally and unreferenced globals. This is necessary
1777 // with ThinLTO in order to avoid leaving undefined references to dead
1778 // globals in the object file.
1780 MPM.addPass(GlobalDCEPass());
1781 return MPM;
1782 }
1783 if (!UseCtxProfile.empty()) {
1784 MPM.addPass(
1786 } else {
1787 // Add the core simplification pipeline.
1790 }
1791 // Now add the optimization pipeline.
1794
1795 // Emit annotation remarks.
1797
1798 return MPM;
1799}
1800
1803 // FIXME: We should use a customized pre-link pipeline!
1804 return buildPerModuleDefaultPipeline(Level,
1806}
1807
1810 ModuleSummaryIndex *ExportSummary) {
1812
1814
1815 // Create a function that performs CFI checks for cross-DSO calls with targets
1816 // in the current module.
1817 MPM.addPass(CrossDSOCFIPass());
1818
1819 if (Level == OptimizationLevel::O0) {
1820 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1821 // metadata and intrinsics.
1822 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1823 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1824 // Run a second time to clean up any type tests left behind by WPD for use
1825 // in ICP.
1826 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1828
1830
1831 // Emit annotation remarks.
1833
1834 return MPM;
1835 }
1836
1837 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1838 // Load sample profile before running the LTO optimization pipeline.
1839 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1840 PGOOpt->ProfileRemappingFile,
1842 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1843 // RequireAnalysisPass for PSI before subsequent non-module passes.
1845 }
1846
1847 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1849
1850 // Remove unused virtual tables to improve the quality of code generated by
1851 // whole-program devirtualization and bitset lowering.
1852 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1853
1854 // Do basic inference of function attributes from known properties of system
1855 // libraries and other oracles.
1857
1858 if (Level.getSpeedupLevel() > 1) {
1861
1862 // Indirect call promotion. This should promote all the targets that are
1863 // left by the earlier promotion pass that promotes intra-module targets.
1864 // This two-step promotion is to save the compile time. For LTO, it should
1865 // produce the same result as if we only do promotion here.
1867 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1868
1869 // Promoting by-reference arguments to by-value exposes more constants to
1870 // IPSCCP.
1871 CGSCCPassManager CGPM;
1874 CGPM.addPass(
1877
1878 // Propagate constants at call sites into the functions they call. This
1879 // opens opportunities for globalopt (and inlining) by substituting function
1880 // pointers passed as arguments to direct uses of functions.
1881 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1882 Level != OptimizationLevel::Os &&
1883 Level != OptimizationLevel::Oz)));
1884
1885 // Attach metadata to indirect call sites indicating the set of functions
1886 // they may target at run-time. This should follow IPSCCP.
1888 }
1889
1890 // Do RPO function attribute inference across the module to forward-propagate
1891 // attributes where applicable.
1892 // FIXME: Is this really an optimization rather than a canonicalization?
1894
1895 // Use in-range annotations on GEP indices to split globals where beneficial.
1896 MPM.addPass(GlobalSplitPass());
1897
1898 // Run whole program optimization of virtual call when the list of callees
1899 // is fixed.
1900 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1901
1902 // Stop here at -O1.
1903 if (Level == OptimizationLevel::O1) {
1904 // The LowerTypeTestsPass needs to run to lower type metadata and the
1905 // type.test intrinsics. The pass does nothing if CFI is disabled.
1906 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1907 // Run a second time to clean up any type tests left behind by WPD for use
1908 // in ICP (which is performed earlier than this in the regular LTO
1909 // pipeline).
1910 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1912
1914
1915 // Emit annotation remarks.
1917
1918 return MPM;
1919 }
1920
1921 // Optimize globals to try and fold them into constants.
1922 MPM.addPass(GlobalOptPass());
1923
1924 // Promote any localized globals to SSA registers.
1926
1927 // Linking modules together can lead to duplicate global constant, only
1928 // keep one copy of each constant.
1930
1931 // Remove unused arguments from functions.
1933
1934 // Reduce the code after globalopt and ipsccp. Both can open up significant
1935 // simplification opportunities, and both can propagate functions through
1936 // function pointers. When this happens, we often have to resolve varargs
1937 // calls, etc, so let instcombine do this.
1938 FunctionPassManager PeepholeFPM;
1939 PeepholeFPM.addPass(InstCombinePass());
1940 if (Level.getSpeedupLevel() > 1)
1941 PeepholeFPM.addPass(AggressiveInstCombinePass());
1942 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1943
1944 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1946
1947 // Lower variadic functions for supported targets prior to inlining.
1949
1950 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1951 // generally clean up exception handling overhead. It isn't clear this is
1952 // valuable as the inliner doesn't currently care whether it is inlining an
1953 // invoke or a call.
1954 // Run the inliner now.
1955 if (EnableModuleInliner) {
1959 } else {
1962 /* MandatoryFirst */ true,
1965 }
1966
1967 // Perform context disambiguation after inlining, since that would reduce the
1968 // amount of additional cloning required to distinguish the allocation
1969 // contexts.
1972 /*Summary=*/nullptr,
1973 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1974
1975 // Optimize globals again after we ran the inliner.
1976 MPM.addPass(GlobalOptPass());
1977
1978 // Run the OpenMPOpt pass again after global optimizations.
1980
1981 // Garbage collect dead functions.
1982 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1983
1984 // If we didn't decide to inline a function, check to see if we can
1985 // transform it to pass arguments by value instead of by reference.
1987
1989 // The IPO Passes may leave cruft around. Clean up after them.
1990 FPM.addPass(InstCombinePass());
1991 invokePeepholeEPCallbacks(FPM, Level);
1992
1995
1997
1998 // Do a post inline PGO instrumentation and use pass. This is a context
1999 // sensitive PGO pass.
2000 if (PGOOpt) {
2001 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2002 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2003 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2004 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
2005 PGOOpt->FS);
2006 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2007 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2008 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2009 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
2010 PGOOpt->FS);
2011 }
2012
2013 // Break up allocas
2015
2016 // LTO provides additional opportunities for tailcall elimination due to
2017 // link-time inlining, and visibility of nocapture attribute.
2019
2020 // Run a few AA driver optimizations here and now to cleanup the code.
2021 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2023
2024 MPM.addPass(
2026
2027 // Require the GlobalsAA analysis for the module so we can query it within
2028 // MainFPM.
2031 // Invalidate AAManager so it can be recreated and pick up the newly
2032 // available GlobalsAA.
2033 MPM.addPass(
2035 }
2036
2037 FunctionPassManager MainFPM;
2040 /*AllowSpeculation=*/true),
2041 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2042
2043 if (RunNewGVN)
2044 MainFPM.addPass(NewGVNPass());
2045 else
2046 MainFPM.addPass(GVNPass());
2047
2048 // Remove dead memcpy()'s.
2049 MainFPM.addPass(MemCpyOptPass());
2050
2051 // Nuke dead stores.
2052 MainFPM.addPass(DSEPass());
2053 MainFPM.addPass(MoveAutoInitPass());
2055
2056 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2057
2058 LoopPassManager LPM;
2059 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2060 LPM.addPass(LoopFlattenPass());
2063 // FIXME: Add loop interchange.
2064
2065 // Unroll small loops and perform peeling.
2066 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2067 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2069 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2070 // *All* loop passes must preserve it, in order to be able to use it.
2072 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2073
2074 MainFPM.addPass(LoopDistributePass());
2075
2076 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2077
2078 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2079
2080 // Run the OpenMPOpt CGSCC pass again late.
2083
2084 invokePeepholeEPCallbacks(MainFPM, Level);
2085 MainFPM.addPass(JumpThreadingPass());
2086 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2088
2089 // Lower type metadata and the type.test intrinsic. This pass supports
2090 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2091 // to be run at link time if CFI is enabled. This pass does nothing if
2092 // CFI is disabled.
2093 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2094 // Run a second time to clean up any type tests left behind by WPD for use
2095 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2096 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2098
2099 // Enable splitting late in the FullLTO post-link pipeline.
2102
2103 // Add late LTO optimization passes.
2104 FunctionPassManager LateFPM;
2105
2106 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2107 // canonicalization pass that enables other optimizations. As a result,
2108 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2109 // result too early.
2110 LateFPM.addPass(LoopSinkPass());
2111
2112 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2113 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2114 // flattening of blocks.
2115 LateFPM.addPass(DivRemPairsPass());
2116
2117 // Delete basic blocks, which optimization passes may have killed.
2119 .convertSwitchRangeToICmp(true)
2120 .hoistCommonInsts(true)
2121 .speculateUnpredictables(true)));
2122 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2123
2124 // Drop bodies of available eternally objects to improve GlobalDCE.
2126
2127 // Now that we have optimized the program, discard unreachable functions.
2128 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2129
2130 if (PTO.MergeFunctions)
2132
2134
2135 if (PTO.CallGraphProfile)
2136 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2137
2139
2140 // Emit annotation remarks.
2142
2143 return MPM;
2144}
2145
2149 assert(Level == OptimizationLevel::O0 &&
2150 "buildO0DefaultPipeline should only be used with O0");
2151
2153
2154 // Perform pseudo probe instrumentation in O0 mode. This is for the
2155 // consistency between different build modes. For example, a LTO build can be
2156 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2157 // the postlink will require pseudo probe instrumentation in the prelink.
2158 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2160
2161 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2162 PGOOpt->Action == PGOOptions::IRUse))
2164 MPM,
2165 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2166 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2167 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2168
2169 // Instrument function entry and exit before all inlining.
2171 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2172
2174
2175 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2177
2178 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2179 // Explicitly disable sample loader inlining and use flattened profile in O0
2180 // pipeline.
2181 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2182 PGOOpt->ProfileRemappingFile,
2183 ThinOrFullLTOPhase::None, nullptr,
2184 /*DisableSampleProfileInlining=*/true,
2185 /*UseFlattenedProfile=*/true));
2186 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2187 // RequireAnalysisPass for PSI before subsequent non-module passes.
2189 }
2190
2192
2193 // Build a minimal pipeline based on the semantics required by LLVM,
2194 // which is just that always inlining occurs. Further, disable generating
2195 // lifetime intrinsics to avoid enabling further optimizations during
2196 // code generation.
2198 /*InsertLifetimeIntrinsics=*/false));
2199
2200 if (PTO.MergeFunctions)
2202
2203 if (EnableMatrix)
2204 MPM.addPass(
2206
2207 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2208 CGSCCPassManager CGPM;
2210 if (!CGPM.isEmpty())
2212 }
2213 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2214 LoopPassManager LPM;
2216 if (!LPM.isEmpty()) {
2218 createFunctionToLoopPassAdaptor(std::move(LPM))));
2219 }
2220 }
2221 if (!LoopOptimizerEndEPCallbacks.empty()) {
2222 LoopPassManager LPM;
2224 if (!LPM.isEmpty()) {
2226 createFunctionToLoopPassAdaptor(std::move(LPM))));
2227 }
2228 }
2229 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2232 if (!FPM.isEmpty())
2233 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2234 }
2235
2237
2238 if (!VectorizerStartEPCallbacks.empty()) {
2241 if (!FPM.isEmpty())
2242 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2243 }
2244
2245 if (!VectorizerEndEPCallbacks.empty()) {
2248 if (!FPM.isEmpty())
2249 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2250 }
2251
2252 ModulePassManager CoroPM;
2253 CoroPM.addPass(CoroEarlyPass());
2254 CGSCCPassManager CGPM;
2255 CGPM.addPass(CoroSplitPass());
2256 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2257 CoroPM.addPass(CoroCleanupPass());
2258 CoroPM.addPass(GlobalDCEPass());
2259 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2260
2262
2263 if (isLTOPreLink(Phase))
2264 addRequiredLTOPreLinkPasses(MPM);
2265
2267
2268 return MPM;
2269}
2270
2272 AAManager AA;
2273
2274 // The order in which these are registered determines their priority when
2275 // being queried.
2276
2277 // First we register the basic alias analysis that provides the majority of
2278 // per-function local AA logic. This is a stateless, on-demand local set of
2279 // AA techniques.
2281
2282 // Next we query fast, specialized alias analyses that wrap IR-embedded
2283 // information about aliasing.
2286
2287 // Add support for querying global aliasing information when available.
2288 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2289 // analysis, all that the `AAManager` can do is query for any *cached*
2290 // results from `GlobalsAA` through a readonly proxy.
2293
2294 // Add target-specific alias analyses.
2295 if (TM)
2297
2298 return AA;
2299}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
cl::opt< std::string > UseCtxProfile
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra function passes if the ShouldRunExtraPasses marker analysis is p...
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
void addPass(PassT &&Pass)
The core GVN pass object.
Definition: GVN.h:124
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
Definition: PassManager.h:195
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:217
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:73
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:58
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:87
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:77
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:84
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:65
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:69
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:50
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:61
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:54
Reassociate commutative expressions.
Definition: Reassociate.h:85
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:852
@ MODULE
Definition: Attributor.h:6489
@ CGSCC
Definition: Attributor.h:6490
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:399
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:406
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:205
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:222
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:207
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:235
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:210
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:905
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:878