33#include "llvm/Config/llvm-config.h"
55#define DEBUG_TYPE "inline-cost"
57STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
61 cl::desc(
"Default amount of inlining to perform"));
70 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
71 "during inline cost calculation"));
75 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
79 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
83 cl::desc(
"Threshold for inlining functions with inline hint"));
88 cl::desc(
"Threshold for inlining cold callsites"));
92 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
99 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
106 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
107 "savings won't justify the cost"));
111 cl::desc(
"The maximum size of a callee that get's "
112 "inlined without sufficient cycle savings"));
119 cl::desc(
"Threshold for inlining functions with cold attribute"));
123 cl::desc(
"Threshold for hot callsites "));
127 cl::desc(
"Threshold for locally hot callsites "));
131 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
132 "entry frequency, for a callsite to be cold in the absence of "
133 "profile information."));
137 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
138 "entry frequency, for a callsite to be hot in the absence of "
139 "profile information."));
143 cl::desc(
"Cost of a single instruction when inlining"));
147 cl::desc(
"Cost of a single inline asm instruction when inlining"));
151 cl::desc(
"Cost of load/store instruction when inlining"));
155 cl::desc(
"Call penalty that is applied per callsite when inlining"));
159 cl::init(std::numeric_limits<size_t>::max()),
160 cl::desc(
"Do not inline functions with a stack size "
161 "that exceeds the specified limit"));
166 cl::desc(
"Do not inline recursive functions with a stack "
167 "size that exceeds the specified limit"));
171 cl::desc(
"Compute the full inline cost of a call site even when the cost "
172 "exceeds the threshold."));
176 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
181 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
185 cl::desc(
"Inline all viable calls, even if they exceed the inlining "
205namespace InlineConstants {
213class InlineCostCallAnalyzer;
217struct InstructionCostDetail {
220 int ThresholdBefore = 0;
221 int ThresholdAfter = 0;
223 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
225 int getCostDelta()
const {
return CostAfter - CostBefore; }
227 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
232 InlineCostCallAnalyzer *
const ICCA;
235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
248class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
253 virtual ~CallAnalyzer() =
default;
288 virtual void onBlockStart(
const BasicBlock *BB) {}
291 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
294 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
297 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
307 virtual bool shouldStop() {
return false; }
316 virtual void onDisableSROA(
AllocaInst *Arg) {}
319 virtual void onDisableLoadElimination() {}
323 virtual bool onCallBaseVisitStart(
CallBase &Call) {
return true; }
326 virtual void onCallPenalty() {}
329 virtual void onMemAccess(){};
333 virtual void onLoadEliminationOpportunity() {}
337 virtual void onCallArgumentSetup(
const CallBase &Call) {}
340 virtual void onLoadRelativeIntrinsic() {}
348 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
352 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
356 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
357 bool DefaultDestUnreachable) {}
361 virtual void onMissedSimplification() {}
364 virtual void onInlineAsm(
const InlineAsm &Arg) {}
367 virtual void onInitializeSROAArg(
AllocaInst *Arg) {}
370 virtual void onAggregateSROAUse(
AllocaInst *V) {}
372 bool handleSROA(
Value *V,
bool DoNotDisable) {
374 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
376 onAggregateSROAUse(SROAArg);
379 disableSROAForArg(SROAArg);
384 bool IsCallerRecursive =
false;
385 bool IsRecursiveCall =
false;
386 bool ExposesReturnsTwice =
false;
387 bool HasDynamicAlloca =
false;
388 bool ContainsNoDuplicateCall =
false;
389 bool HasReturn =
false;
390 bool HasIndirectBr =
false;
391 bool HasUninlineableIntrinsic =
false;
392 bool InitsVargArgs =
false;
396 unsigned NumInstructions = 0;
397 unsigned NumInlineAsmInstructions = 0;
398 unsigned NumVectorInstructions = 0;
430 bool EnableLoadElimination =
true;
433 bool AllowRecursiveCall =
false;
438 auto It = SROAArgValues.
find(V);
439 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
446 template <
typename T>
T *getDirectOrSimplifiedValue(
Value *V)
const {
447 if (
auto *Direct = dyn_cast<T>(V))
449 return getSimplifiedValue<T>(V);
453 bool isAllocaDerivedArg(
Value *V);
455 void disableSROA(
Value *V);
457 void disableLoadElimination();
462 bool simplifyCmpInstForRecCall(
CmpInst &Cmp);
464 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
465 bool simplifyIntrinsicCallObjectSize(
CallBase &CB);
478 bool isKnownNonNullInCallee(
Value *V);
481 bool allowSizeGrowth(
CallBase &Call);
536 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
537 GetTLI(GetTLI), PSI(PSI),
F(
Callee),
DL(
F.getDataLayout()), ORE(ORE),
538 CandidateCall(
Call), GetEphValuesCache(GetEphValuesCache) {}
543 Value *getSimplifiedValueUnchecked(
Value *V)
const {
544 return SimplifiedValues.
lookup(V);
549 template <
typename T>
T *getSimplifiedValue(
Value *V)
const {
556 if constexpr (std::is_base_of_v<Constant, T>)
557 return dyn_cast<T>(SimpleV);
560 if (
auto *
I = dyn_cast<Instruction>(SimpleV)) {
561 if (
I->getFunction() != &
F)
563 }
else if (
auto *Arg = dyn_cast<Argument>(SimpleV)) {
564 if (Arg->getParent() != &
F)
566 }
else if (!isa<Constant>(SimpleV))
568 return dyn_cast<T>(SimpleV);
573 unsigned NumConstantArgs = 0;
574 unsigned NumConstantOffsetPtrArgs = 0;
575 unsigned NumAllocaArgs = 0;
576 unsigned NumConstantPtrCmps = 0;
577 unsigned NumConstantPtrDiffs = 0;
578 unsigned NumInstructionsSimplified = 0;
598int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
599 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
604class InlineCostCallAnalyzer final :
public CallAnalyzer {
605 const bool ComputeFullInlineCost;
606 int LoadEliminationCost = 0;
611 int SingleBBBonus = 0;
626 int StaticBonusApplied = 0;
629 const bool BoostIndirectCalls;
632 const bool IgnoreThreshold;
635 const bool CostBenefitAnalysisEnabled;
646 int CostAtBBStart = 0;
653 bool DecidedByCostThreshold =
false;
656 bool DecidedByCostBenefit =
false;
661 bool SingleBB =
true;
663 unsigned SROACostSavings = 0;
664 unsigned SROACostSavingsLost = 0;
680 std::optional<int> getHotCallSiteThreshold(
CallBase &Call,
684 void addCost(int64_t Inc) {
685 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
686 Cost = std::clamp<int64_t>(Inc +
Cost, INT_MIN, INT_MAX);
689 void onDisableSROA(
AllocaInst *Arg)
override {
690 auto CostIt = SROAArgCosts.
find(Arg);
691 if (CostIt == SROAArgCosts.
end())
693 addCost(CostIt->second);
694 SROACostSavings -= CostIt->second;
695 SROACostSavingsLost += CostIt->second;
696 SROAArgCosts.
erase(CostIt);
699 void onDisableLoadElimination()
override {
700 addCost(LoadEliminationCost);
701 LoadEliminationCost = 0;
704 bool onCallBaseVisitStart(
CallBase &Call)
override {
705 if (std::optional<int> AttrCallThresholdBonus =
707 Threshold += *AttrCallThresholdBonus;
709 if (std::optional<int> AttrCallCost =
711 addCost(*AttrCallCost);
719 void onCallPenalty()
override { addCost(
CallPenalty); }
723 void onCallArgumentSetup(
const CallBase &Call)
override {
728 void onLoadRelativeIntrinsic()
override {
733 bool IsIndirectCall)
override {
742 if (IsIndirectCall && BoostIndirectCalls) {
743 auto IndirectCallParams = Params;
748 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
749 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
751 if (CA.analyze().isSuccess()) {
754 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
762 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
763 bool DefaultDestUnreachable)
override {
770 if (!DefaultDestUnreachable)
779 if (NumCaseCluster <= 3) {
783 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 *
InstrCost);
787 int64_t ExpectedNumberOfCompare =
788 getExpectedNumberOfCompare(NumCaseCluster);
789 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
797 void onInlineAsm(
const InlineAsm &Arg)
override {
802 int SectionLevel = 0;
803 int InlineAsmInstrCount = 0;
807 size_t hashPos = Trimmed.
find(
'#');
809 Trimmed = Trimmed.
substr(0, hashPos);
828 if (SectionLevel == 0)
829 ++InlineAsmInstrCount;
831 NumInlineAsmInstructions += InlineAsmInstrCount;
835 void onMissedSimplification()
override { addCost(
InstrCost); }
837 void onInitializeSROAArg(
AllocaInst *Arg)
override {
839 "Should not initialize SROA costs for null value.");
841 SROACostSavings += SROAArgCost;
842 SROAArgCosts[Arg] = SROAArgCost;
845 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
846 auto CostIt = SROAArgCosts.
find(SROAArg);
848 "expected this argument to have a cost");
853 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart =
Cost; }
855 void onBlockAnalyzed(
const BasicBlock *BB)
override {
856 if (CostBenefitAnalysisEnabled) {
859 assert(GetBFI &&
"GetBFI must be available");
861 assert(BFI &&
"BFI must be available");
864 ColdSize +=
Cost - CostAtBBStart;
872 if (SingleBB && TI->getNumSuccessors() > 1) {
874 Threshold -= SingleBBBonus;
879 void onInstructionAnalysisStart(
const Instruction *
I)
override {
884 auto &CostDetail = InstructionCostDetailMap[
I];
885 CostDetail.CostBefore =
Cost;
886 CostDetail.ThresholdBefore = Threshold;
889 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
894 auto &CostDetail = InstructionCostDetailMap[
I];
895 CostDetail.CostAfter =
Cost;
896 CostDetail.ThresholdAfter = Threshold;
899 bool isCostBenefitAnalysisEnabled() {
900 if (!PSI || !PSI->hasProfileSummary())
912 if (!PSI->hasInstrumentationProfile())
917 if (!
Caller->getEntryCount())
925 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
929 auto EntryCount =
F.getEntryCount();
930 if (!EntryCount || !EntryCount->getCount())
941 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
948 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
954 void OverrideCycleSavingsAndSizeForTesting(
APInt &CycleSavings,
int &
Size) {
956 CandidateCall,
"inline-cycle-savings-for-test")) {
957 CycleSavings = *AttrCycleSavings;
961 CandidateCall,
"inline-runtime-cost-for-test")) {
962 Size = *AttrRuntimeCost;
969 std::optional<bool> costBenefitAnalysis() {
970 if (!CostBenefitAnalysisEnabled)
994 APInt CycleSavings(128, 0);
997 APInt CurrentSavings(128, 0);
1001 if (BI->isConditional() &&
1002 getSimplifiedValue<ConstantInt>(BI->getCondition())) {
1005 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(&
I)) {
1006 if (getSimplifiedValue<ConstantInt>(
SI->getCondition()))
1008 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
1010 if (SimplifiedValues.
count(V)) {
1018 CycleSavings += CurrentSavings;
1022 auto EntryProfileCount =
F.getEntryCount();
1023 assert(EntryProfileCount && EntryProfileCount->getCount());
1024 auto EntryCount = EntryProfileCount->getCount();
1025 CycleSavings += EntryCount / 2;
1026 CycleSavings = CycleSavings.
udiv(EntryCount);
1029 auto *CallerBB = CandidateCall.
getParent();
1043 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
1067 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
1070 APInt UpperBoundCycleSavings = CycleSavings;
1071 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1072 if (UpperBoundCycleSavings.
uge(Threshold))
1075 APInt LowerBoundCycleSavings = CycleSavings;
1076 LowerBoundCycleSavings *=
1077 getInliningCostBenefitAnalysisProfitableMultiplier();
1078 if (LowerBoundCycleSavings.
ult(Threshold))
1082 return std::nullopt;
1092 if (
Caller->hasMinSize()) {
1096 for (
Loop *L : LI) {
1098 if (DeadBlocks.
count(
L->getHeader()))
1108 if (NumVectorInstructions <= NumInstructions / 10)
1109 Threshold -= VectorBonus;
1110 else if (NumVectorInstructions <= NumInstructions / 2)
1111 Threshold -= VectorBonus / 2;
1113 if (std::optional<int> AttrCost =
1120 Cost *= *AttrCostMult;
1122 if (std::optional<int> AttrThreshold =
1124 Threshold = *AttrThreshold;
1126 if (
auto Result = costBenefitAnalysis()) {
1127 DecidedByCostBenefit =
true;
1134 if (IgnoreThreshold)
1137 DecidedByCostThreshold =
true;
1138 return Cost < std::max(1, Threshold)
1143 bool shouldStop()
override {
1144 if (IgnoreThreshold || ComputeFullInlineCost)
1148 if (
Cost < Threshold)
1150 DecidedByCostThreshold =
true;
1154 void onLoadEliminationOpportunity()
override {
1169 assert(NumInstructions == 0);
1170 assert(NumVectorInstructions == 0);
1173 updateThreshold(CandidateCall,
F);
1179 assert(SingleBBBonus >= 0);
1180 assert(VectorBonus >= 0);
1185 Threshold += (SingleBBBonus + VectorBonus);
1199 if (
Cost >= Threshold && !ComputeFullInlineCost)
1206 InlineCostCallAnalyzer(
1214 bool IgnoreThreshold =
false,
1217 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1218 ORE, GetEphValuesCache),
1220 Params.ComputeFullInlineCost || ORE ||
1221 isCostBenefitAnalysisEnabled()),
1223 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1224 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1230 InlineCostAnnotationWriter Writer;
1238 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1239 auto It = InstructionCostDetailMap.
find(
I);
1240 if (It != InstructionCostDetailMap.
end())
1242 return std::nullopt;
1245 virtual ~InlineCostCallAnalyzer() =
default;
1246 int getThreshold()
const {
return Threshold; }
1248 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1249 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1250 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1251 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1255static bool isSoleCallToLocalFunction(
const CallBase &CB,
1257 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1261class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1268 static constexpr int JTCostMultiplier = 2;
1269 static constexpr int CaseClusterCostMultiplier = 2;
1270 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1271 static constexpr int SwitchCostMultiplier = 2;
1275 unsigned SROACostSavingOpportunities = 0;
1276 int VectorBonus = 0;
1277 int SingleBBBonus = 0;
1283 Cost[
static_cast<size_t>(Feature)] += Delta;
1287 Cost[
static_cast<size_t>(Feature)] =
Value;
1290 void onDisableSROA(
AllocaInst *Arg)
override {
1291 auto CostIt = SROACosts.
find(Arg);
1292 if (CostIt == SROACosts.
end())
1295 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1296 SROACostSavingOpportunities -= CostIt->second;
1297 SROACosts.
erase(CostIt);
1300 void onDisableLoadElimination()
override {
1301 set(InlineCostFeatureIndex::load_elimination, 1);
1304 void onCallPenalty()
override {
1305 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1308 void onCallArgumentSetup(
const CallBase &Call)
override {
1309 increment(InlineCostFeatureIndex::call_argument_setup,
1313 void onLoadRelativeIntrinsic()
override {
1314 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1318 bool IsIndirectCall)
override {
1319 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1322 if (IsIndirectCall) {
1336 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1337 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1339 if (CA.analyze().isSuccess()) {
1340 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1342 increment(InlineCostFeatureIndex::nested_inlines, 1);
1349 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
1350 bool DefaultDestUnreachable)
override {
1351 if (JumpTableSize) {
1352 if (!DefaultDestUnreachable)
1353 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1354 SwitchDefaultDestCostMultiplier *
InstrCost);
1355 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1357 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1361 if (NumCaseCluster <= 3) {
1362 increment(InlineCostFeatureIndex::case_cluster_penalty,
1363 (NumCaseCluster - DefaultDestUnreachable) *
1368 int64_t ExpectedNumberOfCompare =
1369 getExpectedNumberOfCompare(NumCaseCluster);
1371 int64_t SwitchCost =
1372 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1373 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1376 void onMissedSimplification()
override {
1377 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1381 void onInitializeSROAArg(
AllocaInst *Arg)
override {
1383 SROACosts[Arg] = SROAArgCost;
1384 SROACostSavingOpportunities += SROAArgCost;
1387 void onAggregateSROAUse(
AllocaInst *Arg)
override {
1389 SROACostSavingOpportunities +=
InstrCost;
1392 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1394 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1395 Threshold -= SingleBBBonus;
1400 if (
Caller->hasMinSize()) {
1403 for (
Loop *L : LI) {
1405 if (DeadBlocks.
count(
L->getHeader()))
1407 increment(InlineCostFeatureIndex::num_loops,
1411 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1412 set(InlineCostFeatureIndex::simplified_instructions,
1413 NumInstructionsSimplified);
1414 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1415 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1416 NumConstantOffsetPtrArgs);
1417 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1419 if (NumVectorInstructions <= NumInstructions / 10)
1420 Threshold -= VectorBonus;
1421 else if (NumVectorInstructions <= NumInstructions / 2)
1422 Threshold -= VectorBonus / 2;
1424 set(InlineCostFeatureIndex::threshold, Threshold);
1429 bool shouldStop()
override {
return false; }
1431 void onLoadEliminationOpportunity()
override {
1432 increment(InlineCostFeatureIndex::load_elimination, 1);
1436 increment(InlineCostFeatureIndex::callsite_cost,
1439 set(InlineCostFeatureIndex::cold_cc_penalty,
1442 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1443 isSoleCallToLocalFunction(CandidateCall,
F));
1448 int SingleBBBonusPercent = 50;
1452 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1453 VectorBonus = Threshold * VectorBonusPercent / 100;
1454 Threshold += (SingleBBBonus + VectorBonus);
1460 InlineCostFeaturesAnalyzer(
1467 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI,
1476bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1477 return SROAArgValues.
count(V);
1480void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1481 onDisableSROA(SROAArg);
1482 EnabledSROAAllocas.
erase(SROAArg);
1483 disableLoadElimination();
1486void InlineCostAnnotationWriter::emitInstructionAnnot(
1491 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1493 OS <<
"; No analysis for the instruction";
1495 OS <<
"; cost before = " <<
Record->CostBefore
1496 <<
", cost after = " <<
Record->CostAfter
1497 <<
", threshold before = " <<
Record->ThresholdBefore
1498 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1499 OS <<
"cost delta = " <<
Record->getCostDelta();
1500 if (
Record->hasThresholdChanged())
1501 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1503 auto *
V = ICCA->getSimplifiedValueUnchecked(
const_cast<Instruction *
>(
I));
1505 OS <<
", simplified to ";
1507 if (
auto *VI = dyn_cast<Instruction>(V)) {
1508 if (
VI->getFunction() !=
I->getFunction())
1509 OS <<
" (caller instruction)";
1510 }
else if (
auto *VArg = dyn_cast<Argument>(V)) {
1511 if (VArg->getParent() !=
I->getFunction())
1512 OS <<
" (caller argument)";
1519void CallAnalyzer::disableSROA(
Value *V) {
1520 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1521 disableSROAForArg(SROAArg);
1525void CallAnalyzer::disableLoadElimination() {
1526 if (EnableLoadElimination) {
1527 onDisableLoadElimination();
1528 EnableLoadElimination =
false;
1537 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1541 GTI != GTE; ++GTI) {
1543 getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand());
1550 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1569 for (
const Use &
Op :
GEP.indices())
1570 if (
Constant *SimpleOp = getSimplifiedValue<Constant>(
Op))
1580 disableSROA(
I.getOperand(0));
1584 if (
I.isArrayAllocation()) {
1585 Constant *
Size = getSimplifiedValue<Constant>(
I.getArraySize());
1586 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(
Size)) {
1595 Type *Ty =
I.getAllocatedType();
1597 AllocSize->getLimitedValue(),
1598 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1600 HasDynamicAlloca =
true;
1606 if (
I.isStaticAlloca()) {
1607 Type *Ty =
I.getAllocatedType();
1608 AllocatedSize =
SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinValue(),
1616 if (!
I.isStaticAlloca())
1617 HasDynamicAlloca =
true;
1622bool CallAnalyzer::visitPHI(
PHINode &
I) {
1634 bool CheckSROA =
I.getType()->isPointerTy();
1638 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1639 Value *FirstV =
nullptr;
1641 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1644 if (DeadBlocks.
count(Pred))
1648 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1649 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1652 Value *
V =
I.getIncomingValue(i);
1657 Constant *
C = getDirectOrSimplifiedValue<Constant>(V);
1659 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1660 if (!
C && CheckSROA)
1661 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1663 if (!
C && !BaseAndOffset.first)
1680 if (FirstBaseAndOffset == BaseAndOffset)
1694 FirstBaseAndOffset = BaseAndOffset;
1699 SimplifiedValues[&
I] = FirstC;
1704 if (FirstBaseAndOffset.first) {
1705 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1707 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1708 SROAArgValues[&
I] = SROAArg;
1720 std::pair<Value *, APInt> BaseAndOffset =
1721 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1722 if (!BaseAndOffset.first)
1727 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1731 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1737 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1741 for (
const Use &
Op :
GEP.indices())
1742 if (!getDirectOrSimplifiedValue<Constant>(
Op))
1751 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1753 SROAArgValues[&
I] = SROAArg;
1761 disableSROAForArg(SROAArg);
1762 return isGEPFree(
I);
1768bool CallAnalyzer::simplifyCmpInstForRecCall(
CmpInst &Cmp) {
1770 if (!isa<Argument>(
Cmp.getOperand(0)) || !isa<Constant>(
Cmp.getOperand(1)))
1772 auto *CmpOp =
Cmp.getOperand(0);
1777 auto *CallBB = CandidateCall.
getParent();
1778 auto *Predecessor = CallBB->getSinglePredecessor();
1782 auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
1783 if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
1788 bool ArgFound =
false;
1789 Value *FuncArg =
nullptr, *CallArg =
nullptr;
1790 for (
unsigned ArgNum = 0;
1791 ArgNum <
F.arg_size() && ArgNum < CandidateCall.
arg_size(); ArgNum++) {
1792 FuncArg =
F.getArg(ArgNum);
1794 if (FuncArg == CmpOp && CallArg != CmpOp) {
1806 CC.Invert = (CallBB != Br->getSuccessor(0));
1808 CC.AffectedValues.insert(FuncArg);
1810 cast<CmpInst>(&Cmp), {CallArg,
Cmp.getOperand(1)}, SQ);
1811 if (
auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
1814 if ((ConstVal->isOne() && CC.Invert) ||
1815 (ConstVal->isZero() && !CC.Invert)) {
1816 SimplifiedValues[&
Cmp] = ConstVal;
1824bool CallAnalyzer::simplifyInstruction(
Instruction &
I) {
1827 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
1835 SimplifiedValues[&
I] =
C;
1848bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1850 auto *
C = getDirectOrSimplifiedValue<Constant>(Arg);
1853 SimplifiedValues[&CB] = ConstantInt::get(RT,
C ? 1 : 0);
1857bool CallAnalyzer::simplifyIntrinsicCallObjectSize(
CallBase &CB) {
1865 Constant *
C = dyn_cast_or_null<Constant>(V);
1867 SimplifiedValues[&CB] =
C;
1877 std::pair<Value *, APInt> BaseAndOffset =
1878 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1880 if (BaseAndOffset.first)
1881 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1884 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1885 SROAArgValues[&
I] = SROAArg;
1898 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1899 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1900 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1901 std::pair<Value *, APInt> BaseAndOffset =
1902 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1903 if (BaseAndOffset.first)
1904 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1914 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1915 SROAArgValues[&
I] = SROAArg;
1929 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1930 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1931 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1932 if (BaseAndOffset.first)
1933 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1937 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1938 SROAArgValues[&
I] = SROAArg;
1944bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1951 disableSROA(
I.getOperand(0));
1956 switch (
I.getOpcode()) {
1957 case Instruction::FPTrunc:
1958 case Instruction::FPExt:
1959 case Instruction::UIToFP:
1960 case Instruction::SIToFP:
1961 case Instruction::FPToUI:
1962 case Instruction::FPToSI:
1978bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1984 if (
Argument *
A = dyn_cast<Argument>(V))
1985 if (paramHasAttr(
A, Attribute::NonNull))
1991 if (isAllocaDerivedArg(V))
2000bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
2017 if (isa<UnreachableInst>(
II->getNormalDest()->getTerminator()))
2019 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
2025bool InlineCostCallAnalyzer::isColdCallSite(
CallBase &Call,
2029 if (PSI && PSI->hasProfileSummary())
2030 return PSI->isColdCallSite(Call, CallerBFI);
2041 auto CallSiteBB =
Call.getParent();
2042 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
2043 auto CallerEntryFreq =
2045 return CallSiteFreq < CallerEntryFreq * ColdProb;
2049InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
2054 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
2060 return std::nullopt;
2070 if (Limit && CallSiteFreq >= *Limit)
2074 return std::nullopt;
2077void InlineCostCallAnalyzer::updateThreshold(
CallBase &Call,
Function &Callee) {
2079 if (!allowSizeGrowth(Call)) {
2087 auto MinIfValid = [](
int A, std::optional<int>
B) {
2088 return B ? std::min(
A, *
B) :
A;
2092 auto MaxIfValid = [](
int A, std::optional<int>
B) {
2093 return B ? std::max(
A, *
B) :
A;
2108 int SingleBBBonusPercent = 50;
2113 auto DisallowAllBonuses = [&]() {
2114 SingleBBBonusPercent = 0;
2115 VectorBonusPercent = 0;
2116 LastCallToStaticBonus = 0;
2121 if (
Caller->hasMinSize()) {
2127 SingleBBBonusPercent = 0;
2128 VectorBonusPercent = 0;
2129 }
else if (
Caller->hasOptSize())
2134 if (!
Caller->hasMinSize()) {
2135 if (
Callee.hasFnAttribute(Attribute::InlineHint))
2160 DisallowAllBonuses();
2165 if (PSI->isFunctionEntryHot(&Callee)) {
2171 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2177 DisallowAllBonuses();
2189 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2190 VectorBonus = Threshold * VectorBonusPercent / 100;
2195 if (isSoleCallToLocalFunction(Call,
F)) {
2196 Cost -= LastCallToStaticBonus;
2197 StaticBonusApplied = LastCallToStaticBonus;
2201bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
2208 if (simplifyCmpInstForRecCall(
I))
2211 if (
I.getOpcode() == Instruction::FCmp)
2216 Value *LHSBase, *RHSBase;
2217 APInt LHSOffset, RHSOffset;
2218 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2220 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2221 if (RHSBase && LHSBase == RHSBase) {
2227 ++NumConstantPtrCmps;
2232 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2233 for (
auto *
User :
I.users())
2234 if (
auto *Instr = dyn_cast<Instruction>(
User))
2235 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2242 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1))) {
2243 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2251 if (isImplicitNullCheckCmp(
I))
2254 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
2261 Value *LHSBase, *RHSBase;
2262 APInt LHSOffset, RHSOffset;
2263 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2265 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2266 if (RHSBase && LHSBase == RHSBase) {
2272 SimplifiedValues[&
I] =
C;
2273 ++NumConstantPtrDiffs;
2281 return Base::visitSub(
I);
2286 Constant *CLHS = getDirectOrSimplifiedValue<Constant>(LHS);
2287 Constant *CRHS = getDirectOrSimplifiedValue<Constant>(RHS);
2289 Value *SimpleV =
nullptr;
2290 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
2291 SimpleV =
simplifyBinOp(
I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2292 FI->getFastMathFlags(),
DL);
2297 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2298 SimplifiedValues[&
I] =
C;
2311 if (
I.getType()->isFloatingPointTy() &&
2321 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
2324 COp ? COp :
Op, cast<FPMathOperator>(
I).getFastMathFlags(),
DL);
2326 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2327 SimplifiedValues[&
I] =
C;
2338bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2339 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2345 if (EnableLoadElimination &&
2346 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2347 onLoadEliminationOpportunity();
2356 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2367 disableLoadElimination();
2374 Value *
Op =
I.getAggregateOperand();
2378 if (
Value *SimpleOp = getSimplifiedValueUnchecked(
Op)) {
2382 SimplifiedValues[&
I] = SimpleV;
2388 return Base::visitExtractValue(
I);
2397 return Base::visitInsertValue(
I);
2418 Constant *
C = getDirectOrSimplifiedValue<Constant>(
I);
2425 SimplifiedValues[&
Call] =
C;
2439 case LibFunc_memcpy_chk:
2440 case LibFunc_memmove_chk:
2441 case LibFunc_mempcpy_chk:
2442 case LibFunc_memset_chk: {
2449 auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(
Call.getOperand(2));
2451 getDirectOrSimplifiedValue<ConstantInt>(
Call.getOperand(3));
2452 if (LenOp && ObjSizeOp &&
2453 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2465bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2466 if (!onCallBaseVisitStart(Call))
2469 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2470 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2472 ExposesReturnsTwice =
true;
2475 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2476 ContainsNoDuplicateCall =
true;
2478 if (
InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(
Call.getCalledOperand()))
2479 onInlineAsm(*InlineAsmOp);
2482 bool IsIndirectCall = !
F;
2483 if (IsIndirectCall) {
2487 F = getSimplifiedValue<Function>(Callee);
2488 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2489 onCallArgumentSetup(Call);
2491 if (!
Call.onlyReadsMemory())
2492 disableLoadElimination();
2493 return Base::visitCallBase(Call);
2497 assert(
F &&
"Expected a call to a known function");
2500 if (simplifyCallSite(
F, Call))
2506 switch (
II->getIntrinsicID()) {
2509 disableLoadElimination();
2510 return Base::visitCallBase(Call);
2512 case Intrinsic::load_relative:
2513 onLoadRelativeIntrinsic();
2516 case Intrinsic::memset:
2517 case Intrinsic::memcpy:
2518 case Intrinsic::memmove:
2519 disableLoadElimination();
2522 case Intrinsic::icall_branch_funnel:
2523 case Intrinsic::localescape:
2524 HasUninlineableIntrinsic =
true;
2526 case Intrinsic::vastart:
2527 InitsVargArgs =
true;
2529 case Intrinsic::launder_invariant_group:
2530 case Intrinsic::strip_invariant_group:
2531 if (
auto *SROAArg = getSROAArgForValueOrNull(
II->getOperand(0)))
2532 SROAArgValues[
II] = SROAArg;
2534 case Intrinsic::is_constant:
2535 return simplifyIntrinsicCallIsConstant(Call);
2536 case Intrinsic::objectsize:
2537 return simplifyIntrinsicCallObjectSize(Call);
2541 if (
F ==
Call.getFunction()) {
2544 IsRecursiveCall =
true;
2545 if (!AllowRecursiveCall)
2549 if (isLoweredToCall(
F, Call)) {
2550 onLoweredCall(
F, Call, IsIndirectCall);
2553 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2554 disableLoadElimination();
2555 return Base::visitCallBase(Call);
2558bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2560 bool Free = !HasReturn;
2565bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2571 getDirectOrSimplifiedValue<ConstantInt>(BI.
getCondition()) ||
2575bool CallAnalyzer::visitSelectInst(
SelectInst &SI) {
2576 bool CheckSROA =
SI.getType()->isPointerTy();
2580 Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
2581 Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
2582 Constant *CondC = getSimplifiedValue<Constant>(
SI.getCondition());
2586 if (TrueC == FalseC && TrueC) {
2587 SimplifiedValues[&
SI] = TrueC;
2592 return Base::visitSelectInst(SI);
2594 std::pair<Value *, APInt> TrueBaseAndOffset =
2595 ConstantOffsetPtrs.
lookup(TrueVal);
2596 std::pair<Value *, APInt> FalseBaseAndOffset =
2597 ConstantOffsetPtrs.
lookup(FalseVal);
2598 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2599 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2601 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2602 SROAArgValues[&
SI] = SROAArg;
2606 return Base::visitSelectInst(SI);
2617 if (TrueC && FalseC) {
2619 SimplifiedValues[&
SI] =
C;
2623 return Base::visitSelectInst(SI);
2627 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2628 SimplifiedValues[&
SI] = SelectedC;
2635 std::pair<Value *, APInt> BaseAndOffset =
2636 ConstantOffsetPtrs.
lookup(SelectedV);
2637 if (BaseAndOffset.first) {
2638 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2640 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2641 SROAArgValues[&
SI] = SROAArg;
2647bool CallAnalyzer::visitSwitchInst(
SwitchInst &SI) {
2650 if (getDirectOrSimplifiedValue<ConstantInt>(
SI.getCondition()))
2665 unsigned JumpTableSize = 0;
2667 unsigned NumCaseCluster =
2670 onFinalizeSwitch(JumpTableSize, NumCaseCluster,
SI.defaultDestUnreachable());
2683 HasIndirectBr =
true;
2687bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2721 for (
const Use &
Op :
I.operands())
2746 if (
I.isDebugOrPseudoInst())
2754 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2755 ++NumVectorInstructions;
2762 onInstructionAnalysisStart(&
I);
2764 if (Base::visit(&
I))
2765 ++NumInstructionsSimplified;
2767 onMissedSimplification();
2769 onInstructionAnalysisFinish(&
I);
2770 using namespace ore;
2773 if (IsRecursiveCall && !AllowRecursiveCall)
2775 else if (ExposesReturnsTwice)
2777 else if (HasDynamicAlloca)
2779 else if (HasIndirectBr)
2781 else if (HasUninlineableIntrinsic)
2783 else if (InitsVargArgs)
2785 if (!
IR.isSuccess()) {
2790 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2791 <<
NV(
"InlineResult",
IR.getFailureReason())
2792 <<
") and cost is not fully computed";
2807 <<
NV(
"Callee", &
F) <<
" is "
2808 <<
NV(
"InlineResult",
IR.getFailureReason())
2809 <<
". Cost is not fully computed";
2816 "Call site analysis is not favorable to inlining.");
2828ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2829 if (!
V->getType()->isPointerTy())
2832 unsigned AS =
V->getType()->getPointerAddressSpace();
2833 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2842 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2844 V =
GEP->getPointerOperand();
2845 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2846 if (GA->isInterposable())
2848 V = GA->getAliasee();
2852 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2853 }
while (Visited.
insert(V).second);
2855 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2856 return cast<ConstantInt>(ConstantInt::get(IdxPtrTy,
Offset));
2870 if (DeadBlocks.
count(Pred))
2872 BasicBlock *KnownSucc = KnownSuccessors[Pred];
2873 return KnownSucc && KnownSucc != Succ;
2878 return (!DeadBlocks.
count(BB) &&
2884 if (Succ == NextBB || !IsNewlyDead(Succ))
2888 while (!NewDead.
empty()) {
2890 if (DeadBlocks.
insert(Dead).second)
2909 auto Result = onAnalysisStart();
2920 if (Call &&
Call->getFunction() == Caller) {
2921 IsCallerRecursive =
true;
2931 SimplifiedValues[&FAI] = *CAI;
2932 if (isa<Constant>(*CAI))
2935 Value *PtrArg = *CAI;
2936 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2937 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2940 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2941 SROAArgValues[&FAI] = SROAArg;
2942 onInitializeSROAArg(SROAArg);
2943 EnabledSROAAllocas.
insert(SROAArg);
2948 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2949 NumAllocaArgs = SROAArgValues.
size();
2955 if (GetEphValuesCache)
2956 EphValues = &GetEphValuesCache(
F).ephValues();
2969 BBSetVector BBWorklist;
2970 BBWorklist.
insert(&
F.getEntryBlock());
2973 for (
unsigned Idx = 0;
Idx != BBWorklist.size(); ++
Idx) {
2993 if (!isa<CallBrInst>(*U))
2999 if (!
IR.isSuccess())
3006 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
3009 if (
ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3011 BBWorklist.insert(NextBB);
3012 KnownSuccessors[BB] = NextBB;
3013 findDeadBlocks(BB, NextBB);
3017 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
3019 if (
ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3020 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
3021 BBWorklist.insert(NextBB);
3022 KnownSuccessors[BB] = NextBB;
3023 findDeadBlocks(BB, NextBB);
3032 onBlockAnalyzed(BB);
3038 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
3048 FinalStackSizeThreshold = *AttrMaxStackSize;
3049 if (AllocatedSize > FinalStackSizeThreshold)
3052 return finalizeAnalysis();
3056#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3058 F.print(
OS, &Writer);
3073#undef DEBUG_PRINT_STAT
3076#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3090 auto CalleeTLI = GetTLI(*Callee);
3093 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
3101 for (
unsigned I = 0, E = Call.arg_size();
I != E; ++
I) {
3102 if (Call.isByValArgument(
I)) {
3105 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
3106 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
3108 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
3110 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
3118 NumStores = std::min(NumStores, 8U);
3131 return std::min<int64_t>(
Cost, INT_MAX);
3141 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
3142 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3163 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
3164 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
true,
3166 auto R = CA.analyze();
3168 return std::nullopt;
3169 return CA.getCost();
3178 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3179 PSI, ORE, *Call.getCalledFunction(), Call);
3180 auto R = CFA.analyze();
3182 return std::nullopt;
3183 return CFA.features();
3198 if (Callee->isPresplitCoroutine())
3206 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3207 for (
unsigned I = 0, E = Call.arg_size();
I != E; ++
I)
3208 if (Call.isByValArgument(
I)) {
3209 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
3217 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3218 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3222 if (IsViable.isSuccess())
3229 Function *Caller = Call.getCaller();
3234 if (Caller->hasOptNone())
3239 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3243 if (Callee->isInterposable())
3247 if (Callee->hasFnAttribute(Attribute::NoInline))
3251 if (Call.isNoInline())
3255 if (Callee->hasFnAttribute(
"loader-replaceable"))
3258 return std::nullopt;
3274 if (UserDecision->isSuccess())
3281 "Inlining forced by -inline-all-viable-calls");
3284 <<
"... (caller:" << Call.getCaller()->getName()
3287 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3288 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3298 if (CA.wasDecidedByCostBenefit()) {
3301 CA.getCostBenefitPair());
3306 if (CA.wasDecidedByCostThreshold())
3308 CA.getStaticBonusApplied());
3317 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3327 if (!isa<CallBrInst>(*U))
3330 for (
auto &
II : BB) {
3336 Function *Callee = Call->getCalledFunction();
3342 if (!ReturnsTwice && isa<CallInst>(Call) &&
3343 cast<CallInst>(Call)->canReturnTwice())
3347 switch (Callee->getIntrinsicID()) {
3350 case llvm::Intrinsic::icall_branch_funnel:
3354 "disallowed inlining of @llvm.icall.branch.funnel");
3355 case llvm::Intrinsic::localescape:
3359 "disallowed inlining of @llvm.localescape");
3360 case llvm::Intrinsic::vastart:
3364 "contains VarArgs initialized with va_start");
3435 unsigned SizeOptLevel) {
3438 if (SizeOptLevel == 1)
3440 if (SizeOptLevel == 2)
3478 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3483 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params,
TTI,
3484 GetAssumptionCache,
nullptr,
nullptr, PSI,
3487 OS <<
" Analyzing call of " << CalledFunction->
getName()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
mir Rename Register Operands
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &)
emitInstructionAnnot - This may be implemented to emit a string right before an instruction is emitte...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
static LLVM_ABI BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getEntryFreq() const
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
LLVM_ABI bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
A cache of ephemeral values within a function.
Type * getReturnType() const
Class to represent profile counts.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
Indirect Branch Instruction.
LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
RetTy visitIndirectBrInst(IndirectBrInst &I)
RetTy visitCmpInst(CmpInst &I)
RetTy visitCallBase(CallBase &I)
RetTy visitCleanupReturnInst(CleanupReturnInst &I)
RetTy visitUnreachableInst(UnreachableInst &I)
RetTy visitSwitchInst(SwitchInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitReturnInst(ReturnInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitResumeInst(ResumeInst &I)
RetTy visitCatchReturnInst(CatchReturnInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitBranchInst(BranchInst &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This class represents a cast from an integer to a pointer.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
static constexpr size_t npos
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Analysis pass providing the TargetTransformInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
LLVM_ABI bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ C
The default llvm calling convention, compatible with C.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
LLVM_ABI int getInstrCost()
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Function::ProfileCount ProfileCount
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
auto predecessors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Evaluate query assuming this condition holds.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.