LLVM 22.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/PassManager.h"
33#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/Error.h"
43#include <cassert>
44#include <cstdint>
45#include <set>
46#include <string>
47#include <unordered_map>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "pgo-icall-prom"
54
55STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
56STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
57
59
60namespace llvm {
62}
63
64// Command line option to disable indirect-call promotion with the default as
65// false. This is for debug purpose.
66static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
67 cl::desc("Disable indirect call promotion"));
68
69// Set the cutoff value for the promotion. If the value is other than 0, we
70// stop the transformation once the total number of promotions equals the cutoff
71// value.
72// For debug use only.
74 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
75 cl::desc("Max number of promotions for this compilation"));
76
77// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
78// For debug use only.
80 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
81 cl::desc("Skip Callsite up to this number for this compilation"));
82
83// ICP the candidate function even when only a declaration is present.
85 "icp-allow-decls", cl::init(false), cl::Hidden,
86 cl::desc("Promote the target candidate even when the defintion "
87 " is not available"));
88
89// ICP hot candidate functions only. When setting to false, non-cold functions
90// (warm functions) can also be promoted.
91static cl::opt<bool>
92 ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
93 cl::desc("Promote the target candidate only if it is a "
94 "hot function. Otherwise, warm functions can "
95 "also be promoted"));
96
97// If one target cannot be ICP'd, proceed with the remaining targets instead
98// of exiting the callsite.
100 "icp-allow-candidate-skip", cl::init(false), cl::Hidden,
101 cl::desc("Continue with the remaining targets instead of exiting "
102 "when failing in a candidate"));
103
104// Set if the pass is called in LTO optimization. The difference for LTO mode
105// is the pass won't prefix the source module name to the internal linkage
106// symbols.
107static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
108 cl::desc("Run indirect-call promotion in LTO "
109 "mode"));
110
111// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
112// mode is it will add prof metadatato the created direct call.
113static cl::opt<bool>
114 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
115 cl::desc("Run indirect-call promotion in SamplePGO mode"));
116
117// If the option is set to true, only call instructions will be considered for
118// transformation -- invoke instructions will be ignored.
119static cl::opt<bool>
120 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
121 cl::desc("Run indirect-call promotion for call instructions "
122 "only"));
123
124// If the option is set to true, only invoke instructions will be considered for
125// transformation -- call instructions will be ignored.
126static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
128 cl::desc("Run indirect-call promotion for "
129 "invoke instruction only"));
130
131// Dump the function level IR if the transformation happened in this
132// function. For debug use only.
133static cl::opt<bool>
134 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
135 cl::desc("Dump IR after transformation happens"));
136
137// Indirect call promotion pass will fall back to function-based comparison if
138// vtable-count / function-count is smaller than this threshold.
140 "icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden,
141 cl::desc("The percentage threshold of vtable-count / function-count for "
142 "cost-benefit analysis."));
143
144// Although comparing vtables can save a vtable load, we may need to compare
145// vtable pointer with multiple vtable address points due to class inheritance.
146// Comparing with multiple vtables inserts additional instructions on hot code
147// path, and doing so for an earlier candidate delays the comparisons for later
148// candidates. For the last candidate, only the fallback path is affected.
149// We allow multiple vtable comparison for the last function candidate and use
150// the option below to cap the number of vtables.
152 "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
153 cl::desc("The maximum number of vtable for the last candidate."));
154
156 "icp-ignored-base-types", cl::Hidden,
157 cl::desc(
158 "A list of mangled vtable type info names. Classes specified by the "
159 "type info names and their derived ones will not be vtable-ICP'ed. "
160 "Useful when the profiled types and actual types in the optimized "
161 "binary could be different due to profiling limitations. Type info "
162 "names are those string literals used in LLVM type metadata"));
163
164namespace {
165
166// The key is a vtable global variable, and the value is a map.
167// In the inner map, the key represents address point offsets and the value is a
168// constant for this address point.
169using VTableAddressPointOffsetValMap =
171
172// A struct to collect type information for a virtual call site.
173struct VirtualCallSiteInfo {
174 // The offset from the address point to virtual function in the vtable.
175 uint64_t FunctionOffset;
176 // The instruction that computes the address point of vtable.
177 Instruction *VPtr;
178 // The compatible type used in LLVM type intrinsics.
179 StringRef CompatibleTypeStr;
180};
181
182// The key is a virtual call, and value is its type information.
183using VirtualCallSiteTypeInfoMap =
185
186// The key is vtable GUID, and value is its value profile count.
187using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
188
189// Return the address point offset of the given compatible type.
190//
191// Type metadata of a vtable specifies the types that can contain a pointer to
192// this vtable, for example, `Base*` can be a pointer to an derived type
193// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
194static std::optional<uint64_t>
195getAddressPointOffset(const GlobalVariable &VTableVar,
196 StringRef CompatibleType) {
198 VTableVar.getMetadata(LLVMContext::MD_type, Types);
199
200 for (MDNode *Type : Types)
201 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
202 TypeId && TypeId->getString() == CompatibleType)
203 return cast<ConstantInt>(
204 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
205 ->getZExtValue();
206
207 return std::nullopt;
208}
209
210// Return a constant representing the vtable's address point specified by the
211// offset.
212static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
213 uint32_t AddressPointOffset) {
214 Module &M = *VTable->getParent();
215 LLVMContext &Context = M.getContext();
216 assert(AddressPointOffset <
217 M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
218 "Out-of-bound access");
219
221 Type::getInt8Ty(Context), VTable,
222 llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
223}
224
225// Return the basic block in which Use `U` is used via its `UserInst`.
226static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
227 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
228 return PN->getIncomingBlock(U);
229
230 return UserInst->getParent();
231}
232
233// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
234// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
235// is the sole predecessor of `DestBB` and `DestBB` is dominated by
236// `Inst->getParent()`.
237static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
238 // 'BB' is used only by assert.
239 [[maybe_unused]] BasicBlock *BB = Inst->getParent();
240
241 assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
242 DestBB->getUniquePredecessor() == BB &&
243 "Guaranteed by ICP transformation");
244
245 BasicBlock *UserBB = nullptr;
246 for (Use &Use : Inst->uses()) {
247 User *User = Use.getUser();
248 // Do checked cast since IR verifier guarantees that the user of an
249 // instruction must be an instruction. See `Verifier::visitInstruction`.
250 Instruction *UserInst = cast<Instruction>(User);
251 // We can sink debug or pseudo instructions together with Inst.
252 if (UserInst->isDebugOrPseudoInst())
253 continue;
254 UserBB = getUserBasicBlock(Use, UserInst);
255 // Do not sink if Inst is used in a basic block that is not DestBB.
256 // TODO: Sink to the common dominator of all user blocks.
257 if (UserBB != DestBB)
258 return false;
259 }
260 return UserBB != nullptr;
261}
262
263// For the virtual call dispatch sequence, try to sink vtable load instructions
264// to the cold indirect call fallback.
265// FIXME: Move the sink eligibility check below to a utility function in
266// Transforms/Utils/ directory.
267static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
268 if (!isDestBBSuitableForSink(I, DestBlock))
269 return false;
270
271 // Do not move control-flow-involving, volatile loads, vaarg, alloca
272 // instructions, etc.
273 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
274 isa<AllocaInst>(I))
275 return false;
276
277 // Do not sink convergent call instructions.
278 if (const auto *C = dyn_cast<CallBase>(I))
279 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
280 return false;
281
282 // Do not move an instruction that may write to memory.
283 if (I->mayWriteToMemory())
284 return false;
285
286 // We can only sink load instructions if there is nothing between the load and
287 // the end of block that could change the value.
288 if (I->mayReadFromMemory()) {
289 // We already know that SrcBlock is the unique predecessor of DestBlock.
290 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
291 E = I->getParent()->end();
292 Scan != E; ++Scan) {
293 // Note analysis analysis can tell whether two pointers can point to the
294 // same object in memory or not thereby find further opportunities to
295 // sink.
296 if (Scan->mayWriteToMemory())
297 return false;
298 }
299 }
300
301 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
302 I->moveBefore(*DestBlock, InsertPos);
303
304 // TODO: Sink debug intrinsic users of I to 'DestBlock'.
305 // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
306 // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
307 // the core logic to do this.
308 return true;
309}
310
311// Try to sink instructions after VPtr to the indirect call fallback.
312// Return the number of sunk IR instructions.
313static int tryToSinkInstructions(BasicBlock *OriginalBB,
314 BasicBlock *IndirectCallBB) {
315 int SinkCount = 0;
316 // Do not sink across a critical edge for simplicity.
317 if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
318 return SinkCount;
319 // Sink all eligible instructions in OriginalBB in reverse order.
320 for (Instruction &I :
322 if (tryToSinkInstruction(&I, IndirectCallBB))
323 SinkCount++;
324
325 return SinkCount;
326}
327
328// Promote indirect calls to conditional direct calls, keeping track of
329// thresholds.
330class IndirectCallPromoter {
331private:
332 Function &F;
333 Module &M;
334
335 // Symtab that maps indirect call profile values to function names and
336 // defines.
337 InstrProfSymtab *const Symtab;
338
339 const bool SamplePGO;
340
341 // A map from a virtual call to its type information.
342 const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
343
344 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
345
347
348 const DenseSet<StringRef> &IgnoredBaseTypes;
349
350 // A struct that records the direct target and it's call count.
351 struct PromotionCandidate {
352 Function *const TargetFunction;
353 const uint64_t Count;
354 const uint32_t Index;
355
356 // The following fields only exists for promotion candidates with vtable
357 // information.
358 //
359 // Due to class inheritance, one virtual call candidate can come from
360 // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
361 // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
362 // points for comparison.
363 VTableGUIDCountsMap VTableGUIDAndCounts;
364 SmallVector<Constant *> AddressPoints;
365
366 PromotionCandidate(Function *F, uint64_t C, uint32_t I)
367 : TargetFunction(F), Count(C), Index(I) {}
368 };
369
370 // Check if the indirect-call call site should be promoted. Return the number
371 // of promotions. Inst is the candidate indirect call, ValueDataRef
372 // contains the array of value profile data for profiled targets,
373 // TotalCount is the total profiled count of call executions, and
374 // NumCandidates is the number of candidate entries in ValueDataRef.
375 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
376 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
377 uint64_t TotalCount, uint32_t NumCandidates);
378
379 // Promote a list of targets for one indirect-call callsite by comparing
380 // indirect callee with functions. Return true if there are IR
381 // transformations and false otherwise.
382 bool tryToPromoteWithFuncCmp(
384 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
385 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
386
387 // Promote a list of targets for one indirect call by comparing vtables with
388 // functions. Return true if there are IR transformations and false
389 // otherwise.
390 bool tryToPromoteWithVTableCmp(
392 uint64_t TotalFuncCount, uint32_t NumCandidates,
394 VTableGUIDCountsMap &VTableGUIDCounts);
395
396 // Return true if it's profitable to compare vtables for the callsite.
397 bool isProfitableToCompareVTables(const CallBase &CB,
399
400 // Return true if the vtable corresponding to VTableGUID should be skipped
401 // for vtable-based comparison.
402 bool shouldSkipVTable(uint64_t VTableGUID);
403
404 // Given an indirect callsite and the list of function candidates, compute
405 // the following vtable information in output parameters and return vtable
406 // pointer if type profiles exist.
407 // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
408 // metadata attached on the vtable pointer.
409 // - For each function candidate, finds out the vtables from which it gets
410 // called and stores the <vtable-guid, count> in promotion candidate.
411 Instruction *computeVTableInfos(const CallBase *CB,
412 VTableGUIDCountsMap &VTableGUIDCounts,
413 std::vector<PromotionCandidate> &Candidates);
414
415 Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
416 uint64_t AddressPointOffset);
417
418 void updateFuncValueProfiles(CallBase &CB,
420 uint64_t Sum, uint32_t MaxMDCount);
421
422 void updateVPtrValueProfiles(Instruction *VPtr,
423 VTableGUIDCountsMap &VTableGUIDCounts);
424
425 bool isValidTarget(uint64_t, Function *, const CallBase &, uint64_t);
426
427public:
428 IndirectCallPromoter(
429 Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
430 const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
431 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
432 const DenseSet<StringRef> &IgnoredBaseTypes,
434 : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
435 VirtualCSInfo(VirtualCSInfo),
436 VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
437 IgnoredBaseTypes(IgnoredBaseTypes) {}
438 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
439 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
440
442};
443
444} // end anonymous namespace
445
446bool IndirectCallPromoter::isValidTarget(uint64_t Target,
447 Function *TargetFunction,
448 const CallBase &CB, uint64_t Count) {
449 // Don't promote if the symbol is not defined in the module. This avoids
450 // creating a reference to a symbol that doesn't exist in the module
451 // This can happen when we compile with a sample profile collected from
452 // one binary but used for another, which may have profiled targets that
453 // aren't used in the new binary. We might have a declaration initially in
454 // the case where the symbol is globally dead in the binary and removed by
455 // ThinLTO.
456 using namespace ore;
457 if (TargetFunction == nullptr) {
458 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
459 ORE.emit([&]() {
460 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
461 << "Cannot promote indirect call: target with md5sum "
462 << NV("target md5sum", Target)
463 << " not found (count=" << NV("Count", Count) << ")";
464 });
465 return false;
466 }
467 if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
468 LLVM_DEBUG(dbgs() << " Not promote: target definition is not available\n");
469 ORE.emit([&]() {
470 return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
471 << "Do not promote indirect call: target with md5sum "
472 << NV("target md5sum", Target)
473 << " definition not available (count=" << ore::NV("Count", Count)
474 << ")";
475 });
476 return false;
477 }
478
479 const char *Reason = nullptr;
480 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
481
482 ORE.emit([&]() {
483 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
484 << "Cannot promote indirect call to "
485 << NV("TargetFunction", TargetFunction)
486 << " (count=" << NV("Count", Count) << "): " << Reason;
487 });
488 return false;
489 }
490 return true;
491}
492
493// Indirect-call promotion heuristic. The direct targets are sorted based on
494// the count. Stop at the first target that is not promoted.
495std::vector<IndirectCallPromoter::PromotionCandidate>
496IndirectCallPromoter::getPromotionCandidatesForCallSite(
497 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
498 uint64_t TotalCount, uint32_t NumCandidates) {
499 std::vector<PromotionCandidate> Ret;
500
501 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
502 << " Num_targets: " << ValueDataRef.size()
503 << " Num_candidates: " << NumCandidates << "\n");
504 NumOfPGOICallsites++;
505 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
506 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
507 return Ret;
508 }
509
510 for (uint32_t I = 0; I < NumCandidates; I++) {
511 uint64_t Count = ValueDataRef[I].Count;
512 assert(Count <= TotalCount);
513 (void)TotalCount;
514 uint64_t Target = ValueDataRef[I].Value;
515 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
516 << " Target_func: " << Target << "\n");
517
518 if (ICPInvokeOnly && isa<CallInst>(CB)) {
519 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
520 ORE.emit([&]() {
521 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
522 << " Not promote: User options";
523 });
524 break;
525 }
526 if (ICPCallOnly && isa<InvokeInst>(CB)) {
527 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
528 ORE.emit([&]() {
529 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
530 << " Not promote: User options";
531 });
532 break;
533 }
534 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
535 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
536 ORE.emit([&]() {
537 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
538 << " Not promote: Cutoff reached";
539 });
540 break;
541 }
542
543 Function *TargetFunction = Symtab->getFunction(Target);
544 if (!isValidTarget(Target, TargetFunction, CB, Count)) {
546 continue;
547 else
548 break;
549 }
550
551 Ret.push_back(PromotionCandidate(TargetFunction, Count, I));
552 TotalCount -= Count;
553 }
554 return Ret;
555}
556
557Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
558 GlobalVariable *GV, uint64_t AddressPointOffset) {
559 auto [Iter, Inserted] =
560 VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
561 if (Inserted)
562 Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
563 return Iter->second;
564}
565
566Instruction *IndirectCallPromoter::computeVTableInfos(
567 const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
568 std::vector<PromotionCandidate> &Candidates) {
570 return nullptr;
571
572 // Take the following code sequence as an example, here is how the code works
573 // @vtable1 = {[n x ptr] [... ptr @func1]}
574 // @vtable2 = {[m x ptr] [... ptr @func2]}
575 //
576 // %vptr = load ptr, ptr %d, !prof !0
577 // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
578 // tail call void @llvm.assume(i1 %0)
579 // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
580 // %1 = load ptr, ptr %vfn
581 // call void %1(ptr %d), !prof !1
582 //
583 // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
584 // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
585 //
586 // Step 1. Find out the %vptr instruction for indirect call and use its !prof
587 // to populate `GUIDCountsMap`.
588 // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
589 // make vtable definitions visible across modules.
590 // Step 3. Compute the byte offset of the virtual call, by adding vtable
591 // address point offset and function's offset relative to vtable address
592 // point. For each function candidate, this step tells us the vtable from
593 // which it comes from, and the vtable address point to compare %vptr with.
594
595 // Only virtual calls have virtual call site info.
596 auto Iter = VirtualCSInfo.find(CB);
597 if (Iter == VirtualCSInfo.end())
598 return nullptr;
599
600 LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
601 << NumOfPGOICallsites << "\n");
602
603 const auto &VirtualCallInfo = Iter->second;
604 Instruction *VPtr = VirtualCallInfo.VPtr;
605
607 for (size_t I = 0; I < Candidates.size(); I++)
608 CalleeIndexMap[Candidates[I].TargetFunction] = I;
609
610 uint64_t TotalVTableCount = 0;
611 auto VTableValueDataArray =
612 getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget,
613 MaxNumVTableAnnotations, TotalVTableCount);
614 if (VTableValueDataArray.empty())
615 return VPtr;
616
617 // Compute the functions and counts from by each vtable.
618 for (const auto &V : VTableValueDataArray) {
619 uint64_t VTableVal = V.Value;
620 GUIDCountsMap[VTableVal] = V.Count;
621 GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
622 if (!VTableVar) {
623 LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
624 << "; maybe the vtable isn't imported\n");
625 continue;
626 }
627
628 std::optional<uint64_t> MaybeAddressPointOffset =
629 getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
630 if (!MaybeAddressPointOffset)
631 continue;
632
633 const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
634
635 Function *Callee = nullptr;
636 std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
637 VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
638 if (!Callee)
639 continue;
640 auto CalleeIndexIter = CalleeIndexMap.find(Callee);
641 if (CalleeIndexIter == CalleeIndexMap.end())
642 continue;
643
644 auto &Candidate = Candidates[CalleeIndexIter->second];
645 // There shouldn't be duplicate GUIDs in one !prof metadata (except
646 // duplicated zeros), so assign counters directly won't cause overwrite or
647 // counter loss.
648 Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
649 Candidate.AddressPoints.push_back(
650 getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
651 }
652
653 return VPtr;
654}
655
656// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
657// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
658static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
659 uint64_t FalseWeight) {
660 MDBuilder MDB(Context);
661 uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
662 return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
663 scaleBranchCount(FalseWeight, Scale));
664}
665
667 uint64_t Count, uint64_t TotalCount,
668 bool AttachProfToDirectCall,
671 CB, DirectCallee,
672 createBranchWeights(CB.getContext(), Count, TotalCount - Count));
673
674 if (AttachProfToDirectCall)
675 setBranchWeights(NewInst, {static_cast<uint32_t>(Count)},
676 /*IsExpected=*/false);
677
678 using namespace ore;
679
680 if (ORE)
681 ORE->emit([&]() {
682 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
683 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
684 << " with count " << NV("Count", Count) << " out of "
685 << NV("TotalCount", TotalCount);
686 });
687 return NewInst;
688}
689
690// Promote indirect-call to conditional direct-call for one callsite.
691bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
693 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
694 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
695 uint32_t NumPromoted = 0;
696
697 for (const auto &C : Candidates) {
698 uint64_t FuncCount = C.Count;
699 pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
700 SamplePGO, &ORE);
701 assert(TotalCount >= FuncCount);
702 TotalCount -= FuncCount;
703 NumOfPGOICallPromotion++;
704 NumPromoted++;
705
706 // Update the count and this entry will be erased later.
707 ICallProfDataRef[C.Index].Count = 0;
708 if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
709 continue;
710
711 // After a virtual call candidate gets promoted, update the vtable's counts
712 // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
713 // a vtable from which the virtual call is loaded. Compute the sum and use
714 // 128-bit APInt to improve accuracy.
715 uint64_t SumVTableCount = 0;
716 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
717 SumVTableCount += VTableCount;
718
719 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
720 APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
721 APFuncCount *= VTableCount;
722 VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
723 }
724 }
725 if (NumPromoted == 0)
726 return false;
727
728 assert(NumPromoted <= ICallProfDataRef.size() &&
729 "Number of promoted functions should not be greater than the number "
730 "of values in profile metadata");
731
732 updateFuncValueProfiles(CB, ICallProfDataRef, TotalCount, NumCandidates);
733 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
734 return true;
735}
736
737void IndirectCallPromoter::updateFuncValueProfiles(
739 uint64_t TotalCount, uint32_t MaxMDCount) {
740 // First clear the existing !prof.
741 CB.setMetadata(LLVMContext::MD_prof, nullptr);
742
743 // Sort value profiles by count in descending order.
744 llvm::stable_sort(CallVDs, [](const InstrProfValueData &LHS,
745 const InstrProfValueData &RHS) {
746 return LHS.Count > RHS.Count;
747 });
748 // Drop the <target-value, count> pair if count is zero.
750 CallVDs.begin(),
751 llvm::upper_bound(CallVDs, 0U,
752 [](uint64_t Count, const InstrProfValueData &ProfData) {
753 return ProfData.Count <= Count;
754 }));
755
756 // Annotate the remaining value profiles if counter is not zero.
757 if (TotalCount != 0)
758 annotateValueSite(M, CB, VDs, TotalCount, IPVK_IndirectCallTarget,
759 MaxMDCount);
760}
761
762void IndirectCallPromoter::updateVPtrValueProfiles(
763 Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
764 if (!EnableVTableProfileUse || VPtr == nullptr ||
765 !VPtr->getMetadata(LLVMContext::MD_prof))
766 return;
767 VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
768 std::vector<InstrProfValueData> VTableValueProfiles;
769 uint64_t TotalVTableCount = 0;
770 for (auto [GUID, Count] : VTableGUIDCounts) {
771 if (Count == 0)
772 continue;
773
774 VTableValueProfiles.push_back({GUID, Count});
775 TotalVTableCount += Count;
776 }
777 llvm::sort(VTableValueProfiles,
778 [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
779 return LHS.Count > RHS.Count;
780 });
781
782 annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
783 IPVK_VTableTarget, VTableValueProfiles.size());
784}
785
786bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
788 uint64_t TotalFuncCount, uint32_t NumCandidates,
790 VTableGUIDCountsMap &VTableGUIDCounts) {
791 SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
792
793 for (const auto &Candidate : Candidates) {
794 for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
795 VTableGUIDCounts[GUID] -= Count;
796
797 // 'OriginalBB' is the basic block of indirect call. After each candidate
798 // is promoted, a new basic block is created for the indirect fallback basic
799 // block and indirect call `CB` is moved into this new BB.
800 BasicBlock *OriginalBB = CB.getParent();
802 CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
803 createBranchWeights(CB.getContext(), Candidate.Count,
804 TotalFuncCount - Candidate.Count));
805
806 int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
807
808 ORE.emit([&]() {
809 OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
810
811 const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
812 Remark << "Promote indirect call to "
813 << ore::NV("DirectCallee", Candidate.TargetFunction)
814 << " with count " << ore::NV("Count", Candidate.Count)
815 << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
816 << ore::NV("SinkCount", SinkCount)
817 << " instruction(s) and compare "
818 << ore::NV("VTable", VTableGUIDAndCounts.size())
819 << " vtable(s): {";
820
821 // Sort GUIDs so remark message is deterministic.
822 std::set<uint64_t> GUIDSet;
823 for (auto [GUID, Count] : VTableGUIDAndCounts)
824 GUIDSet.insert(GUID);
825 for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
826 if (Iter != GUIDSet.begin())
827 Remark << ", ";
828 Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
829 }
830
831 Remark << "}";
832
833 return Remark;
834 });
835
836 PromotedFuncCount.push_back({Candidate.Index, Candidate.Count});
837
838 assert(TotalFuncCount >= Candidate.Count &&
839 "Within one prof metadata, total count is the sum of counts from "
840 "individual <target, count> pairs");
841 // Use std::min since 'TotalFuncCount' is the saturated sum of individual
842 // counts, see
843 // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
844 TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
845 NumOfPGOICallPromotion++;
846 }
847
848 if (PromotedFuncCount.empty())
849 return false;
850
851 // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
852 // a distinct 'VPtr'.
853 // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
854 // used to load multiple virtual functions. The vtable profiles needs to be
855 // updated properly in that case (e.g, for each indirect call annotate both
856 // type profiles and function profiles in one !prof).
857 for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
858 uint32_t Index = PromotedFuncCount[I].first;
859 ICallProfDataRef[Index].Count -=
860 std::max(PromotedFuncCount[I].second, ICallProfDataRef[Index].Count);
861 }
862 updateFuncValueProfiles(CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
863 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
864 return true;
865}
866
867// Traverse all the indirect-call callsite and get the value profile
868// annotation to perform indirect-call promotion.
869bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
870 bool Changed = false;
871 ICallPromotionAnalysis ICallAnalysis;
872 for (auto *CB : findIndirectCalls(F)) {
873 uint32_t NumCandidates;
874 uint64_t TotalCount;
875 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
876 CB, TotalCount, NumCandidates);
877 if (!NumCandidates)
878 continue;
879 if (PSI && PSI->hasProfileSummary()) {
880 // Don't promote cold candidates.
881 if (PSI->isColdCount(TotalCount)) {
882 LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
883 << TotalCount << "\n");
884 continue;
885 }
886 // Only pormote hot if ICPAllowHotOnly is true.
887 if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
888 LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
889 << TotalCount << "\n");
890 continue;
891 }
892 }
893
894 auto PromotionCandidates = getPromotionCandidatesForCallSite(
895 *CB, ICallProfDataRef, TotalCount, NumCandidates);
896
897 VTableGUIDCountsMap VTableGUIDCounts;
898 Instruction *VPtr =
899 computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
900
901 if (isProfitableToCompareVTables(*CB, PromotionCandidates))
902 Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
903 TotalCount, NumCandidates,
904 ICallProfDataRef, VTableGUIDCounts);
905 else
906 Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
907 TotalCount, ICallProfDataRef,
908 NumCandidates, VTableGUIDCounts);
909 }
910 return Changed;
911}
912
913// TODO: Return false if the function addressing and vtable load instructions
914// cannot sink to indirect fallback.
915bool IndirectCallPromoter::isProfitableToCompareVTables(
916 const CallBase &CB, ArrayRef<PromotionCandidate> Candidates) {
917 if (!EnableVTableProfileUse || Candidates.empty())
918 return false;
919 LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
920 << NumOfPGOICallsites << CB << "\n");
921 const size_t CandidateSize = Candidates.size();
922 for (size_t I = 0; I < CandidateSize; I++) {
923 auto &Candidate = Candidates[I];
924 auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
925
926 LLVM_DEBUG({
927 dbgs() << " Candidate " << I << " FunctionCount: " << Candidate.Count
928 << ", VTableCounts:";
929 for (const auto &[GUID, Count] : VTableGUIDAndCounts)
930 dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() << ", "
931 << Count << "}";
932 dbgs() << "\n";
933 });
934
935 uint64_t CandidateVTableCount = 0;
936
937 for (auto &[GUID, Count] : VTableGUIDAndCounts) {
938 CandidateVTableCount += Count;
939
940 if (shouldSkipVTable(GUID))
941 return false;
942 }
943
944 if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
946 dbgs() << " function count " << Candidate.Count
947 << " and its vtable sum count " << CandidateVTableCount
948 << " have discrepancies. Bail out vtable comparison.\n");
949 return false;
950 }
951
952 // 'MaxNumVTable' limits the number of vtables to make vtable comparison
953 // profitable. Comparing multiple vtables for one function candidate will
954 // insert additional instructions on the hot path, and allowing more than
955 // one vtable for non last candidates may or may not elongate the dependency
956 // chain for the subsequent candidates. Set its value to 1 for non-last
957 // candidate and allow option to override it for the last candidate.
958 int MaxNumVTable = 1;
959 if (I == CandidateSize - 1)
960 MaxNumVTable = ICPMaxNumVTableLastCandidate;
961
962 if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
963 LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
964 << Candidate.AddressPoints.size()
965 << " vtables. Bail out for vtable comparison.\n");
966 return false;
967 }
968 }
969
970 return true;
971}
972
973bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
974 if (IgnoredBaseTypes.empty())
975 return false;
976
977 auto *VTableVar = Symtab->getGlobalVariable(VTableGUID);
978
979 assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
980
982 VTableVar->getMetadata(LLVMContext::MD_type, Types);
983
984 for (auto *Type : Types)
985 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
986 if (IgnoredBaseTypes.contains(TypeId->getString())) {
987 LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
988 "out of vtable comparison.");
989 return true;
990 }
991 return false;
992}
993
994// For virtual calls in the module, collect per-callsite information which will
995// be used to associate an ICP candidate with a vtable and a specific function
996// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
997// calls in a compile-time efficient manner (by iterating its users) and more
998// importantly use the compatible type later to figure out the function byte
999// offset relative to the start of vtables.
1000static void
1002 VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
1003 // Right now only llvm.type.test is used to find out virtual call sites.
1004 // With ThinLTO and whole-program-devirtualization, llvm.type.test and
1005 // llvm.public.type.test are emitted, and llvm.public.type.test is either
1006 // refined to llvm.type.test or dropped before indirect-call-promotion pass.
1007 //
1008 // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
1009 // Find out virtual calls by looking at users of llvm.type.checked.load in
1010 // that case.
1011 Function *TypeTestFunc =
1012 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
1013 if (!TypeTestFunc || TypeTestFunc->use_empty())
1014 return;
1015
1016 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1017 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
1019 };
1020 // Iterate all type.test calls to find all indirect calls.
1021 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
1022 auto *CI = dyn_cast<CallInst>(U.getUser());
1023 if (!CI)
1024 continue;
1025 auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
1026 if (!TypeMDVal)
1027 continue;
1028 auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
1029 if (!CompatibleTypeId)
1030 continue;
1031
1032 // Find out all devirtualizable call sites given a llvm.type.test
1033 // intrinsic call.
1036 auto &DT = LookupDomTree(*CI->getFunction());
1037 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1038
1039 for (auto &DevirtCall : DevirtCalls) {
1040 CallBase &CB = DevirtCall.CB;
1041 // Given an indirect call, try find the instruction which loads a
1042 // pointer to virtual table.
1043 Instruction *VTablePtr =
1045 if (!VTablePtr)
1046 continue;
1047 VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
1048 CompatibleTypeId->getString()};
1049 }
1050 }
1051}
1052
1053// A wrapper function that does the actual work.
1054static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
1055 bool SamplePGO, ModuleAnalysisManager &MAM) {
1056 if (DisableICP)
1057 return false;
1058 InstrProfSymtab Symtab;
1059 if (Error E = Symtab.create(M, InLTO)) {
1060 std::string SymtabFailure = toString(std::move(E));
1061 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
1062 return false;
1063 }
1064 bool Changed = false;
1065 VirtualCallSiteTypeInfoMap VirtualCSInfo;
1066
1067 DenseSet<StringRef> IgnoredBaseTypes;
1068
1070 computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
1071
1072 IgnoredBaseTypes.insert_range(ICPIgnoredBaseTypes);
1073 }
1074
1075 // VTableAddressPointOffsetVal stores the vtable address points. The vtable
1076 // address point of a given <vtable, address point offset> is static (doesn't
1077 // change after being computed once).
1078 // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
1079 // entry the first time a <vtable, offset> pair is seen, as
1080 // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
1081 // repeatedly on each function.
1082 VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
1083
1084 for (auto &F : M) {
1085 if (F.isDeclaration() || F.hasOptNone())
1086 continue;
1087
1088 auto &FAM =
1091
1092 IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
1093 VTableAddressPointOffsetVal,
1094 IgnoredBaseTypes, ORE);
1095 bool FuncChanged = CallPromoter.processFunction(PSI);
1096 if (ICPDUMPAFTER && FuncChanged) {
1097 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
1098 LLVM_DEBUG(dbgs() << "\n");
1099 }
1100 Changed |= FuncChanged;
1101 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
1102 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
1103 break;
1104 }
1105 }
1106 return Changed;
1107}
1108
1112
1113 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
1114 SamplePGO | ICPSamplePGOMode, MAM))
1115 return PreservedAnalyses::all();
1116
1117 return PreservedAnalyses::none();
1118}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
This header defines various interfaces for pass management in LLVM.
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static cl::opt< bool > ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden, cl::desc("Promote the target candidate only if it is a " "hot function. Otherwise, warm functions can " "also be promoted"))
static cl::opt< float > ICPVTablePercentageThreshold("icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden, cl::desc("The percentage threshold of vtable-count / function-count for " "cost-benefit analysis."))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static void computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, VirtualCallSiteTypeInfoMap &VirtualCSInfo)
static MDNode * createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, uint64_t FalseWeight)
static cl::opt< bool > ICPAllowCandidateSkip("icp-allow-candidate-skip", cl::init(false), cl::Hidden, cl::desc("Continue with the remaining targets instead of exiting " "when failing in a candidate"))
static cl::opt< bool > ICPAllowDecls("icp-allow-decls", cl::init(false), cl::Hidden, cl::desc("Promote the target candidate even when the defintion " " is not available"))
static cl::list< std::string > ICPIgnoredBaseTypes("icp-ignored-base-types", cl::Hidden, cl::desc("A list of mangled vtable type info names. Classes specified by the " "type info names and their derived ones will not be vtable-ICP'ed. " "Useful when the profiled types and actual types in the optimized " "binary could be different due to profiling limitations. Type info " "names are those string literals used in LLVM type metadata"))
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< int > ICPMaxNumVTableLastCandidate("icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, cl::desc("The maximum number of vtable for the last candidate."))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:445
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1301
This is an important base class in LLVM.
Definition: Constant.h:43
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
const Function & getFunction() const
Definition: Function.h:164
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:576
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:316
MutableArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:585
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition: InstrProf.h:506
LLVM_ABI Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:428
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:38
Metadata node.
Definition: Metadata.h:1077
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
iterator begin() const
Definition: ArrayRef.h:347
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
iterator_range< use_iterator > uses()
Definition: Value.h:380
void insert_range(Range &&R)
Definition: DenseSet.h:222
const ParentTy * getParent() const
Definition: ilist_node.h:34
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
Definition: Intrinsics.cpp:762
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
LLVM_ABI CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2026
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1334
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
Definition: InstrProf.cpp:1402
const char * toString(DWARFSectionKind Kind)
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI CallBase & promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, Function *Callee, ArrayRef< Constant * > AddressPoints, MDNode *BranchWeights)
This is similar to promoteCallWithIfThenElse except that the condition to promote a virtual call is t...
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
static Instruction * tryGetVTableInstruction(CallBase *CB)