24#define DEBUG_TYPE "coro-elide"
26STATISTIC(NumOfCoroElided,
"The # of coroutine get elided.");
36class FunctionElideInfo {
38 FunctionElideInfo(
Function *
F) : ContainingFunction(
F) {
39 this->collectPostSplitCoroIds();
42 bool hasCoroIds()
const {
return !CoroIds.empty(); }
52 void collectPostSplitCoroIds();
53 friend class CoroIdElider;
60 void elideHeapAllocations(
uint64_t FrameSize,
Align FrameAlign);
61 bool lifetimeEligibleForElide()
const;
68 FunctionElideInfo &FEI;
106 if (
auto *Call = dyn_cast<CallInst>(&
I))
108 !Call->isMustTailCall())
109 Call->setTailCall(
false);
114static std::optional<std::pair<uint64_t, Align>>
126 if (!isa<AllocaInst>(&
I))
134 "coro-elide-info-output-file shouldn't be empty");
140 llvm::errs() <<
"Error opening coro-elide-info-output-file '"
142 return std::make_unique<raw_fd_ostream>(2,
false);
146void FunctionElideInfo::collectPostSplitCoroIds() {
148 if (
auto *CII = dyn_cast<CoroIdInst>(&
I))
149 if (CII->getInfo().isPostSplit())
151 if (CII->getCoroutine() != CII->getFunction())
152 CoroIds.push_back(CII);
159 if (
auto *CSI = dyn_cast<CoroSuspendInst>(&
I))
160 if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) {
161 SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser());
163 CoroSuspendSwitches.insert(SWI);
168CoroIdElider::CoroIdElider(
CoroIdInst *CoroId, FunctionElideInfo &FEI,
171 : CoroId(CoroId), FEI(FEI), AA(AA), DT(DT), ORE(ORE) {
174 if (
auto *CB = dyn_cast<CoroBeginInst>(U))
175 CoroBegins.push_back(CB);
176 else if (
auto *CA = dyn_cast<CoroAllocInst>(U))
177 CoroAllocs.push_back(CA);
186 if (
auto *
II = dyn_cast<CoroSubFnInst>(U))
187 switch (
II->getIndex()) {
189 ResumeAddr.push_back(
II);
192 DestroyAddr[CB].push_back(
II);
202void CoroIdElider::elideHeapAllocations(
uint64_t FrameSize,
Align FrameAlign) {
214 for (
auto *CA : CoroAllocs) {
215 CA->replaceAllUsesWith(False);
216 CA->eraseFromParent();
223 const DataLayout &
DL = FEI.ContainingFunction->getDataLayout();
225 auto *Frame =
new AllocaInst(FrameTy,
DL.getAllocaAddrSpace(),
"", InsertPt);
226 Frame->setAlignment(FrameAlign);
228 new BitCastInst(Frame, PointerType::getUnqual(
C),
"vFrame", InsertPt);
230 for (
auto *CB : CoroBegins) {
231 CB->replaceAllUsesWith(FrameVoidPtr);
232 CB->eraseFromParent();
240bool CoroIdElider::canCoroBeginEscape(
242 const auto &It = DestroyAddr.find(CB);
243 assert(It != DestroyAddr.end());
246 unsigned Limit = 32 * (1 + It->second.size());
254 for (
auto *DA : It->second)
258 for (
auto *U : CB->
users()) {
260 if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
274 EscapingBBs.
insert(cast<Instruction>(U)->getParent());
277 bool PotentiallyEscaped =
false;
281 if (!Visited.
insert(BB).second)
287 PotentiallyEscaped |= EscapingBBs.
count(BB);
290 if (isa<ReturnInst>(BB->getTerminator()) || PotentiallyEscaped)
305 auto TI = BB->getTerminator();
309 if (isa<SwitchInst>(TI) &&
310 FEI.CoroSuspendSwitches.count(cast<SwitchInst>(TI))) {
311 Worklist.
push_back(cast<SwitchInst>(TI)->getSuccessor(1));
312 Worklist.
push_back(cast<SwitchInst>(TI)->getSuccessor(2));
316 }
while (!Worklist.
empty());
323bool CoroIdElider::lifetimeEligibleForElide()
const {
326 if (CoroAllocs.empty())
341 auto *TI =
B.getTerminator();
343 if (TI->getNumSuccessors() != 0 || isa<UnreachableInst>(TI))
350 for (
const auto *CB : CoroBegins) {
351 auto It = DestroyAddr.find(CB);
355 if (It == DestroyAddr.end())
358 const auto &CorrespondingDestroyAddrs = It->second;
362 auto DominatesTerminator = [&](
auto *TI) {
363 return llvm::any_of(CorrespondingDestroyAddrs, [&](
auto *Destroy) {
364 return DT.
dominates(Destroy, TI->getTerminator());
377 if (canCoroBeginEscape(CB, Terminators))
386bool CoroIdElider::attemptElide() {
390 assert(Resumers &&
"PostSplit coro.id Info argument must refer to an array"
391 "of coroutine subfunctions");
392 auto *ResumeAddrConstant =
397 bool EligibleForElide = lifetimeEligibleForElide();
403 for (
auto &It : DestroyAddr)
406 auto FrameSizeAndAlign =
getFrameLayout(cast<Function>(ResumeAddrConstant));
408 auto CallerFunctionName = FEI.ContainingFunction->getName();
411 if (EligibleForElide && FrameSizeAndAlign) {
412 elideHeapAllocations(FrameSizeAndAlign->first, FrameSizeAndAlign->second);
419 << FEI.ContainingFunction->getName() <<
"\n";
424 <<
"'" <<
ore::NV(
"callee", CalleeCoroutineName)
425 <<
"' elided in '" <<
ore::NV(
"caller", CallerFunctionName)
427 <<
ore::NV(
"frame_size", FrameSizeAndAlign->first) <<
", align="
428 <<
ore::NV(
"align", FrameSizeAndAlign->second.value()) <<
")";
433 <<
"'" <<
ore::NV(
"callee", CalleeCoroutineName)
434 <<
"' not elided in '"
435 <<
ore::NV(
"caller", CallerFunctionName);
437 if (FrameSizeAndAlign)
438 return Remark <<
"' (frame_size="
439 <<
ore::NV(
"frame_size", FrameSizeAndAlign->first)
441 <<
ore::NV(
"align", FrameSizeAndAlign->second.value())
444 return Remark <<
"' (frame_size=unknown, align=unknown)";
452 auto &M = *
F.getParent();
456 FunctionElideInfo FEI{&
F};
458 if (!FEI.hasCoroIds())
465 bool Changed =
false;
466 for (
auto *CII : FEI.getCoroIds()) {
467 CoroIdElider CIE(CII, FEI, AA, DT, ORE);
468 Changed |= CIE.attemptElide();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void replaceWithConstant(Constant *Value, SmallVectorImpl< CoroSubFnInst * > &Users)
static Instruction * getFirstNonAllocaInTheEntryBlock(Function *F)
static cl::opt< std::string > CoroElideInfoOutputFilename("coro-elide-info-output-file", cl::value_desc("filename"), cl::desc("File to record the coroutines got elided"), cl::Hidden)
static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA)
static std::optional< std::pair< uint64_t, Align > > getFrameLayout(Function *Resume)
static std::unique_ptr< raw_fd_ostream > getOrCreateLogFile()
static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA)
This file defines the DenseMap class.
iv Induction Variable Users
uint64_t IntrinsicInst * II
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A private abstract base class describing the concept of an individual alias analysis implementation.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
ConstantArray - Constant Array Declarations.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents the llvm.coro.begin or llvm.coro.begin.custom.abi instructions.
This represents the llvm.coro.id instruction.
Function * getCoroutine() const
This class represents the llvm.coro.subfn.addr instruction.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
uint64_t getParamDereferenceableBytes(unsigned ArgNo) const
Extract the number of dereferenceable bytes for a parameter.
MaybeAlign getParamAlign(unsigned ArgNo) const
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
This is an important class for using LLVM in a threaded context.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getNumCases() const
Return the number of 'cases' in this switch instruction, excluding the default case.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
iterator_range< value_op_iterator > operand_values()
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
void replaceCoroFree(CoroIdInst *CoroId, bool Elide)
bool declaresIntrinsics(const Module &M, ArrayRef< Intrinsic::ID > List)
DiagnosticInfoOptimizationBase::Argument NV
@ OF_Append
The file should be opened in append mode.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
This struct is a compact representation of a valid (non-zero power of two) alignment.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.