LLVM 22.0.0git
IROutliner.cpp
Go to the documentation of this file.
1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
26#include "llvm/Transforms/IPO.h"
28#include <optional>
29#include <vector>
30
31#define DEBUG_TYPE "iroutliner"
32
33using namespace llvm;
34using namespace IRSimilarity;
35
36// A command flag to be used for debugging to exclude branches from similarity
37// matching and outlining.
38namespace llvm {
40
41// A command flag to be used for debugging to indirect calls from similarity
42// matching and outlining.
44
45// A command flag to be used for debugging to exclude intrinsics from similarity
46// matching and outlining.
48
49} // namespace llvm
50
51// Set to true if the user wants the ir outliner to run on linkonceodr linkage
52// functions. This is false by default because the linker can dedupe linkonceodr
53// functions. Since the outliner is confined to a single module (modulo LTO),
54// this is off by default. It should, however, be the default behavior in
55// LTO.
57 "enable-linkonceodr-ir-outlining", cl::Hidden,
58 cl::desc("Enable the IR outliner on linkonceodr functions"),
59 cl::init(false));
60
61// This is a debug option to test small pieces of code to ensure that outlining
62// works correctly.
64 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
65 cl::desc("Debug option to outline greedily, without restriction that "
66 "calculated benefit outweighs cost"));
67
68/// The OutlinableGroup holds all the overarching information for outlining
69/// a set of regions that are structurally similar to one another, such as the
70/// types of the overall function, the output blocks, the sets of stores needed
71/// and a list of the different regions. This information is used in the
72/// deduplication of extracted regions with the same structure.
74 /// The sections that could be outlined
75 std::vector<OutlinableRegion *> Regions;
76
77 /// The argument types for the function created as the overall function to
78 /// replace the extracted function for each region.
79 std::vector<Type *> ArgumentTypes;
80 /// The FunctionType for the overall function.
82 /// The Function for the collective overall function.
84
85 /// Flag for whether we should not consider this group of OutlinableRegions
86 /// for extraction.
87 bool IgnoreGroup = false;
88
89 /// The return blocks for the overall function.
91
92 /// The PHIBlocks with their corresponding return block based on the return
93 /// value as the key.
95
96 /// A set containing the different GVN store sets needed. Each array contains
97 /// a sorted list of the different values that need to be stored into output
98 /// registers.
100
101 /// Flag for whether the \ref ArgumentTypes have been defined after the
102 /// extraction of the first region.
103 bool InputTypesSet = false;
104
105 /// The number of input values in \ref ArgumentTypes. Anything after this
106 /// index in ArgumentTypes is an output argument.
107 unsigned NumAggregateInputs = 0;
108
109 /// The mapping of the canonical numbering of the values in outlined sections
110 /// to specific arguments.
112
113 /// The number of branches in the region target a basic block that is outside
114 /// of the region.
115 unsigned BranchesToOutside = 0;
116
117 /// Tracker counting backwards from the highest unsigned value possible to
118 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
119 /// -2 and -1 are assigned by the DenseMap.
120 unsigned PHINodeGVNTracker = -3;
121
123 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
126
127 /// The number of instructions that will be outlined by extracting \ref
128 /// Regions.
130 /// The number of added instructions needed for the outlining of the \ref
131 /// Regions.
133
134 /// The argument that needs to be marked with the swifterr attribute. If not
135 /// needed, there is no value.
136 std::optional<unsigned> SwiftErrorArgument;
137
138 /// For the \ref Regions, we look at every Value. If it is a constant,
139 /// we check whether it is the same in Region.
140 ///
141 /// \param [in,out] NotSame contains the global value numbers where the
142 /// constant is not always the same, and must be passed in as an argument.
144
145 /// For the regions, look at each set of GVN stores needed and account for
146 /// each combination. Add an argument to the argument types if there is
147 /// more than one combination.
148 ///
149 /// \param [in] M - The module we are outlining from.
151};
152
153/// Move the contents of \p SourceBB to before the last instruction of \p
154/// TargetBB.
155/// \param SourceBB - the BasicBlock to pull Instructions from.
156/// \param TargetBB - the BasicBlock to put Instruction into.
157static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
158 TargetBB.splice(TargetBB.end(), &SourceBB);
159}
160
161/// A function to sort the keys of \p Map, which must be a mapping of constant
162/// values to basic blocks and return it in \p SortedKeys
163///
164/// \param SortedKeys - The vector the keys will be return in and sorted.
165/// \param Map - The DenseMap containing keys to sort.
166static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
168 for (auto &VtoBB : Map)
169 SortedKeys.push_back(VtoBB.first);
170
171 // Here we expect to have either 1 value that is void (nullptr) or multiple
172 // values that are all constant integers.
173 if (SortedKeys.size() == 1) {
174 assert(!SortedKeys[0] && "Expected a single void value.");
175 return;
176 }
177
178 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
179 assert(LHS && RHS && "Expected non void values.");
180 const ConstantInt *LHSC = cast<ConstantInt>(LHS);
181 const ConstantInt *RHSC = cast<ConstantInt>(RHS);
182
183 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
184 });
185}
186
188 Value *V) {
189 std::optional<unsigned> GVN = Candidate->getGVN(V);
190 assert(GVN && "No GVN for incoming value");
191 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
192 std::optional<unsigned> FirstGVN =
193 Other.Candidate->fromCanonicalNum(*CanonNum);
194 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
195 return FoundValueOpt.value_or(nullptr);
196}
197
200 BasicBlock *BB) {
201 Instruction *FirstNonPHI = &*BB->getFirstNonPHIOrDbg();
202 assert(FirstNonPHI && "block is empty?");
203 Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
204 if (!CorrespondingVal)
205 return nullptr;
206 BasicBlock *CorrespondingBlock =
207 cast<Instruction>(CorrespondingVal)->getParent();
208 return CorrespondingBlock;
209}
210
211/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
212/// in \p Included to branch to BasicBlock \p Replace if they currently branch
213/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
214/// when PHINodes are included in outlined regions.
215///
216/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
217/// checked.
218/// \param Find - The successor block to be replaced.
219/// \param Replace - The new succesor block to branch to.
220/// \param Included - The set of blocks about to be outlined.
222 BasicBlock *Replace,
223 DenseSet<BasicBlock *> &Included) {
224 for (PHINode &PN : PHIBlock->phis()) {
225 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
226 ++Idx) {
227 // Check if the incoming block is included in the set of blocks being
228 // outlined.
229 BasicBlock *Incoming = PN.getIncomingBlock(Idx);
230 if (!Included.contains(Incoming))
231 continue;
232
233 BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
234 assert(BI && "Not a branch instruction?");
235 // Look over the branching instructions into this block to see if we
236 // used to branch to Find in this outlined block.
237 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
238 Succ++) {
239 // If we have found the block to replace, we do so here.
240 if (BI->getSuccessor(Succ) != Find)
241 continue;
242 BI->setSuccessor(Succ, Replace);
243 }
244 }
245 }
246}
247
248
250 assert(!CandidateSplit && "Candidate already split!");
251
253
254 Instruction *EndInst = nullptr;
255 // Check whether the last instruction is a terminator, if it is, we do
256 // not split on the following instruction. We leave the block as it is. We
257 // also check that this is not the last instruction in the Module, otherwise
258 // the check for whether the current following instruction matches the
259 // previously recorded instruction will be incorrect.
260 if (!BackInst->isTerminator() ||
261 BackInst->getParent() != &BackInst->getFunction()->back()) {
262 EndInst = Candidate->end()->Inst;
263 assert(EndInst && "Expected an end instruction?");
264 }
265
266 // We check if the current instruction following the last instruction in the
267 // region is the same as the recorded instruction following the last
268 // instruction. If they do not match, there could be problems in rewriting
269 // the program after outlining, so we ignore it.
270 if (!BackInst->isTerminator() && EndInst != BackInst->getNextNode())
271 return;
272
273 Instruction *StartInst = (*Candidate->begin()).Inst;
274 assert(StartInst && "Expected a start instruction?");
275 StartBB = StartInst->getParent();
276 PrevBB = StartBB;
277
280
281 // We iterate over the instructions in the region, if we find a PHINode, we
282 // check if there are predecessors outside of the region, if there are,
283 // we ignore this region since we are unable to handle the severing of the
284 // phi node right now.
285
286 // TODO: Handle extraneous inputs for PHINodes through variable number of
287 // inputs, similar to how outputs are handled.
288 BasicBlock::iterator It = StartInst->getIterator();
289 EndBB = BackInst->getParent();
290 BasicBlock *IBlock;
291 BasicBlock *PHIPredBlock = nullptr;
292 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
293 while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
294 unsigned NumPredsOutsideRegion = 0;
295 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
296 if (!BBSet.contains(PN->getIncomingBlock(i))) {
297 PHIPredBlock = PN->getIncomingBlock(i);
298 ++NumPredsOutsideRegion;
299 continue;
300 }
301
302 // We must consider the case there the incoming block to the PHINode is
303 // the same as the final block of the OutlinableRegion. If this is the
304 // case, the branch from this block must also be outlined to be valid.
305 IBlock = PN->getIncomingBlock(i);
306 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
307 PHIPredBlock = PN->getIncomingBlock(i);
308 ++NumPredsOutsideRegion;
309 }
310 }
311
312 if (NumPredsOutsideRegion > 1)
313 return;
314
315 It++;
316 }
317
318 // If the region starts with a PHINode, but is not the initial instruction of
319 // the BasicBlock, we ignore this region for now.
320 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
321 return;
322
323 // If the region ends with a PHINode, but does not contain all of the phi node
324 // instructions of the region, we ignore it for now.
325 if (isa<PHINode>(BackInst) &&
326 BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
327 return;
328
329 // The basic block gets split like so:
330 // block: block:
331 // inst1 inst1
332 // inst2 inst2
333 // region1 br block_to_outline
334 // region2 block_to_outline:
335 // region3 -> region1
336 // region4 region2
337 // inst3 region3
338 // inst4 region4
339 // br block_after_outline
340 // block_after_outline:
341 // inst3
342 // inst4
343
344 std::string OriginalName = PrevBB->getName().str();
345
346 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
348 // If there was a PHINode with an incoming block outside the region,
349 // make sure is correctly updated in the newly split block.
350 if (PHIPredBlock)
352
353 CandidateSplit = true;
354 if (!BackInst->isTerminator()) {
355 EndBB = EndInst->getParent();
356 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
359 } else {
360 EndBB = BackInst->getParent();
361 EndsInBranch = true;
362 FollowBB = nullptr;
363 }
364
365 // Refind the basic block set.
366 BBSet.clear();
368 // For the phi nodes in the new starting basic block of the region, we
369 // reassign the targets of the basic blocks branching instructions.
371 if (FollowBB)
373}
374
376 assert(CandidateSplit && "Candidate is not split!");
377
378 // The basic block gets reattached like so:
379 // block: block:
380 // inst1 inst1
381 // inst2 inst2
382 // br block_to_outline region1
383 // block_to_outline: -> region2
384 // region1 region3
385 // region2 region4
386 // region3 inst3
387 // region4 inst4
388 // br block_after_outline
389 // block_after_outline:
390 // inst3
391 // inst4
392 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
393
394 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
395 // Make sure PHINode references to the block we are merging into are
396 // updated to be incoming blocks from the predecessor to the current block.
397
398 // NOTE: If this is updated such that the outlined block can have more than
399 // one incoming block to a PHINode, this logic will have to updated
400 // to handle multiple precessors instead.
401
402 // We only need to update this if the outlined section contains a PHINode, if
403 // it does not, then the incoming block was never changed in the first place.
404 // On the other hand, if PrevBB has no predecessors, it means that all
405 // incoming blocks to the first block are contained in the region, and there
406 // will be nothing to update.
407 Instruction *StartInst = (*Candidate->begin()).Inst;
408 if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
410 "PrevBB has more than one predecessor. Should be 0 or 1.");
411 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
413 }
415
416 // If we reattaching after outlining, we iterate over the phi nodes to
417 // the initial block, and reassign the branch instructions of the incoming
418 // blocks to the block we are remerging into.
419 if (!ExtractedFunction) {
422
424 if (!EndsInBranch)
426 }
427
429
430 BasicBlock *PlacementBB = PrevBB;
431 if (StartBB != EndBB)
432 PlacementBB = EndBB;
433 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
434 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
435 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
436 PlacementBB->getTerminator()->eraseFromParent();
437 moveBBContents(*FollowBB, *PlacementBB);
438 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
440 }
441
444
445 // Make sure to save changes back to the StartBB.
446 StartBB = PrevBB;
447 EndBB = nullptr;
448 PrevBB = nullptr;
449 FollowBB = nullptr;
450
451 CandidateSplit = false;
452}
453
454/// Find whether \p V matches the Constants previously found for the \p GVN.
455///
456/// \param V - The value to check for consistency.
457/// \param GVN - The global value number assigned to \p V.
458/// \param GVNToConstant - The mapping of global value number to Constants.
459/// \returns true if the Value matches the Constant mapped to by V and false if
460/// it \p V is a Constant but does not match.
461/// \returns std::nullopt if \p V is not a Constant.
462static std::optional<bool>
463constantMatches(Value *V, unsigned GVN,
464 DenseMap<unsigned, Constant *> &GVNToConstant) {
465 // See if we have a constants
466 Constant *CST = dyn_cast<Constant>(V);
467 if (!CST)
468 return std::nullopt;
469
470 // Holds a mapping from a global value number to a Constant.
472 bool Inserted;
473
474
475 // If we have a constant, try to make a new entry in the GVNToConstant.
476 std::tie(GVNToConstantIt, Inserted) =
477 GVNToConstant.insert(std::make_pair(GVN, CST));
478 // If it was found and is not equal, it is not the same. We do not
479 // handle this case yet, and exit early.
480 if (Inserted || (GVNToConstantIt->second == CST))
481 return true;
482
483 return false;
484}
485
487 InstructionCost Benefit = 0;
488
489 // Estimate the benefit of outlining a specific sections of the program. We
490 // delegate mostly this task to the TargetTransformInfo so that if the target
491 // has specific changes, we can have a more accurate estimate.
492
493 // However, getInstructionCost delegates the code size calculation for
494 // arithmetic instructions to getArithmeticInstrCost in
495 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
496 // code size for a division and remainder instruction to be equal to 4, and
497 // everything else to 1. This is not an accurate representation of the
498 // division instruction for targets that have a native division instruction.
499 // To be overly conservative, we only add 1 to the number of instructions for
500 // each division instruction.
501 for (IRInstructionData &ID : *Candidate) {
502 Instruction *I = ID.Inst;
503 switch (I->getOpcode()) {
504 case Instruction::FDiv:
505 case Instruction::FRem:
506 case Instruction::SDiv:
507 case Instruction::SRem:
508 case Instruction::UDiv:
509 case Instruction::URem:
510 Benefit += 1;
511 break;
512 default:
514 break;
515 }
516 }
517
518 return Benefit;
519}
520
521/// Check the \p OutputMappings structure for value \p Input, if it exists
522/// it has been used as an output for outlining, and has been renamed, and we
523/// return the new value, otherwise, we return the same value.
524///
525/// \param OutputMappings [in] - The mapping of values to their renamed value
526/// after being used as an output for an outlined region.
527/// \param Input [in] - The value to find the remapped value of, if it exists.
528/// \return The remapped value if it has been renamed, and the same value if has
529/// not.
531 Value *Input) {
533 OutputMappings.find(Input);
534 if (OutputMapping != OutputMappings.end())
535 return OutputMapping->second;
536 return Input;
537}
538
539/// Find whether \p Region matches the global value numbering to Constant
540/// mapping found so far.
541///
542/// \param Region - The OutlinableRegion we are checking for constants
543/// \param GVNToConstant - The mapping of global value number to Constants.
544/// \param NotSame - The set of global value numbers that do not have the same
545/// constant in each region.
546/// \returns true if all Constants are the same in every use of a Constant in \p
547/// Region and false if not
548static bool
550 DenseMap<unsigned, Constant *> &GVNToConstant,
551 DenseSet<unsigned> &NotSame) {
552 bool ConstantsTheSame = true;
553
554 IRSimilarityCandidate &C = *Region.Candidate;
555 for (IRInstructionData &ID : C) {
556
557 // Iterate over the operands in an instruction. If the global value number,
558 // assigned by the IRSimilarityCandidate, has been seen before, we check if
559 // the number has been found to be not the same value in each instance.
560 for (Value *V : ID.OperVals) {
561 std::optional<unsigned> GVNOpt = C.getGVN(V);
562 assert(GVNOpt && "Expected a GVN for operand?");
563 unsigned GVN = *GVNOpt;
564
565 // Check if this global value has been found to not be the same already.
566 if (NotSame.contains(GVN)) {
567 if (isa<Constant>(V))
568 ConstantsTheSame = false;
569 continue;
570 }
571
572 // If it has been the same so far, we check the value for if the
573 // associated Constant value match the previous instances of the same
574 // global value number. If the global value does not map to a Constant,
575 // it is considered to not be the same value.
576 std::optional<bool> ConstantMatches =
577 constantMatches(V, GVN, GVNToConstant);
578 if (ConstantMatches) {
579 if (*ConstantMatches)
580 continue;
581 else
582 ConstantsTheSame = false;
583 }
584
585 // While this value is a register, it might not have been previously,
586 // make sure we don't already have a constant mapped to this global value
587 // number.
588 if (GVNToConstant.contains(GVN))
589 ConstantsTheSame = false;
590
591 NotSame.insert(GVN);
592 }
593 }
594
595 return ConstantsTheSame;
596}
597
599 DenseMap<unsigned, Constant *> GVNToConstant;
600
601 for (OutlinableRegion *Region : Regions)
602 collectRegionsConstants(*Region, GVNToConstant, NotSame);
603}
604
606 for (OutlinableRegion *OS : Regions)
607 OutputGVNCombinations.insert(OS->GVNStores);
608
609 // We are adding an extracted argument to decide between which output path
610 // to use in the basic block. It is used in a switch statement and only
611 // needs to be an integer.
612 if (OutputGVNCombinations.size() > 1)
613 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
614}
615
616/// Get the subprogram if it exists for one of the outlined regions.
617///
618/// \param [in] Group - The set of regions to find a subprogram for.
619/// \returns the subprogram if it exists, or nullptr.
621 for (OutlinableRegion *OS : Group.Regions)
622 if (Function *F = OS->Call->getFunction())
623 if (DISubprogram *SP = F->getSubprogram())
624 return SP;
625
626 return nullptr;
627}
628
629Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
630 unsigned FunctionNameSuffix) {
631 assert(!Group.OutlinedFunction && "Function is already defined!");
632
633 Type *RetTy = Type::getVoidTy(M.getContext());
634 // All extracted functions _should_ have the same return type at this point
635 // since the similarity identifier ensures that all branches outside of the
636 // region occur in the same place.
637
638 // NOTE: Should we ever move to the model that uses a switch at every point
639 // needed, meaning that we could branch within the region or out, it is
640 // possible that we will need to switch to using the most general case all of
641 // the time.
642 for (OutlinableRegion *R : Group.Regions) {
643 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
644 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
645 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
646 RetTy = ExtractedFuncType;
647 }
648
650 RetTy, Group.ArgumentTypes, false);
651
652 // These functions will only be called from within the same module, so
653 // we can set an internal linkage.
656 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
657
658 // Transfer the swifterr attribute to the correct function parameter.
659 if (Group.SwiftErrorArgument)
661 Attribute::SwiftError);
662
663 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
664 Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
665
666 // If there's a DISubprogram associated with this outlined function, then
667 // emit debug info for the outlined function.
668 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
669 Function *F = Group.OutlinedFunction;
670 // We have a DISubprogram. Get its DICompileUnit.
671 DICompileUnit *CU = SP->getUnit();
672 DIBuilder DB(M, true, CU);
673 DIFile *Unit = SP->getFile();
674 Mangler Mg;
675 // Get the mangled name of the function for the linkage name.
676 std::string Dummy;
677 llvm::raw_string_ostream MangledNameStream(Dummy);
678 Mg.getNameWithPrefix(MangledNameStream, F, false);
679
680 DISubprogram *OutlinedSP = DB.createFunction(
681 Unit /* Context */, F->getName(), Dummy, Unit /* File */,
682 0 /* Line 0 is reserved for compiler-generated code. */,
683 DB.createSubroutineType(DB.getOrCreateTypeArray({})), /* void type */
684 0, /* Line 0 is reserved for compiler-generated code. */
685 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
686 /* Outlined code is optimized code by definition. */
687 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
688
689 // Attach subprogram to the function.
690 F->setSubprogram(OutlinedSP);
691 // We're done with the DIBuilder.
692 DB.finalize();
693 }
694
695 return Group.OutlinedFunction;
696}
697
698/// Move each BasicBlock in \p Old to \p New.
699///
700/// \param [in] Old - The function to move the basic blocks from.
701/// \param [in] New - The function to move the basic blocks to.
702/// \param [out] NewEnds - The return blocks of the new overall function.
703static void moveFunctionData(Function &Old, Function &New,
705 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
706 CurrBB.removeFromParent();
707 CurrBB.insertInto(&New);
708 Instruction *I = CurrBB.getTerminator();
709
710 // For each block we find a return instruction is, it is a potential exit
711 // path for the function. We keep track of each block based on the return
712 // value here.
713 if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
714 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
715
716 for (Instruction &Val : CurrBB) {
717 // Since debug-info originates from many different locations in the
718 // program, it will cause incorrect reporting from a debugger if we keep
719 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
720 // here, collect intrinsics for removal later.
721 Val.dropDbgRecords();
722
723 // We must handle the scoping of called functions differently than
724 // other outlined instructions.
725 if (!isa<CallInst>(&Val)) {
726 // Remove the debug information for outlined functions.
727 Val.setDebugLoc(DebugLoc::getDropped());
728
729 // Loop info metadata may contain line locations. Update them to have no
730 // value in the new subprogram since the outlined code could be from
731 // several locations.
732 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
733 if (DISubprogram *SP = New.getSubprogram())
734 if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
735 return DILocation::get(New.getContext(), Loc->getLine(),
736 Loc->getColumn(), SP, nullptr);
737 return MD;
738 };
739 updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
740 continue;
741 }
742
743 // Edit the scope of called functions inside of outlined functions.
744 if (DISubprogram *SP = New.getSubprogram()) {
745 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
746 Val.setDebugLoc(DI);
747 }
748 }
749 }
750}
751
752/// Find the constants that will need to be lifted into arguments
753/// as they are not the same in each instance of the region.
754///
755/// \param [in] C - The IRSimilarityCandidate containing the region we are
756/// analyzing.
757/// \param [in] NotSame - The set of global value numbers that do not have a
758/// single Constant across all OutlinableRegions similar to \p C.
759/// \param [out] Inputs - The list containing the global value numbers of the
760/// arguments needed for the region of code.
762 std::vector<unsigned> &Inputs) {
764 // Iterate over the instructions, and find what constants will need to be
765 // extracted into arguments.
766 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
767 IDIt != EndIDIt; IDIt++) {
768 for (Value *V : (*IDIt).OperVals) {
769 // Since these are stored before any outlining, they will be in the
770 // global value numbering.
771 unsigned GVN = *C.getGVN(V);
772 if (isa<Constant>(V))
773 if (NotSame.contains(GVN) && Seen.insert(GVN).second)
774 Inputs.push_back(GVN);
775 }
776 }
777}
778
779/// Find the GVN for the inputs that have been found by the CodeExtractor.
780///
781/// \param [in] C - The IRSimilarityCandidate containing the region we are
782/// analyzing.
783/// \param [in] CurrentInputs - The set of inputs found by the
784/// CodeExtractor.
785/// \param [in] OutputMappings - The mapping of values that have been replaced
786/// by a new output value.
787/// \param [out] EndInputNumbers - The global value numbers for the extracted
788/// arguments.
790 SetVector<Value *> &CurrentInputs,
791 const DenseMap<Value *, Value *> &OutputMappings,
792 std::vector<unsigned> &EndInputNumbers) {
793 // Get the Global Value Number for each input. We check if the Value has been
794 // replaced by a different value at output, and use the original value before
795 // replacement.
796 for (Value *Input : CurrentInputs) {
797 assert(Input && "Have a nullptr as an input");
798 auto It = OutputMappings.find(Input);
799 if (It != OutputMappings.end())
800 Input = It->second;
801 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
802 EndInputNumbers.push_back(*C.getGVN(Input));
803 }
804}
805
806/// Find the original value for the \p ArgInput values if any one of them was
807/// replaced during a previous extraction.
808///
809/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
810/// \param [in] OutputMappings - The mapping of values that have been replaced
811/// by a new output value.
812/// \param [out] RemappedArgInputs - The remapped values according to
813/// \p OutputMappings that will be extracted.
814static void
816 const DenseMap<Value *, Value *> &OutputMappings,
817 SetVector<Value *> &RemappedArgInputs) {
818 // Get the global value number for each input that will be extracted as an
819 // argument by the code extractor, remapping if needed for reloaded values.
820 for (Value *Input : ArgInputs) {
821 auto It = OutputMappings.find(Input);
822 if (It != OutputMappings.end())
823 Input = It->second;
824 RemappedArgInputs.insert(Input);
825 }
826}
827
828/// Find the input GVNs and the output values for a region of Instructions.
829/// Using the code extractor, we collect the inputs to the extracted function.
830///
831/// The \p Region can be identified as needing to be ignored in this function.
832/// It should be checked whether it should be ignored after a call to this
833/// function.
834///
835/// \param [in,out] Region - The region of code to be analyzed.
836/// \param [out] InputGVNs - The global value numbers for the extracted
837/// arguments.
838/// \param [in] NotSame - The global value numbers in the region that do not
839/// have the same constant value in the regions structurally similar to
840/// \p Region.
841/// \param [in] OutputMappings - The mapping of values that have been replaced
842/// by a new output value after extraction.
843/// \param [out] ArgInputs - The values of the inputs to the extracted function.
844/// \param [out] Outputs - The set of values extracted by the CodeExtractor
845/// as outputs.
847 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
848 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
849 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
850 IRSimilarityCandidate &C = *Region.Candidate;
851
852 // OverallInputs are the inputs to the region found by the CodeExtractor,
853 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
854 // allocas of values whose lifetimes are contained completely within the
855 // outlined region. PremappedInputs are the arguments found by the
856 // CodeExtractor, removing conditions such as sunken allocas, but that
857 // may need to be remapped due to the extracted output values replacing
858 // the original values. We use DummyOutputs for this first run of finding
859 // inputs and outputs since the outputs could change during findAllocas,
860 // the correct set of extracted outputs will be in the final Outputs ValueSet.
861 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
862 DummyOutputs;
863
864 // Use the code extractor to get the inputs and outputs, without sunken
865 // allocas or removing llvm.assumes.
866 CodeExtractor *CE = Region.CE;
867 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
868 assert(Region.StartBB && "Region must have a start BasicBlock!");
869 Function *OrigF = Region.StartBB->getParent();
870 CodeExtractorAnalysisCache CEAC(*OrigF);
871 BasicBlock *Dummy = nullptr;
872
873 // The region may be ineligible due to VarArgs in the parent function. In this
874 // case we ignore the region.
875 if (!CE->isEligible()) {
876 Region.IgnoreRegion = true;
877 return;
878 }
879
880 // Find if any values are going to be sunk into the function when extracted
881 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
882 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
883
884 // TODO: Support regions with sunken allocas: values whose lifetimes are
885 // contained completely within the outlined region. These are not guaranteed
886 // to be the same in every region, so we must elevate them all to arguments
887 // when they appear. If these values are not equal, it means there is some
888 // Input in OverallInputs that was removed for ArgInputs.
889 if (OverallInputs.size() != PremappedInputs.size()) {
890 Region.IgnoreRegion = true;
891 return;
892 }
893
894 findConstants(C, NotSame, InputGVNs);
895
896 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
897
898 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
899 ArgInputs);
900
901 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
902 // we need to make sure they are in a deterministic order.
903 stable_sort(InputGVNs);
904}
905
906/// Look over the inputs and map each input argument to an argument in the
907/// overall function for the OutlinableRegions. This creates a way to replace
908/// the arguments of the extracted function with the arguments of the new
909/// overall function.
910///
911/// \param [in,out] Region - The region of code to be analyzed.
912/// \param [in] InputGVNs - The global value numbering of the input values
913/// collected.
914/// \param [in] ArgInputs - The values of the arguments to the extracted
915/// function.
916static void
918 std::vector<unsigned> &InputGVNs,
919 SetVector<Value *> &ArgInputs) {
920
921 IRSimilarityCandidate &C = *Region.Candidate;
922 OutlinableGroup &Group = *Region.Parent;
923
924 // This counts the argument number in the overall function.
925 unsigned TypeIndex = 0;
926
927 // This counts the argument number in the extracted function.
928 unsigned OriginalIndex = 0;
929
930 // Find the mapping of the extracted arguments to the arguments for the
931 // overall function. Since there may be extra arguments in the overall
932 // function to account for the extracted constants, we have two different
933 // counters as we find extracted arguments, and as we come across overall
934 // arguments.
935
936 // Additionally, in our first pass, for the first extracted function,
937 // we find argument locations for the canonical value numbering. This
938 // numbering overrides any discovered location for the extracted code.
939 for (unsigned InputVal : InputGVNs) {
940 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
941 assert(CanonicalNumberOpt && "Canonical number not found?");
942 unsigned CanonicalNumber = *CanonicalNumberOpt;
943
944 std::optional<Value *> InputOpt = C.fromGVN(InputVal);
945 assert(InputOpt && "Global value number not found?");
946 Value *Input = *InputOpt;
947
949 Group.CanonicalNumberToAggArg.find(CanonicalNumber);
950
951 if (!Group.InputTypesSet) {
952 Group.ArgumentTypes.push_back(Input->getType());
953 // If the input value has a swifterr attribute, make sure to mark the
954 // argument in the overall function.
955 if (Input->isSwiftError()) {
956 assert(
957 !Group.SwiftErrorArgument &&
958 "Argument already marked with swifterr for this OutlinableGroup!");
959 Group.SwiftErrorArgument = TypeIndex;
960 }
961 }
962
963 // Check if we have a constant. If we do add it to the overall argument
964 // number to Constant map for the region, and continue to the next input.
965 if (Constant *CST = dyn_cast<Constant>(Input)) {
966 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
967 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
968 else {
970 std::make_pair(CanonicalNumber, TypeIndex));
971 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
972 }
973 TypeIndex++;
974 continue;
975 }
976
977 // It is not a constant, we create the mapping from extracted argument list
978 // to the overall argument list, using the canonical location, if it exists.
979 assert(ArgInputs.count(Input) && "Input cannot be found!");
980
981 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
982 if (OriginalIndex != AggArgIt->second)
983 Region.ChangedArgOrder = true;
984 Region.ExtractedArgToAgg.insert(
985 std::make_pair(OriginalIndex, AggArgIt->second));
986 Region.AggArgToExtracted.insert(
987 std::make_pair(AggArgIt->second, OriginalIndex));
988 } else {
990 std::make_pair(CanonicalNumber, TypeIndex));
991 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
992 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
993 }
994 OriginalIndex++;
995 TypeIndex++;
996 }
997
998 // If the function type definitions for the OutlinableGroup holding the region
999 // have not been set, set the length of the inputs here. We should have the
1000 // same inputs for all of the different regions contained in the
1001 // OutlinableGroup since they are all structurally similar to one another.
1002 if (!Group.InputTypesSet) {
1003 Group.NumAggregateInputs = TypeIndex;
1004 Group.InputTypesSet = true;
1005 }
1006
1007 Region.NumExtractedInputs = OriginalIndex;
1008}
1009
1010/// Check if the \p V has any uses outside of the region other than \p PN.
1011///
1012/// \param V [in] - The value to check.
1013/// \param PHILoc [in] - The location in the PHINode of \p V.
1014/// \param PN [in] - The PHINode using \p V.
1015/// \param Exits [in] - The potential blocks we exit to from the outlined
1016/// region.
1017/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1018/// \returns true if \p V has any use soutside its region other than \p PN.
1019static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1021 DenseSet<BasicBlock *> &BlocksInRegion) {
1022 // We check to see if the value is used by the PHINode from some other
1023 // predecessor not included in the region. If it is, we make sure
1024 // to keep it as an output.
1025 if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
1026 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1027 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1028 !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1029 }))
1030 return true;
1031
1032 // Check if the value is used by any other instructions outside the region.
1033 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1034 Instruction *I = dyn_cast<Instruction>(U);
1035 if (!I)
1036 return false;
1037
1038 // If the use of the item is inside the region, we skip it. Uses
1039 // inside the region give us useful information about how the item could be
1040 // used as an output.
1041 BasicBlock *Parent = I->getParent();
1042 if (BlocksInRegion.contains(Parent))
1043 return false;
1044
1045 // If it's not a PHINode then we definitely know the use matters. This
1046 // output value will not completely combined with another item in a PHINode
1047 // as it is directly reference by another non-phi instruction
1048 if (!isa<PHINode>(I))
1049 return true;
1050
1051 // If we have a PHINode outside one of the exit locations, then it
1052 // can be considered an outside use as well. If there is a PHINode
1053 // contained in the Exit where this values use matters, it will be
1054 // caught when we analyze that PHINode.
1055 if (!Exits.contains(Parent))
1056 return true;
1057
1058 return false;
1059 });
1060}
1061
1062/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1063/// considered outputs. A PHINodes is an output when more than one incoming
1064/// value has been marked by the CodeExtractor as an output.
1065///
1066/// \param CurrentExitFromRegion [in] - The block to analyze.
1067/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1068/// region.
1069/// \param RegionBlocks [in] - The basic blocks in the region.
1070/// \param Outputs [in, out] - The existing outputs for the region, we may add
1071/// PHINodes to this as we find that they replace output values.
1072/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1073/// totally replaced by a PHINode.
1074/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1075/// in PHINodes, but have other uses, and should still be considered outputs.
1077 BasicBlock *CurrentExitFromRegion,
1078 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1079 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1080 DenseSet<Value *> &OutputsReplacedByPHINode,
1081 DenseSet<Value *> &OutputsWithNonPhiUses) {
1082 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1083 // Find all incoming values from the outlining region.
1084 SmallVector<unsigned, 2> IncomingVals;
1085 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1086 if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1087 IncomingVals.push_back(I);
1088
1089 // Do not process PHI if there are no predecessors from region.
1090 unsigned NumIncomingVals = IncomingVals.size();
1091 if (NumIncomingVals == 0)
1092 continue;
1093
1094 // If there is one predecessor, we mark it as a value that needs to be kept
1095 // as an output.
1096 if (NumIncomingVals == 1) {
1097 Value *V = PN.getIncomingValue(*IncomingVals.begin());
1098 OutputsWithNonPhiUses.insert(V);
1099 OutputsReplacedByPHINode.erase(V);
1100 continue;
1101 }
1102
1103 // This PHINode will be used as an output value, so we add it to our list.
1104 Outputs.insert(&PN);
1105
1106 // Not all of the incoming values should be ignored as other inputs and
1107 // outputs may have uses in outlined region. If they have other uses
1108 // outside of the single PHINode we should not skip over it.
1109 for (unsigned Idx : IncomingVals) {
1110 Value *V = PN.getIncomingValue(Idx);
1111 if (!isa<Constant>(V) &&
1112 outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1113 OutputsWithNonPhiUses.insert(V);
1114 OutputsReplacedByPHINode.erase(V);
1115 continue;
1116 }
1117 if (!OutputsWithNonPhiUses.contains(V))
1118 OutputsReplacedByPHINode.insert(V);
1119 }
1120 }
1121}
1122
1123// Represents the type for the unsigned number denoting the output number for
1124// phi node, along with the canonical number for the exit block.
1125using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1126// The list of canonical numbers for the incoming values to a PHINode.
1128// The pair type representing the set of canonical values being combined in the
1129// PHINode, along with the location data for the PHINode.
1130using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1131
1132/// Encode \p PND as an integer for easy lookup based on the argument location,
1133/// the parent BasicBlock canonical numbering, and the canonical numbering of
1134/// the values stored in the PHINode.
1135///
1136/// \param PND - The data to hash.
1137/// \returns The hash code of \p PND.
1139 return llvm::hash_combine(llvm::hash_value(PND.first.first),
1140 llvm::hash_value(PND.first.second),
1141 llvm::hash_combine_range(PND.second));
1142}
1143
1144/// Create a special GVN for PHINodes that will be used outside of
1145/// the region. We create a hash code based on the Canonical number of the
1146/// parent BasicBlock, the canonical numbering of the values stored in the
1147/// PHINode and the aggregate argument location. This is used to find whether
1148/// this PHINode type has been given a canonical numbering already. If not, we
1149/// assign it a value and store it for later use. The value is returned to
1150/// identify different output schemes for the set of regions.
1151///
1152/// \param Region - The region that \p PN is an output for.
1153/// \param PN - The PHINode we are analyzing.
1154/// \param Blocks - The blocks for the region we are analyzing.
1155/// \param AggArgIdx - The argument \p PN will be stored into.
1156/// \returns An optional holding the assigned canonical number, or std::nullopt
1157/// if there is some attribute of the PHINode blocking it from being used.
1158static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1159 PHINode *PN,
1161 unsigned AggArgIdx) {
1162 OutlinableGroup &Group = *Region.Parent;
1163 IRSimilarityCandidate &Cand = *Region.Candidate;
1164 BasicBlock *PHIBB = PN->getParent();
1165 CanonList PHIGVNs;
1166 Value *Incoming;
1167 BasicBlock *IncomingBlock;
1168 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1170 IncomingBlock = PN->getIncomingBlock(Idx);
1171 // If the incoming block isn't in the region, we don't have to worry about
1172 // this incoming value.
1173 if (!Blocks.contains(IncomingBlock))
1174 continue;
1175
1176 // If we cannot find a GVN, and the incoming block is included in the region
1177 // this means that the input to the PHINode is not included in the region we
1178 // are trying to analyze, meaning, that if it was outlined, we would be
1179 // adding an extra input. We ignore this case for now, and so ignore the
1180 // region.
1181 std::optional<unsigned> OGVN = Cand.getGVN(Incoming);
1182 if (!OGVN) {
1183 Region.IgnoreRegion = true;
1184 return std::nullopt;
1185 }
1186
1187 // Collect the canonical numbers of the values in the PHINode.
1188 unsigned GVN = *OGVN;
1189 OGVN = Cand.getCanonicalNum(GVN);
1190 assert(OGVN && "No GVN found for incoming value?");
1191 PHIGVNs.push_back(*OGVN);
1192
1193 // Find the incoming block and use the canonical numbering as well to define
1194 // the hash for the PHINode.
1195 OGVN = Cand.getGVN(IncomingBlock);
1196
1197 // If there is no number for the incoming block, it is because we have
1198 // split the candidate basic blocks. So we use the previous block that it
1199 // was split from to find the valid global value numbering for the PHINode.
1200 if (!OGVN) {
1201 assert(Cand.getStartBB() == IncomingBlock &&
1202 "Unknown basic block used in exit path PHINode.");
1203
1204 BasicBlock *PrevBlock = nullptr;
1205 // Iterate over the predecessors to the incoming block of the
1206 // PHINode, when we find a block that is not contained in the region
1207 // we know that this is the first block that we split from, and should
1208 // have a valid global value numbering.
1209 for (BasicBlock *Pred : predecessors(IncomingBlock))
1210 if (!Blocks.contains(Pred)) {
1211 PrevBlock = Pred;
1212 break;
1213 }
1214 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1215 OGVN = Cand.getGVN(PrevBlock);
1216 }
1217 GVN = *OGVN;
1218 OGVN = Cand.getCanonicalNum(GVN);
1219 assert(OGVN && "No GVN found for incoming block?");
1220 PHIGVNs.push_back(*OGVN);
1221 }
1222
1223 // Now that we have the GVNs for the incoming values, we are going to combine
1224 // them with the GVN of the incoming bock, and the output location of the
1225 // PHINode to generate a hash value representing this instance of the PHINode.
1228 std::optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1229 assert(BBGVN && "Could not find GVN for the incoming block!");
1230
1231 BBGVN = Cand.getCanonicalNum(*BBGVN);
1232 assert(BBGVN && "Could not find canonical number for the incoming block!");
1233 // Create a pair of the exit block canonical value, and the aggregate
1234 // argument location, connected to the canonical numbers stored in the
1235 // PHINode.
1236 PHINodeData TemporaryPair =
1237 std::make_pair(std::make_pair(*BBGVN, AggArgIdx), PHIGVNs);
1238 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1239
1240 // Look for and create a new entry in our connection between canonical
1241 // numbers for PHINodes, and the set of objects we just created.
1242 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1243 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1244 bool Inserted = false;
1245 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1246 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1247 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1248 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1249 }
1250
1251 return GVNToPHIIt->second;
1252}
1253
1254/// Create a mapping of the output arguments for the \p Region to the output
1255/// arguments of the overall outlined function.
1256///
1257/// \param [in,out] Region - The region of code to be analyzed.
1258/// \param [in] Outputs - The values found by the code extractor.
1259static void
1261 SetVector<Value *> &Outputs) {
1262 OutlinableGroup &Group = *Region.Parent;
1263 IRSimilarityCandidate &C = *Region.Candidate;
1264
1266 DenseSet<BasicBlock *> BlocksInRegion;
1267 C.getBasicBlocks(BlocksInRegion, BE);
1268
1269 // Find the exits to the region.
1271 for (BasicBlock *Block : BE)
1272 for (BasicBlock *Succ : successors(Block))
1273 if (!BlocksInRegion.contains(Succ))
1274 Exits.insert(Succ);
1275
1276 // After determining which blocks exit to PHINodes, we add these PHINodes to
1277 // the set of outputs to be processed. We also check the incoming values of
1278 // the PHINodes for whether they should no longer be considered outputs.
1279 DenseSet<Value *> OutputsReplacedByPHINode;
1280 DenseSet<Value *> OutputsWithNonPhiUses;
1281 for (BasicBlock *ExitBB : Exits)
1282 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1283 OutputsReplacedByPHINode,
1284 OutputsWithNonPhiUses);
1285
1286 // This counts the argument number in the extracted function.
1287 unsigned OriginalIndex = Region.NumExtractedInputs;
1288
1289 // This counts the argument number in the overall function.
1290 unsigned TypeIndex = Group.NumAggregateInputs;
1291 bool TypeFound;
1292 DenseSet<unsigned> AggArgsUsed;
1293
1294 // Iterate over the output types and identify if there is an aggregate pointer
1295 // type whose base type matches the current output type. If there is, we mark
1296 // that we will use this output register for this value. If not we add another
1297 // type to the overall argument type list. We also store the GVNs used for
1298 // stores to identify which values will need to be moved into an special
1299 // block that holds the stores to the output registers.
1300 for (Value *Output : Outputs) {
1301 TypeFound = false;
1302 // We can do this since it is a result value, and will have a number
1303 // that is necessarily the same. BUT if in the future, the instructions
1304 // do not have to be in same order, but are functionally the same, we will
1305 // have to use a different scheme, as one-to-one correspondence is not
1306 // guaranteed.
1307 unsigned ArgumentSize = Group.ArgumentTypes.size();
1308
1309 // If the output is combined in a PHINode, we make sure to skip over it.
1310 if (OutputsReplacedByPHINode.contains(Output))
1311 continue;
1312
1313 unsigned AggArgIdx = 0;
1314 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1315 if (!isa<PointerType>(Group.ArgumentTypes[Jdx]))
1316 continue;
1317
1318 if (!AggArgsUsed.insert(Jdx).second)
1319 continue;
1320
1321 TypeFound = true;
1322 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1323 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1324 AggArgIdx = Jdx;
1325 break;
1326 }
1327
1328 // We were unable to find an unused type in the output type set that matches
1329 // the output, so we add a pointer type to the argument types of the overall
1330 // function to handle this output and create a mapping to it.
1331 if (!TypeFound) {
1332 Group.ArgumentTypes.push_back(PointerType::get(Output->getContext(),
1333 M.getDataLayout().getAllocaAddrSpace()));
1334 // Mark the new pointer type as the last value in the aggregate argument
1335 // list.
1336 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1337 AggArgsUsed.insert(ArgTypeIdx);
1338 Region.ExtractedArgToAgg.insert(
1339 std::make_pair(OriginalIndex, ArgTypeIdx));
1340 Region.AggArgToExtracted.insert(
1341 std::make_pair(ArgTypeIdx, OriginalIndex));
1342 AggArgIdx = ArgTypeIdx;
1343 }
1344
1345 // TODO: Adapt to the extra input from the PHINode.
1346 PHINode *PN = dyn_cast<PHINode>(Output);
1347
1348 std::optional<unsigned> GVN;
1349 if (PN && !BlocksInRegion.contains(PN->getParent())) {
1350 // Values outside the region can be combined into PHINode when we
1351 // have multiple exits. We collect both of these into a list to identify
1352 // which values are being used in the PHINode. Each list identifies a
1353 // different PHINode, and a different output. We store the PHINode as it's
1354 // own canonical value. These canonical values are also dependent on the
1355 // output argument it is saved to.
1356
1357 // If two PHINodes have the same canonical values, but different aggregate
1358 // argument locations, then they will have distinct Canonical Values.
1359 GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1360 if (!GVN)
1361 return;
1362 } else {
1363 // If we do not have a PHINode we use the global value numbering for the
1364 // output value, to find the canonical number to add to the set of stored
1365 // values.
1366 GVN = C.getGVN(Output);
1367 GVN = C.getCanonicalNum(*GVN);
1368 }
1369
1370 // Each region has a potentially unique set of outputs. We save which
1371 // values are output in a list of canonical values so we can differentiate
1372 // among the different store schemes.
1373 Region.GVNStores.push_back(*GVN);
1374
1375 OriginalIndex++;
1376 TypeIndex++;
1377 }
1378
1379 // We sort the stored values to make sure that we are not affected by analysis
1380 // order when determining what combination of items were stored.
1381 stable_sort(Region.GVNStores);
1382}
1383
1384void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1385 DenseSet<unsigned> &NotSame) {
1386 std::vector<unsigned> Inputs;
1387 SetVector<Value *> ArgInputs, Outputs;
1388
1389 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1390 Outputs);
1391
1392 if (Region.IgnoreRegion)
1393 return;
1394
1395 // Map the inputs found by the CodeExtractor to the arguments found for
1396 // the overall function.
1398
1399 // Map the outputs found by the CodeExtractor to the arguments found for
1400 // the overall function.
1402}
1403
1404/// Replace the extracted function in the Region with a call to the overall
1405/// function constructed from the deduplicated similar regions, replacing and
1406/// remapping the values passed to the extracted function as arguments to the
1407/// new arguments of the overall function.
1408///
1409/// \param [in] M - The module to outline from.
1410/// \param [in] Region - The regions of extracted code to be replaced with a new
1411/// function.
1412/// \returns a call instruction with the replaced function.
1414 std::vector<Value *> NewCallArgs;
1416
1417 OutlinableGroup &Group = *Region.Parent;
1418 CallInst *Call = Region.Call;
1419 assert(Call && "Call to replace is nullptr?");
1420 Function *AggFunc = Group.OutlinedFunction;
1421 assert(AggFunc && "Function to replace with is nullptr?");
1422
1423 // If the arguments are the same size, there are not values that need to be
1424 // made into an argument, the argument ordering has not been change, or
1425 // different output registers to handle. We can simply replace the called
1426 // function in this case.
1427 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1428 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1429 << *AggFunc << " with same number of arguments\n");
1430 Call->setCalledFunction(AggFunc);
1431 return Call;
1432 }
1433
1434 // We have a different number of arguments than the new function, so
1435 // we need to use our previously mappings off extracted argument to overall
1436 // function argument, and constants to overall function argument to create the
1437 // new argument list.
1438 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1439
1440 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1441 Group.OutputGVNCombinations.size() > 1) {
1442 // If we are on the last argument, and we need to differentiate between
1443 // output blocks, add an integer to the argument list to determine
1444 // what block to take
1445 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1446 << Region.OutputBlockNum << "\n");
1447 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1448 Region.OutputBlockNum));
1449 continue;
1450 }
1451
1452 ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1453 if (ArgPair != Region.AggArgToExtracted.end()) {
1454 Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1455 // If we found the mapping from the extracted function to the overall
1456 // function, we simply add it to the argument list. We use the same
1457 // value, it just needs to honor the new order of arguments.
1458 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1459 << *ArgumentValue << "\n");
1460 NewCallArgs.push_back(ArgumentValue);
1461 continue;
1462 }
1463
1464 // If it is a constant, we simply add it to the argument list as a value.
1465 if (auto It = Region.AggArgToConstant.find(AggArgIdx);
1466 It != Region.AggArgToConstant.end()) {
1467 Constant *CST = It->second;
1468 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1469 << *CST << "\n");
1470 NewCallArgs.push_back(CST);
1471 continue;
1472 }
1473
1474 // Add a nullptr value if the argument is not found in the extracted
1475 // function. If we cannot find a value, it means it is not in use
1476 // for the region, so we should not pass anything to it.
1477 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1478 NewCallArgs.push_back(ConstantPointerNull::get(
1479 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1480 }
1481
1482 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1483 << *AggFunc << " with new set of arguments\n");
1484 // Create the new call instruction and erase the old one.
1485 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1486 Call->getIterator());
1487
1488 // It is possible that the call to the outlined function is either the first
1489 // instruction is in the new block, the last instruction, or both. If either
1490 // of these is the case, we need to make sure that we replace the instruction
1491 // in the IRInstructionData struct with the new call.
1492 CallInst *OldCall = Region.Call;
1493 if (Region.NewFront->Inst == OldCall)
1494 Region.NewFront->Inst = Call;
1495 if (Region.NewBack->Inst == OldCall)
1496 Region.NewBack->Inst = Call;
1497
1498 // Transfer any debug information.
1499 Call->setDebugLoc(Region.Call->getDebugLoc());
1500 // Since our output may determine which branch we go to, we make sure to
1501 // propagate this new call value through the module.
1502 OldCall->replaceAllUsesWith(Call);
1503
1504 // Remove the old instruction.
1505 OldCall->eraseFromParent();
1506 Region.Call = Call;
1507
1508 // Make sure that the argument in the new function has the SwiftError
1509 // argument.
1510 if (Group.SwiftErrorArgument)
1511 Call->addParamAttr(*Group.SwiftErrorArgument, Attribute::SwiftError);
1512
1513 return Call;
1514}
1515
1516/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1517/// for \p RetVal.
1518///
1519/// \param Group - The OutlinableGroup containing the information about the
1520/// overall outlined function.
1521/// \param RetVal - The return value or exit option that we are currently
1522/// evaluating.
1523/// \returns The found or newly created BasicBlock to contain the needed
1524/// PHINodes to be used as outputs.
1526 // Find if a PHIBlock exists for this return value already. If it is
1527 // the first time we are analyzing this, we will not, so we record it.
1528 auto [PhiBlockForRetVal, Inserted] = Group.PHIBlocks.try_emplace(RetVal);
1529 if (!Inserted)
1530 return PhiBlockForRetVal->second;
1531
1532 auto ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1533 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1534 "Could not find output value!");
1535 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1536
1537 // If we did not find a block, we create one, and insert it into the
1538 // overall function and record it.
1539 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1540 ReturnBB->getParent());
1541 PhiBlockForRetVal->second = PHIBlock;
1542
1543 // We find the predecessors of the return block in the newly created outlined
1544 // function in order to point them to the new PHIBlock rather than the already
1545 // existing return block.
1546 SmallVector<BranchInst *, 2> BranchesToChange;
1547 for (BasicBlock *Pred : predecessors(ReturnBB))
1548 BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
1549
1550 // Now we mark the branch instructions found, and change the references of the
1551 // return block to the newly created PHIBlock.
1552 for (BranchInst *BI : BranchesToChange)
1553 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1554 if (BI->getSuccessor(Succ) != ReturnBB)
1555 continue;
1556 BI->setSuccessor(Succ, PHIBlock);
1557 }
1558
1559 BranchInst::Create(ReturnBB, PHIBlock);
1560
1561 return PhiBlockForRetVal->second;
1562}
1563
1564/// For the function call now representing the \p Region, find the passed value
1565/// to that call that represents Argument \p A at the call location if the
1566/// call has already been replaced with a call to the overall, aggregate
1567/// function.
1568///
1569/// \param A - The Argument to get the passed value for.
1570/// \param Region - The extracted Region corresponding to the outlined function.
1571/// \returns The Value representing \p A at the call site.
1572static Value *
1574 const OutlinableRegion &Region) {
1575 // If we don't need to adjust the argument number at all (since the call
1576 // has already been replaced by a call to the overall outlined function)
1577 // we can just get the specified argument.
1578 return Region.Call->getArgOperand(A->getArgNo());
1579}
1580
1581/// For the function call now representing the \p Region, find the passed value
1582/// to that call that represents Argument \p A at the call location if the
1583/// call has only been replaced by the call to the aggregate function.
1584///
1585/// \param A - The Argument to get the passed value for.
1586/// \param Region - The extracted Region corresponding to the outlined function.
1587/// \returns The Value representing \p A at the call site.
1588static Value *
1590 const OutlinableRegion &Region) {
1591 unsigned ArgNum = A->getArgNo();
1592
1593 // If it is a constant, we can look at our mapping from when we created
1594 // the outputs to figure out what the constant value is.
1595 if (auto It = Region.AggArgToConstant.find(ArgNum);
1596 It != Region.AggArgToConstant.end())
1597 return It->second;
1598
1599 // If it is not a constant, and we are not looking at the overall function, we
1600 // need to adjust which argument we are looking at.
1601 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1602 return Region.Call->getArgOperand(ArgNum);
1603}
1604
1605/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1606///
1607/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1608/// \param Region [in] - The OutlinableRegion containing \p PN.
1609/// \param OutputMappings [in] - The mapping of output values from outlined
1610/// region to their original values.
1611/// \param CanonNums [out] - The canonical numbering for the incoming values to
1612/// \p PN paired with their incoming block.
1613/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1614/// of \p Region rather than the overall function's call.
1617 const DenseMap<Value *, Value *> &OutputMappings,
1618 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1619 bool ReplacedWithOutlinedCall = true) {
1620 // Iterate over the incoming values.
1621 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1622 Value *IVal = PN->getIncomingValue(Idx);
1623 BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1624 // If we have an argument as incoming value, we need to grab the passed
1625 // value from the call itself.
1626 if (Argument *A = dyn_cast<Argument>(IVal)) {
1627 if (ReplacedWithOutlinedCall)
1629 else
1631 }
1632
1633 // Get the original value if it has been replaced by an output value.
1634 IVal = findOutputMapping(OutputMappings, IVal);
1635
1636 // Find and add the canonical number for the incoming value.
1637 std::optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1638 assert(GVN && "No GVN for incoming value");
1639 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1640 assert(CanonNum && "No Canonical Number for GVN");
1641 CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1642 }
1643}
1644
1645/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1646/// in order to condense the number of instructions added to the outlined
1647/// function.
1648///
1649/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1650/// \param Region [in] - The OutlinableRegion containing \p PN.
1651/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1652/// \p PN in.
1653/// \param OutputMappings [in] - The mapping of output values from outlined
1654/// region to their original values.
1655/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1656/// matched.
1657/// \return the newly found or created PHINode in \p OverallPhiBlock.
1658static PHINode*
1660 BasicBlock *OverallPhiBlock,
1661 const DenseMap<Value *, Value *> &OutputMappings,
1662 DenseSet<PHINode *> &UsedPHIs) {
1663 OutlinableGroup &Group = *Region.Parent;
1664
1665
1666 // A list of the canonical numbering assigned to each incoming value, paired
1667 // with the incoming block for the PHINode passed into this function.
1669
1670 // We have to use the extracted function since we have merged this region into
1671 // the overall function yet. We make sure to reassign the argument numbering
1672 // since it is possible that the argument ordering is different between the
1673 // functions.
1674 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1675 /* ReplacedWithOutlinedCall = */ false);
1676
1677 OutlinableRegion *FirstRegion = Group.Regions[0];
1678
1679 // A list of the canonical numbering assigned to each incoming value, paired
1680 // with the incoming block for the PHINode that we are currently comparing
1681 // the passed PHINode to.
1683
1684 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1685 // the uses of the PHINode we are searching for, with the found PHINode.
1686 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1687 // If this PHINode has already been matched to another PHINode to be merged,
1688 // we skip it.
1689 if (UsedPHIs.contains(&CurrPN))
1690 continue;
1691
1692 CurrentCanonNums.clear();
1693 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1694 /* ReplacedWithOutlinedCall = */ true);
1695
1696 // If the list of incoming values is not the same length, then they cannot
1697 // match since there is not an analogue for each incoming value.
1698 if (PNCanonNums.size() != CurrentCanonNums.size())
1699 continue;
1700
1701 bool FoundMatch = true;
1702
1703 // We compare the canonical value for each incoming value in the passed
1704 // in PHINode to one already present in the outlined region. If the
1705 // incoming values do not match, then the PHINodes do not match.
1706
1707 // We also check to make sure that the incoming block matches as well by
1708 // finding the corresponding incoming block in the combined outlined region
1709 // for the current outlined region.
1710 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1711 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1712 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1713 if (ToCompareTo.first != ToAdd.first) {
1714 FoundMatch = false;
1715 break;
1716 }
1717
1718 BasicBlock *CorrespondingBlock =
1719 Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1720 assert(CorrespondingBlock && "Found block is nullptr");
1721 if (CorrespondingBlock != ToCompareTo.second) {
1722 FoundMatch = false;
1723 break;
1724 }
1725 }
1726
1727 // If all incoming values and branches matched, then we can merge
1728 // into the found PHINode.
1729 if (FoundMatch) {
1730 UsedPHIs.insert(&CurrPN);
1731 return &CurrPN;
1732 }
1733 }
1734
1735 // If we've made it here, it means we weren't able to replace the PHINode, so
1736 // we must insert it ourselves.
1737 PHINode *NewPN = cast<PHINode>(PN.clone());
1738 NewPN->insertBefore(OverallPhiBlock->begin());
1739 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1740 Idx++) {
1741 Value *IncomingVal = NewPN->getIncomingValue(Idx);
1742 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1743
1744 // Find corresponding basic block in the overall function for the incoming
1745 // block.
1746 BasicBlock *BlockToUse =
1747 Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1748 NewPN->setIncomingBlock(Idx, BlockToUse);
1749
1750 // If we have an argument we make sure we replace using the argument from
1751 // the correct function.
1752 if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1753 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1754 NewPN->setIncomingValue(Idx, Val);
1755 continue;
1756 }
1757
1758 // Find the corresponding value in the overall function.
1759 IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1760 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1761 assert(Val && "Value is nullptr?");
1763 FirstRegion->RemappedArguments.find(Val);
1764 if (RemappedIt != FirstRegion->RemappedArguments.end())
1765 Val = RemappedIt->second;
1766 NewPN->setIncomingValue(Idx, Val);
1767 }
1768 return NewPN;
1769}
1770
1771// Within an extracted function, replace the argument uses of the extracted
1772// region with the arguments of the function for an OutlinableGroup.
1773//
1774/// \param [in] Region - The region of extracted code to be changed.
1775/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1776/// region.
1777/// \param [in] FirstFunction - A flag to indicate whether we are using this
1778/// function to define the overall outlined function for all the regions, or
1779/// if we are operating on one of the following regions.
1780static void
1783 const DenseMap<Value *, Value *> &OutputMappings,
1784 bool FirstFunction = false) {
1785 OutlinableGroup &Group = *Region.Parent;
1786 assert(Region.ExtractedFunction && "Region has no extracted function?");
1787
1788 Function *DominatingFunction = Region.ExtractedFunction;
1789 if (FirstFunction)
1790 DominatingFunction = Group.OutlinedFunction;
1791 DominatorTree DT(*DominatingFunction);
1792 DenseSet<PHINode *> UsedPHIs;
1793
1794 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1795 ArgIdx++) {
1796 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1797 "No mapping from extracted to outlined?");
1798 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1799 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1800 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1801 // The argument is an input, so we can simply replace it with the overall
1802 // argument value
1803 if (ArgIdx < Region.NumExtractedInputs) {
1804 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1805 << *Region.ExtractedFunction << " with " << *AggArg
1806 << " in function " << *Group.OutlinedFunction << "\n");
1807 Arg->replaceAllUsesWith(AggArg);
1808 Value *V = Region.Call->getArgOperand(ArgIdx);
1809 Region.RemappedArguments.insert(std::make_pair(V, AggArg));
1810 continue;
1811 }
1812
1813 // If we are replacing an output, we place the store value in its own
1814 // block inside the overall function before replacing the use of the output
1815 // in the function.
1816 assert(Arg->hasOneUse() && "Output argument can only have one use");
1817 User *InstAsUser = Arg->user_back();
1818 assert(InstAsUser && "User is nullptr!");
1819
1820 Instruction *I = cast<Instruction>(InstAsUser);
1821 BasicBlock *BB = I->getParent();
1822 SmallVector<BasicBlock *, 4> Descendants;
1823 DT.getDescendants(BB, Descendants);
1824 bool EdgeAdded = false;
1825 if (Descendants.size() == 0) {
1826 EdgeAdded = true;
1827 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1828 DT.getDescendants(BB, Descendants);
1829 }
1830
1831 // Iterate over the following blocks, looking for return instructions,
1832 // if we find one, find the corresponding output block for the return value
1833 // and move our store instruction there.
1834 for (BasicBlock *DescendBB : Descendants) {
1835 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1836 if (!RI)
1837 continue;
1838 Value *RetVal = RI->getReturnValue();
1839 auto VBBIt = OutputBBs.find(RetVal);
1840 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1841
1842 // If this is storing a PHINode, we must make sure it is included in the
1843 // overall function.
1844 StoreInst *SI = cast<StoreInst>(I);
1845
1846 Value *ValueOperand = SI->getValueOperand();
1847
1848 StoreInst *NewI = cast<StoreInst>(I->clone());
1850 BasicBlock *OutputBB = VBBIt->second;
1851 NewI->insertInto(OutputBB, OutputBB->end());
1852 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1853 << *OutputBB << "\n");
1854
1855 // If this is storing a PHINode, we must make sure it is included in the
1856 // overall function.
1857 if (!isa<PHINode>(ValueOperand) ||
1858 Region.Candidate->getGVN(ValueOperand).has_value()) {
1859 if (FirstFunction)
1860 continue;
1861 Value *CorrVal =
1862 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1863 assert(CorrVal && "Value is nullptr?");
1864 NewI->setOperand(0, CorrVal);
1865 continue;
1866 }
1867 PHINode *PN = cast<PHINode>(SI->getValueOperand());
1868 // If it has a value, it was not split by the code extractor, which
1869 // is what we are looking for.
1870 if (Region.Candidate->getGVN(PN))
1871 continue;
1872
1873 // We record the parent block for the PHINode in the Region so that
1874 // we can exclude it from checks later on.
1875 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1876
1877 // If this is the first function, we do not need to worry about mergiing
1878 // this with any other block in the overall outlined function, so we can
1879 // just continue.
1880 if (FirstFunction) {
1881 BasicBlock *PHIBlock = PN->getParent();
1882 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1883 continue;
1884 }
1885
1886 // We look for the aggregate block that contains the PHINodes leading into
1887 // this exit path. If we can't find one, we create one.
1888 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1889
1890 // For our PHINode, we find the combined canonical numbering, and
1891 // attempt to find a matching PHINode in the overall PHIBlock. If we
1892 // cannot, we copy the PHINode and move it into this new block.
1893 PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1894 OutputMappings, UsedPHIs);
1895 NewI->setOperand(0, NewPN);
1896 }
1897
1898 // If we added an edge for basic blocks without a predecessor, we remove it
1899 // here.
1900 if (EdgeAdded)
1901 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1902 I->eraseFromParent();
1903
1904 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1905 << *Region.ExtractedFunction << " with " << *AggArg
1906 << " in function " << *Group.OutlinedFunction << "\n");
1907 Arg->replaceAllUsesWith(AggArg);
1908 }
1909}
1910
1911/// Within an extracted function, replace the constants that need to be lifted
1912/// into arguments with the actual argument.
1913///
1914/// \param Region [in] - The region of extracted code to be changed.
1916 OutlinableGroup &Group = *Region.Parent;
1917 Function *OutlinedFunction = Group.OutlinedFunction;
1918 ValueToValueMapTy VMap;
1919
1920 // Iterate over the constants that need to be elevated into arguments
1921 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1922 unsigned AggArgIdx = Const.first;
1923 assert(OutlinedFunction && "Overall Function is not defined?");
1924 Constant *CST = Const.second;
1925 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1926 // Identify the argument it will be elevated to, and replace instances of
1927 // that constant in the function.
1928 VMap[CST] = Arg;
1929 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1930 << " in function " << *OutlinedFunction << " with "
1931 << *Arg << '\n');
1932 }
1933
1934 RemapFunction(*OutlinedFunction, VMap,
1936}
1937
1938/// It is possible that there is a basic block that already performs the same
1939/// stores. This returns a duplicate block, if it exists
1940///
1941/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1942/// \param OutputStoreBBs [in] The existing output blocks.
1943/// \returns an optional value with the number output block if there is a match.
1944std::optional<unsigned> findDuplicateOutputBlock(
1946 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1947
1948 bool Mismatch = false;
1949 unsigned MatchingNum = 0;
1950 // We compare the new set output blocks to the other sets of output blocks.
1951 // If they are the same number, and have identical instructions, they are
1952 // considered to be the same.
1953 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1954 Mismatch = false;
1955 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1957 OutputBBs.find(VToB.first);
1958 if (OutputBBIt == OutputBBs.end()) {
1959 Mismatch = true;
1960 break;
1961 }
1962
1963 BasicBlock *CompBB = VToB.second;
1964 BasicBlock *OutputBB = OutputBBIt->second;
1965 if (CompBB->size() - 1 != OutputBB->size()) {
1966 Mismatch = true;
1967 break;
1968 }
1969
1970 BasicBlock::iterator NIt = OutputBB->begin();
1971 for (Instruction &I : *CompBB) {
1972 if (isa<BranchInst>(&I))
1973 continue;
1974
1975 if (!I.isIdenticalTo(&(*NIt))) {
1976 Mismatch = true;
1977 break;
1978 }
1979
1980 NIt++;
1981 }
1982 }
1983
1984 if (!Mismatch)
1985 return MatchingNum;
1986
1987 MatchingNum++;
1988 }
1989
1990 return std::nullopt;
1991}
1992
1993/// Remove empty output blocks from the outlined region.
1994///
1995/// \param BlocksToPrune - Mapping of return values output blocks for the \p
1996/// Region.
1997/// \param Region - The OutlinableRegion we are analyzing.
1998static bool
2001 bool AllRemoved = true;
2002 Value *RetValueForBB;
2003 BasicBlock *NewBB;
2005 // Iterate over the output blocks created in the outlined section.
2006 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
2007 RetValueForBB = VtoBB.first;
2008 NewBB = VtoBB.second;
2009
2010 // If there are no instructions, we remove it from the module, and also
2011 // mark the value for removal from the return value to output block mapping.
2012 if (NewBB->size() == 0) {
2013 NewBB->eraseFromParent();
2014 ToRemove.push_back(RetValueForBB);
2015 continue;
2016 }
2017
2018 // Mark that we could not remove all the blocks since they were not all
2019 // empty.
2020 AllRemoved = false;
2021 }
2022
2023 // Remove the return value from the mapping.
2024 for (Value *V : ToRemove)
2025 BlocksToPrune.erase(V);
2026
2027 // Mark the region as having the no output scheme.
2028 if (AllRemoved)
2029 Region.OutputBlockNum = -1;
2030
2031 return AllRemoved;
2032}
2033
2034/// For the outlined section, move needed the StoreInsts for the output
2035/// registers into their own block. Then, determine if there is a duplicate
2036/// output block already created.
2037///
2038/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2039/// \param [in] Region - The OutlinableRegion that is being analyzed.
2040/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2041/// placed in.
2042/// \param [in] EndBBs - the final blocks of the extracted function.
2043/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2044/// been replaced by a new output value.
2045/// \param [in,out] OutputStoreBBs - The existing output blocks.
2050 const DenseMap<Value *, Value *> &OutputMappings,
2051 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2052 // If none of the output blocks have any instructions, this means that we do
2053 // not have to determine if it matches any of the other output schemes, and we
2054 // don't have to do anything else.
2055 if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2056 return;
2057
2058 // Determine is there is a duplicate set of blocks.
2059 std::optional<unsigned> MatchingBB =
2060 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2061
2062 // If there is, we remove the new output blocks. If it does not,
2063 // we add it to our list of sets of output blocks.
2064 if (MatchingBB) {
2065 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2066 << Region.ExtractedFunction << " to " << *MatchingBB);
2067
2068 Region.OutputBlockNum = *MatchingBB;
2069 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2070 VtoBB.second->eraseFromParent();
2071 return;
2072 }
2073
2074 Region.OutputBlockNum = OutputStoreBBs.size();
2075
2076 Value *RetValueForBB;
2077 BasicBlock *NewBB;
2078 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2079 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2080 RetValueForBB = VtoBB.first;
2081 NewBB = VtoBB.second;
2083 EndBBs.find(RetValueForBB);
2084 LLVM_DEBUG(dbgs() << "Create output block for region in"
2085 << Region.ExtractedFunction << " to "
2086 << *NewBB);
2087 BranchInst::Create(VBBIt->second, NewBB);
2088 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2089 }
2090}
2091
2092/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2093/// before creating a basic block for each \p NewMap, and inserting into the new
2094/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2095///
2096/// \param OldMap [in] - The mapping to base the new mapping off of.
2097/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2098/// \param ParentFunc [in] - The function to put the new basic block in.
2099/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2100/// by an index value.
2103 Function *ParentFunc, Twine BaseName) {
2104 unsigned Idx = 0;
2105 std::vector<Value *> SortedKeys;
2106
2107 getSortedConstantKeys(SortedKeys, OldMap);
2108
2109 for (Value *RetVal : SortedKeys) {
2111 ParentFunc->getContext(),
2112 Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
2113 ParentFunc);
2114 NewMap.insert(std::make_pair(RetVal, NewBB));
2115 }
2116}
2117
2118/// Create the switch statement for outlined function to differentiate between
2119/// all the output blocks.
2120///
2121/// For the outlined section, determine if an outlined block already exists that
2122/// matches the needed stores for the extracted section.
2123/// \param [in] M - The module we are outlining from.
2124/// \param [in] OG - The group of regions to be outlined.
2125/// \param [in] EndBBs - The final blocks of the extracted function.
2126/// \param [in,out] OutputStoreBBs - The existing output blocks.
2129 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2130 // We only need the switch statement if there is more than one store
2131 // combination, or there is more than one set of output blocks. The first
2132 // will occur when we store different sets of values for two different
2133 // regions. The second will occur when we have two outputs that are combined
2134 // in a PHINode outside of the region in one outlined instance, and are used
2135 // seaparately in another. This will create the same set of OutputGVNs, but
2136 // will generate two different output schemes.
2137 if (OG.OutputGVNCombinations.size() > 1) {
2138 Function *AggFunc = OG.OutlinedFunction;
2139 // Create a final block for each different return block.
2141 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2142
2143 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2144 std::pair<Value *, BasicBlock *> &OutputBlock =
2145 *OG.EndBBs.find(RetBlockPair.first);
2146 BasicBlock *ReturnBlock = RetBlockPair.second;
2147 BasicBlock *EndBB = OutputBlock.second;
2148 Instruction *Term = EndBB->getTerminator();
2149 // Move the return value to the final block instead of the original exit
2150 // stub.
2151 Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2152 // Put the switch statement in the old end basic block for the function
2153 // with a fall through to the new return block.
2154 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2155 << OutputStoreBBs.size() << "\n");
2156 SwitchInst *SwitchI =
2157 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2158 ReturnBlock, OutputStoreBBs.size(), EndBB);
2159
2160 unsigned Idx = 0;
2161 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2163 OutputStoreBB.find(OutputBlock.first);
2164
2165 if (OSBBIt == OutputStoreBB.end())
2166 continue;
2167
2168 BasicBlock *BB = OSBBIt->second;
2169 SwitchI->addCase(
2170 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2171 Term = BB->getTerminator();
2172 Term->setSuccessor(0, ReturnBlock);
2173 Idx++;
2174 }
2175 }
2176 return;
2177 }
2178
2179 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2180
2181 // If there needs to be stores, move them from the output blocks to their
2182 // corresponding ending block. We do not check that the OutputGVNCombinations
2183 // is equal to 1 here since that could just been the case where there are 0
2184 // outputs. Instead, we check whether there is more than one set of output
2185 // blocks since this is the only case where we would have to move the
2186 // stores, and erase the extraneous blocks.
2187 if (OutputStoreBBs.size() == 1) {
2188 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2189 << *OG.OutlinedFunction << "\n");
2190 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2191 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2193 EndBBs.find(VBPair.first);
2194 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2195 BasicBlock *EndBB = EndBBIt->second;
2196 BasicBlock *OutputBB = VBPair.second;
2197 Instruction *Term = OutputBB->getTerminator();
2198 Term->eraseFromParent();
2199 Term = EndBB->getTerminator();
2200 moveBBContents(*OutputBB, *EndBB);
2201 Term->moveBefore(*EndBB, EndBB->end());
2202 OutputBB->eraseFromParent();
2203 }
2204 }
2205}
2206
2207/// Fill the new function that will serve as the replacement function for all of
2208/// the extracted regions of a certain structure from the first region in the
2209/// list of regions. Replace this first region's extracted function with the
2210/// new overall function.
2211///
2212/// \param [in] M - The module we are outlining from.
2213/// \param [in] CurrentGroup - The group of regions to be outlined.
2214/// \param [in,out] OutputStoreBBs - The output blocks for each different
2215/// set of stores needed for the different functions.
2216/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2217/// once outlining is complete.
2218/// \param [in] OutputMappings - Extracted functions to erase from module
2219/// once outlining is complete.
2221 Module &M, OutlinableGroup &CurrentGroup,
2222 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2223 std::vector<Function *> &FuncsToRemove,
2224 const DenseMap<Value *, Value *> &OutputMappings) {
2225 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2226
2227 // Move first extracted function's instructions into new function.
2228 LLVM_DEBUG(dbgs() << "Move instructions from "
2229 << *CurrentOS->ExtractedFunction << " to instruction "
2230 << *CurrentGroup.OutlinedFunction << "\n");
2232 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2233
2234 // Transfer the attributes from the function to the new function.
2235 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2236 CurrentGroup.OutlinedFunction->addFnAttr(A);
2237
2238 // Create a new set of output blocks for the first extracted function.
2240 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2241 CurrentGroup.OutlinedFunction, "output_block_0");
2242 CurrentOS->OutputBlockNum = 0;
2243
2244 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2245 replaceConstants(*CurrentOS);
2246
2247 // We first identify if any output blocks are empty, if they are we remove
2248 // them. We then create a branch instruction to the basic block to the return
2249 // block for the function for each non empty output block.
2250 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2251 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2252 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2254 CurrentGroup.EndBBs.find(VToBB.first);
2255 BasicBlock *EndBB = VBBIt->second;
2256 BranchInst::Create(EndBB, VToBB.second);
2257 OutputStoreBBs.back().insert(VToBB);
2258 }
2259 }
2260
2261 // Replace the call to the extracted function with the outlined function.
2262 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2263
2264 // We only delete the extracted functions at the end since we may need to
2265 // reference instructions contained in them for mapping purposes.
2266 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2267}
2268
2269void IROutliner::deduplicateExtractedSections(
2270 Module &M, OutlinableGroup &CurrentGroup,
2271 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2272 createFunction(M, CurrentGroup, OutlinedFunctionNum);
2273
2274 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2275
2276 OutlinableRegion *CurrentOS;
2277
2278 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2279 OutputMappings);
2280
2281 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2282 CurrentOS = CurrentGroup.Regions[Idx];
2284 *CurrentOS->ExtractedFunction);
2285
2286 // Create a set of BasicBlocks, one for each return block, to hold the
2287 // needed store instructions.
2290 CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
2291 "output_block_" + Twine(static_cast<unsigned>(Idx)));
2292 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2293 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2294 CurrentGroup.EndBBs, OutputMappings,
2295 OutputStoreBBs);
2296
2297 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2298 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2299 }
2300
2301 // Create a switch statement to handle the different output schemes.
2302 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2303
2304 OutlinedFunctionNum++;
2305}
2306
2307/// Checks that the next instruction in the InstructionDataList matches the
2308/// next instruction in the module. If they do not, there could be the
2309/// possibility that extra code has been inserted, and we must ignore it.
2310///
2311/// \param ID - The IRInstructionData to check the next instruction of.
2312/// \returns true if the InstructionDataList and actual instruction match.
2314 // We check if there is a discrepancy between the InstructionDataList
2315 // and the actual next instruction in the module. If there is, it means
2316 // that an extra instruction was added, likely by the CodeExtractor.
2317
2318 // Since we do not have any similarity data about this particular
2319 // instruction, we cannot confidently outline it, and must discard this
2320 // candidate.
2321 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2322 Instruction *NextIDLInst = NextIDIt->Inst;
2323 Instruction *NextModuleInst = nullptr;
2324 if (!ID.Inst->isTerminator())
2325 NextModuleInst = ID.Inst->getNextNode();
2326 else if (NextIDLInst != nullptr)
2327 NextModuleInst =
2328 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2329
2330 if (NextIDLInst && NextIDLInst != NextModuleInst)
2331 return false;
2332
2333 return true;
2334}
2335
2336bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2337 const OutlinableRegion &Region) {
2338 IRSimilarityCandidate *IRSC = Region.Candidate;
2339 unsigned StartIdx = IRSC->getStartIdx();
2340 unsigned EndIdx = IRSC->getEndIdx();
2341
2342 // A check to make sure that we are not about to attempt to outline something
2343 // that has already been outlined.
2344 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2345 if (Outlined.contains(Idx))
2346 return false;
2347
2348 // We check if the recorded instruction matches the actual next instruction,
2349 // if it does not, we fix it in the InstructionDataList.
2350 if (!Region.Candidate->backInstruction()->isTerminator()) {
2351 Instruction *NewEndInst =
2352 Region.Candidate->backInstruction()->getNextNode();
2353 assert(NewEndInst && "Next instruction is a nullptr?");
2354 if (Region.Candidate->end()->Inst != NewEndInst) {
2355 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2356 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2357 IRInstructionData(*NewEndInst,
2358 InstructionClassifier.visit(*NewEndInst), *IDL);
2359
2360 // Insert the first IRInstructionData of the new region after the
2361 // last IRInstructionData of the IRSimilarityCandidate.
2362 IDL->insert(Region.Candidate->end(), *NewEndIRID);
2363 }
2364 }
2365
2366 return none_of(*IRSC, [this](IRInstructionData &ID) {
2368 return true;
2369
2370 return !this->InstructionClassifier.visit(ID.Inst);
2371 });
2372}
2373
2374void IROutliner::pruneIncompatibleRegions(
2375 std::vector<IRSimilarityCandidate> &CandidateVec,
2376 OutlinableGroup &CurrentGroup) {
2377 bool PreviouslyOutlined;
2378
2379 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2380 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2381 const IRSimilarityCandidate &RHS) {
2382 return LHS.getStartIdx() < RHS.getStartIdx();
2383 });
2384
2385 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2386 // Since outlining a call and a branch instruction will be the same as only
2387 // outlinining a call instruction, we ignore it as a space saving.
2388 if (FirstCandidate.getLength() == 2) {
2389 if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2390 isa<BranchInst>(FirstCandidate.back()->Inst))
2391 return;
2392 }
2393
2394 unsigned CurrentEndIdx = 0;
2395 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2396 PreviouslyOutlined = false;
2397 unsigned StartIdx = IRSC.getStartIdx();
2398 unsigned EndIdx = IRSC.getEndIdx();
2399 const Function &FnForCurrCand = *IRSC.getFunction();
2400
2401 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2402 if (Outlined.contains(Idx)) {
2403 PreviouslyOutlined = true;
2404 break;
2405 }
2406
2407 if (PreviouslyOutlined)
2408 continue;
2409
2410 // Check over the instructions, and if the basic block has its address
2411 // taken for use somewhere else, we do not outline that block.
2412 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2413 return ID.Inst->getParent()->hasAddressTaken();
2414 });
2415
2416 if (BBHasAddressTaken)
2417 continue;
2418
2419 if (FnForCurrCand.hasOptNone())
2420 continue;
2421
2422 if (FnForCurrCand.hasFnAttribute("nooutline")) {
2423 LLVM_DEBUG({
2424 dbgs() << "... Skipping function with nooutline attribute: "
2425 << FnForCurrCand.getName() << "\n";
2426 });
2427 continue;
2428 }
2429
2430 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2431 !OutlineFromLinkODRs)
2432 continue;
2433
2434 // Greedily prune out any regions that will overlap with already chosen
2435 // regions.
2436 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2437 continue;
2438
2439 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2441 return true;
2442
2443 return !this->InstructionClassifier.visit(ID.Inst);
2444 });
2445
2446 if (BadInst)
2447 continue;
2448
2449 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2450 OutlinableRegion(IRSC, CurrentGroup);
2451 CurrentGroup.Regions.push_back(OS);
2452
2453 CurrentEndIdx = EndIdx;
2454 }
2455}
2456
2458IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2459 InstructionCost RegionBenefit = 0;
2460 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2461 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2462 // We add the number of instructions in the region to the benefit as an
2463 // estimate as to how much will be removed.
2464 RegionBenefit += Region->getBenefit(TTI);
2465 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2466 << " saved instructions to overfall benefit.\n");
2467 }
2468
2469 return RegionBenefit;
2470}
2471
2472/// For the \p OutputCanon number passed in find the value represented by this
2473/// canonical number. If it is from a PHINode, we pick the first incoming
2474/// value and return that Value instead.
2475///
2476/// \param Region - The OutlinableRegion to get the Value from.
2477/// \param OutputCanon - The canonical number to find the Value from.
2478/// \returns The Value represented by a canonical number \p OutputCanon in \p
2479/// Region.
2481 unsigned OutputCanon) {
2482 OutlinableGroup &CurrentGroup = *Region.Parent;
2483 // If the value is greater than the value in the tracker, we have a
2484 // PHINode and will instead use one of the incoming values to find the
2485 // type.
2486 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2487 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2488 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2489 "Could not find GVN set for PHINode number!");
2490 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2491 OutputCanon = *It->second.second.begin();
2492 }
2493 std::optional<unsigned> OGVN =
2494 Region.Candidate->fromCanonicalNum(OutputCanon);
2495 assert(OGVN && "Could not find GVN for Canonical Number?");
2496 std::optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2497 assert(OV && "Could not find value for GVN?");
2498 return *OV;
2499}
2500
2502IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2503 InstructionCost OverallCost = 0;
2504 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2505 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2506
2507 // Each output incurs a load after the call, so we add that to the cost.
2508 for (unsigned OutputCanon : Region->GVNStores) {
2509 Value *V = findOutputValueInRegion(*Region, OutputCanon);
2510 InstructionCost LoadCost =
2511 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2513
2514 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2515 << " instructions to cost for output of type "
2516 << *V->getType() << "\n");
2517 OverallCost += LoadCost;
2518 }
2519 }
2520
2521 return OverallCost;
2522}
2523
2524/// Find the extra instructions needed to handle any output values for the
2525/// region.
2526///
2527/// \param [in] M - The Module to outline from.
2528/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2529/// \param [in] TTI - The TargetTransformInfo used to collect information for
2530/// new instruction costs.
2531/// \returns the additional cost to handle the outputs.
2533 OutlinableGroup &CurrentGroup,
2535 InstructionCost OutputCost = 0;
2536 unsigned NumOutputBranches = 0;
2537
2538 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2539 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2540 DenseSet<BasicBlock *> CandidateBlocks;
2541 Candidate.getBasicBlocks(CandidateBlocks);
2542
2543 // Count the number of different output branches that point to blocks outside
2544 // of the region.
2545 DenseSet<BasicBlock *> FoundBlocks;
2546 for (IRInstructionData &ID : Candidate) {
2547 if (!isa<BranchInst>(ID.Inst))
2548 continue;
2549
2550 for (Value *V : ID.OperVals) {
2551 BasicBlock *BB = static_cast<BasicBlock *>(V);
2552 if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
2553 NumOutputBranches++;
2554 }
2555 }
2556
2557 CurrentGroup.BranchesToOutside = NumOutputBranches;
2558
2559 for (const ArrayRef<unsigned> &OutputUse :
2560 CurrentGroup.OutputGVNCombinations) {
2561 for (unsigned OutputCanon : OutputUse) {
2562 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2563 InstructionCost StoreCost =
2564 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2566
2567 // An instruction cost is added for each store set that needs to occur for
2568 // various output combinations inside the function, plus a branch to
2569 // return to the exit block.
2570 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2571 << " instructions to cost for output of type "
2572 << *V->getType() << "\n");
2573 OutputCost += StoreCost * NumOutputBranches;
2574 }
2575
2576 InstructionCost BranchCost =
2578 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2579 << " a branch instruction\n");
2580 OutputCost += BranchCost * NumOutputBranches;
2581 }
2582
2583 // If there is more than one output scheme, we must have a comparison and
2584 // branch for each different item in the switch statement.
2585 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2586 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2587 Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2590 InstructionCost BranchCost =
2592
2593 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2594 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2595
2596 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2597 << " instructions for each switch case for each different"
2598 << " output path in a function\n");
2599 OutputCost += TotalCost * NumOutputBranches;
2600 }
2601
2602 return OutputCost;
2603}
2604
2605void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2606 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2607 CurrentGroup.Benefit += RegionBenefit;
2608 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2609
2610 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2611 CurrentGroup.Cost += OutputReloadCost;
2612 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2613
2614 InstructionCost AverageRegionBenefit =
2615 RegionBenefit / CurrentGroup.Regions.size();
2616 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2617 unsigned NumRegions = CurrentGroup.Regions.size();
2619 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2620
2621 // We add one region to the cost once, to account for the instructions added
2622 // inside of the newly created function.
2623 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2624 << " instructions to cost for body of new function.\n");
2625 CurrentGroup.Cost += AverageRegionBenefit;
2626 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2627
2628 // For each argument, we must add an instruction for loading the argument
2629 // out of the register and into a value inside of the newly outlined function.
2630 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2631 << " instructions to cost for each argument in the new"
2632 << " function.\n");
2633 CurrentGroup.Cost +=
2634 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2635 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2636
2637 // Each argument needs to either be loaded into a register or onto the stack.
2638 // Some arguments will only be loaded into the stack once the argument
2639 // registers are filled.
2640 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2641 << " instructions to cost for each argument in the new"
2642 << " function " << NumRegions << " times for the "
2643 << "needed argument handling at the call site.\n");
2644 CurrentGroup.Cost +=
2645 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2646 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2647
2648 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2649 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2650}
2651
2652void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2653 ArrayRef<Value *> Outputs,
2654 LoadInst *LI) {
2655 // For and load instructions following the call
2656 Value *Operand = LI->getPointerOperand();
2657 std::optional<unsigned> OutputIdx;
2658 // Find if the operand it is an output register.
2659 for (unsigned ArgIdx = Region.NumExtractedInputs;
2660 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2661 if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2662 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2663 break;
2664 }
2665 }
2666
2667 // If we found an output register, place a mapping of the new value
2668 // to the original in the mapping.
2669 if (!OutputIdx)
2670 return;
2671
2672 auto It = OutputMappings.find(Outputs[*OutputIdx]);
2673 if (It == OutputMappings.end()) {
2674 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2675 << *Outputs[*OutputIdx] << "\n");
2676 OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
2677 } else {
2678 Value *Orig = It->second;
2679 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2680 << *Outputs[*OutputIdx] << "\n");
2681 OutputMappings.insert(std::make_pair(LI, Orig));
2682 }
2683}
2684
2685bool IROutliner::extractSection(OutlinableRegion &Region) {
2686 SetVector<Value *> ArgInputs, Outputs;
2687 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2688 BasicBlock *InitialStart = Region.StartBB;
2689 Function *OrigF = Region.StartBB->getParent();
2690 CodeExtractorAnalysisCache CEAC(*OrigF);
2691 Region.ExtractedFunction =
2692 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2693
2694 // If the extraction was successful, find the BasicBlock, and reassign the
2695 // OutlinableRegion blocks
2696 if (!Region.ExtractedFunction) {
2697 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2698 << "\n");
2699 Region.reattachCandidate();
2700 return false;
2701 }
2702
2703 // Get the block containing the called branch, and reassign the blocks as
2704 // necessary. If the original block still exists, it is because we ended on
2705 // a branch instruction, and so we move the contents into the block before
2706 // and assign the previous block correctly.
2707 User *InstAsUser = Region.ExtractedFunction->user_back();
2708 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2709 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2710 assert(Region.PrevBB && "PrevBB is nullptr?");
2711 if (Region.PrevBB == InitialStart) {
2712 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2713 Instruction *BI = NewPrev->getTerminator();
2714 BI->eraseFromParent();
2715 moveBBContents(*InitialStart, *NewPrev);
2716 Region.PrevBB = NewPrev;
2717 InitialStart->eraseFromParent();
2718 }
2719
2720 Region.StartBB = RewrittenBB;
2721 Region.EndBB = RewrittenBB;
2722
2723 // The sequences of outlinable regions has now changed. We must fix the
2724 // IRInstructionDataList for consistency. Although they may not be illegal
2725 // instructions, they should not be compared with anything else as they
2726 // should not be outlined in this round. So marking these as illegal is
2727 // allowed.
2728 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2729 Instruction *BeginRewritten = &*RewrittenBB->begin();
2730 Instruction *EndRewritten = &*RewrittenBB->begin();
2731 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2732 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2733 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2734 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2735
2736 // Insert the first IRInstructionData of the new region in front of the
2737 // first IRInstructionData of the IRSimilarityCandidate.
2738 IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2739 // Insert the first IRInstructionData of the new region after the
2740 // last IRInstructionData of the IRSimilarityCandidate.
2741 IDL->insert(Region.Candidate->end(), *Region.NewBack);
2742 // Remove the IRInstructionData from the IRSimilarityCandidate.
2743 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2744
2745 assert(RewrittenBB != nullptr &&
2746 "Could not find a predecessor after extraction!");
2747
2748 // Iterate over the new set of instructions to find the new call
2749 // instruction.
2750 for (Instruction &I : *RewrittenBB)
2751 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2752 if (Region.ExtractedFunction == CI->getCalledFunction())
2753 Region.Call = CI;
2754 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2755 updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2756 Region.reattachCandidate();
2757 return true;
2758}
2759
2760unsigned IROutliner::doOutline(Module &M) {
2761 // Find the possible similarity sections.
2762 InstructionClassifier.EnableBranches = !DisableBranches;
2763 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2764 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2765
2766 IRSimilarityIdentifier &Identifier = getIRSI(M);
2767 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2768
2769 // Sort them by size of extracted sections
2770 unsigned OutlinedFunctionNum = 0;
2771 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2772 // to sort them by the potential number of instructions to be outlined
2773 if (SimilarityCandidates.size() > 1)
2774 llvm::stable_sort(SimilarityCandidates,
2775 [](const std::vector<IRSimilarityCandidate> &LHS,
2776 const std::vector<IRSimilarityCandidate> &RHS) {
2777 return LHS[0].getLength() * LHS.size() >
2778 RHS[0].getLength() * RHS.size();
2779 });
2780 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2781 // each of the following for loops to avoid making an allocator.
2782 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2783
2784 DenseSet<unsigned> NotSame;
2785 std::vector<OutlinableGroup *> NegativeCostGroups;
2786 std::vector<OutlinableRegion *> OutlinedRegions;
2787 // Iterate over the possible sets of similarity.
2788 unsigned PotentialGroupIdx = 0;
2789 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2790 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2791
2792 // Remove entries that were previously outlined
2793 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2794
2795 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2796 // trying to outlined since there is no compatible similar instance of this
2797 // code.
2798 if (CurrentGroup.Regions.size() < 2)
2799 continue;
2800
2801 // Determine if there are any values that are the same constant throughout
2802 // each section in the set.
2803 NotSame.clear();
2804 CurrentGroup.findSameConstants(NotSame);
2805
2806 if (CurrentGroup.IgnoreGroup)
2807 continue;
2808
2809 // Create a CodeExtractor for each outlinable region. Identify inputs and
2810 // outputs for each section using the code extractor and create the argument
2811 // types for the Aggregate Outlining Function.
2812 OutlinedRegions.clear();
2813 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2814 // Break the outlinable region out of its parent BasicBlock into its own
2815 // BasicBlocks (see function implementation).
2816 OS->splitCandidate();
2817
2818 // There's a chance that when the region is split, extra instructions are
2819 // added to the region. This makes the region no longer viable
2820 // to be split, so we ignore it for outlining.
2821 if (!OS->CandidateSplit)
2822 continue;
2823
2825 DenseSet<BasicBlock *> BlocksInRegion;
2826 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2827 OS->CE = new (ExtractorAllocator.Allocate())
2828 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2829 false, nullptr, "outlined");
2830 findAddInputsOutputs(M, *OS, NotSame);
2831 if (!OS->IgnoreRegion)
2832 OutlinedRegions.push_back(OS);
2833
2834 // We recombine the blocks together now that we have gathered all the
2835 // needed information.
2836 OS->reattachCandidate();
2837 }
2838
2839 CurrentGroup.Regions = std::move(OutlinedRegions);
2840
2841 if (CurrentGroup.Regions.empty())
2842 continue;
2843
2844 CurrentGroup.collectGVNStoreSets(M);
2845
2846 if (CostModel)
2847 findCostBenefit(M, CurrentGroup);
2848
2849 // If we are adhering to the cost model, skip those groups where the cost
2850 // outweighs the benefits.
2851 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2853 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2854 ORE.emit([&]() {
2855 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2856 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2857 C->frontInstruction());
2858 R << "did not outline "
2859 << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2860 << " regions due to estimated increase of "
2861 << ore::NV("InstructionIncrease",
2862 CurrentGroup.Cost - CurrentGroup.Benefit)
2863 << " instructions at locations ";
2864 interleave(
2865 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2866 [&R](OutlinableRegion *Region) {
2867 R << ore::NV(
2868 "DebugLoc",
2869 Region->Candidate->frontInstruction()->getDebugLoc());
2870 },
2871 [&R]() { R << " "; });
2872 return R;
2873 });
2874 continue;
2875 }
2876
2877 NegativeCostGroups.push_back(&CurrentGroup);
2878 }
2879
2880 ExtractorAllocator.DestroyAll();
2881
2882 if (NegativeCostGroups.size() > 1)
2883 stable_sort(NegativeCostGroups,
2884 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2885 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2886 });
2887
2888 std::vector<Function *> FuncsToRemove;
2889 for (OutlinableGroup *CG : NegativeCostGroups) {
2890 OutlinableGroup &CurrentGroup = *CG;
2891
2892 OutlinedRegions.clear();
2893 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2894 // We check whether our region is compatible with what has already been
2895 // outlined, and whether we need to ignore this item.
2896 if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2897 continue;
2898 OutlinedRegions.push_back(Region);
2899 }
2900
2901 if (OutlinedRegions.size() < 2)
2902 continue;
2903
2904 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2905 // we are still outlining enough regions to make up for the added cost.
2906 CurrentGroup.Regions = std::move(OutlinedRegions);
2907 if (CostModel) {
2908 CurrentGroup.Benefit = 0;
2909 CurrentGroup.Cost = 0;
2910 findCostBenefit(M, CurrentGroup);
2911 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2912 continue;
2913 }
2914 OutlinedRegions.clear();
2915 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2916 Region->splitCandidate();
2917 if (!Region->CandidateSplit)
2918 continue;
2919 OutlinedRegions.push_back(Region);
2920 }
2921
2922 CurrentGroup.Regions = std::move(OutlinedRegions);
2923 if (CurrentGroup.Regions.size() < 2) {
2924 for (OutlinableRegion *R : CurrentGroup.Regions)
2925 R->reattachCandidate();
2926 continue;
2927 }
2928
2929 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2930 << " and benefit " << CurrentGroup.Benefit << "\n");
2931
2932 // Create functions out of all the sections, and mark them as outlined.
2933 OutlinedRegions.clear();
2934 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2936 DenseSet<BasicBlock *> BlocksInRegion;
2937 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2938 OS->CE = new (ExtractorAllocator.Allocate())
2939 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2940 false, nullptr, "outlined");
2941 bool FunctionOutlined = extractSection(*OS);
2942 if (FunctionOutlined) {
2943 unsigned StartIdx = OS->Candidate->getStartIdx();
2944 unsigned EndIdx = OS->Candidate->getEndIdx();
2945 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2946 Outlined.insert(Idx);
2947
2948 OutlinedRegions.push_back(OS);
2949 }
2950 }
2951
2952 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2953 << " with benefit " << CurrentGroup.Benefit
2954 << " and cost " << CurrentGroup.Cost << "\n");
2955
2956 CurrentGroup.Regions = std::move(OutlinedRegions);
2957
2958 if (CurrentGroup.Regions.empty())
2959 continue;
2960
2962 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2963 ORE.emit([&]() {
2964 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2965 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2966 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2967 << " regions with decrease of "
2968 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2969 << " instructions at locations ";
2970 interleave(
2971 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2972 [&R](OutlinableRegion *Region) {
2973 R << ore::NV("DebugLoc",
2974 Region->Candidate->frontInstruction()->getDebugLoc());
2975 },
2976 [&R]() { R << " "; });
2977 return R;
2978 });
2979
2980 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2981 OutlinedFunctionNum);
2982 }
2983
2984 for (Function *F : FuncsToRemove)
2985 F->eraseFromParent();
2986
2987 return OutlinedFunctionNum;
2988}
2989
2991 CostModel = !NoCostModel;
2992 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2993
2994 return doOutline(M) > 0;
2995}
2996
2998 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2999
3000 std::function<TargetTransformInfo &(Function &)> GTTI =
3001 [&FAM](Function &F) -> TargetTransformInfo & {
3003 };
3004
3005 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
3006 [&AM](Module &M) -> IRSimilarityIdentifier & {
3007 return AM.getResult<IRSimilarityAnalysis>(M);
3008 };
3009
3010 std::unique_ptr<OptimizationRemarkEmitter> ORE;
3011 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
3012 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3013 ORE.reset(new OptimizationRemarkEmitter(&F));
3014 return *ORE;
3015 };
3016
3017 if (IROutliner(GTTI, GIRSI, GORE).run(M))
3018 return PreservedAnalyses::none();
3019 return PreservedAnalyses::all();
3020}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefAnalysis InstSet & ToRemove
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
#define DEBUG_TYPE
std::pair< ArgLocWithBBCanon, CanonList > PHINodeData
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
Definition: IROutliner.cpp:815
static std::optional< unsigned > getGVNForPHINode(OutlinableRegion &Region, PHINode *PN, DenseSet< BasicBlock * > &Blocks, unsigned AggArgIdx)
Create a special GVN for PHINodes that will be used outside of the region.
static Value * getPassedArgumentAndAdjustArgumentLocation(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
static void findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, const DenseMap< Value *, Value * > &OutputMappings, SmallVector< std::pair< unsigned, BasicBlock * > > &CanonNums, bool ReplacedWithOutlinedCall=true)
Find the canonical numbering for the incoming Values into the PHINode PN.
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
Definition: IROutliner.cpp:157
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
Definition: IROutliner.cpp:703
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
Definition: IROutliner.cpp:846
static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs, std::vector< Function * > &FuncsToRemove, const DenseMap< Value *, Value * > &OutputMappings)
Fill the new function that will serve as the replacement function for all of the extracted regions of...
static void findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
Definition: IROutliner.cpp:549
static PHINode * findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, const DenseMap< Value *, Value * > &OutputMappings, DenseSet< PHINode * > &UsedPHIs)
Find, or add PHINode PN to the combined PHINode Block OverallPHIBlock in order to condense the number...
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
static hash_code encodePHINodeData(PHINodeData &PND)
Encode PND as an integer for easy lookup based on the argument location, the parent BasicBlock canoni...
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, SmallPtrSet< BasicBlock *, 1 > &Exits, DenseSet< BasicBlock * > &BlocksInRegion)
Check if the V has any uses outside of the region other than PN.
static void analyzeExitPHIsForOutputUses(BasicBlock *CurrentExitFromRegion, SmallPtrSet< BasicBlock *, 1 > &PotentialExitsFromRegion, DenseSet< BasicBlock * > &RegionBlocks, SetVector< Value * > &Outputs, DenseSet< Value * > &OutputsReplacedByPHINode, DenseSet< Value * > &OutputsWithNonPhiUses)
Test whether CurrentExitFromRegion contains any PhiNodes that should be considered outputs.
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
Definition: IROutliner.cpp:166
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, BasicBlock *Replace, DenseSet< BasicBlock * > &Included)
Rewrite the BranchInsts in the incoming blocks to PHIBlock that are found in Included to branch to Ba...
Definition: IROutliner.cpp:221
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
Definition: IROutliner.cpp:917
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
Definition: IROutliner.cpp:789
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, const DenseMap< Value *, Value * > &OutputMappings, bool FirstFunction=false)
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
Definition: IROutliner.cpp:620
static Value * findOutputMapping(const DenseMap< Value *, Value * > OutputMappings, Value *Input)
Check the OutputMappings structure for value Input, if it exists it has been used as an output for ou...
Definition: IROutliner.cpp:530
static Value * findOutputValueInRegion(OutlinableRegion &Region, unsigned OutputCanon)
For the OutputCanon number passed in find the value represented by this canonical number.
static std::optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
Definition: IROutliner.cpp:463
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the constants that will need to be lifted into arguments as they are not the same in each instan...
Definition: IROutliner.cpp:761
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
std::pair< unsigned, unsigned > ArgLocWithBBCanon
static Value * getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
static BasicBlock * findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal)
Find or create a BasicBlock in the outlined function containing PhiBlocks for RetVal.
std::optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
This header defines various interfaces for pass management in LLVM.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
raw_pwrite_stream & OS
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM_ABI AttributeSet getFnAttrs() const
The function attributes are returned.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:646
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:354
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:459
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:555
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:475
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:235
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
size_t size() const
Definition: BasicBlock.h:480
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:463
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:662
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:47
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:87
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:264
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1833
This is an important base class in LLVM.
Definition: Constant.h:43
Debug location.
Subprogram description. Uses SubclassData1.
static DebugLoc getDropped()
Definition: DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
iterator end()
Definition: DenseMap.h:87
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:168
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
Class to represent function types.
Definition: DerivedTypes.h:105
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:637
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:166
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:209
const BasicBlock & back() const
Definition: Function.h:860
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
size_t size() const
Definition: Function.h:856
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:700
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:665
size_t arg_size() const
Definition: Function.h:899
Argument * getArg(unsigned i) const
Definition: Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:521
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:60
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:199
bool run(Module &M)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:585
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
bool isTerminator() const
Definition: Instruction.h:315
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
An instruction for reading from memory.
Definition: Instructions.h:180
Value * getPointerOperand()
Definition: Instructions.h:259
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
LLVM_ABI void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:121
Root of the metadata hierarchy.
Definition: Metadata.h:63
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
iterator begin()
Definition: RegionInfo.h:558
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:362
iterator end()
Definition: RegionInfo.h:559
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
A vector that has set insertion semantics.
Definition: SetVector.h:59
ArrayRef< value_type > getArrayRef() const
Definition: SetVector.h:90
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:279
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:233
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
@ TCK_CodeSize
Instruction code size.
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
LLVM_ABI InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
User * user_back()
Definition: Value.h:412
LLVM_ABI bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:1119
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
size_type size() const
Definition: DenseSet.h:87
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:169
bool erase(const ValueT &V)
Definition: DenseSet.h:100
An opaque object representing a hash code.
Definition: Hashing.h:76
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:359
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
iterator erase(iterator I)
Remove a node by iterator; never deletes.
Definition: simple_ilist.h:202
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
Definition: simple_ilist.h:165
LLVM_ABI void mergeAttributesForOutlining(Function &Base, const Function &ToMerge)
Merges the functions attributes from ToMerge into function Base.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:137
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition: STLExtras.h:2212
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:43
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:39
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:47
@ Other
Any other memory.
void RemapFunction(Function &F, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the operands, metadata, arguments, and instructions of a function.
Definition: ValueMapper.h:334
auto predecessors(const MachineBasicBlock *BB)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:595
LLVM_ABI void updateLoopMetadataDebugLocations(Instruction &I, function_ref< Metadata *(Metadata *)> Updater)
Update the debug locations contained within the MD_loop metadata attached to the instruction I,...
Definition: DebugInfo.cpp:401
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:469
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:73
std::optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
Definition: IROutliner.cpp:136
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
Definition: IROutliner.cpp:79
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
Definition: IROutliner.cpp:99
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
Definition: IROutliner.cpp:598
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
Definition: IROutliner.cpp:132
DenseMap< unsigned, std::pair< std::pair< unsigned, unsigned >, SmallVector< unsigned, 2 > > > PHINodeGVNToGVNs
Definition: IROutliner.cpp:124
unsigned PHINodeGVNTracker
Tracker counting backwards from the highest unsigned value possible to avoid conflicting with the GVN...
Definition: IROutliner.cpp:120
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
Definition: IROutliner.cpp:75
DenseMap< hash_code, unsigned > GVNsToPHINodeGVN
Definition: IROutliner.cpp:125
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
Definition: IROutliner.cpp:87
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
Definition: IROutliner.cpp:90
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Definition: IROutliner.cpp:115
Function * OutlinedFunction
The Function for the collective overall function.
Definition: IROutliner.cpp:83
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
Definition: IROutliner.cpp:103
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.cpp:94
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
Definition: IROutliner.cpp:81
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
Definition: IROutliner.cpp:111
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
Definition: IROutliner.cpp:605
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
Definition: IROutliner.cpp:129
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
Definition: IROutliner.cpp:107
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This provides the utilities for hashing an Instruction to an unsigned integer.
Instruction * Inst
The source Instruction that is being wrapped.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:123
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:486
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition: IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:78
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:130
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:249
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:187
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
Definition: IROutliner.cpp:199
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition: IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition: IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:65
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition: IROutliner.h:93
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition: IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition: IROutliner.h:140
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:375