LLVM 22.0.0git
IRSimilarityIdentifier.cpp
Go to the documentation of this file.
1//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Implementation file for the IRSimilarityIdentifier for identifying
11// similarities in IR including the IRInstructionMapper.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/Operator.h"
20#include "llvm/IR/User.h"
23
24using namespace llvm;
25using namespace IRSimilarity;
26
27namespace llvm {
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
33
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
37 cl::desc("disable outlining indirect calls."));
38
39static cl::opt<bool>
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
43
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46 cl::desc("Don't match or outline intrinsics"));
47} // namespace llvm
48
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
53}
54
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
59 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
61 if (Predicate != C->getPredicate())
62 RevisedPredicate = Predicate;
63 }
64
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use &OI : Inst->operands()) {
68 if (isa<CmpInst>(Inst) && RevisedPredicate) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals.insert(OperVals.begin(), OI.get());
72 continue;
73 }
74
75 OperVals.push_back(OI.get());
76 }
77
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
80 if (PHINode *PN = dyn_cast<PHINode>(Inst))
81 llvm::append_range(OperVals, PN->blocks());
82}
83
85 : IDL(&IDList) {}
86
88 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89 assert(isa<BranchInst>(Inst) && "Instruction must be branch");
90
91 BranchInst *BI = cast<BranchInst>(Inst);
93
94 BBNumIt = BasicBlockToInteger.find(BI->getParent());
95 assert(BBNumIt != BasicBlockToInteger.end() &&
96 "Could not find location for BasicBlock!");
97
98 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
99
100 for (Value *V : getBlockOperVals()) {
101 BasicBlock *Successor = cast<BasicBlock>(V);
102 BBNumIt = BasicBlockToInteger.find(Successor);
103 assert(BBNumIt != BasicBlockToInteger.end() &&
104 "Could not find number for BasicBlock!");
105 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
106
107 int Relative = OtherBlockNumber - CurrentBlockNumber;
109 }
110}
111
113 assert((isa<BranchInst>(Inst) ||
114 isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
115
116 if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
117 return ArrayRef<Value *>(
118 std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
119 OperVals.end()
120 );
121
122 if (PHINode *PN = dyn_cast<PHINode>(Inst))
123 return ArrayRef<Value *>(
124 std::next(OperVals.begin(), PN->getNumIncomingValues()),
125 OperVals.end()
126 );
127
128 return ArrayRef<Value *>();
129}
130
131void IRInstructionData::setCalleeName(bool MatchByName) {
132 CallInst *CI = dyn_cast<CallInst>(Inst);
133 assert(CI && "Instruction must be call");
134
135 CalleeName = "";
136 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
137 // To hash intrinsics, we use the opcode, and types like the other
138 // instructions, but also, the Intrinsic ID, and the Name of the
139 // intrinsic.
140 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
141 FunctionType *FT = II->getFunctionType();
142 // If there is an overloaded name, we have to use the complex version
143 // of getName to get the entire string.
144 if (Intrinsic::isOverloaded(IntrinsicID))
145 CalleeName =
146 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
147 // If there is not an overloaded name, we only need to use this version.
148 else
149 CalleeName = Intrinsic::getName(IntrinsicID).str();
150
151 return;
152 }
153
154 if (!CI->isIndirectCall() && MatchByName)
156}
157
159 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
160 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
161
162 PHINode *PN = cast<PHINode>(Inst);
164
165 BBNumIt = BasicBlockToInteger.find(PN->getParent());
166 assert(BBNumIt != BasicBlockToInteger.end() &&
167 "Could not find location for BasicBlock!");
168
169 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
170
171 // Convert the incoming blocks of the PHINode to an integer value, based on
172 // the relative distances between the current block and the incoming block.
173 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
175 BBNumIt = BasicBlockToInteger.find(Incoming);
176 assert(BBNumIt != BasicBlockToInteger.end() &&
177 "Could not find number for BasicBlock!");
178 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
179
180 int Relative = OtherBlockNumber - CurrentBlockNumber;
182 }
183}
184
186 switch (CI->getPredicate()) {
195 return CI->getSwappedPredicate();
196 default:
197 return CI->getPredicate();
198 }
199}
200
202 assert(isa<CmpInst>(Inst) &&
203 "Can only get a predicate from a compare instruction");
204
206 return *RevisedPredicate;
207
208 return cast<CmpInst>(Inst)->getPredicate();
209}
210
212 assert(isa<CallInst>(Inst) &&
213 "Can only get a name from a call instruction");
214
215 assert(CalleeName && "CalleeName has not been set");
216
217 return *CalleeName;
218}
219
221 const IRInstructionData &B) {
222
223 if (!A.Legal || !B.Legal)
224 return false;
225
226 // Check if we are performing the same sort of operation on the same types
227 // but not on the same values.
228 if (!A.Inst->isSameOperationAs(B.Inst)) {
229 // If there is a predicate, this means that either there is a swapped
230 // predicate, or that the types are different, we want to make sure that
231 // the predicates are equivalent via swapping.
232 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
233
234 if (A.getPredicate() != B.getPredicate())
235 return false;
236
237 // If the predicates are the same via swap, make sure that the types are
238 // still the same.
239 auto ZippedTypes = zip(A.OperVals, B.OperVals);
240
241 return all_of(
242 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
243 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
244 });
245 }
246
247 return false;
248 }
249
250 // Since any GEP Instruction operands after the first operand cannot be
251 // defined by a register, we must make sure that the operands after the first
252 // are the same in the two instructions
253 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
254 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
255
256 // If the instructions do not have the same inbounds restrictions, we do
257 // not consider them the same.
258 if (GEP->isInBounds() != OtherGEP->isInBounds())
259 return false;
260
261 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
262
263 // We increment here since we do not care about the first instruction,
264 // we only care about the following operands since they must be the
265 // exact same to be considered similar.
266 return all_of(drop_begin(ZippedOperands),
267 [](std::tuple<llvm::Use &, llvm::Use &> R) {
268 return std::get<0>(R) == std::get<1>(R);
269 });
270 }
271
272 // If the instructions are functions calls, we make sure that the function
273 // name is the same. We already know that the types are since is
274 // isSameOperationAs is true.
275 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
276 if (A.getCalleeName() != B.getCalleeName())
277 return false;
278 }
279
280 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
281 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
282 return false;
283
284 return true;
285}
286
287// TODO: This is the same as the MachineOutliner, and should be consolidated
288// into the same interface.
290 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
291 std::vector<unsigned> &IntegerMapping) {
292 BasicBlock::iterator It = BB.begin();
293
294 std::vector<unsigned> IntegerMappingForBB;
295 std::vector<IRInstructionData *> InstrListForBB;
296
297 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
298 switch (InstClassifier.visit(*It)) {
299 case InstrType::Legal:
300 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
301 break;
303 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
304 break;
306 AddedIllegalLastTime = false;
307 break;
308 }
309 }
310
312 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
313 for (IRInstructionData *ID : InstrListForBB)
314 this->IDL->push_back(*ID);
315 llvm::append_range(InstrList, InstrListForBB);
316 llvm::append_range(IntegerMapping, IntegerMappingForBB);
317}
318
319// TODO: This is the same as the MachineOutliner, and should be consolidated
320// into the same interface.
322 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
323 std::vector<IRInstructionData *> &InstrListForBB) {
324 // We added something legal, so we should unset the AddedLegalLastTime
325 // flag.
326 AddedIllegalLastTime = false;
327
328 // If we have at least two adjacent legal instructions (which may have
329 // invisible instructions in between), remember that.
331 HaveLegalRange = true;
333
334 // Get the integer for this instruction or give it the current
335 // LegalInstrNumber.
337 InstrListForBB.push_back(ID);
338
339 if (isa<BranchInst>(*It))
340 ID->setBranchSuccessors(BasicBlockToInteger);
341
342 if (isa<CallInst>(*It))
343 ID->setCalleeName(EnableMatchCallsByName);
344
345 if (isa<PHINode>(*It))
346 ID->setPHIPredecessors(BasicBlockToInteger);
347
348 // Add to the instruction list
349 bool WasInserted;
351 ResultIt;
352 std::tie(ResultIt, WasInserted) =
353 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
354 unsigned INumber = ResultIt->second;
355
356 // There was an insertion.
357 if (WasInserted)
359
360 IntegerMappingForBB.push_back(INumber);
361
362 // Make sure we don't overflow or use any integers reserved by the DenseMap.
364 "Instruction mapping overflow!");
365
367 "Tried to assign DenseMap tombstone or empty key to instruction.");
369 "Tried to assign DenseMap tombstone or empty key to instruction.");
370
371 return INumber;
372}
373
377 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
378}
379
382 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
383}
384
387 return new (IDLAllocator->Allocate()) IRInstructionDataList();
388}
389
390// TODO: This is the same as the MachineOutliner, and should be consolidated
391// into the same interface.
393 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
394 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
395 // Can't combine an illegal instruction. Set the flag.
397
398 // Only add one illegal number per range of legal numbers.
400 return IllegalInstrNumber;
401
402 IRInstructionData *ID = nullptr;
403 if (!End)
404 ID = allocateIRInstructionData(*It, false, *IDL);
405 else
407 InstrListForBB.push_back(ID);
408
409 // Remember that we added an illegal number last time.
411 unsigned INumber = IllegalInstrNumber;
412 IntegerMappingForBB.push_back(IllegalInstrNumber--);
413
415 "Instruction mapping overflow!");
416
418 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
419
421 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
422
423 return INumber;
424}
425
426IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
427 IRInstructionData *FirstInstIt,
428 IRInstructionData *LastInstIt)
429 : StartIdx(StartIdx), Len(Len) {
430
431 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
432 assert(LastInstIt != nullptr && "Instruction is nullptr!");
433 assert(StartIdx + Len > StartIdx &&
434 "Overflow for IRSimilarityCandidate range?");
435 assert(Len - 1 == static_cast<unsigned>(std::distance(
436 iterator(FirstInstIt), iterator(LastInstIt))) &&
437 "Length of the first and last IRInstructionData do not match the "
438 "given length");
439
440 // We iterate over the given instructions, and map each unique value
441 // to a unique number in the IRSimilarityCandidate ValueToNumber and
442 // NumberToValue maps. A constant get its own value globally, the individual
443 // uses of the constants are not considered to be unique.
444 //
445 // IR: Mapping Added:
446 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
447 // %add2 = add i32 %a, %1 %add2 -> 4
448 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
449 //
450 // when replace with global values, starting from 1, would be
451 //
452 // 3 = add i32 1, 2
453 // 4 = add i32 1, 3
454 // 6 = add i32 5, 2
455 unsigned LocalValNumber = 1;
457 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
458 // Map the operand values to an unsigned integer if it does not already
459 // have an unsigned integer assigned to it.
460 for (Value *Arg : ID->OperVals)
461 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
462 NumberToValue.try_emplace(LocalValNumber, Arg);
463 LocalValNumber++;
464 }
465
466 // Mapping the instructions to an unsigned integer if it is not already
467 // exist in the mapping.
468 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
469 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
470 LocalValNumber++;
471 }
472 }
473
474 // Setting the first and last instruction data pointers for the candidate. If
475 // we got through the entire for loop without hitting an assert, we know
476 // that both of these instructions are not nullptrs.
477 FirstInst = FirstInstIt;
478 LastInst = LastInstIt;
479
480 // Add the basic blocks contained in the set into the global value numbering.
482 getBasicBlocks(BBSet);
483 for (BasicBlock *BB : BBSet) {
484 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
485 NumberToValue.try_emplace(LocalValNumber, BB);
486 LocalValNumber++;
487 }
488 }
489}
490
492 const IRSimilarityCandidate &B) {
493 if (A.getLength() != B.getLength())
494 return false;
495
496 auto InstrDataForBoth =
497 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
498
499 return all_of(InstrDataForBoth,
500 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
501 IRInstructionData &A = std::get<0>(R);
502 IRInstructionData &B = std::get<1>(R);
503 if (!A.Legal || !B.Legal)
504 return false;
505 return isClose(A, B);
506 });
507}
508
509/// Determine if one or more of the assigned global value numbers for the
510/// operands in \p TargetValueNumbers is in the current mapping set for operand
511/// numbers in \p SourceOperands. The set of possible corresponding global
512/// value numbers are replaced with the most recent version of compatible
513/// values.
514///
515/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
516/// value number for the source IRInstructionCandidate.
517/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
518/// IRSimilarityCandidate global value numbers to a set of possible numbers in
519/// the target.
520/// \param [in] SourceOperands - The operands in the original
521/// IRSimilarityCandidate in the current instruction.
522/// \param [in] TargetValueNumbers - The global value numbers of the operands in
523/// the corresponding Instruction in the other IRSimilarityCandidate.
524/// \returns true if there exists a possible mapping between the source
525/// Instruction operands and the target Instruction operands, and false if not.
527 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
528 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
529 ArrayRef<Value *> &SourceOperands,
530 DenseSet<unsigned> &TargetValueNumbers){
531
532 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
533
534 unsigned ArgVal;
535 bool WasInserted;
536
537 // Iterate over the operands in the source IRSimilarityCandidate to determine
538 // whether there exists an operand in the other IRSimilarityCandidate that
539 // creates a valid mapping of Value to Value between the
540 // IRSimilarityCaniddates.
541 for (Value *V : SourceOperands) {
542 ArgVal = SourceValueToNumberMapping.find(V)->second;
543
544 // Instead of finding a current mapping, we attempt to insert a set.
545 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
546 std::make_pair(ArgVal, TargetValueNumbers));
547
548 // We need to iterate over the items in other IRSimilarityCandidate's
549 // Instruction to determine whether there is a valid mapping of
550 // Value to Value.
551 DenseSet<unsigned> NewSet;
552 for (unsigned &Curr : ValueMappingIt->second)
553 // If we can find the value in the mapping, we add it to the new set.
554 if (TargetValueNumbers.contains(Curr))
555 NewSet.insert(Curr);
556
557 // If we could not find a Value, return 0.
558 if (NewSet.empty())
559 return false;
560
561 // Otherwise replace the old mapping with the newly constructed one.
562 if (NewSet.size() != ValueMappingIt->second.size())
563 ValueMappingIt->second.swap(NewSet);
564
565 // We have reached no conclusions about the mapping, and cannot remove
566 // any items from the other operands, so we move to check the next operand.
567 if (ValueMappingIt->second.size() != 1)
568 continue;
569
570 unsigned ValToRemove = *ValueMappingIt->second.begin();
571 // When there is only one item left in the mapping for and operand, remove
572 // the value from the other operands. If it results in there being no
573 // mapping, return false, it means the mapping is wrong
574 for (Value *InnerV : SourceOperands) {
575 if (V == InnerV)
576 continue;
577
578 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
579 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
580 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
581 continue;
582
583 ValueMappingIt->second.erase(ValToRemove);
584 if (ValueMappingIt->second.empty())
585 return false;
586 }
587 }
588
589 return true;
590}
591
592/// Determine if operand number \p TargetArgVal is in the current mapping set
593/// for operand number \p SourceArgVal.
594///
595/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
596/// value numbers from source IRSimilarityCandidate to target
597/// IRSimilarityCandidate.
598/// \param [in] SourceArgVal The global value number for an operand in the
599/// in the original candidate.
600/// \param [in] TargetArgVal The global value number for the corresponding
601/// operand in the other candidate.
602/// \returns True if there exists a mapping and false if not.
604 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
605 unsigned SourceArgVal, unsigned TargetArgVal) {
606 // We are given two unsigned integers representing the global values of
607 // the operands in different IRSimilarityCandidates and a current mapping
608 // between the two.
609 //
610 // Source Operand GVN: 1
611 // Target Operand GVN: 2
612 // CurrentMapping: {1: {1, 2}}
613 //
614 // Since we have mapping, and the target operand is contained in the set, we
615 // update it to:
616 // CurrentMapping: {1: {2}}
617 // and can return true. But, if the mapping was
618 // CurrentMapping: {1: {3}}
619 // we would return false.
620
621 bool WasInserted;
623
624 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
625 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
626
627 // If we created a new mapping, then we are done.
628 if (WasInserted)
629 return true;
630
631 // If there is more than one option in the mapping set, and the target value
632 // is included in the mapping set replace that set with one that only includes
633 // the target value, as it is the only valid mapping via the non commutative
634 // instruction.
635
636 DenseSet<unsigned> &TargetSet = Val->second;
637 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
638 TargetSet.clear();
639 TargetSet.insert(TargetArgVal);
640 return true;
641 }
642
643 // Return true if we can find the value in the set.
644 return TargetSet.contains(TargetArgVal);
645}
646
649 // Iterators to keep track of where we are in the operands for each
650 // Instruction.
651 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
652 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
653 unsigned OperandLength = A.OperVals.size();
654
655 // For each operand, get the value numbering and ensure it is consistent.
656 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
657 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
658 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
659
660 // Attempt to add a set with only the target value. If there is no mapping
661 // we can create it here.
662 //
663 // For an instruction like a subtraction:
664 // IRSimilarityCandidateA: IRSimilarityCandidateB:
665 // %resultA = sub %a, %b %resultB = sub %d, %e
666 //
667 // We map %a -> %d and %b -> %e.
668 //
669 // And check to see whether their mapping is consistent in
670 // checkNumberingAndReplace.
671
672 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
673 return false;
674
675 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
676 return false;
677 }
678 return true;
679}
680
683 DenseSet<unsigned> ValueNumbersA;
684 DenseSet<unsigned> ValueNumbersB;
685
686 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
687 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
688 unsigned OperandLength = A.OperVals.size();
689
690 // Find the value number sets for the operands.
691 for (unsigned Idx = 0; Idx < OperandLength;
692 Idx++, VItA++, VItB++) {
693 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
694 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
695 }
696
697 // Iterate over the operands in the first IRSimilarityCandidate and make sure
698 // there exists a possible mapping with the operands in the second
699 // IRSimilarityCandidate.
700 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
701 A.ValueNumberMapping, A.OperVals,
702 ValueNumbersB))
703 return false;
704
705 // Iterate over the operands in the second IRSimilarityCandidate and make sure
706 // there exists a possible mapping with the operands in the first
707 // IRSimilarityCandidate.
708 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
709 B.ValueNumberMapping, B.OperVals,
710 ValueNumbersA))
711 return false;
712
713 return true;
714}
715
717 const unsigned InstValA, const unsigned &InstValB,
718 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
719 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
721 bool WasInserted;
722 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
723 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
724 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
725 return false;
726 else if (ValueMappingIt->second.size() != 1) {
727 for (unsigned OtherVal : ValueMappingIt->second) {
728 if (OtherVal == InstValB)
729 continue;
730 auto OtherValIt = ValueNumberMappingA.find(OtherVal);
731 if (OtherValIt == ValueNumberMappingA.end())
732 continue;
733 OtherValIt->second.erase(InstValA);
734 }
735 ValueNumberMappingA.erase(ValueMappingIt);
736 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
737 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
738 }
739
740 return true;
741}
742
745 // Get the basic blocks the label refers to.
746 BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
747 BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
748
749 // Get the basic blocks contained in each region.
750 DenseSet<BasicBlock *> BasicBlockA;
751 DenseSet<BasicBlock *> BasicBlockB;
752 A.IRSC.getBasicBlocks(BasicBlockA);
753 B.IRSC.getBasicBlocks(BasicBlockB);
754
755 // Determine if the block is contained in the region.
756 bool AContained = BasicBlockA.contains(ABB);
757 bool BContained = BasicBlockB.contains(BBB);
758
759 // Both blocks need to be contained in the region, or both need to be outside
760 // the region.
761 if (AContained != BContained)
762 return false;
763
764 // If both are contained, then we need to make sure that the relative
765 // distance to the target blocks are the same.
766 if (AContained)
767 return A.RelativeLocation == B.RelativeLocation;
768 return true;
769}
770
772 const IRSimilarityCandidate &B) {
775 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
776}
777
782
785 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
786 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
787 if (A.getLength() != B.getLength())
788 return false;
789
790 if (A.ValueToNumber.size() != B.ValueToNumber.size())
791 return false;
792
793 iterator ItA = A.begin();
794 iterator ItB = B.begin();
795
796 // These ValueNumber Mapping sets create a create a mapping between the values
797 // in one candidate to values in the other candidate. If we create a set with
798 // one element, and that same element maps to the original element in the
799 // candidate we have a good mapping.
800
801 // Iterate over the instructions contained in each candidate
802 unsigned SectionLength = A.getStartIdx() + A.getLength();
803 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
804 ItA++, ItB++, Loc++) {
805 // Make sure the instructions are similar to one another.
806 if (!isClose(*ItA, *ItB))
807 return false;
808
809 Instruction *IA = ItA->Inst;
810 Instruction *IB = ItB->Inst;
811
812 if (!ItA->Legal || !ItB->Legal)
813 return false;
814
815 // Get the operand sets for the instructions.
816 ArrayRef<Value *> OperValsA = ItA->OperVals;
817 ArrayRef<Value *> OperValsB = ItB->OperVals;
818
819 unsigned InstValA = A.ValueToNumber.find(IA)->second;
820 unsigned InstValB = B.ValueToNumber.find(IB)->second;
821
822 // Ensure that the mappings for the instructions exists.
823 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
824 ValueNumberMappingB))
825 return false;
826
827 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
828 ValueNumberMappingA))
829 return false;
830
831 // We have different paths for commutative instructions and non-commutative
832 // instructions since commutative instructions could allow multiple mappings
833 // to certain values.
834 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
835 !isa<IntrinsicInst>(IA)) {
837 {A, OperValsA, ValueNumberMappingA},
838 {B, OperValsB, ValueNumberMappingB}))
839 return false;
840 continue;
841 }
842
843 // Handle the non-commutative cases.
845 {A, OperValsA, ValueNumberMappingA},
846 {B, OperValsB, ValueNumberMappingB}))
847 return false;
848
849 // Here we check that between two corresponding instructions,
850 // when referring to a basic block in the same region, the
851 // relative locations are the same. And, that the instructions refer to
852 // basic blocks outside the region in the same corresponding locations.
853
854 // We are able to make the assumption about blocks outside of the region
855 // since the target block labels are considered values and will follow the
856 // same number matching that we defined for the other instructions in the
857 // region. So, at this point, in each location we target a specific block
858 // outside the region, we are targeting a corresponding block in each
859 // analagous location in the region we are comparing to.
860 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
861 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
862 continue;
863
864 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
865 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
866 ArrayRef<Value *> ABL = ItA->getBlockOperVals();
867 ArrayRef<Value *> BBL = ItB->getBlockOperVals();
868
869 // Check to make sure that the number of operands, and branching locations
870 // between BranchInsts is the same.
871 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
872 ABL.size() != BBL.size())
873 return false;
874
875 assert(RelBlockLocsA.size() == ABL.size() &&
876 "Block information vectors not the same size.");
877 assert(RelBlockLocsB.size() == BBL.size() &&
878 "Block information vectors not the same size.");
879
880 ZippedRelativeLocationsT ZippedRelativeLocations =
881 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
882 if (any_of(ZippedRelativeLocations,
883 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
885 {A, std::get<0>(R), std::get<2>(R)},
886 {B, std::get<1>(R), std::get<3>(R)});
887 }))
888 return false;
889 }
890 return true;
891}
892
894 const IRSimilarityCandidate &B) {
895 auto DoesOverlap = [](const IRSimilarityCandidate &X,
896 const IRSimilarityCandidate &Y) {
897 // Check:
898 // XXXXXX X starts before Y ends
899 // YYYYYYY Y starts after X starts
900 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
901 };
902
903 return DoesOverlap(A, B) || DoesOverlap(B, A);
904}
905
906void IRSimilarityIdentifier::populateMapper(
907 Module &M, std::vector<IRInstructionData *> &InstrList,
908 std::vector<unsigned> &IntegerMapping) {
909
910 std::vector<IRInstructionData *> InstrListForModule;
911 std::vector<unsigned> IntegerMappingForModule;
912 // Iterate over the functions in the module to map each Instruction in each
913 // BasicBlock to an unsigned integer.
914 Mapper.initializeForBBs(M);
915
916 for (Function &F : M) {
917
918 if (F.empty())
919 continue;
920
921 for (BasicBlock &BB : F) {
922
923 // BB has potential to have similarity since it has a size greater than 2
924 // and can therefore match other regions greater than 2. Map it to a list
925 // of unsigned integers.
926 Mapper.convertToUnsignedVec(BB, InstrListForModule,
927 IntegerMappingForModule);
928 }
929
930 BasicBlock::iterator It = F.begin()->end();
931 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
932 true);
933 if (InstrListForModule.size() > 0)
934 Mapper.IDL->push_back(*InstrListForModule.back());
935 }
936
937 // Insert the InstrListForModule at the end of the overall InstrList so that
938 // we can have a long InstrList for the entire set of Modules being analyzed.
939 llvm::append_range(InstrList, InstrListForModule);
940 // Do the same as above, but for IntegerMapping.
941 llvm::append_range(IntegerMapping, IntegerMappingForModule);
942}
943
944void IRSimilarityIdentifier::populateMapper(
945 ArrayRef<std::unique_ptr<Module>> &Modules,
946 std::vector<IRInstructionData *> &InstrList,
947 std::vector<unsigned> &IntegerMapping) {
948
949 // Iterate over, and map the instructions in each module.
950 for (const std::unique_ptr<Module> &M : Modules)
951 populateMapper(*M, InstrList, IntegerMapping);
952}
953
954/// From a repeated subsequence, find all the different instances of the
955/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
956/// the IRInstructionData in subsequence.
957///
958/// \param [in] Mapper - The instruction mapper for basic correctness checks.
959/// \param [in] InstrList - The vector that holds the instruction data.
960/// \param [in] IntegerMapping - The vector that holds the mapped integers.
961/// \param [out] CandsForRepSubstring - The vector to store the generated
962/// IRSimilarityCandidates.
964 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
965 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
966 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
967
968 unsigned StringLen = RS.Length;
969 if (StringLen < 2)
970 return;
971
972 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
973 for (const unsigned &StartIdx : RS.StartIndices) {
974 unsigned EndIdx = StartIdx + StringLen - 1;
975
976 // Check that this subsequence does not contain an illegal instruction.
977 bool ContainsIllegal = false;
978 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
979 unsigned Key = IntegerMapping[CurrIdx];
980 if (Key > Mapper.IllegalInstrNumber) {
981 ContainsIllegal = true;
982 break;
983 }
984 }
985
986 // If we have an illegal instruction, we should not create an
987 // IRSimilarityCandidate for this region.
988 if (ContainsIllegal)
989 continue;
990
991 // We are getting iterators to the instructions in this region of code
992 // by advancing the start and end indices from the start of the
993 // InstrList.
994 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
995 std::advance(StartIt, StartIdx);
996 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
997 std::advance(EndIt, EndIdx);
998
999 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1000 }
1001}
1002
1004 IRSimilarityCandidate &SourceCand,
1005 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1006 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1007 assert(SourceCand.CanonNumToNumber.size() != 0 &&
1008 "Base canonical relationship is empty!");
1009 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1010 "Base canonical relationship is empty!");
1011
1012 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1013 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1014
1015 DenseSet<unsigned> UsedGVNs;
1016 // Iterate over the mappings provided from this candidate to SourceCand. We
1017 // are then able to map the GVN in this candidate to the same canonical number
1018 // given to the corresponding GVN in SourceCand.
1019 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1020 unsigned SourceGVN = GVNMapping.first;
1021
1022 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1023
1024 unsigned ResultGVN;
1025 // We need special handling if we have more than one potential value. This
1026 // means that there are at least two GVNs that could correspond to this GVN.
1027 // This could lead to potential swapping later on, so we make a decision
1028 // here to ensure a one-to-one mapping.
1029 if (GVNMapping.second.size() > 1) {
1030 bool Found = false;
1031 for (unsigned Val : GVNMapping.second) {
1032 // We make sure the target value number hasn't already been reserved.
1033 if (UsedGVNs.contains(Val))
1034 continue;
1035
1036 // We make sure that the opposite mapping is still consistent.
1038 FromSourceMapping.find(Val);
1039
1040 if (!It->second.contains(SourceGVN))
1041 continue;
1042
1043 // We pick the first item that satisfies these conditions.
1044 Found = true;
1045 ResultGVN = Val;
1046 break;
1047 }
1048
1049 assert(Found && "Could not find matching value for source GVN");
1050 (void)Found;
1051
1052 } else
1053 ResultGVN = *GVNMapping.second.begin();
1054
1055 // Whatever GVN is found, we mark it as used.
1056 UsedGVNs.insert(ResultGVN);
1057
1058 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1059 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1060 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1061 }
1062
1064 getBasicBlocks(BBSet);
1065 // Find canonical numbers for the BasicBlocks in the current candidate.
1066 // This is done by finding the corresponding value for the first instruction
1067 // in the block in the current candidate, finding the matching value in the
1068 // source candidate. Then by finding the parent of this value, use the
1069 // canonical number of the block in the source candidate for the canonical
1070 // number in the current candidate.
1071 for (BasicBlock *BB : BBSet) {
1072 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1073
1074 // We can skip the BasicBlock if the canonical numbering has already been
1075 // found in a separate instruction.
1076 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1077 continue;
1078
1079 // If the basic block is the starting block, then the shared instruction may
1080 // not be the first instruction in the block, it will be the first
1081 // instruction in the similarity region.
1082 Value *FirstOutlineInst = BB == getStartBB()
1084 : &*BB->instructionsWithoutDebug().begin();
1085
1086 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1087 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1088 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1089 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1090 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1091 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1092 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1093 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1094 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1095 }
1096}
1097
1099 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1100 IRSimilarityCandidate &TargetCandLarge) {
1101 assert(!SourceCand.CanonNumToNumber.empty() &&
1102 "Canonical Relationship is non-empty");
1103 assert(!SourceCand.NumberToCanonNum.empty() &&
1104 "Canonical Relationship is non-empty");
1105
1106 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1107 "Canonical Relationship is non-empty");
1108 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1109 "Canonical Relationship is non-empty");
1110
1111 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1112 "Canonical Relationship is non-empty");
1113 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1114 "Canonical Relationship is non-empty");
1115
1116 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1117 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1118
1119 // We're going to use the larger candidates as a "bridge" to create the
1120 // canonical number for the target candidate since we have idetified two
1121 // candidates as subsequences of larger sequences, and therefore must be
1122 // structurally similar.
1123 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1124 Value *CurrVal = ValueNumPair.first;
1125 unsigned TargetCandGVN = ValueNumPair.second;
1126
1127 // Find the numbering in the large candidate that surrounds the
1128 // current candidate.
1129 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1130 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1131
1132 // Get the canonical numbering in the large target candidate.
1133 std::optional<unsigned> OTargetCandCanon =
1134 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1135 assert(OTargetCandCanon.has_value() &&
1136 "Canononical Number not found for GVN");
1137
1138 // Get the GVN in the large source candidate from the canonical numbering.
1139 std::optional<unsigned> OLargeSourceGVN =
1140 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1141 assert(OLargeSourceGVN.has_value() &&
1142 "GVN Number not found for Canonical Number");
1143
1144 // Get the Value from the GVN in the large source candidate.
1145 std::optional<Value *> OLargeSourceV =
1146 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1147 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1148
1149 // Get the GVN number for the Value in the source candidate.
1150 std::optional<unsigned> OSourceGVN =
1151 SourceCand.getGVN(OLargeSourceV.value());
1152 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1153
1154 // Get the canonical numbering from the GVN/
1155 std::optional<unsigned> OSourceCanon =
1156 SourceCand.getCanonicalNum(OSourceGVN.value());
1157 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1158
1159 // Insert the canonical numbering and GVN pair into their respective
1160 // mappings.
1161 CanonNumToNumber.insert(
1162 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1163 NumberToCanonNum.insert(
1164 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1165 }
1166}
1167
1169 IRSimilarityCandidate &CurrCand) {
1170 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1171 "Canonical Relationship is non-empty");
1172 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1173 "Canonical Relationship is non-empty");
1174
1175 unsigned CanonNum = 0;
1176 // Iterate over the value numbers found, the order does not matter in this
1177 // case.
1178 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1179 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1180 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1181 CanonNum++;
1182 }
1183}
1184
1185/// Look for larger IRSimilarityCandidates From the previously matched
1186/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1187/// an overlap, return a pair of structurally similar, larger
1188/// IRSimilarityCandidates.
1189///
1190/// \param [in] CandA - The first candidate we are trying to determine the
1191/// structure of.
1192/// \param [in] CandB - The second candidate we are trying to determine the
1193/// structure of.
1194/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1195/// a circuit to the IRSimilarityCandidates that include this instruction.
1196/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1197/// number representing the structural group assigned to it.
1198static std::optional<
1199 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1202 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1204 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1205 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1206 DenseSet<unsigned> IncludedGroupsA;
1207 DenseSet<unsigned> IncludedGroupsB;
1208
1209 // Find the overall similarity group numbers that fully contain the candidate,
1210 // and record the larger candidate for each group.
1211 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1212 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1213 Result;
1214
1215 unsigned CandAStart = CandA.getStartIdx();
1216 unsigned CandAEnd = CandA.getEndIdx();
1217 unsigned CandBStart = CandB.getStartIdx();
1218 unsigned CandBEnd = CandB.getEndIdx();
1219 if (IdxToCandidateIt == IndexToIncludedCand.end())
1220 return Result;
1221 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1222 if (MatchedCand->getStartIdx() > CandAStart ||
1223 (MatchedCand->getEndIdx() < CandAEnd))
1224 continue;
1225 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1226 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1227 IncludedGroupsA.insert(GroupNum);
1228 }
1229
1230 // Find the overall similarity group numbers that fully contain the next
1231 // candidate, and record the larger candidate for each group.
1232 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1233 if (IdxToCandidateIt == IndexToIncludedCand.end())
1234 return Result;
1235 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1236 if (MatchedCand->getStartIdx() > CandBStart ||
1237 MatchedCand->getEndIdx() < CandBEnd)
1238 continue;
1239 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1240 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1241 IncludedGroupsB.insert(GroupNum);
1242 }
1243
1244 // Find the intersection between the two groups, these are the groups where
1245 // the larger candidates exist.
1246 set_intersect(IncludedGroupsA, IncludedGroupsB);
1247
1248 // If there is no intersection between the sets, then we cannot determine
1249 // whether or not there is a match.
1250 if (IncludedGroupsA.empty())
1251 return Result;
1252
1253 // Create a pair that contains the larger candidates.
1254 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1255 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1256 Result = std::make_pair(ItA->second, ItB->second);
1257 return Result;
1258}
1259
1260/// From the list of IRSimilarityCandidates, perform a comparison between each
1261/// IRSimilarityCandidate to determine if there are overlapping
1262/// IRInstructionData, or if they do not have the same structure.
1263///
1264/// \param [in] CandsForRepSubstring - The vector containing the
1265/// IRSimilarityCandidates.
1266/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1267/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1268/// vector are structurally similar to one another.
1269/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1270/// a circuit to the IRSimilarityCandidates that include this instruction.
1271/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1272/// number representing the structural group assigned to it.
1274 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1275 DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1276 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1278 ) {
1279 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1280 InnerCandEndIt;
1281
1282 // IRSimilarityCandidates each have a structure for operand use. It is
1283 // possible that two instances of the same subsequences have different
1284 // structure. Each type of structure found is assigned a number. This
1285 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1286 // discovered it fits within.
1288
1289 // Find the compatibility from each candidate to the others to determine
1290 // which candidates overlap and which have the same structure by mapping
1291 // each structure to a different group.
1292 bool SameStructure;
1293 bool Inserted;
1294 unsigned CurrentGroupNum = 0;
1295 unsigned OuterGroupNum;
1299
1300 // Iterate over the candidates to determine its structural and overlapping
1301 // compatibility with other instructions
1302 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1303 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1304 for (CandIt = CandsForRepSubstring.begin(),
1305 CandEndIt = CandsForRepSubstring.end();
1306 CandIt != CandEndIt; CandIt++) {
1307
1308 // Determine if it has an assigned structural group already.
1309 // If not, we assign it one, and add it to our mapping.
1310 std::tie(CandToGroupIt, Inserted) =
1311 CandToGroup.try_emplace(&*CandIt, CurrentGroupNum);
1312 if (Inserted)
1313 ++CurrentGroupNum;
1314
1315 // Get the structural group number from the iterator.
1316 OuterGroupNum = CandToGroupIt->second;
1317
1318 // Check if we already have a list of IRSimilarityCandidates for the current
1319 // structural group. Create one if one does not exist.
1320 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1321 if (CurrentGroupPair == StructuralGroups.end()) {
1323 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1324 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1325 }
1326
1327 // Iterate over the IRSimilarityCandidates following the current
1328 // IRSimilarityCandidate in the list to determine whether the two
1329 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1330 // of IRSimilarityCandidates.
1331 for (InnerCandIt = std::next(CandIt),
1332 InnerCandEndIt = CandsForRepSubstring.end();
1333 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1334
1335 // We check if the inner item has a group already, if it does, we skip it.
1336 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1337 if (CandToGroupItInner != CandToGroup.end())
1338 continue;
1339
1340 // Check if we have found structural similarity between two candidates
1341 // that fully contains the first and second candidates.
1342 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1343 LargerPair = CheckLargerCands(
1344 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1345
1346 // If a pair was found, it means that we can assume that these smaller
1347 // substrings are also structurally similar. Use the larger candidates to
1348 // determine the canonical mapping between the two sections.
1349 if (LargerPair.has_value()) {
1350 SameStructure = true;
1351 InnerCandIt->createCanonicalRelationFrom(
1352 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1353 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1354 CurrentGroupPair->second.push_back(*InnerCandIt);
1355 continue;
1356 }
1357
1358 // Otherwise we determine if they have the same structure and add it to
1359 // vector if they match.
1360 ValueNumberMappingA.clear();
1361 ValueNumberMappingB.clear();
1363 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1364 if (!SameStructure)
1365 continue;
1366
1367 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1368 ValueNumberMappingB);
1369 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1370 CurrentGroupPair->second.push_back(*InnerCandIt);
1371 }
1372 }
1373}
1374
1375void IRSimilarityIdentifier::findCandidates(
1376 std::vector<IRInstructionData *> &InstrList,
1377 std::vector<unsigned> &IntegerMapping) {
1378 SuffixTree ST(IntegerMapping);
1379
1380 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1381 std::vector<SimilarityGroup> NewCandidateGroups;
1382
1383 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1386
1387 // Iterate over the subsequences found by the Suffix Tree to create
1388 // IRSimilarityCandidates for each repeated subsequence and determine which
1389 // instances are structurally similar to one another.
1390
1391 // Sort the suffix tree from longest substring to shortest.
1392 std::vector<SuffixTree::RepeatedSubstring> RSes;
1393 for (SuffixTree::RepeatedSubstring &RS : ST)
1394 RSes.push_back(RS);
1395
1397 const SuffixTree::RepeatedSubstring &RHS) {
1398 return LHS.Length > RHS.Length;
1399 });
1400 for (SuffixTree::RepeatedSubstring &RS : RSes) {
1401 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1402 CandsForRepSubstring);
1403
1404 if (CandsForRepSubstring.size() < 2)
1405 continue;
1406
1407 findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1408 IndexToIncludedCand, CandToGroup);
1409 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1410 // We only add the group if it contains more than one
1411 // IRSimilarityCandidate. If there is only one, that means there is no
1412 // other repeated subsequence with the same structure.
1413 if (Group.second.size() > 1) {
1414 SimilarityCandidates->push_back(Group.second);
1415 // Iterate over each candidate in the group, and add an entry for each
1416 // instruction included with a mapping to a set of
1417 // IRSimilarityCandidates that include that instruction.
1418 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1419 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1420 Idx <= Edx; ++Idx)
1421 IndexToIncludedCand[Idx].insert(&IRCand);
1422 // Add mapping of candidate to the overall similarity group number.
1423 CandToGroup.insert(
1424 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1425 }
1426 }
1427 }
1428
1429 CandsForRepSubstring.clear();
1430 StructuralGroups.clear();
1431 NewCandidateGroups.clear();
1432 }
1433}
1434
1436 ArrayRef<std::unique_ptr<Module>> Modules) {
1438
1439 std::vector<IRInstructionData *> InstrList;
1440 std::vector<unsigned> IntegerMapping;
1441 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1442 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1443 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1444 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1445 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1446
1447 populateMapper(Modules, InstrList, IntegerMapping);
1448 findCandidates(InstrList, IntegerMapping);
1449
1450 return *SimilarityCandidates;
1451}
1452
1455 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1456 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1457 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1458 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1459 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1460
1461 std::vector<IRInstructionData *> InstrList;
1462 std::vector<unsigned> IntegerMapping;
1463
1464 populateMapper(M, InstrList, IntegerMapping);
1465 findCandidates(InstrList, IntegerMapping);
1466
1467 return *SimilarityCandidates;
1468}
1469
1471 "ir-similarity-identifier", false, true)
1472
1474 : ModulePass(ID) {}
1475
1479 false));
1480 return false;
1481}
1482
1484 IRSI.reset();
1485 return false;
1486}
1487
1489 IRSI->findSimilarity(M);
1490 return false;
1491}
1492
1493AnalysisKey IRSimilarityAnalysis::Key;
1498 false);
1499 IRSI.findSimilarity(M);
1500 return IRSI;
1501}
1502
1506 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1507 IRSI.getSimilarity();
1508
1509 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1510 OS << CandVec.size() << " candidates of length "
1511 << CandVec.begin()->getLength() << ". Found in: \n";
1512 for (IRSimilarityCandidate &Cand : CandVec) {
1513 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1514 << ", Basic Block: ";
1515 if (Cand.front()->Inst->getParent()->getName().str() == "")
1516 OS << "(unnamed)";
1517 else
1518 OS << Cand.front()->Inst->getParent()->getName().str();
1519 OS << "\n Start Instruction: ";
1520 Cand.frontInstruction()->print(OS);
1521 OS << "\n End Instruction: ";
1522 Cand.backInstruction()->print(OS);
1523 OS << "\n";
1524 }
1525 }
1526
1527 return PreservedAnalyses::all();
1528}
1529
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
raw_pwrite_stream & OS
This file defines generic set operations that may be used on set's of different types,...
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
Conditional or Unconditional Branch instruction.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:666
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:683
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:690
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:691
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:829
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:767
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
unsigned size() const
Definition: DenseMap.h:120
bool empty() const
Definition: DenseMap.h:119
iterator begin()
Definition: DenseMap.h:78
iterator end()
Definition: DenseMap.h:87
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:168
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
void swap(DenseMap &RHS)
Definition: DenseMap.h:774
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Class to represent function types.
Definition: DerivedTypes.h:105
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
LLVM_ABI void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static LLVM_ABI bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
static LLVM_ABI bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
static LLVM_ABI bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static LLVM_ABI bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static LLVM_ABI void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static LLVM_ABI bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
LLVM_ABI IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static LLVM_ABI bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static LLVM_ABI bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
LLVM_ABI SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:255
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
size_t size() const
Definition: SmallVector.h:79
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:233
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
op_range operands()
Definition: User.h:292
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:163
size_type size() const
Definition: DenseSet.h:87
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:169
const ParentTy * getParent() const
Definition: ilist_node.h:34
typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator
Definition: simple_ilist.h:97
void push_back(reference Node)
Insert a node at the back; never copies.
Definition: simple_ilist.h:153
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
LLVM_ABI bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
LLVM_ABI StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Intrinsics.cpp:49
LLVM_ABI bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
Definition: Intrinsics.cpp:618
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:43
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:39
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:47
static cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:29
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:54
This provides the utilities for hashing an Instruction to an unsigned integer.
LLVM_ABI StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
LLVM_ABI void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
LLVM_ABI IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
LLVM_ABI ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static LLVM_ABI CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
LLVM_ABI void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
LLVM_ABI void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
LLVM_ABI void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
LLVM_ABI CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
void initializeForBBs(Function &F, unsigned &BBNumber)
Assigns values to all the basic blocks in function F starting from integer BBNumber.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
LLVM_ABI IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
LLVM_ABI IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
LLVM_ABI unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
LLVM_ABI void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
LLVM_ABI unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
Definition: SuffixTree.h:50
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
Definition: SuffixTree.h:55
unsigned Length
The length of the string.
Definition: SuffixTree.h:52