25using namespace IRSimilarity;
31 cl::desc(
"disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
37 cl::desc(
"disable outlining indirect calls."));
41 cl::desc(
"only allow matching call instructions if the "
42 "name and type signature match."));
46 cl::desc(
"Don't match or outline intrinsics"));
51 : Inst(&
I),
Legal(Legality), IDL(&IDList) {
61 if (Predicate !=
C->getPredicate())
89 assert(isa<BranchInst>(
Inst) &&
"Instruction must be branch");
95 assert(BBNumIt != BasicBlockToInteger.
end() &&
96 "Could not find location for BasicBlock!");
98 int CurrentBlockNumber =
static_cast<int>(BBNumIt->second);
103 assert(BBNumIt != BasicBlockToInteger.
end() &&
104 "Could not find number for BasicBlock!");
105 int OtherBlockNumber =
static_cast<int>(BBNumIt->second);
107 int Relative = OtherBlockNumber - CurrentBlockNumber;
114 isa<PHINode>(
Inst)) &&
"Instruction must be branch or PHINode");
133 assert(CI &&
"Instruction must be call");
160 assert(isa<PHINode>(
Inst) &&
"Instruction must be phi node");
166 assert(BBNumIt != BasicBlockToInteger.
end() &&
167 "Could not find location for BasicBlock!");
169 int CurrentBlockNumber =
static_cast<int>(BBNumIt->second);
176 assert(BBNumIt != BasicBlockToInteger.
end() &&
177 "Could not find number for BasicBlock!");
178 int OtherBlockNumber =
static_cast<int>(BBNumIt->second);
180 int Relative = OtherBlockNumber - CurrentBlockNumber;
203 "Can only get a predicate from a compare instruction");
208 return cast<CmpInst>(
Inst)->getPredicate();
213 "Can only get a name from a call instruction");
223 if (!
A.Legal || !
B.Legal)
228 if (!
A.Inst->isSameOperationAs(
B.Inst)) {
232 if (isa<CmpInst>(
A.Inst) && isa<CmpInst>(
B.Inst)) {
234 if (
A.getPredicate() !=
B.getPredicate())
239 auto ZippedTypes =
zip(
A.OperVals,
B.OperVals);
242 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
243 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
253 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
A.Inst)) {
254 auto *OtherGEP = cast<GetElementPtrInst>(
B.Inst);
258 if (
GEP->isInBounds() != OtherGEP->isInBounds())
261 auto ZippedOperands =
zip(
GEP->indices(), OtherGEP->indices());
267 [](std::tuple<llvm::Use &, llvm::Use &> R) {
268 return std::get<0>(R) == std::get<1>(R);
275 if (isa<CallInst>(
A.Inst) && isa<CallInst>(
B.Inst)) {
276 if (
A.getCalleeName() !=
B.getCalleeName())
280 if (isa<BranchInst>(
A.Inst) && isa<BranchInst>(
B.Inst) &&
281 A.RelativeBlockLocations.size() !=
B.RelativeBlockLocations.size())
290 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
291 std::vector<unsigned> &IntegerMapping) {
294 std::vector<unsigned> IntegerMappingForBB;
295 std::vector<IRInstructionData *> InstrListForBB;
323 std::vector<IRInstructionData *> &InstrListForBB) {
337 InstrListForBB.push_back(
ID);
339 if (isa<BranchInst>(*It))
342 if (isa<CallInst>(*It))
345 if (isa<PHINode>(*It))
352 std::tie(ResultIt, WasInserted) =
354 unsigned INumber = ResultIt->second;
360 IntegerMappingForBB.push_back(INumber);
364 "Instruction mapping overflow!");
367 "Tried to assign DenseMap tombstone or empty key to instruction.");
369 "Tried to assign DenseMap tombstone or empty key to instruction.");
394 std::vector<IRInstructionData *> &InstrListForBB,
bool End) {
407 InstrListForBB.push_back(
ID);
415 "Instruction mapping overflow!");
418 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
421 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
429 : StartIdx(StartIdx), Len(Len) {
431 assert(FirstInstIt !=
nullptr &&
"Instruction is nullptr!");
432 assert(LastInstIt !=
nullptr &&
"Instruction is nullptr!");
433 assert(StartIdx + Len > StartIdx &&
434 "Overflow for IRSimilarityCandidate range?");
435 assert(Len - 1 ==
static_cast<unsigned>(std::distance(
437 "Length of the first and last IRInstructionData do not match the "
455 unsigned LocalValNumber = 1;
457 for (
unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++,
ID++) {
460 for (
Value *Arg :
ID->OperVals)
461 if (ValueToNumber.
try_emplace(Arg, LocalValNumber).second) {
462 NumberToValue.try_emplace(LocalValNumber, Arg);
468 if (ValueToNumber.
try_emplace(
ID->Inst, LocalValNumber).second) {
469 NumberToValue.try_emplace(LocalValNumber,
ID->Inst);
477 FirstInst = FirstInstIt;
478 LastInst = LastInstIt;
484 if (ValueToNumber.
try_emplace(BB, LocalValNumber).second) {
485 NumberToValue.try_emplace(LocalValNumber, BB);
493 if (
A.getLength() !=
B.getLength())
496 auto InstrDataForBoth =
499 return all_of(InstrDataForBoth,
500 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
503 if (!
A.Legal || !
B.Legal)
541 for (
Value *V : SourceOperands) {
542 ArgVal = SourceValueToNumberMapping.
find(V)->second;
545 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
546 std::make_pair(ArgVal, TargetValueNumbers));
552 for (
unsigned &Curr : ValueMappingIt->second)
554 if (TargetValueNumbers.
contains(Curr))
562 if (NewSet.
size() != ValueMappingIt->second.
size())
563 ValueMappingIt->second.
swap(NewSet);
567 if (ValueMappingIt->second.
size() != 1)
570 unsigned ValToRemove = *ValueMappingIt->second.
begin();
574 for (
Value *InnerV : SourceOperands) {
578 unsigned InnerVal = SourceValueToNumberMapping.
find(InnerV)->second;
579 ValueMappingIt = CurrentSrcTgtNumberMapping.
find(InnerVal);
580 if (ValueMappingIt == CurrentSrcTgtNumberMapping.
end())
583 ValueMappingIt->second.
erase(ValToRemove);
584 if (ValueMappingIt->second.
empty())
605 unsigned SourceArgVal,
unsigned TargetArgVal) {
624 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.
insert(
637 if (TargetSet.
size() > 1 && TargetSet.
contains(TargetArgVal)) {
639 TargetSet.
insert(TargetArgVal);
644 return TargetSet.
contains(TargetArgVal);
653 unsigned OperandLength =
A.OperVals.size();
656 for (
unsigned Idx = 0;
Idx < OperandLength;
Idx++, VItA++, VItB++) {
657 unsigned OperValA =
A.IRSC.ValueToNumber.find(*VItA)->second;
658 unsigned OperValB =
B.IRSC.ValueToNumber.find(*VItB)->second;
688 unsigned OperandLength =
A.OperVals.size();
691 for (
unsigned Idx = 0;
Idx < OperandLength;
692 Idx++, VItA++, VItB++) {
693 ValueNumbersA.
insert(
A.IRSC.ValueToNumber.find(*VItA)->second);
694 ValueNumbersB.
insert(
B.IRSC.ValueToNumber.find(*VItB)->second);
701 A.ValueNumberMapping,
A.OperVals,
709 B.ValueNumberMapping,
B.OperVals,
717 const unsigned InstValA,
const unsigned &InstValB,
722 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
724 if (!WasInserted && !ValueMappingIt->second.
contains(InstValB))
726 else if (ValueMappingIt->second.
size() != 1) {
727 for (
unsigned OtherVal : ValueMappingIt->second) {
728 if (OtherVal == InstValB)
730 auto OtherValIt = ValueNumberMappingA.find(OtherVal);
731 if (OtherValIt == ValueNumberMappingA.end())
733 OtherValIt->second.erase(InstValA);
735 ValueNumberMappingA.erase(ValueMappingIt);
736 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
752 A.IRSC.getBasicBlocks(BasicBlockA);
753 B.IRSC.getBasicBlocks(BasicBlockB);
756 bool AContained = BasicBlockA.
contains(ABB);
757 bool BContained = BasicBlockB.
contains(BBB);
761 if (AContained != BContained)
767 return A.RelativeLocation ==
B.RelativeLocation;
787 if (
A.getLength() !=
B.getLength())
790 if (
A.ValueToNumber.size() !=
B.ValueToNumber.size())
802 unsigned SectionLength =
A.getStartIdx() +
A.getLength();
803 for (
unsigned Loc =
A.getStartIdx(); Loc < SectionLength;
804 ItA++, ItB++, Loc++) {
812 if (!ItA->Legal || !ItB->Legal)
819 unsigned InstValA =
A.ValueToNumber.find(IA)->second;
820 unsigned InstValB =
B.ValueToNumber.find(IB)->second;
824 ValueNumberMappingB))
828 ValueNumberMappingA))
834 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
835 !isa<IntrinsicInst>(IA)) {
837 {
A, OperValsA, ValueNumberMappingA},
838 {
B, OperValsB, ValueNumberMappingB}))
845 {
A, OperValsA, ValueNumberMappingA},
846 {
B, OperValsB, ValueNumberMappingB}))
860 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
861 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
871 if (RelBlockLocsA.
size() != RelBlockLocsB.
size() &&
876 "Block information vectors not the same size.");
878 "Block information vectors not the same size.");
881 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
882 if (
any_of(ZippedRelativeLocations,
883 [&
A, &
B](std::tuple<int, int, Value *, Value *> R) {
885 {
A, std::get<0>(R), std::get<2>(R)},
886 {
B, std::get<1>(R), std::get<3>(R)});
900 return X.StartIdx <=
Y.getEndIdx() &&
Y.StartIdx >=
X.StartIdx;
903 return DoesOverlap(
A,
B) || DoesOverlap(
B,
A);
906void IRSimilarityIdentifier::populateMapper(
907 Module &M, std::vector<IRInstructionData *> &InstrList,
908 std::vector<unsigned> &IntegerMapping) {
910 std::vector<IRInstructionData *> InstrListForModule;
911 std::vector<unsigned> IntegerMappingForModule;
927 IntegerMappingForModule);
933 if (InstrListForModule.size() > 0)
944void IRSimilarityIdentifier::populateMapper(
945 ArrayRef<std::unique_ptr<Module>> &Modules,
946 std::vector<IRInstructionData *> &InstrList,
947 std::vector<unsigned> &IntegerMapping) {
950 for (
const std::unique_ptr<Module> &M : Modules)
951 populateMapper(*M, InstrList, IntegerMapping);
966 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
968 unsigned StringLen = RS.
Length;
974 unsigned EndIdx = StartIdx + StringLen - 1;
977 bool ContainsIllegal =
false;
978 for (
unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
979 unsigned Key = IntegerMapping[CurrIdx];
981 ContainsIllegal =
true;
994 std::vector<IRInstructionData *>::iterator StartIt = InstrList.
begin();
995 std::advance(StartIt, StartIdx);
996 std::vector<IRInstructionData *>::iterator EndIt = InstrList.
begin();
997 std::advance(EndIt, EndIdx);
999 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1007 assert(SourceCand.CanonNumToNumber.
size() != 0 &&
1008 "Base canonical relationship is empty!");
1009 assert(SourceCand.NumberToCanonNum.
size() != 0 &&
1010 "Base canonical relationship is empty!");
1012 assert(CanonNumToNumber.
size() == 0 &&
"Canonical Relationship is non-empty");
1013 assert(NumberToCanonNum.
size() == 0 &&
"Canonical Relationship is non-empty");
1020 unsigned SourceGVN = GVNMapping.first;
1022 assert(GVNMapping.second.size() != 0 &&
"Possible GVNs is 0!");
1029 if (GVNMapping.second.size() > 1) {
1031 for (
unsigned Val : GVNMapping.second) {
1038 FromSourceMapping.find(Val);
1040 if (!It->second.
contains(SourceGVN))
1049 assert(Found &&
"Could not find matching value for source GVN");
1053 ResultGVN = *GVNMapping.second.begin();
1056 UsedGVNs.
insert(ResultGVN);
1059 CanonNumToNumber.
insert(std::make_pair(CanonNum, SourceGVN));
1060 NumberToCanonNum.
insert(std::make_pair(SourceGVN, CanonNum));
1072 unsigned BBGVNForCurrCand = ValueToNumber.
find(BB)->second;
1076 if (NumberToCanonNum.
contains(BBGVNForCurrCand))
1084 : &*BB->instructionsWithoutDebug().begin();
1086 unsigned FirstInstGVN = *
getGVN(FirstOutlineInst);
1091 unsigned SourceBBGVN = *SourceCand.
getGVN(SourceBB);
1093 CanonNumToNumber.
insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1094 NumberToCanonNum.
insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1102 "Canonical Relationship is non-empty");
1104 "Canonical Relationship is non-empty");
1106 assert(!SourceCandLarge.CanonNumToNumber.
empty() &&
1107 "Canonical Relationship is non-empty");
1108 assert(!SourceCandLarge.NumberToCanonNum.
empty() &&
1109 "Canonical Relationship is non-empty");
1111 assert(!TargetCandLarge.CanonNumToNumber.
empty() &&
1112 "Canonical Relationship is non-empty");
1113 assert(!TargetCandLarge.NumberToCanonNum.
empty() &&
1114 "Canonical Relationship is non-empty");
1116 assert(CanonNumToNumber.
empty() &&
"Canonical Relationship is non-empty");
1117 assert(NumberToCanonNum.
empty() &&
"Canonical Relationship is non-empty");
1123 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1124 Value *CurrVal = ValueNumPair.first;
1125 unsigned TargetCandGVN = ValueNumPair.second;
1129 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.
getGVN(CurrVal);
1130 assert(OLargeTargetGVN.has_value() &&
"GVN not found for Value");
1133 std::optional<unsigned> OTargetCandCanon =
1135 assert(OTargetCandCanon.has_value() &&
1136 "Canononical Number not found for GVN");
1139 std::optional<unsigned> OLargeSourceGVN =
1141 assert(OLargeSourceGVN.has_value() &&
1142 "GVN Number not found for Canonical Number");
1145 std::optional<Value *> OLargeSourceV =
1146 SourceCandLarge.
fromGVN(OLargeSourceGVN.value());
1147 assert(OLargeSourceV.has_value() &&
"Value not found for GVN");
1150 std::optional<unsigned> OSourceGVN =
1151 SourceCand.
getGVN(OLargeSourceV.value());
1152 assert(OSourceGVN.has_value() &&
"GVN Number not found for Value");
1155 std::optional<unsigned> OSourceCanon =
1157 assert(OSourceCanon.has_value() &&
"Canon Number not found for GVN");
1162 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1164 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1170 assert(CurrCand.CanonNumToNumber.
size() == 0 &&
1171 "Canonical Relationship is non-empty");
1172 assert(CurrCand.NumberToCanonNum.
size() == 0 &&
1173 "Canonical Relationship is non-empty");
1175 unsigned CanonNum = 0;
1178 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1179 CurrCand.NumberToCanonNum.
insert(std::make_pair(NumToVal.first, CanonNum));
1180 CurrCand.CanonNumToNumber.
insert(std::make_pair(CanonNum, NumToVal.first));
1198static std::optional<
1199 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1211 auto IdxToCandidateIt = IndexToIncludedCand.
find(CandA.
getStartIdx());
1212 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1219 if (IdxToCandidateIt == IndexToIncludedCand.end())
1225 unsigned GroupNum = CandToGroup.
find(MatchedCand)->second;
1226 IncludedGroupAndCandA.
insert(std::make_pair(GroupNum, MatchedCand));
1227 IncludedGroupsA.
insert(GroupNum);
1232 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1233 if (IdxToCandidateIt == IndexToIncludedCand.end())
1239 unsigned GroupNum = CandToGroup.
find(MatchedCand)->second;
1240 IncludedGroupAndCandB.
insert(std::make_pair(GroupNum, MatchedCand));
1241 IncludedGroupsB.
insert(GroupNum);
1250 if (IncludedGroupsA.
empty())
1254 auto ItA = IncludedGroupAndCandA.
find(*IncludedGroupsA.
begin());
1255 auto ItB = IncludedGroupAndCandB.
find(*IncludedGroupsA.
begin());
1256 Result = std::make_pair(ItA->second, ItB->second);
1274 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1279 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1294 unsigned CurrentGroupNum = 0;
1295 unsigned OuterGroupNum;
1304 for (CandIt = CandsForRepSubstring.begin(),
1305 CandEndIt = CandsForRepSubstring.end();
1306 CandIt != CandEndIt; CandIt++) {
1310 std::tie(CandToGroupIt, Inserted) =
1311 CandToGroup.
try_emplace(&*CandIt, CurrentGroupNum);
1316 OuterGroupNum = CandToGroupIt->second;
1320 CurrentGroupPair = StructuralGroups.
find(OuterGroupNum);
1321 if (CurrentGroupPair == StructuralGroups.
end()) {
1323 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.
insert(
1331 for (InnerCandIt = std::next(CandIt),
1332 InnerCandEndIt = CandsForRepSubstring.end();
1333 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1336 CandToGroupItInner = CandToGroup.
find(&*InnerCandIt);
1337 if (CandToGroupItInner != CandToGroup.
end())
1342 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1344 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1349 if (LargerPair.has_value()) {
1350 SameStructure =
true;
1351 InnerCandIt->createCanonicalRelationFrom(
1352 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1353 CandToGroup.
insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1354 CurrentGroupPair->second.push_back(*InnerCandIt);
1360 ValueNumberMappingA.
clear();
1361 ValueNumberMappingB.
clear();
1363 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1367 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1368 ValueNumberMappingB);
1369 CandToGroup.
insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1370 CurrentGroupPair->second.push_back(*InnerCandIt);
1375void IRSimilarityIdentifier::findCandidates(
1376 std::vector<IRInstructionData *> &InstrList,
1377 std::vector<unsigned> &IntegerMapping) {
1380 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1381 std::vector<SimilarityGroup> NewCandidateGroups;
1392 std::vector<SuffixTree::RepeatedSubstring> RSes;
1398 return LHS.Length >
RHS.Length;
1402 CandsForRepSubstring);
1404 if (CandsForRepSubstring.size() < 2)
1408 IndexToIncludedCand, CandToGroup);
1409 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1413 if (Group.second.size() > 1) {
1414 SimilarityCandidates->push_back(Group.second);
1419 for (
unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1421 IndexToIncludedCand[
Idx].
insert(&IRCand);
1424 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1429 CandsForRepSubstring.clear();
1430 StructuralGroups.clear();
1431 NewCandidateGroups.clear();
1436 ArrayRef<std::unique_ptr<Module>> Modules) {
1439 std::vector<IRInstructionData *> InstrList;
1440 std::vector<unsigned> IntegerMapping;
1447 populateMapper(Modules, InstrList, IntegerMapping);
1448 findCandidates(InstrList, IntegerMapping);
1450 return *SimilarityCandidates;
1461 std::vector<IRInstructionData *> InstrList;
1462 std::vector<unsigned> IntegerMapping;
1464 populateMapper(M, InstrList, IntegerMapping);
1465 findCandidates(InstrList, IntegerMapping);
1467 return *SimilarityCandidates;
1471 "ir-similarity-identifier",
false,
true)
1489 IRSI->findSimilarity(M);
1499 IRSI.findSimilarity(M);
1506 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1509 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1510 OS << CandVec.size() <<
" candidates of length "
1511 << CandVec.begin()->getLength() <<
". Found in: \n";
1513 OS <<
" Function: " << Cand.front()->Inst->getFunction()->getName().str()
1514 <<
", Basic Block: ";
1515 if (Cand.front()->Inst->getParent()->getName().str() ==
"")
1518 OS << Cand.front()->Inst->getParent()->getName().str();
1519 OS <<
"\n Start Instruction: ";
1520 Cand.frontInstruction()->print(
OS);
1521 OS <<
"\n End Instruction: ";
1522 Cand.backInstruction()->print(
OS);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines generic set operations that may be used on set's of different types,...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Conditional or Unconditional Branch instruction.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ ICMP_SGE
signed greater or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Predicate getPredicate() const
Return the predicate for this instruction.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent function types.
ArrayRef< Type * > params() const
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
LLVM_ABI void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static LLVM_ABI bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
unsigned getEndIdx() const
static LLVM_ABI bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
IRInstructionDataList::iterator iterator
Instruction * frontInstruction()
static LLVM_ABI bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static LLVM_ABI bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static LLVM_ABI void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static LLVM_ABI bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
unsigned getStartIdx() const
BasicBlock * getStartBB()
LLVM_ABI IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static LLVM_ABI bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static LLVM_ABI bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
void resetSimilarityCandidates()
LLVM_ABI SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
void visit(Iterator Start, Iterator End)
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
iterator find(const_arg_type_t< ValueT > V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
const ParentTy * getParent() const
typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator
void push_back(reference Node)
Insert a node at the back; never copies.
@ C
The default llvm calling convention, compatible with C.
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
LLVM_ABI bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
LLVM_ABI StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
LLVM_ABI bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
static cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
A special type used by analysis passes to provide an address that identifies that particular analysis...
An information struct used to provide DenseMap with the various necessary components for a given valu...
This provides the utilities for hashing an Instruction to an unsigned integer.
LLVM_ABI StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
LLVM_ABI void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
LLVM_ABI IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
LLVM_ABI ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static LLVM_ABI CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
LLVM_ABI void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
LLVM_ABI void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
LLVM_ABI void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
LLVM_ABI CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
void initializeForBBs(Function &F, unsigned &BBNumber)
Assigns values to all the basic blocks in function F starting from integer BBNumber.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
LLVM_ABI IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
IRInstructionDataList * IDL
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
LLVM_ABI IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
LLVM_ABI unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
LLVM_ABI void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
LLVM_ABI unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
unsigned Length
The length of the string.