LLVM 22.0.0git
TypeHashing.h
Go to the documentation of this file.
1//===- TypeHashing.h ---------------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
10#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
11
12#include "llvm/ADT/ArrayRef.h"
13#include "llvm/ADT/Hashing.h"
14#include "llvm/ADT/StringRef.h"
16
20
22
23#include <type_traits>
24
25namespace llvm {
26class raw_ostream;
27namespace codeview {
28
29/// A locally hashed type represents a straightforward hash code of a serialized
30/// record. The record is simply serialized, and then the bytes are hashed by
31/// a standard algorithm. This is sufficient for the case of de-duplicating
32/// records within a single sequence of types, because if two records both have
33/// a back-reference to the same type in the same stream, they will both have
34/// the same numeric value for the TypeIndex of the back reference.
38
39 /// Given a type, compute its local hash.
41
42 /// Given a sequence of types, compute all of the local hashes.
43 template <typename Range>
44 static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
45 std::vector<LocallyHashedType> Hashes;
46 Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
47 for (const auto &R : Records)
48 Hashes.push_back(hashType(R));
49
50 return Hashes;
51 }
52
53 static std::vector<LocallyHashedType>
55 std::vector<LocallyHashedType> Hashes;
56 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
57 Hashes.push_back(hashType(Type.RecordData));
58 });
59 return Hashes;
60 }
61};
62
64 SHA1 = 0, // standard 20-byte SHA1 hash
65 SHA1_8, // last 8-bytes of standard SHA1 hash
66 BLAKE3, // truncated 8-bytes BLAKE3
67};
68
69/// A globally hashed type represents a hash value that is sufficient to
70/// uniquely identify a record across multiple type streams or type sequences.
71/// This works by, for any given record A which references B, replacing the
72/// TypeIndex that refers to B with a previously-computed global hash for B. As
73/// this is a recursive algorithm (e.g. the global hash of B also depends on the
74/// global hashes of the types that B refers to), a global hash can uniquely
75/// identify that A occurs in another stream that has a completely
76/// different graph structure. Although the hash itself is slower to compute,
77/// probing is much faster with a globally hashed type, because the hash itself
78/// is considered "as good as" the original type. Since type records can be
79/// quite large, this makes the equality comparison of the hash much faster than
80/// equality comparison of a full record.
82 GloballyHashedType() = default;
84 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
86 assert(H.size() == 8);
87 ::memcpy(Hash.data(), H.data(), 8);
88 }
89 std::array<uint8_t, 8> Hash;
90
91 bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
92
93 friend inline bool operator==(const GloballyHashedType &L,
94 const GloballyHashedType &R) {
95 return L.Hash == R.Hash;
96 }
97
98 friend inline bool operator!=(const GloballyHashedType &L,
99 const GloballyHashedType &R) {
100 return !(L.Hash == R.Hash);
101 }
102
103 /// Given a sequence of bytes representing a record, compute a global hash for
104 /// this record. Due to the nature of global hashes incorporating the hashes
105 /// of referenced records, this function requires a list of types and ids
106 /// that RecordData might reference, indexable by TypeIndex.
108 hashType(ArrayRef<uint8_t> RecordData,
109 ArrayRef<GloballyHashedType> PreviousTypes,
110 ArrayRef<GloballyHashedType> PreviousIds);
111
112 /// Given a sequence of bytes representing a record, compute a global hash for
113 /// this record. Due to the nature of global hashes incorporating the hashes
114 /// of referenced records, this function requires a list of types and ids
115 /// that RecordData might reference, indexable by TypeIndex.
117 ArrayRef<GloballyHashedType> PreviousTypes,
118 ArrayRef<GloballyHashedType> PreviousIds) {
119 return hashType(Type.RecordData, PreviousTypes, PreviousIds);
120 }
121
122 /// Given a sequence of combined type and ID records, compute global hashes
123 /// for each of them, returning the results in a vector of hashed types.
124 template <typename Range>
125 static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
126 std::vector<GloballyHashedType> Hashes;
127 bool UnresolvedRecords = false;
128 for (const auto &R : Records) {
129 GloballyHashedType H = hashType(R, Hashes, Hashes);
130 if (H.empty())
131 UnresolvedRecords = true;
132 Hashes.push_back(H);
133 }
134
135 // In some rare cases, there might be records with forward references in the
136 // stream. Several passes might be needed to fully hash each record in the
137 // Type stream. However this occurs on very small OBJs generated by MASM,
138 // with a dozen records at most. Therefore this codepath isn't
139 // time-critical, as it isn't taken in 99% of cases.
140 while (UnresolvedRecords) {
141 UnresolvedRecords = false;
142 auto HashIt = Hashes.begin();
143 for (const auto &R : Records) {
144 if (HashIt->empty()) {
145 GloballyHashedType H = hashType(R, Hashes, Hashes);
146 if (H.empty())
147 UnresolvedRecords = true;
148 else
149 *HashIt = H;
150 }
151 ++HashIt;
152 }
153 }
154
155 return Hashes;
156 }
157
158 /// Given a sequence of combined type and ID records, compute global hashes
159 /// for each of them, returning the results in a vector of hashed types.
160 template <typename Range>
161 static std::vector<GloballyHashedType>
163 std::vector<GloballyHashedType> IdHashes;
164 for (const auto &R : Records)
165 IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
166
167 return IdHashes;
168 }
169
170 static std::vector<GloballyHashedType>
172 std::vector<GloballyHashedType> Hashes;
173 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
174 Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
175 });
176 return Hashes;
177 }
178};
179static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
180 "GloballyHashedType must be trivially copyable so that we can "
181 "reinterpret_cast arrays of hash data to arrays of "
182 "GloballyHashedType");
183} // namespace codeview
184
185template <> struct DenseMapInfo<codeview::LocallyHashedType> {
188
189 static codeview::LocallyHashedType getEmptyKey() { return Empty; }
190
191 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
192
194 return Val.Hash;
195 }
196
199 if (LHS.Hash != RHS.Hash)
200 return false;
201 return LHS.RecordData == RHS.RecordData;
202 }
203};
204
205template <> struct DenseMapInfo<codeview::GloballyHashedType> {
208
209 static codeview::GloballyHashedType getEmptyKey() { return Empty; }
210
211 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
212
214 return *reinterpret_cast<const unsigned *>(Val.Hash.data());
215 }
216
219 return LHS == RHS;
220 }
221};
222
223template <> struct format_provider<codeview::LocallyHashedType> {
224public:
226 llvm::raw_ostream &Stream, StringRef Style) {
227 write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
228 }
229};
230
231template <> struct format_provider<codeview::GloballyHashedType> {
232public:
234 llvm::raw_ostream &Stream, StringRef Style) {
235 for (uint8_t B : V.Hash) {
236 write_hex(Stream, B, HexPrintStyle::Upper, 2);
237 }
238 }
239};
240
241} // namespace llvm
242
243#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition: Compiler.h:213
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A class that wrap the SHA1 algorithm.
Definition: SHA1.h:27
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A 32-bit type reference.
Definition: TypeIndex.h:97
An opaque object representing a hash code.
Definition: Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
static unsigned getHashValue(codeview::GloballyHashedType Val)
Definition: TypeHashing.h:213
static LLVM_ABI codeview::GloballyHashedType Empty
Definition: TypeHashing.h:206
static codeview::GloballyHashedType getEmptyKey()
Definition: TypeHashing.h:209
static codeview::GloballyHashedType getTombstoneKey()
Definition: TypeHashing.h:211
static bool isEqual(codeview::GloballyHashedType LHS, codeview::GloballyHashedType RHS)
Definition: TypeHashing.h:217
static LLVM_ABI codeview::GloballyHashedType Tombstone
Definition: TypeHashing.h:207
static codeview::LocallyHashedType getEmptyKey()
Definition: TypeHashing.h:189
static codeview::LocallyHashedType getTombstoneKey()
Definition: TypeHashing.h:191
static LLVM_ABI codeview::LocallyHashedType Empty
Definition: TypeHashing.h:186
static unsigned getHashValue(codeview::LocallyHashedType Val)
Definition: TypeHashing.h:193
static bool isEqual(codeview::LocallyHashedType LHS, codeview::LocallyHashedType RHS)
Definition: TypeHashing.h:197
static LLVM_ABI codeview::LocallyHashedType Tombstone
Definition: TypeHashing.h:187
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:54
A globally hashed type represents a hash value that is sufficient to uniquely identify a record acros...
Definition: TypeHashing.h:81
static GloballyHashedType hashType(CVType Type, ArrayRef< GloballyHashedType > PreviousTypes, ArrayRef< GloballyHashedType > PreviousIds)
Given a sequence of bytes representing a record, compute a global hash for this record.
Definition: TypeHashing.h:116
static std::vector< GloballyHashedType > hashIds(Range &&Records, ArrayRef< GloballyHashedType > TypeHashes)
Given a sequence of combined type and ID records, compute global hashes for each of them,...
Definition: TypeHashing.h:162
static std::vector< GloballyHashedType > hashTypes(Range &&Records)
Given a sequence of combined type and ID records, compute global hashes for each of them,...
Definition: TypeHashing.h:125
static std::vector< GloballyHashedType > hashTypeCollection(TypeCollection &Types)
Definition: TypeHashing.h:171
GloballyHashedType(ArrayRef< uint8_t > H)
Definition: TypeHashing.h:85
friend bool operator==(const GloballyHashedType &L, const GloballyHashedType &R)
Definition: TypeHashing.h:93
static LLVM_ABI GloballyHashedType hashType(ArrayRef< uint8_t > RecordData, ArrayRef< GloballyHashedType > PreviousTypes, ArrayRef< GloballyHashedType > PreviousIds)
Given a sequence of bytes representing a record, compute a global hash for this record.
Definition: TypeHashing.cpp:33
std::array< uint8_t, 8 > Hash
Definition: TypeHashing.h:89
friend bool operator!=(const GloballyHashedType &L, const GloballyHashedType &R)
Definition: TypeHashing.h:98
A locally hashed type represents a straightforward hash code of a serialized record.
Definition: TypeHashing.h:35
static std::vector< LocallyHashedType > hashTypeCollection(TypeCollection &Types)
Definition: TypeHashing.h:54
static LLVM_ABI LocallyHashedType hashType(ArrayRef< uint8_t > RecordData)
Given a type, compute its local hash.
Definition: TypeHashing.cpp:28
ArrayRef< uint8_t > RecordData
Definition: TypeHashing.h:37
static std::vector< LocallyHashedType > hashTypes(Range &&Records)
Given a sequence of types, compute all of the local hashes.
Definition: TypeHashing.h:44
static void format(const codeview::GloballyHashedType &V, llvm::raw_ostream &Stream, StringRef Style)
Definition: TypeHashing.h:233
static void format(const codeview::LocallyHashedType &V, llvm::raw_ostream &Stream, StringRef Style)
Definition: TypeHashing.h:225