LLVM 22.0.0git
TrieRawHashMap.h
Go to the documentation of this file.
1//===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_ADT_TRIERAWHASHMAP_H
10#define LLVM_ADT_TRIERAWHASHMAP_H
11
12#include "llvm/ADT/ArrayRef.h"
14#include <atomic>
15#include <optional>
16
17namespace llvm {
18
19class raw_ostream;
20
21/// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to
22/// store/index data based on a hash value. It can be customized to work with
23/// any hash algorithm or store any data.
24///
25/// Data structure:
26/// Data node stored in the Trie contains both hash and data:
27/// struct {
28/// HashT Hash;
29/// DataT Data;
30/// };
31///
32/// Data is stored/indexed via a prefix tree, where each node in the tree can be
33/// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two
34/// data objects {0001, A} and {0100, B}, it can be stored in a trie
35/// (assuming Root has 2 bits, SubTrie has 1 bit):
36/// +--------+
37/// |Root[00]| -> {0001, A}
38/// | [01]| -> {0100, B}
39/// | [10]| (empty)
40/// | [11]| (empty)
41/// +--------+
42///
43/// Inserting a new object {0010, C} will result in:
44/// +--------+ +----------+
45/// |Root[00]| -> |SubTrie[0]| -> {0001, A}
46/// | | | [1]| -> {0010, C}
47/// | | +----------+
48/// | [01]| -> {0100, B}
49/// | [10]| (empty)
50/// | [11]| (empty)
51/// +--------+
52/// Note object A is sunk down to a sub-trie during the insertion. All the
53/// nodes are inserted through compare-exchange to ensure thread-safe and
54/// lock-free.
55///
56/// To find an object in the trie, walk the tree with prefix of the hash until
57/// the data node is found. Then the hash is compared with the hash stored in
58/// the data node to see if the is the same object.
59///
60/// Hash collision is not allowed so it is recommended to use trie with a
61/// "strong" hashing algorithm. A well-distributed hash can also result in
62/// better performance and memory usage.
63///
64/// It currently does not support iteration and deletion.
65
66/// Base class for a lock-free thread-safe hash-mapped trie.
68public:
69 static constexpr size_t TrieContentBaseSize = 4;
70 static constexpr size_t DefaultNumRootBits = 6;
71 static constexpr size_t DefaultNumSubtrieBits = 4;
72
73private:
74 template <class T> struct AllocValueType {
76 alignas(T) char Content[sizeof(T)];
77 };
78
79protected:
80 template <class T>
81 static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType<T>);
82
83 template <class T>
84 static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType<T>);
85
86 template <class T>
87 static constexpr size_t DefaultContentOffset =
88 offsetof(AllocValueType<T>, Content);
89
90public:
91 static void *operator new(size_t Size) { return ::operator new(Size); }
92 void operator delete(void *Ptr) { ::operator delete(Ptr); }
93
94#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
95 LLVM_DUMP_METHOD void dump() const;
96#endif
97
98 LLVM_ABI void print(raw_ostream &OS) const;
99
100protected:
101 /// Result of a lookup. Suitable for an insertion hint. Maybe could be
102 /// expanded into an iterator of sorts, but likely not useful (visiting
103 /// everything in the trie should probably be done some way other than
104 /// through an iterator pattern).
106 protected:
107 void *get() const { return I == -2u ? P : nullptr; }
108
109 public:
110 PointerBase() noexcept = default;
111
112 private:
114 explicit PointerBase(void *Content) : P(Content), I(-2u) {}
115 PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {}
116
117 bool isHint() const { return I != -1u && I != -2u; }
118
119 void *P = nullptr;
120 unsigned I = -1u;
121 unsigned B = 0;
122 };
123
124 /// Find the stored content with hash.
125 LLVM_ABI PointerBase find(ArrayRef<uint8_t> Hash) const;
126
127 /// Insert and return the stored content.
129 insert(PointerBase Hint, ArrayRef<uint8_t> Hash,
130 function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
131 Constructor);
132
134
136 size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
137 std::optional<size_t> NumRootBits = std::nullopt,
138 std::optional<size_t> NumSubtrieBits = std::nullopt);
139
140 /// Destructor, which asserts if there's anything to do. Subclasses should
141 /// call \a destroyImpl().
142 ///
143 /// \pre \a destroyImpl() was already called.
145 LLVM_ABI void destroyImpl(function_ref<void(void *ValueMem)> Destructor);
146
148
149 // Move assignment is not supported as it is not thread-safe.
152
153 // No copy.
157
158 // Debug functions. Implementation details and not guaranteed to be
159 // thread-safe.
161 LLVM_ABI unsigned getStartBit(PointerBase P) const;
162 LLVM_ABI unsigned getNumBits(PointerBase P) const;
163 LLVM_ABI unsigned getNumSlotUsed(PointerBase P) const;
164 LLVM_ABI std::string getTriePrefixAsString(PointerBase P) const;
165 LLVM_ABI unsigned getNumTries() const;
166 // Visit next trie in the allocation chain.
168
169private:
171 const unsigned short ContentAllocSize;
172 const unsigned short ContentAllocAlign;
173 const unsigned short ContentOffset;
174 unsigned short NumRootBits;
175 unsigned short NumSubtrieBits;
176 class ImplType;
177 // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in
178 // destroyImpl.
179 std::atomic<ImplType *> ImplPtr;
180 ImplType &getOrCreateImpl();
181 ImplType *getImpl() const;
182};
183
184/// Lock-free thread-safe hash-mapped trie.
185template <class T, size_t NumHashBytes>
187public:
188 using HashT = std::array<uint8_t, NumHashBytes>;
189
191 struct value_type {
192 const HashT Hash;
194
195 value_type(value_type &&) = default;
196 value_type(const value_type &) = default;
197
199 : Hash(makeHash(Hash)), Data(Data) {}
202
203 private:
205
206 struct EmplaceTag {};
207 template <class... ArgsT>
208 value_type(ArrayRef<uint8_t> Hash, EmplaceTag, ArgsT &&...Args)
209 : Hash(makeHash(Hash)), Data(std::forward<ArgsT>(Args)...) {}
210
211 static HashT makeHash(ArrayRef<uint8_t> HashRef) {
212 HashT Hash;
213 std::copy(HashRef.begin(), HashRef.end(), Hash.data());
214 return Hash;
215 }
216 };
217
218 using ThreadSafeTrieRawHashMapBase::operator delete;
220
221#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
223#endif
224
226
227private:
228 template <class ValueT> class PointerImpl : PointerBase {
229 friend class ThreadSafeTrieRawHashMap;
230
231 ValueT *get() const {
232 return reinterpret_cast<ValueT *>(PointerBase::get());
233 }
234
235 public:
236 ValueT &operator*() const {
237 assert(get());
238 return *get();
239 }
240 ValueT *operator->() const {
241 assert(get());
242 return get();
243 }
244 explicit operator bool() const { return get(); }
245
246 PointerImpl() = default;
247
248 protected:
249 PointerImpl(PointerBase Result) : PointerBase(Result) {}
250 };
251
252public:
253 class pointer;
254 class const_pointer;
255 class pointer : public PointerImpl<value_type> {
257 friend class const_pointer;
258
259 public:
260 pointer() = default;
261
262 private:
263 pointer(PointerBase Result) : pointer::PointerImpl(Result) {}
264 };
265
266 class const_pointer : public PointerImpl<const value_type> {
268
269 public:
270 const_pointer() = default;
271 const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {}
272
273 private:
274 const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {}
275 };
276
278 public:
280 assert(Mem && "Constructor already called, or moved away");
281 return assign(::new (Mem) value_type(Hash, std::move(RHS)));
282 }
284 assert(Mem && "Constructor already called, or moved away");
285 return assign(::new (Mem) value_type(Hash, RHS));
286 }
287 template <class... ArgsT> value_type &emplace(ArgsT &&...Args) {
288 assert(Mem && "Constructor already called, or moved away");
289 return assign(::new (Mem)
290 value_type(Hash, typename value_type::EmplaceTag{},
291 std::forward<ArgsT>(Args)...));
292 }
293
295 : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) {
296 RHS.Mem = nullptr; // Moved away, cannot call.
297 }
298 ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); }
299
300 private:
301 value_type &assign(value_type *V) {
302 Mem = nullptr;
303 Result = V;
304 return *V;
305 }
307 LazyValueConstructor() = delete;
308 LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef<uint8_t> Hash)
309 : Mem(Mem), Result(Result), Hash(Hash) {
310 assert(Hash.size() == sizeof(HashT) && "Invalid hash");
311 assert(Mem && "Invalid memory for construction");
312 }
313 void *Mem;
314 value_type *&Result;
316 };
317
318 /// Insert with a hint. Default-constructed hint will work, but it's
319 /// recommended to start with a lookup to avoid overhead in object creation
320 /// if it already exists.
321 pointer insertLazy(const_pointer Hint, ArrayRef<uint8_t> Hash,
322 function_ref<void(LazyValueConstructor)> OnConstruct) {
324 Hint, Hash, [&](void *Mem, ArrayRef<uint8_t> Hash) {
325 value_type *Result = nullptr;
326 OnConstruct(LazyValueConstructor(Mem, Result, Hash));
327 return Result->Hash.data();
328 }));
329 }
330
332 function_ref<void(LazyValueConstructor)> OnConstruct) {
333 return insertLazy(const_pointer(), Hash, OnConstruct);
334 }
335
336 pointer insert(const_pointer Hint, value_type &&HashedData) {
337 return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) {
338 C(std::move(HashedData.Data));
339 });
340 }
341
342 pointer insert(const_pointer Hint, const value_type &HashedData) {
343 return insertLazy(Hint, HashedData.Hash,
344 [&](LazyValueConstructor C) { C(HashedData.Data); });
345 }
346
347 pointer find(ArrayRef<uint8_t> Hash) {
348 assert(Hash.size() == std::tuple_size<HashT>::value);
350 }
351
352 const_pointer find(ArrayRef<uint8_t> Hash) const {
353 assert(Hash.size() == std::tuple_size<HashT>::value);
355 }
356
357 ThreadSafeTrieRawHashMap(std::optional<size_t> NumRootBits = std::nullopt,
358 std::optional<size_t> NumSubtrieBits = std::nullopt)
360 DefaultContentAllocAlign<value_type>,
361 DefaultContentOffset<value_type>,
362 NumRootBits, NumSubtrieBits) {}
363
365 if constexpr (std::is_trivially_destructible<value_type>::value)
366 this->destroyImpl(nullptr);
367 else
368 this->destroyImpl(
369 [](void *P) { static_cast<value_type *>(P)->~value_type(); });
370 }
371
372 // Move constructor okay.
374
375 // No move assignment or any copy.
380};
381
382} // namespace llvm
383
384#endif // LLVM_ADT_TRIERAWHASHMAP_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define offsetof(TYPE, MEMBER)
#define I(x, y, z)
Definition MD5.cpp:58
#define T
#define P(N)
Value * RHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
LLVM_ABI PointerBase getNextTrie(PointerBase P) const
LLVM_ABI unsigned getNumTries() const
LLVM_ABI unsigned getNumBits(PointerBase P) const
ThreadSafeTrieRawHashMapBase & operator=(const ThreadSafeTrieRawHashMapBase &)=delete
LLVM_ABI unsigned getNumSlotUsed(PointerBase P) const
LLVM_ABI ~ThreadSafeTrieRawHashMapBase()
Destructor, which asserts if there's anything to do.
LLVM_ABI void destroyImpl(function_ref< void(void *ValueMem)> Destructor)
static constexpr size_t DefaultContentAllocSize
static constexpr size_t DefaultContentAllocAlign
static constexpr size_t DefaultContentOffset
static constexpr size_t DefaultNumSubtrieBits
LLVM_ABI PointerBase find(ArrayRef< uint8_t > Hash) const
Find the stored content with hash.
LLVM_DUMP_METHOD void dump() const
LLVM_ABI PointerBase getRoot() const
LLVM_ABI PointerBase insert(PointerBase Hint, ArrayRef< uint8_t > Hash, function_ref< const uint8_t *(void *Mem, ArrayRef< uint8_t > Hash)> Constructor)
Insert and return the stored content.
LLVM_ABI unsigned getStartBit(PointerBase P) const
LLVM_ABI void print(raw_ostream &OS) const
ThreadSafeTrieRawHashMapBase & operator=(ThreadSafeTrieRawHashMapBase &&RHS)=delete
LLVM_ABI std::string getTriePrefixAsString(PointerBase P) const
static constexpr size_t TrieContentBaseSize
static constexpr size_t DefaultNumRootBits
ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &)=delete
ThreadSafeTrieRawHashMap & operator=(const ThreadSafeTrieRawHashMap &)=delete
pointer insertLazy(ArrayRef< uint8_t > Hash, function_ref< void(LazyValueConstructor)> OnConstruct)
ThreadSafeTrieRawHashMap & operator=(ThreadSafeTrieRawHashMap &&)=delete
pointer find(ArrayRef< uint8_t > Hash)
std::array< uint8_t, NumHashBytes > HashT
pointer insertLazy(const_pointer Hint, ArrayRef< uint8_t > Hash, function_ref< void(LazyValueConstructor)> OnConstruct)
Insert with a hint.
const_pointer find(ArrayRef< uint8_t > Hash) const
pointer insert(const_pointer Hint, value_type &&HashedData)
pointer insert(const_pointer Hint, const value_type &HashedData)
ThreadSafeTrieRawHashMap(std::optional< size_t > NumRootBits=std::nullopt, std::optional< size_t > NumSubtrieBits=std::nullopt)
ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &)=delete
ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&)=default
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2235
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1847
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:851
value_type(const value_type &)=default
value_type(ArrayRef< uint8_t > Hash, T &&Data)
value_type(ArrayRef< uint8_t > Hash, const T &Data)