CCF
Loading...
Searching...
No Matches
champ_map.h
Go to the documentation of this file.
1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the Apache 2.0 License.
3#pragma once
4
5#include "ccf/ds/hash.h"
6#include "ds/ccf_assert.h"
8
9#include <algorithm>
10#include <array>
11#include <memory>
12#include <optional>
13#include <vector>
14
15namespace champ
16{
17 // A persistent hash map based on the Compressed Hash-Array Mapped Prefix-tree
18 // from 'Fast and Lean Immutable Multi-Maps on the JVM based on Heterogeneous
19 // Hash-Array Mapped Tries' by Michael J. Steindorfer and Jurgen J. Vinju
20 // (https://arxiv.org/pdf/1608.01036.pdf).
21
22 static constexpr size_t index_mask_bits = 5;
23 static constexpr size_t index_mask = (1 << index_mask_bits) - 1;
24
25 using Hash = uint32_t;
26 static constexpr size_t hash_bits = sizeof(Hash) * 8;
27
28 using SmallIndex = uint8_t;
29 static constexpr size_t small_index_bits = sizeof(SmallIndex) * 8;
30 static_assert(small_index_bits > index_mask_bits);
31
32 static constexpr size_t collision_node_bits = hash_bits % index_mask_bits;
33 static_assert(collision_node_bits > 0);
34 static constexpr SmallIndex collision_depth = hash_bits / index_mask_bits;
35 static constexpr size_t collision_bins = 1 << collision_node_bits;
36
37 static constexpr SmallIndex mask(Hash hash, SmallIndex depth)
38 {
39 return (hash >> ((Hash)depth * index_mask_bits)) & index_mask;
40 }
41
42 template <class K, class V, class H = std::hash<K>>
43 class Snapshot;
44
45 class Bitmap
46 {
47 uint32_t _bits;
48
49 public:
50 constexpr Bitmap() : _bits(0) {}
51
52 constexpr Bitmap(uint32_t bits) : _bits(bits) {}
53
54 constexpr Bitmap operator&(const Bitmap& other) const
55 {
56 return Bitmap(_bits & other._bits);
57 }
58
59 constexpr SmallIndex pop() const
60 {
61 return __builtin_popcount(_bits);
62 }
63
64 constexpr Bitmap set(SmallIndex idx) const
65 {
66 return Bitmap(_bits | ((uint32_t)1 << idx));
67 }
68
69 constexpr Bitmap clear(SmallIndex idx) const
70 {
71 return Bitmap(_bits & ~((uint32_t)1 << idx));
72 }
73
74 constexpr bool check(SmallIndex idx) const
75 {
76 return (_bits & ((uint32_t)1 << idx)) != 0;
77 }
78 };
79
80 template <class K, class V, class H>
81 struct SubNodes;
82
83 template <class K, class V>
84 struct Entry
85 {
86 K key;
88
89 Entry(K k, V v) : key(k), value(v) {}
90
91 const V* getp(const K& k) const
92 {
93 if (k == key)
94 return &value;
95 else
96 return nullptr;
97 }
98 };
99
100 template <class K, class V, class H>
101 using Node = std::shared_ptr<void>;
102
103 template <class K, class V, class H>
105 {
106 std::array<std::vector<std::shared_ptr<Entry<K, V>>>, collision_bins> bins;
107
108 const V* getp(Hash hash, const K& k) const
109 {
110 const auto idx = mask(hash, collision_depth);
111 const auto& bin = bins[idx];
112 for (const auto& node : bin)
113 {
114 if (k == node->key)
115 return &node->value;
116 }
117 return nullptr;
118 }
119
120 size_t put_mut(Hash hash, const K& k, const V& v)
121 {
122 const auto idx = mask(hash, collision_depth);
123 auto& bin = bins[idx];
124 for (size_t i = 0; i < bin.size(); ++i)
125 {
126 const auto& entry = bin[i];
127 if (k == entry->key)
128 {
129 const auto diff = map::get_serialized_size_with_padding(entry->key) +
130 map::get_serialized_size_with_padding(entry->value);
131 bin[i] = std::make_shared<Entry<K, V>>(k, v);
132 return diff;
133 }
134 }
135 bin.push_back(std::make_shared<Entry<K, V>>(k, v));
136 return 0;
137 }
138
139 size_t remove_mut(Hash hash, const K& k)
140 {
141 const auto idx = mask(hash, collision_depth);
142 auto& bin = bins[idx];
143 for (size_t i = 0; i < bin.size(); ++i)
144 {
145 const auto& entry = bin[i];
146 if (k == entry->key)
147 {
148 const auto diff = map::get_serialized_size_with_padding(entry->key) +
149 map::get_serialized_size_with_padding(entry->value);
150 bin.erase(bin.begin() + i);
151 return diff;
152 }
153 }
154 return 0;
155 }
156
157 template <class F>
158 bool foreach(F&& f) const
159 {
160 for (const auto& bin : bins)
161 {
162 for (const auto& entry : bin)
163 if (!f(entry->key, entry->value))
164 return false;
165 }
166 return true;
167 }
168 };
169
170 template <class K, class V, class H>
171 struct SubNodes
172 {
173 std::vector<Node<K, V, H>> nodes;
176
178
179 SubNodes(std::vector<Node<K, V, H>>&& ns) : nodes(std::move(ns)) {}
180
181 SubNodes(std::vector<Node<K, V, H>>&& ns, Bitmap nm, Bitmap dm) :
182 nodes(std::move(ns)),
183 node_map(nm),
184 data_map(dm)
185 {}
186
188 {
189 if (!node_map.check(idx) && !data_map.check(idx))
190 return (SmallIndex)-1;
191
192 const auto mask = Bitmap(~((uint32_t)-1 << idx));
193 if (data_map.check(idx))
194 return (data_map & mask).pop();
195
196 return data_map.pop() + (node_map & mask).pop();
197 }
198
199 const V* getp(SmallIndex depth, Hash hash, const K& k) const
200 {
201 const auto idx = mask(hash, depth);
202 const auto c_idx = compressed_idx(idx);
203
204 if (c_idx == (SmallIndex)-1)
205 return nullptr;
206
207 if (data_map.check(idx))
208 return node_as<Entry<K, V>>(c_idx)->getp(k);
209
210 if (depth == (collision_depth - 1))
211 return node_as<Collisions<K, V, H>>(c_idx)->getp(hash, k);
212
213 return node_as<SubNodes<K, V, H>>(c_idx)->getp(depth + 1, hash, k);
214 }
215
216 // Returns serialised size of overwritten (k,v) if k exists, 0 otherwise
217 size_t put_mut(SmallIndex depth, Hash hash, const K& k, const V& v)
218 {
219 const auto idx = mask(hash, depth);
220 auto c_idx = compressed_idx(idx);
221
222 if (c_idx == (SmallIndex)-1)
223 {
224 data_map = data_map.set(idx);
225 c_idx = compressed_idx(idx);
226 nodes.insert(
227 nodes.begin() + c_idx, std::make_shared<Entry<K, V>>(k, v));
228 return 0;
229 }
230
231 if (node_map.check(idx))
232 {
233 size_t insert;
234 if (depth < (collision_depth - 1))
235 {
236 auto sn = *node_as<SubNodes<K, V, H>>(c_idx);
237 insert = sn.put_mut(depth + 1, hash, k, v);
238 nodes[c_idx] = std::make_shared<SubNodes<K, V, H>>(std::move(sn));
239 }
240 else
241 {
242 auto sn = *node_as<Collisions<K, V, H>>(c_idx);
243 insert = sn.put_mut(hash, k, v);
244 nodes[c_idx] = std::make_shared<Collisions<K, V, H>>(std::move(sn));
245 }
246 return insert;
247 }
248
249 const auto& entry0 = node_as<Entry<K, V>>(c_idx);
250 if (k == entry0->key)
251 {
252 auto current_size = map::get_serialized_size_with_padding(entry0->key) +
253 map::get_serialized_size_with_padding(entry0->value);
254 nodes[c_idx] = std::make_shared<Entry<K, V>>(k, v);
255 return current_size;
256 }
257
258 if (depth < (collision_depth - 1))
259 {
260 const auto hash0 = H()(entry0->key);
261 const auto idx0 = mask(hash0, depth + 1);
262 auto sub_node =
263 SubNodes<K, V, H>({entry0}, Bitmap(0), Bitmap(0).set(idx0));
264 sub_node.put_mut(depth + 1, hash, k, v);
265
266 nodes.erase(nodes.begin() + c_idx);
267 data_map = data_map.clear(idx);
268 node_map = node_map.set(idx);
269 c_idx = compressed_idx(idx);
270 nodes.insert(
271 nodes.begin() + c_idx,
272 std::make_shared<SubNodes<K, V, H>>(std::move(sub_node)));
273 }
274 else
275 {
276 auto sub_node = Collisions<K, V, H>();
277 const auto hash0 = H()(entry0->key);
278 const auto idx0 = mask(hash0, collision_depth);
279 sub_node.bins[idx0].push_back(entry0);
280 const auto idx1 = mask(hash, collision_depth);
281 sub_node.bins[idx1].push_back(std::make_shared<Entry<K, V>>(k, v));
282
283 nodes.erase(nodes.begin() + c_idx);
284 data_map = data_map.clear(idx);
285 node_map = node_map.set(idx);
286 c_idx = compressed_idx(idx);
287 nodes.insert(
288 nodes.begin() + c_idx,
289 std::make_shared<Collisions<K, V, H>>(std::move(sub_node)));
290 }
291 return 0;
292 }
293
294 std::pair<std::shared_ptr<SubNodes<K, V, H>>, size_t> put(
295 SmallIndex depth, Hash hash, const K& k, const V& v) const
296 {
297 auto node = *this;
298 auto r = node.put_mut(depth, hash, k, v);
299 return std::make_pair(
300 std::make_shared<SubNodes<K, V, H>>(std::move(node)), r);
301 }
302
303 // Returns serialised size of removed (k,v) if k exists, 0 otherwise
304 size_t remove_mut(SmallIndex depth, Hash hash, const K& k)
305 {
306 const auto idx = mask(hash, depth);
307 const auto c_idx = compressed_idx(idx);
308
309 if (c_idx == (SmallIndex)-1)
310 return 0;
311
312 if (data_map.check(idx))
313 {
314 const auto& entry = node_as<Entry<K, V>>(c_idx);
315 if (entry->key != k)
316 return 0;
317
318 const auto diff = map::get_serialized_size_with_padding(entry->key) +
319 map::get_serialized_size_with_padding(entry->value);
320 nodes.erase(nodes.begin() + c_idx);
321 data_map = data_map.clear(idx);
322 return diff;
323 }
324
325 if (depth == (collision_depth - 1))
326 {
327 auto sn = *node_as<Collisions<K, V, H>>(c_idx);
328 const auto diff = sn.remove_mut(hash, k);
329 nodes[c_idx] = std::make_shared<Collisions<K, V, H>>(std::move(sn));
330 return diff;
331 }
332
333 auto sn = *node_as<SubNodes<K, V, H>>(c_idx);
334 const auto diff = sn.remove_mut(depth + 1, hash, k);
335 nodes[c_idx] = std::make_shared<SubNodes<K, V, H>>(std::move(sn));
336 return diff;
337 }
338
339 std::pair<std::shared_ptr<SubNodes<K, V, H>>, size_t> remove(
340 SmallIndex depth, Hash hash, const K& k) const
341 {
342 auto node = *this;
343 auto r = node.remove_mut(depth, hash, k);
344 return std::make_pair(
345 std::make_shared<SubNodes<K, V, H>>(std::move(node)), r);
346 }
347
348 template <class F>
349 bool foreach(SmallIndex depth, F&& f) const
350 {
351 const auto entries = data_map.pop();
352 for (SmallIndex i = 0; i < entries; ++i)
353 {
354 const auto& entry = node_as<Entry<K, V>>(i);
355 if (!f(entry->key, entry->value))
356 return false;
357 }
358 for (size_t i = entries; i < nodes.size(); ++i)
359 {
360 if (depth == (collision_depth - 1))
361 {
362 if (!node_as<Collisions<K, V, H>>(i)->foreach(std::forward<F>(f)))
363 return false;
364 }
365 else
366 {
367 if (!node_as<SubNodes<K, V, H>>(i)->foreach(
368 depth + 1, std::forward<F>(f)))
369 return false;
370 }
371 }
372 return true;
373 }
374
375 private:
376 template <class A>
377 const std::shared_ptr<A>& node_as(SmallIndex c_idx) const
378 {
379 return reinterpret_cast<const std::shared_ptr<A>&>(nodes[c_idx]);
380 }
381 };
382
383 template <class K, class V, class H = std::hash<K>>
384 class Map
385 {
386 private:
387 std::shared_ptr<SubNodes<K, V, H>> root;
388 size_t map_size = 0;
389 size_t serialized_size = 0;
390
391 Map(
392 std::shared_ptr<SubNodes<K, V, H>>&& root_,
393 size_t size_,
394 size_t serialized_size_) :
395 root(std::move(root_)),
396 map_size(size_),
397 serialized_size(serialized_size_)
398 {}
399
400 public:
401 using KeyType = K;
402 using ValueType = V;
404
405 Map() : root(std::make_shared<SubNodes<K, V, H>>()) {}
406
407 size_t size() const
408 {
409 return map_size;
410 }
411
412 size_t get_serialized_size() const
413 {
414 return serialized_size;
415 }
416
417 bool empty() const
418 {
419 return map_size == 0;
420 }
421
422 std::optional<V> get(const K& key) const
423 {
424 auto v = root->getp(0, H()(key), key);
425
426 if (v)
427 return *v;
428 else
429 return {};
430 }
431
432 const V* getp(const K& key) const
433 {
434 return root->getp(0, H()(key), key);
435 }
436
437 const Map<K, V, H> put(const K& key, const V& value) const
438 {
439 auto r = root->put(0, H()(key), key, value);
440 auto size_ = map_size;
441 if (r.second == 0)
442 size_++;
443
444 const auto size_change = (map::get_serialized_size_with_padding(key) +
445 map::get_serialized_size_with_padding(value)) -
446 r.second;
447
448 return Map(std::move(r.first), size_, size_change + serialized_size);
449 }
450
451 const Map<K, V, H> remove(const K& key) const
452 {
453 auto r = root->remove(0, H()(key), key);
454 auto size_ = map_size;
455 if (r.second > 0)
456 size_--;
457
458 return Map(std::move(r.first), size_, serialized_size - r.second);
459 }
460
461 template <class F>
462 bool foreach(F&& f) const
463 {
464 return root->foreach(0, std::forward<F>(f));
465 }
466
467 std::unique_ptr<Snapshot> make_snapshot() const
468 {
469 return std::make_unique<Snapshot>(*this);
470 }
471 };
472
473 template <class K, class V, class H>
475 {
476 private:
477 const Map<K, V, H> map;
478
479 struct KVTuple
480 {
481 K* k;
482 Hash h_k;
483 V* v;
484
485 KVTuple(K* k_, Hash h_k_, V* v_) : k(k_), h_k(h_k_), v(v_) {}
486 };
487
488 public:
489 Snapshot(const Map<K, V, H>& map_) : map(map_) {}
490
492 {
493 return map.get_serialized_size();
494 }
495
496 void serialize(uint8_t* data)
497 {
498 std::vector<KVTuple> ordered_state;
499 ordered_state.reserve(map.size());
500 size_t serialized_size = 0;
501
502 map.foreach([&ordered_state, &serialized_size](auto& key, auto& value) {
503 K* k = &key;
504 V* v = &value;
505 uint32_t key_size = map::get_serialized_size_with_padding(key);
506 uint32_t value_size = map::get_serialized_size_with_padding(value);
507 serialized_size += (key_size + value_size);
508
509 ordered_state.emplace_back(k, static_cast<Hash>(H()(key)), v);
510
511 return true;
512 });
513
514 // Sort keys to be able to generate byte-for-byte serialised snapshot from
515 // the same state
516 std::sort(
517 ordered_state.begin(), ordered_state.end(), [](KVTuple& i, KVTuple& j) {
518 return i.h_k < j.h_k;
519 });
520
522 serialized_size == map.get_serialized_size(),
523 "Serialized size:{}, map.get_serialized_size():{} (map count:{}, "
524 "ordered state count:{})",
525 serialized_size,
526 map.get_serialized_size(),
527 map.size(),
528 ordered_state.size());
529
530 for (const auto& p : ordered_state)
531 {
532 // Serialize the key
533 uint32_t key_size = map::serialize(*p.k, data, serialized_size);
534 map::add_padding(key_size, data, serialized_size);
535
536 // Serialize the value
537 uint32_t value_size = map::serialize(*p.v, data, serialized_size);
538 map::add_padding(value_size, data, serialized_size);
539 }
540
542 serialized_size == 0,
543 "Serialization buffer is not complete, remaining:{}/{}",
544 serialized_size,
545 map.get_serialized_size());
546 }
547 };
548
549}
#define CCF_ASSERT_FMT(expr,...)
Definition ccf_assert.h:10
Definition champ_map.h:46
constexpr Bitmap operator&(const Bitmap &other) const
Definition champ_map.h:54
constexpr Bitmap()
Definition champ_map.h:50
constexpr Bitmap clear(SmallIndex idx) const
Definition champ_map.h:69
constexpr Bitmap set(SmallIndex idx) const
Definition champ_map.h:64
constexpr bool check(SmallIndex idx) const
Definition champ_map.h:74
constexpr Bitmap(uint32_t bits)
Definition champ_map.h:52
constexpr SmallIndex pop() const
Definition champ_map.h:59
Definition champ_map.h:385
std::unique_ptr< Snapshot > make_snapshot() const
Definition champ_map.h:467
Map()
Definition champ_map.h:405
V ValueType
Definition champ_map.h:402
const Map< K, V, H > put(const K &key, const V &value) const
Definition champ_map.h:437
K KeyType
Definition champ_map.h:401
std::optional< V > get(const K &key) const
Definition champ_map.h:422
size_t get_serialized_size() const
Definition champ_map.h:412
Snapshot< K, V, H > Snapshot
Definition champ_map.h:403
const Map< K, V, H > remove(const K &key) const
Definition champ_map.h:451
size_t size() const
Definition champ_map.h:407
const V * getp(const K &key) const
Definition champ_map.h:432
bool empty() const
Definition champ_map.h:417
Definition champ_map.h:475
Snapshot(const Map< K, V, H > &map_)
Definition champ_map.h:489
size_t get_serialized_size()
Definition champ_map.h:491
void serialize(uint8_t *data)
Definition champ_map.h:496
Definition champ_map.h:16
std::shared_ptr< void > Node
Definition champ_map.h:101
uint32_t Hash
Definition champ_map.h:25
uint8_t SmallIndex
Definition champ_map.h:28
Definition map_serializers.h:11
size_t serialize(const T &t, uint8_t *&data, size_t &size)
Definition map_serializers.h:48
STL namespace.
Definition champ_map.h:105
const V * getp(Hash hash, const K &k) const
Definition champ_map.h:108
bool foreach(F &&f) const
Definition champ_map.h:158
size_t put_mut(Hash hash, const K &k, const V &v)
Definition champ_map.h:120
std::array< std::vector< std::shared_ptr< Entry< K, V > > >, collision_bins > bins
Definition champ_map.h:106
size_t remove_mut(Hash hash, const K &k)
Definition champ_map.h:139
Definition champ_map.h:85
K key
Definition champ_map.h:86
Entry(K k, V v)
Definition champ_map.h:89
V value
Definition champ_map.h:87
const V * getp(const K &k) const
Definition champ_map.h:91
Definition champ_map.h:172
SmallIndex compressed_idx(SmallIndex idx) const
Definition champ_map.h:187
SubNodes()
Definition champ_map.h:177
std::pair< std::shared_ptr< SubNodes< K, V, H > >, size_t > put(SmallIndex depth, Hash hash, const K &k, const V &v) const
Definition champ_map.h:294
Bitmap data_map
Definition champ_map.h:175
size_t remove_mut(SmallIndex depth, Hash hash, const K &k)
Definition champ_map.h:304
size_t put_mut(SmallIndex depth, Hash hash, const K &k, const V &v)
Definition champ_map.h:217
bool foreach(SmallIndex depth, F &&f) const
Definition champ_map.h:349
std::vector< Node< K, V, H > > nodes
Definition champ_map.h:173
const V * getp(SmallIndex depth, Hash hash, const K &k) const
Definition champ_map.h:199
Bitmap node_map
Definition champ_map.h:174
std::pair< std::shared_ptr< SubNodes< K, V, H > >, size_t > remove(SmallIndex depth, Hash hash, const K &k) const
Definition champ_map.h:339
SubNodes(std::vector< Node< K, V, H > > &&ns)
Definition champ_map.h:179
SubNodes(std::vector< Node< K, V, H > > &&ns, Bitmap nm, Bitmap dm)
Definition champ_map.h:181