CCF
Loading...
Searching...
No Matches
champ_map.h
Go to the documentation of this file.
1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the Apache 2.0 License.
3#pragma once
4
5#include "ccf/ds/hash.h"
6#include "ds/ccf_assert.h"
8
9#include <algorithm>
10#include <array>
11#include <memory>
12#include <optional>
13#include <utility>
14#include <vector>
15
16namespace champ
17{
18 // A persistent hash map based on the Compressed Hash-Array Mapped Prefix-tree
19 // from 'Fast and Lean Immutable Multi-Maps on the JVM based on Heterogeneous
20 // Hash-Array Mapped Tries' by Michael J. Steindorfer and Jurgen J. Vinju
21 // (https://arxiv.org/pdf/1608.01036.pdf).
22
23 static constexpr size_t index_mask_bits = 5;
24 static constexpr size_t index_mask = (1 << index_mask_bits) - 1;
25
26 using Hash = uint32_t;
27 static constexpr size_t hash_bits = sizeof(Hash) * 8;
28
29 using SmallIndex = uint8_t;
30 static constexpr size_t small_index_bits = sizeof(SmallIndex) * 8;
31 static_assert(small_index_bits > index_mask_bits);
32
33 static constexpr size_t collision_node_bits = hash_bits % index_mask_bits;
34 static_assert(collision_node_bits > 0);
35 static constexpr SmallIndex collision_depth = hash_bits / index_mask_bits;
36 static constexpr size_t collision_bins = 1 << collision_node_bits;
37
38 static constexpr SmallIndex mask(Hash hash, SmallIndex depth)
39 {
40 return (hash >> ((Hash)depth * index_mask_bits)) & index_mask;
41 }
42
43 template <class K, class V, class H = std::hash<K>>
44 class Snapshot;
45
46 class Bitmap
47 {
48 uint32_t _bits;
49
50 public:
51 constexpr Bitmap() : _bits(0) {}
52
53 constexpr Bitmap(uint32_t bits) : _bits(bits) {}
54
55 constexpr Bitmap operator&(const Bitmap& other) const
56 {
57 return {_bits & other._bits};
58 }
59
60 [[nodiscard]] constexpr SmallIndex pop() const
61 {
62 return __builtin_popcount(_bits);
63 }
64
65 [[nodiscard]] constexpr Bitmap set(SmallIndex idx) const
66 {
67 return {_bits | (static_cast<uint32_t>(1) << idx)};
68 }
69
70 [[nodiscard]] constexpr Bitmap clear(SmallIndex idx) const
71 {
72 return {_bits & ~(static_cast<uint32_t>(1) << idx)};
73 }
74
75 [[nodiscard]] constexpr bool check(SmallIndex idx) const
76 {
77 return (_bits & (static_cast<uint32_t>(1) << idx)) != 0;
78 }
79 };
80
81 template <class K, class V, class H>
82 struct SubNodes;
83
84 template <class K, class V>
85 struct Entry
86 {
87 K key;
89
90 Entry(K k, V v) : key(std::move(k)), value(std::move(v)) {}
91
92 [[nodiscard]] const V* getp(const K& k) const
93 {
94 if (k == key)
95 {
96 return &value;
97 }
98 return nullptr;
99 }
100 };
101
102 template <class K, class V, class H>
103 using Node = std::shared_ptr<void>;
104
105 template <class K, class V, class H>
107 {
108 std::array<std::vector<std::shared_ptr<Entry<K, V>>>, collision_bins> bins;
109
110 [[nodiscard]] const V* getp(Hash hash, const K& k) const
111 {
112 const auto idx = mask(hash, collision_depth);
113 const auto& bin = bins[idx];
114 for (const auto& node : bin)
115 {
116 if (k == node->key)
117 {
118 return &node->value;
119 }
120 }
121 return nullptr;
122 }
123
124 size_t put_mut(Hash hash, const K& k, const V& v)
125 {
126 const auto idx = mask(hash, collision_depth);
127 auto& bin = bins[idx];
128 for (size_t i = 0; i < bin.size(); ++i)
129 {
130 const auto& entry = bin[i];
131 if (k == entry->key)
132 {
133 const auto diff = map::get_serialized_size_with_padding(entry->key) +
134 map::get_serialized_size_with_padding(entry->value);
135 bin[i] = std::make_shared<Entry<K, V>>(k, v);
136 return diff;
137 }
138 }
139 bin.push_back(std::make_shared<Entry<K, V>>(k, v));
140 return 0;
141 }
142
143 size_t remove_mut(Hash hash, const K& k)
144 {
145 const auto idx = mask(hash, collision_depth);
146 auto& bin = bins[idx];
147 for (size_t i = 0; i < bin.size(); ++i)
148 {
149 const auto& entry = bin[i];
150 if (k == entry->key)
151 {
152 const auto diff = map::get_serialized_size_with_padding(entry->key) +
153 map::get_serialized_size_with_padding(entry->value);
154 bin.erase(bin.begin() + i);
155 return diff;
156 }
157 }
158 return 0;
159 }
160
161 template <class F>
162 bool foreach(F&& f) const
163 {
164 for (const auto& bin : bins)
165 {
166 for (const auto& entry : bin)
167 {
168 if (!std::forward<F>(f)(entry->key, entry->value))
169 {
170 return false;
171 }
172 }
173 }
174 return true;
175 }
176 };
177
178 template <class K, class V, class H>
179 struct SubNodes
180 {
181 std::vector<Node<K, V, H>> nodes;
184
185 SubNodes() = default;
186
187 SubNodes(std::vector<Node<K, V, H>>&& ns) : nodes(std::move(ns)) {}
188
189 SubNodes(std::vector<Node<K, V, H>>&& ns, Bitmap nm, Bitmap dm) :
190 nodes(std::move(ns)),
191 node_map(nm),
192 data_map(dm)
193 {}
194
195 [[nodiscard]] SmallIndex compressed_idx(SmallIndex idx) const
196 {
197 if (!node_map.check(idx) && !data_map.check(idx))
198 {
199 return static_cast<SmallIndex>(-1);
200 }
201
202 const auto mask = Bitmap(~(static_cast<uint32_t>(-1) << idx));
203 if (data_map.check(idx))
204 {
205 return (data_map & mask).pop();
206 }
207
208 return data_map.pop() + (node_map & mask).pop();
209 }
210
211 [[nodiscard]] const V* getp(SmallIndex depth, Hash hash, const K& k) const
212 {
213 const auto idx = mask(hash, depth);
214 const auto c_idx = compressed_idx(idx);
215
216 if (c_idx == static_cast<SmallIndex>(-1))
217 {
218 return nullptr;
219 }
220
221 if (data_map.check(idx))
222 {
223 return node_as<Entry<K, V>>(c_idx)->getp(k);
224 }
225
226 if (depth == (collision_depth - 1))
227 {
228 return node_as<Collisions<K, V, H>>(c_idx)->getp(hash, k);
229 }
230
231 return node_as<SubNodes<K, V, H>>(c_idx)->getp(depth + 1, hash, k);
232 }
233
234 // Returns serialised size of overwritten (k,v) if k exists, 0 otherwise
235 size_t put_mut(SmallIndex depth, Hash hash, const K& k, const V& v)
236 {
237 const auto idx = mask(hash, depth);
238 auto c_idx = compressed_idx(idx);
239
240 if (c_idx == static_cast<SmallIndex>(-1))
241 {
242 data_map = data_map.set(idx);
243 c_idx = compressed_idx(idx);
244 nodes.insert(
245 nodes.begin() + c_idx, std::make_shared<Entry<K, V>>(k, v));
246 return 0;
247 }
248
249 if (node_map.check(idx))
250 {
251 size_t insert = 0;
252 if (depth < (collision_depth - 1))
253 {
254 auto sn = *node_as<SubNodes<K, V, H>>(c_idx);
255 insert = sn.put_mut(depth + 1, hash, k, v);
256 nodes[c_idx] = std::make_shared<SubNodes<K, V, H>>(std::move(sn));
257 }
258 else
259 {
260 auto sn = *node_as<Collisions<K, V, H>>(c_idx);
261 insert = sn.put_mut(hash, k, v);
262 nodes[c_idx] = std::make_shared<Collisions<K, V, H>>(std::move(sn));
263 }
264 return insert;
265 }
266
267 const auto& entry0 = node_as<Entry<K, V>>(c_idx);
268 if (k == entry0->key)
269 {
270 auto current_size = map::get_serialized_size_with_padding(entry0->key) +
271 map::get_serialized_size_with_padding(entry0->value);
272 nodes[c_idx] = std::make_shared<Entry<K, V>>(k, v);
273 return current_size;
274 }
275
276 if (depth < (collision_depth - 1))
277 {
278 const auto hash0 = H()(entry0->key);
279 const auto idx0 = mask(hash0, depth + 1);
280 auto sub_node =
281 SubNodes<K, V, H>({entry0}, Bitmap(0), Bitmap(0).set(idx0));
282 sub_node.put_mut(depth + 1, hash, k, v);
283
284 nodes.erase(nodes.begin() + c_idx);
285 data_map = data_map.clear(idx);
286 node_map = node_map.set(idx);
287 c_idx = compressed_idx(idx);
288 nodes.insert(
289 nodes.begin() + c_idx,
290 std::make_shared<SubNodes<K, V, H>>(std::move(sub_node)));
291 }
292 else
293 {
294 auto sub_node = Collisions<K, V, H>();
295 const auto hash0 = H()(entry0->key);
296 const auto idx0 = mask(hash0, collision_depth);
297 sub_node.bins[idx0].push_back(entry0);
298 const auto idx1 = mask(hash, collision_depth);
299 sub_node.bins[idx1].push_back(std::make_shared<Entry<K, V>>(k, v));
300
301 nodes.erase(nodes.begin() + c_idx);
302 data_map = data_map.clear(idx);
303 node_map = node_map.set(idx);
304 c_idx = compressed_idx(idx);
305 nodes.insert(
306 nodes.begin() + c_idx,
307 std::make_shared<Collisions<K, V, H>>(std::move(sub_node)));
308 }
309 return 0;
310 }
311
312 [[nodiscard]] std::pair<std::shared_ptr<SubNodes<K, V, H>>, size_t> put(
313 SmallIndex depth, Hash hash, const K& k, const V& v) const
314 {
315 auto node = *this;
316 auto r = node.put_mut(depth, hash, k, v);
317 return std::make_pair(
318 std::make_shared<SubNodes<K, V, H>>(std::move(node)), r);
319 }
320
321 // Returns serialised size of removed (k,v) if k exists, 0 otherwise
322 size_t remove_mut(SmallIndex depth, Hash hash, const K& k)
323 {
324 const auto idx = mask(hash, depth);
325 const auto c_idx = compressed_idx(idx);
326
327 if (c_idx == static_cast<SmallIndex>(-1))
328 {
329 return 0;
330 }
331
332 if (data_map.check(idx))
333 {
334 const auto& entry = node_as<Entry<K, V>>(c_idx);
335 if (entry->key != k)
336 {
337 return 0;
338 }
339
340 const auto diff = map::get_serialized_size_with_padding(entry->key) +
341 map::get_serialized_size_with_padding(entry->value);
342 nodes.erase(nodes.begin() + c_idx);
343 data_map = data_map.clear(idx);
344 return diff;
345 }
346
347 if (depth == (collision_depth - 1))
348 {
349 auto sn = *node_as<Collisions<K, V, H>>(c_idx);
350 const auto diff = sn.remove_mut(hash, k);
351 nodes[c_idx] = std::make_shared<Collisions<K, V, H>>(std::move(sn));
352 return diff;
353 }
354
355 auto sn = *node_as<SubNodes<K, V, H>>(c_idx);
356 const auto diff = sn.remove_mut(depth + 1, hash, k);
357 nodes[c_idx] = std::make_shared<SubNodes<K, V, H>>(std::move(sn));
358 return diff;
359 }
360
361 [[nodiscard]] std::pair<std::shared_ptr<SubNodes<K, V, H>>, size_t> remove(
362 SmallIndex depth, Hash hash, const K& k) const
363 {
364 auto node = *this;
365 auto r = node.remove_mut(depth, hash, k);
366 return std::make_pair(
367 std::make_shared<SubNodes<K, V, H>>(std::move(node)), r);
368 }
369
370 template <class F>
371 bool foreach(SmallIndex depth, F&& f) const
372 {
373 const auto entries = data_map.pop();
374 for (SmallIndex i = 0; i < entries; ++i)
375 {
376 const auto& entry = node_as<Entry<K, V>>(i);
377 if (!std::forward<F>(f)(entry->key, entry->value))
378 {
379 return false;
380 }
381 }
382 for (size_t i = entries; i < nodes.size(); ++i)
383 {
384 if (depth == (collision_depth - 1))
385 {
386 if (!node_as<Collisions<K, V, H>>(i)->foreach(std::forward<F>(f)))
387 {
388 return false;
389 }
390 }
391 else
392 {
393 if (!node_as<SubNodes<K, V, H>>(i)->foreach(
394 depth + 1, std::forward<F>(f)))
395 {
396 return false;
397 }
398 }
399 }
400 return true;
401 }
402
403 private:
404 template <class A>
405 [[nodiscard]] const std::shared_ptr<A>& node_as(SmallIndex c_idx) const
406 {
407 return reinterpret_cast<const std::shared_ptr<A>&>(nodes[c_idx]);
408 }
409 };
410
411 template <class K, class V, class H = std::hash<K>>
412 class Map
413 {
414 private:
415 std::shared_ptr<SubNodes<K, V, H>> root;
416 size_t map_size = 0;
417 size_t serialized_size = 0;
418
419 Map(
420 std::shared_ptr<SubNodes<K, V, H>>&& root_,
421 size_t size_,
422 size_t serialized_size_) :
423 root(std::move(root_)),
424 map_size(size_),
425 serialized_size(serialized_size_)
426 {}
427
428 public:
429 using KeyType = K;
430 using ValueType = V;
432
433 Map() : root(std::make_shared<SubNodes<K, V, H>>()) {}
434
435 [[nodiscard]] size_t size() const
436 {
437 return map_size;
438 }
439
440 [[nodiscard]] size_t get_serialized_size() const
441 {
442 return serialized_size;
443 }
444
445 [[nodiscard]] bool empty() const
446 {
447 return map_size == 0;
448 }
449
450 [[nodiscard]] std::optional<V> get(const K& key) const
451 {
452 auto v = root->getp(0, H()(key), key);
453
454 if (v)
455 {
456 return *v;
457 }
458 return {};
459 }
460
461 [[nodiscard]] const V* getp(const K& key) const
462 {
463 return root->getp(0, H()(key), key);
464 }
465
466 [[nodiscard]] Map<K, V, H> put(const K& key, const V& value) const
467 {
468 auto r = root->put(0, H()(key), key, value);
469 auto size_ = map_size;
470 if (r.second == 0)
471 {
472 size_++;
473 }
474
475 const auto size_change = (map::get_serialized_size_with_padding(key) +
476 map::get_serialized_size_with_padding(value)) -
477 r.second;
478
479 return Map(std::move(r.first), size_, size_change + serialized_size);
480 }
481
482 [[nodiscard]] Map<K, V, H> remove(const K& key) const
483 {
484 auto r = root->remove(0, H()(key), key);
485 auto size_ = map_size;
486 if (r.second > 0)
487 {
488 size_--;
489 }
490
491 return Map(std::move(r.first), size_, serialized_size - r.second);
492 }
493
494 template <class F>
495 bool foreach(F&& f) const
496 {
497 return root->foreach(0, std::forward<F>(f));
498 }
499
500 [[nodiscard]] std::unique_ptr<Snapshot> make_snapshot() const
501 {
502 return std::make_unique<Snapshot>(*this);
503 }
504 };
505
506 template <class K, class V, class H>
508 {
509 private:
510 const Map<K, V, H> map;
511
512 struct KVTuple
513 {
514 K* k;
515 Hash h_k;
516 V* v;
517
518 KVTuple(K* k_, Hash h_k_, V* v_) : k(k_), h_k(h_k_), v(v_) {}
519 };
520
521 public:
522 Snapshot(const Map<K, V, H>& map_) : map(map_) {}
523
525 {
526 return map.get_serialized_size();
527 }
528
529 void serialize(uint8_t* data)
530 {
531 std::vector<KVTuple> ordered_state;
532 ordered_state.reserve(map.size());
533 size_t serialized_size = 0;
534
535 map.foreach([&ordered_state, &serialized_size](auto& key, auto& value) {
536 K* k = &key;
537 V* v = &value;
538 uint32_t key_size = map::get_serialized_size_with_padding(key);
539 uint32_t value_size = map::get_serialized_size_with_padding(value);
540 serialized_size += (key_size + value_size);
541
542 ordered_state.emplace_back(k, static_cast<Hash>(H()(key)), v);
543
544 return true;
545 });
546
547 // Sort keys to be able to generate byte-for-byte serialised snapshot from
548 // the same state
549 std::sort(
550 ordered_state.begin(), ordered_state.end(), [](KVTuple& i, KVTuple& j) {
551 return i.h_k < j.h_k;
552 });
553
555 serialized_size == map.get_serialized_size(),
556 "Serialized size:{}, map.get_serialized_size():{} (map count:{}, "
557 "ordered state count:{})",
558 serialized_size,
559 map.get_serialized_size(),
560 map.size(),
561 ordered_state.size());
562
563 for (const auto& p : ordered_state)
564 {
565 // Serialize the key
566 uint32_t key_size = map::serialize(*p.k, data, serialized_size);
567 map::add_padding(key_size, data, serialized_size);
568
569 // Serialize the value
570 uint32_t value_size = map::serialize(*p.v, data, serialized_size);
571 map::add_padding(value_size, data, serialized_size);
572 }
573
575 serialized_size == 0,
576 "Serialization buffer is not complete, remaining:{}/{}",
577 serialized_size,
578 map.get_serialized_size());
579 }
580 };
581
582}
#define CCF_ASSERT_FMT(expr,...)
Definition ccf_assert.h:10
Definition champ_map.h:47
constexpr Bitmap operator&(const Bitmap &other) const
Definition champ_map.h:55
constexpr Bitmap()
Definition champ_map.h:51
constexpr Bitmap clear(SmallIndex idx) const
Definition champ_map.h:70
constexpr Bitmap set(SmallIndex idx) const
Definition champ_map.h:65
constexpr bool check(SmallIndex idx) const
Definition champ_map.h:75
constexpr Bitmap(uint32_t bits)
Definition champ_map.h:53
constexpr SmallIndex pop() const
Definition champ_map.h:60
Definition champ_map.h:413
Map< K, V, H > remove(const K &key) const
Definition champ_map.h:482
std::unique_ptr< Snapshot > make_snapshot() const
Definition champ_map.h:500
Map()
Definition champ_map.h:433
V ValueType
Definition champ_map.h:430
K KeyType
Definition champ_map.h:429
std::optional< V > get(const K &key) const
Definition champ_map.h:450
size_t get_serialized_size() const
Definition champ_map.h:440
Snapshot< K, V, H > Snapshot
Definition champ_map.h:431
Map< K, V, H > put(const K &key, const V &value) const
Definition champ_map.h:466
size_t size() const
Definition champ_map.h:435
const V * getp(const K &key) const
Definition champ_map.h:461
bool empty() const
Definition champ_map.h:445
Definition champ_map.h:508
Snapshot(const Map< K, V, H > &map_)
Definition champ_map.h:522
size_t get_serialized_size()
Definition champ_map.h:524
void serialize(uint8_t *data)
Definition champ_map.h:529
Definition champ_map.h:17
std::shared_ptr< void > Node
Definition champ_map.h:103
uint32_t Hash
Definition champ_map.h:26
uint8_t SmallIndex
Definition champ_map.h:29
Definition map_serializers.h:11
size_t serialize(const T &t, uint8_t *&data, size_t &size)
Definition map_serializers.h:48
STL namespace.
Definition champ_map.h:107
const V * getp(Hash hash, const K &k) const
Definition champ_map.h:110
bool foreach(F &&f) const
Definition champ_map.h:162
size_t put_mut(Hash hash, const K &k, const V &v)
Definition champ_map.h:124
std::array< std::vector< std::shared_ptr< Entry< K, V > > >, collision_bins > bins
Definition champ_map.h:108
size_t remove_mut(Hash hash, const K &k)
Definition champ_map.h:143
Definition champ_map.h:86
K key
Definition champ_map.h:87
Entry(K k, V v)
Definition champ_map.h:90
V value
Definition champ_map.h:88
const V * getp(const K &k) const
Definition champ_map.h:92
Definition champ_map.h:180
SmallIndex compressed_idx(SmallIndex idx) const
Definition champ_map.h:195
std::pair< std::shared_ptr< SubNodes< K, V, H > >, size_t > put(SmallIndex depth, Hash hash, const K &k, const V &v) const
Definition champ_map.h:312
Bitmap data_map
Definition champ_map.h:183
size_t remove_mut(SmallIndex depth, Hash hash, const K &k)
Definition champ_map.h:322
size_t put_mut(SmallIndex depth, Hash hash, const K &k, const V &v)
Definition champ_map.h:235
bool foreach(SmallIndex depth, F &&f) const
Definition champ_map.h:371
std::vector< Node< K, V, H > > nodes
Definition champ_map.h:181
const V * getp(SmallIndex depth, Hash hash, const K &k) const
Definition champ_map.h:211
Bitmap node_map
Definition champ_map.h:182
std::pair< std::shared_ptr< SubNodes< K, V, H > >, size_t > remove(SmallIndex depth, Hash hash, const K &k) const
Definition champ_map.h:361
SubNodes(std::vector< Node< K, V, H > > &&ns)
Definition champ_map.h:187
SubNodes(std::vector< Node< K, V, H > > &&ns, Bitmap nm, Bitmap dm)
Definition champ_map.h:189
SubNodes()=default