CCF
Loading...
Searching...
No Matches
contiguous_set.h
Go to the documentation of this file.
1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the Apache 2.0 License.
3#pragma once
4
5#define FMT_HEADER_ONLY
6#include <fmt/format.h>
7#include <fmt/ranges.h>
8#include <numeric>
9#include <vector>
10
11namespace ccf::ds
12{
13 // Dense representation of an ordered set of values, assuming it contains
14 // some contiguous ranges of adjacent values. Stores a sequence of ranges,
15 // rather than individual values.
16 template <typename T>
18 {
19 public:
20 // Ranges are represented by their first value, and a count of additional
21 // values. This disallows negative ranges
22 using Range = std::pair<T, size_t>;
23 using Ranges = std::vector<Range>;
24
25 // Define an iterator for accessing each contained element, rather than the
26 // ranges
28 {
29 using iterator_category = std::random_access_iterator_tag;
30 using value_type = size_t;
31 using difference_type = size_t;
32 using pointer = const size_t*;
33 using reference = size_t;
34
35 using RangeIt = typename Ranges::const_iterator;
36
38 size_t offset = 0;
39
40 ConstIterator(RangeIt i, size_t o = 0) : it(i), offset(o) {}
41
42 T operator*() const
43 {
44 return it->first + offset;
45 }
46
47 bool operator==(const ConstIterator& other) const
48 {
49 return (it == other.it && offset == other.offset);
50 }
51
52 bool operator!=(const ConstIterator& other) const
53 {
54 return !(*this == other);
55 }
56
58 {
59 ++offset;
60 if (offset > it->second)
61 {
62 ++it;
63 offset = 0;
64 }
65 return (*this);
66 }
67
69 {
70 auto temp(*this);
71 ++(*this);
72 return temp;
73 }
74
76 {
77 if (offset == 0)
78 {
79 it = std::prev(it);
80 offset = it->second;
81 }
82 else
83 {
84 --offset;
85 }
86 return (*this);
87 }
88
90 {
91 auto temp(*this);
92 --(*this);
93 return temp;
94 }
95
97 {
98 if (n_ < 0)
99 {
100 return (*this) -= (size_t)-n_;
101 }
102 size_t n = n_;
103 while (offset + n > it->second)
104 {
105 n -= (it->second - offset + 1);
106 it = std::next(it);
107 offset = 0;
108 }
109 offset += n;
110 return (*this);
111 }
112
113 ConstIterator operator+(size_t n) const
114 {
115 ConstIterator copy(it, offset);
116 copy += n;
117 return copy;
118 }
119
120 friend ConstIterator operator+(size_t n, const ConstIterator& other)
121 {
122 return other + n;
123 }
124
126 {
127 while (n > offset)
128 {
129 n -= (offset + 1);
130 it = std::prev(it);
131 offset = it->second;
132 }
133 offset -= n;
134 return (*this);
135 }
136
137 ConstIterator operator-(size_t n) const
138 {
139 ConstIterator copy(it, offset);
140 copy -= n;
141 return copy;
142 }
143
145 {
146 if (it == other.it)
147 {
148 // In same range, simple diff
149 return offset - other.offset;
150 }
151
152 if (it < other.it)
153 {
154 return -(other - (*this));
155 }
156
157 // it > other.it
158 // Walk from this->it to other.it, summing all of the ranges that are
159 // passed
160 difference_type sum = std::accumulate(
161 std::reverse_iterator(it),
162 std::prev(std::reverse_iterator(other.it)),
163 offset + 1,
164 [](difference_type acc, const auto& range) {
165 return acc + range.second + 1;
166 });
167 sum += other.it->second - other.offset;
168 return sum;
169 }
170 };
171
172 private:
173 Ranges ranges;
174
175 template <typename It>
176 void init_from_iterators(It begin, It end)
177 {
178 if (!std::is_sorted(begin, end))
179 {
180 throw std::logic_error("Range must be sorted");
181 }
182
183 while (begin != end)
184 {
185 auto next = std::adjacent_find(
186 begin, end, [](const T& a, const T& b) { return a + 1 != b; });
187 if (next == end)
188 {
189 ranges.emplace_back(*begin, size_t(std::distance(begin, end)) - 1);
190 break;
191 }
192 ranges.emplace_back(*begin, size_t(std::distance(begin, next)));
193 begin = std::next(next);
194 }
195 }
196
197 void init_from_iterators(
198 const ConstIterator& begin, const ConstIterator& end)
199 {
200 // If they're in different ranges...
201 if (begin.it != end.it)
202 {
203 // first insert the end of the initial range
204 ranges.emplace_back(
205 begin.it->first + begin.offset, begin.it->second - begin.offset);
206
207 // then insert all intermediate ranges, by direct copies
208 ranges.insert(ranges.end(), std::next(begin.it), end.it);
209
210 // finally handle the final range; insert part of it if it is non-empty
211 if (end.offset != 0)
212 {
213 ranges.emplace_back(end.it->first, end.offset - 1);
214 }
215 }
216 else
217 {
218 if (begin.offset < end.offset)
219 {
220 ranges.emplace_back(
221 begin.it->first + begin.offset, end.offset - begin.offset - 1);
222 }
223 }
224 }
225
226 void maybe_merge_with_following(typename Ranges::iterator it)
227 {
228 if (it != ranges.end())
229 {
230 auto next_it = std::next(it);
231 if (next_it != ranges.end())
232 {
233 if (it->first + it->second + 1 == next_it->first)
234 {
235 it->second = it->second + 1 + next_it->second;
236 ranges.erase(next_it);
237 }
238 }
239 }
240 }
241
242 void maybe_merge_with_following(typename Ranges::reverse_iterator it)
243 {
244 if (it != ranges.rend())
245 {
246 maybe_merge_with_following(std::next(it).base());
247 }
248 }
249
250 [[nodiscard]] typename Ranges::const_iterator find_internal(
251 const T& t) const
252 {
253 Range estimated_range{t, 0};
254 auto it = std::lower_bound(ranges.begin(), ranges.end(), estimated_range);
255 if (it != ranges.end())
256 {
257 // If lower_bound found {t, n}, then return that result
258 if (it->first == t)
259 {
260 return it;
261 }
262 }
263
264 // else, most of the time, we found {x, n}, where x > t. Check if there
265 // is a previous range, and if that contains t
266 if (it != ranges.begin())
267 {
268 it = std::prev(it);
269 const T& from = it->first;
270 const T additional = it->second;
271 if (from + additional >= t)
272 {
273 return it;
274 }
275 }
276
277 return ranges.end();
278 }
279
280 public:
281 ContiguousSet() = default;
282
283 template <typename It>
285 {
286 init_from_iterators(std::forward<It>(begin), std::forward<It>(end));
287 }
288
289 ContiguousSet(const T& from, size_t additional)
290 {
291 ranges.emplace_back(from, additional);
292 }
293
294 bool operator==(const ContiguousSet& other) const
295 {
296 return ranges == other.ranges;
297 }
298
299 bool operator!=(const ContiguousSet& other) const
300 {
301 return !(*this == other);
302 }
303
304 [[nodiscard]] const Ranges& get_ranges() const
305 {
306 return ranges;
307 }
308
309 [[nodiscard]] size_t size() const
310 {
311 return std::accumulate(
312 ranges.begin(), ranges.end(), 0u, [](size_t n, const Range& r) {
313 return n + r.second + 1;
314 });
315 }
316
317 [[nodiscard]] bool empty() const
318 {
319 return ranges.empty();
320 }
321
322 bool insert(const T& t)
323 {
324 // Search backwards, to find the range with the highest starting point
325 // lower than this value. Offset by one, to find ranges adjacent to this
326 // value. eg - if inserting 5 into [{2, 1}, {6, 2}, {10, 2}], we want to
327 // find {6, 2}, and extend this range down by 1
328 const Range estimated_range(t + 1, 0);
329 auto it = std::lower_bound(
330 ranges.rbegin(), ranges.rend(), estimated_range, std::greater<>());
331
332 if (it != ranges.rend())
333 {
334 const T& from = it->first;
335 const T additional = it->second;
336 if (from <= t && t <= from + additional)
337 {
338 // Already present
339 return false;
340 }
341
342 if (from + additional + 1 == t)
343 {
344 // Adjacent to the end of the existing range
345 it->second++;
346 maybe_merge_with_following(it);
347 return true;
348 }
349
350 if (t + 1 == from)
351 {
352 // Precedes directly, extend this range by 1
353 it->first = t;
354 it->second++;
355 if (it != ranges.rend())
356 {
357 maybe_merge_with_following(std::next(it));
358 }
359 return true;
360 }
361 // Else fall through to emplace new entry
362 }
363
364 auto emplaced_it = ranges.emplace(it.base(), t, 0);
365 maybe_merge_with_following(emplaced_it);
366
367 return true;
368 }
369
370 bool erase(const T& t)
371 {
372 Range estimated_range{t, 0};
373 auto it = std::lower_bound(
374 ranges.begin(),
375 ranges.end(),
376 estimated_range,
377 // Custom comparator - ignore the second element
378 [](const Range& left, const Range& right) {
379 return left.first < right.first;
380 });
381
382 // Usually this has found the iterator _after_ the range containing t, and
383 // so we want std::prev(it). The only time when that is not the case are
384 // if it == ranges.begin() (there is no prev), or t == it->first()
385 // (lower_bound returned the correct iterator, because t is the start of a
386 // range). The latter must be additionally guarded by a check that we're
387 // not dereferencing an end iterator.
388 if (it != ranges.begin() && (it == ranges.end() || t != it->first))
389 {
390 it = std::prev(it);
391 }
392
393 if (it != ranges.end())
394 {
395 const T& from = it->first;
396 const T additional = it->second;
397 if (from <= t && t <= from + additional)
398 {
399 // Contained within this range
400 if (from == t)
401 {
402 if (additional == 0u)
403 {
404 // Remove range entirely
405 ranges.erase(it);
406 return true;
407 }
408 // Shrink start of range
409 ++it->first;
410 --it->second;
411 return true;
412 }
413
414 if (t == from + additional)
415 {
416 // Shrink end of range
417 --it->second;
418 return true;
419 }
420
421 const auto before = t - it->first - 1;
422 const auto after = it->first + it->second - t - 1;
423
424 it->second = before;
425
426 auto next_it = std::next(it);
427 ranges.emplace(next_it, t + 1, after);
428 return true;
429 }
430 }
431
432 return false;
433 }
434
435 void extend(const T& from, size_t additional)
436 {
437 for (auto n = from; n <= from + additional; ++n)
438 {
439 const auto b = insert(n);
440 }
441 }
442
443 [[nodiscard]] bool contains(const T& t) const
444 {
445 return find_internal(t) != end().it;
446 }
447
448 [[nodiscard]] ConstIterator find(const T& t) const
449 {
450 auto it = find_internal(t);
451 if (it != ranges.end())
452 {
453 return ConstIterator(it, t - it->first);
454 }
455
456 return end();
457 }
458
459 [[nodiscard]] ConstIterator lower_bound(const T& t) const
460 {
461 return std::lower_bound(begin(), end(), t);
462 }
463
464 [[nodiscard]] ConstIterator upper_bound(const T& t) const
465 {
466 return std::upper_bound(begin(), end(), t);
467 }
468
469 void clear()
470 {
471 ranges.clear();
472 }
473
474 [[nodiscard]] T front() const
475 {
476 return ranges.front().first;
477 }
478
479 [[nodiscard]] T back() const
480 {
481 const auto back = ranges.back();
482 return back.first + back.second;
483 }
484
485 [[nodiscard]] ConstIterator begin() const
486 {
487 return ConstIterator(ranges.begin());
488 }
489
490 [[nodiscard]] ConstIterator end() const
491 {
492 return ConstIterator(ranges.end());
493 }
494 };
495}
496
497FMT_BEGIN_NAMESPACE
498template <typename T>
499struct formatter<ccf::ds::ContiguousSet<T>>
500{
501 template <typename ParseContext>
502 constexpr auto parse(ParseContext& ctx)
503 {
504 return ctx.begin();
505 }
506
507 template <typename FormatContext>
508 auto format(const ccf::ds::ContiguousSet<T>& v, FormatContext& ctx) const
509 {
510 std::vector<std::string> ranges;
511 for (const auto& [from, additional] : v.get_ranges())
512 {
513 ranges.emplace_back(fmt::format("[{}->{}]", from, from + additional));
514 }
515 return format_to(
516 ctx.out(),
517 "{{{} values in {} ranges: {}}}",
518 v.size(),
519 v.get_ranges().size(),
520 fmt::join(ranges, ", "));
521 }
522};
523FMT_END_NAMESPACE
Definition contiguous_set.h:18
ConstIterator end() const
Definition contiguous_set.h:490
ContiguousSet(const T &from, size_t additional)
Definition contiguous_set.h:289
T back() const
Definition contiguous_set.h:479
bool erase(const T &t)
Definition contiguous_set.h:370
void extend(const T &from, size_t additional)
Definition contiguous_set.h:435
ContiguousSet(It &&begin, It &&end)
Definition contiguous_set.h:284
ConstIterator find(const T &t) const
Definition contiguous_set.h:448
bool insert(const T &t)
Definition contiguous_set.h:322
bool empty() const
Definition contiguous_set.h:317
T front() const
Definition contiguous_set.h:474
ConstIterator begin() const
Definition contiguous_set.h:485
bool operator==(const ContiguousSet &other) const
Definition contiguous_set.h:294
bool operator!=(const ContiguousSet &other) const
Definition contiguous_set.h:299
size_t size() const
Definition contiguous_set.h:309
bool contains(const T &t) const
Definition contiguous_set.h:443
const Ranges & get_ranges() const
Definition contiguous_set.h:304
std::pair< T, size_t > Range
Definition contiguous_set.h:22
ConstIterator lower_bound(const T &t) const
Definition contiguous_set.h:459
std::vector< Range > Ranges
Definition contiguous_set.h:23
ConstIterator upper_bound(const T &t) const
Definition contiguous_set.h:464
void clear()
Definition contiguous_set.h:469
Definition contiguous_set.h:12
Definition app_interface.h:14
Definition contiguous_set.h:28
ConstIterator & operator-=(size_t n)
Definition contiguous_set.h:125
size_t reference
Definition contiguous_set.h:33
friend ConstIterator operator+(size_t n, const ConstIterator &other)
Definition contiguous_set.h:120
ConstIterator(RangeIt i, size_t o=0)
Definition contiguous_set.h:40
ConstIterator operator+(size_t n) const
Definition contiguous_set.h:113
size_t difference_type
Definition contiguous_set.h:31
RangeIt it
Definition contiguous_set.h:37
ConstIterator operator++(int)
Definition contiguous_set.h:68
size_t value_type
Definition contiguous_set.h:30
const size_t * pointer
Definition contiguous_set.h:32
ConstIterator & operator--()
Definition contiguous_set.h:75
ConstIterator operator--(int)
Definition contiguous_set.h:89
T operator*() const
Definition contiguous_set.h:42
std::random_access_iterator_tag iterator_category
Definition contiguous_set.h:29
ConstIterator operator-(size_t n) const
Definition contiguous_set.h:137
ConstIterator & operator++()
Definition contiguous_set.h:57
size_t offset
Definition contiguous_set.h:38
bool operator==(const ConstIterator &other) const
Definition contiguous_set.h:47
ConstIterator & operator+=(difference_type n_)
Definition contiguous_set.h:96
typename Ranges::const_iterator RangeIt
Definition contiguous_set.h:35
difference_type operator-(const ConstIterator &other) const
Definition contiguous_set.h:144
bool operator!=(const ConstIterator &other) const
Definition contiguous_set.h:52
constexpr auto parse(ParseContext &ctx)
Definition contiguous_set.h:502
auto format(const ccf::ds::ContiguousSet< T > &v, FormatContext &ctx) const
Definition contiguous_set.h:508