CCF
Loading...
Searching...
No Matches
contiguous_set.h
Go to the documentation of this file.
1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the Apache 2.0 License.
3#pragma once
4
5#define FMT_HEADER_ONLY
6#include <fmt/format.h>
7#include <fmt/ranges.h>
8#include <numeric>
9#include <vector>
10
11namespace ccf::ds
12{
13 // Dense representation of an ordered set of values, assuming it contains
14 // some contiguous ranges of adjacent values. Stores a sequence of ranges,
15 // rather than individual values.
16 template <typename T>
18 {
19 public:
20 // Ranges are represented by their first value, and a count of additional
21 // values. This disallows negative ranges
22 using Range = std::pair<T, size_t>;
23 using Ranges = std::vector<Range>;
24
25 // Define an iterator for accessing each contained element, rather than the
26 // ranges
28 {
29 using iterator_category = std::random_access_iterator_tag;
30 using value_type = size_t;
31 using difference_type = size_t;
32 using pointer = const size_t*;
33 using reference = size_t;
34
35 using RangeIt = typename Ranges::const_iterator;
36
38 size_t offset = 0;
39
40 ConstIterator(RangeIt i, size_t o = 0) : it(i), offset(o) {}
41
42 T operator*() const
43 {
44 return it->first + offset;
45 }
46
47 bool operator==(const ConstIterator& other) const
48 {
49 return (it == other.it && offset == other.offset);
50 }
51
52 bool operator!=(const ConstIterator& other) const
53 {
54 return !(*this == other);
55 }
56
58 {
59 ++offset;
60 if (offset > it->second)
61 {
62 ++it;
63 offset = 0;
64 }
65 return (*this);
66 }
67
69 {
70 auto temp(*this);
71 ++(*this);
72 return temp;
73 }
74
76 {
77 if (offset == 0)
78 {
79 it = std::prev(it);
80 offset = it->second;
81 }
82 else
83 {
84 --offset;
85 }
86 return (*this);
87 }
88
90 {
91 auto temp(*this);
92 --(*this);
93 return temp;
94 }
95
97 {
98 if (n_ < 0)
99 {
100 return (*this) -= (size_t)-n_;
101 }
102 else
103 {
104 size_t n = n_;
105 while (offset + n > it->second)
106 {
107 n -= (it->second - offset + 1);
108 it = std::next(it);
109 offset = 0;
110 }
111 offset += n;
112 return (*this);
113 }
114 }
115
116 ConstIterator operator+(size_t n) const
117 {
118 ConstIterator copy(it, offset);
119 copy += n;
120 return copy;
121 }
122
123 friend ConstIterator operator+(size_t n, const ConstIterator& other)
124 {
125 return other + n;
126 }
127
129 {
130 while (n > offset)
131 {
132 n -= (offset + 1);
133 it = std::prev(it);
134 offset = it->second;
135 }
136 offset -= n;
137 return (*this);
138 }
139
140 ConstIterator operator-(size_t n) const
141 {
142 ConstIterator copy(it, offset);
143 copy -= n;
144 return copy;
145 }
146
148 {
149 if (it == other.it)
150 {
151 // In same range, simple diff
152 return offset - other.offset;
153 }
154 else if (it < other.it)
155 {
156 return -(other - (*this));
157 }
158 else
159 {
160 // it > other.it
161 // Walk from this->it to other.it, summing all of the ranges that are
162 // passed
163 difference_type sum = std::accumulate(
164 std::reverse_iterator(it),
165 std::prev(std::reverse_iterator(other.it)),
166 offset + 1,
167 [](difference_type acc, const auto& range) {
168 return acc + range.second + 1;
169 });
170 sum += other.it->second - other.offset;
171 return sum;
172 }
173 }
174 };
175
176 private:
177 Ranges ranges;
178
179 template <typename It>
180 void init_from_iterators(It begin, It end)
181 {
182 if (!std::is_sorted(begin, end))
183 {
184 throw std::logic_error("Range must be sorted");
185 }
186
187 while (begin != end)
188 {
189 auto next = std::adjacent_find(
190 begin, end, [](const T& a, const T& b) { return a + 1 != b; });
191 if (next == end)
192 {
193 ranges.emplace_back(*begin, size_t(std::distance(begin, end)) - 1);
194 break;
195 }
196 ranges.emplace_back(*begin, size_t(std::distance(begin, next)));
197 begin = std::next(next);
198 }
199 }
200
201 void init_from_iterators(
202 const ConstIterator& begin, const ConstIterator& end)
203 {
204 // If they're in different ranges...
205 if (begin.it != end.it)
206 {
207 // first insert the end of the initial range
208 ranges.emplace_back(
209 begin.it->first + begin.offset, begin.it->second - begin.offset);
210
211 // then insert all intermediate ranges, by direct copies
212 ranges.insert(ranges.end(), std::next(begin.it), end.it);
213
214 // finally handle the final range; insert part of it if it is non-empty
215 if (end.offset != 0)
216 {
217 ranges.emplace_back(end.it->first, end.offset - 1);
218 }
219 }
220 else
221 {
222 if (begin.offset < end.offset)
223 {
224 ranges.emplace_back(
225 begin.it->first + begin.offset, end.offset - begin.offset - 1);
226 }
227 }
228 }
229
230 void maybe_merge_with_following(typename Ranges::iterator it)
231 {
232 if (it != ranges.end())
233 {
234 auto next_it = std::next(it);
235 if (next_it != ranges.end())
236 {
237 if (it->first + it->second + 1 == next_it->first)
238 {
239 it->second = it->second + 1 + next_it->second;
240 ranges.erase(next_it);
241 }
242 }
243 }
244 }
245
246 void maybe_merge_with_following(typename Ranges::reverse_iterator it)
247 {
248 if (it != ranges.rend())
249 {
250 maybe_merge_with_following(std::next(it).base());
251 }
252 }
253
254 typename Ranges::const_iterator find_internal(const T& t) const
255 {
256 Range estimated_range{t, 0};
257 auto it = std::lower_bound(ranges.begin(), ranges.end(), estimated_range);
258 if (it != ranges.end())
259 {
260 // If lower_bound found {t, n}, then return that result
261 if (it->first == t)
262 {
263 return it;
264 }
265 }
266
267 // else, most of the time, we found {x, n}, where x > t. Check if there
268 // is a previous range, and if that contains t
269 if (it != ranges.begin())
270 {
271 it = std::prev(it);
272 const T& from = it->first;
273 const T additional = it->second;
274 if (from + additional >= t)
275 {
276 return it;
277 }
278 }
279
280 return ranges.end();
281 }
282
283 public:
284 ContiguousSet() = default;
285
286 template <typename It>
288 {
289 init_from_iterators(std::forward<It>(begin), std::forward<It>(end));
290 }
291
292 ContiguousSet(const T& from, size_t additional)
293 {
294 ranges.emplace_back(from, additional);
295 }
296
297 bool operator==(const ContiguousSet& other) const
298 {
299 return ranges == other.ranges;
300 }
301
302 bool operator!=(const ContiguousSet& other) const
303 {
304 return !(*this == other);
305 }
306
307 const Ranges& get_ranges() const
308 {
309 return ranges;
310 }
311
312 size_t size() const
313 {
314 return std::accumulate(
315 ranges.begin(), ranges.end(), 0u, [](size_t n, const Range& r) {
316 return n + r.second + 1;
317 });
318 }
319
320 bool empty() const
321 {
322 return ranges.empty();
323 }
324
325 bool insert(const T& t)
326 {
327 // Search backwards, to find the range with the highest starting point
328 // lower than this value. Offset by one, to find ranges adjacent to this
329 // value. eg - if inserting 5 into [{2, 1}, {6, 2}, {10, 2}], we want to
330 // find {6, 2}, and extend this range down by 1
331 const Range estimated_range(t + 1, 0);
332 auto it = std::lower_bound(
333 ranges.rbegin(), ranges.rend(), estimated_range, std::greater<>());
334
335 if (it != ranges.rend())
336 {
337 const T& from = it->first;
338 const T additional = it->second;
339 if (from <= t && t <= from + additional)
340 {
341 // Already present
342 return false;
343 }
344 else if (from + additional + 1 == t)
345 {
346 // Adjacent to the end of the existing range
347 it->second++;
348 maybe_merge_with_following(it);
349 return true;
350 }
351 else if (t + 1 == from)
352 {
353 // Precedes directly, extend this range by 1
354 it->first = t;
355 it->second++;
356 if (it != ranges.rend())
357 {
358 maybe_merge_with_following(std::next(it));
359 }
360 return true;
361 }
362 // Else fall through to emplace new entry
363 }
364
365 auto emplaced_it = ranges.emplace(it.base(), t, 0);
366 maybe_merge_with_following(emplaced_it);
367
368 return true;
369 }
370
371 bool erase(const T& t)
372 {
373 Range estimated_range{t, 0};
374 auto it = std::lower_bound(
375 ranges.begin(),
376 ranges.end(),
377 estimated_range,
378 // Custom comparator - ignore the second element
379 [](const Range& left, const Range& right) {
380 return left.first < right.first;
381 });
382
383 // Usually this has found the iterator _after_ the range containing t, and
384 // so we want std::prev(it). The only time when that is not the case are
385 // if it == ranges.begin() (there is no prev), or t == it->first()
386 // (lower_bound returned the correct iterator, because t is the start of a
387 // range). The latter must be additionally guarded by a check that we're
388 // not dereferencing an end iterator.
389 if (it != ranges.begin() && (it == ranges.end() || t != it->first))
390 {
391 it = std::prev(it);
392 }
393
394 if (it != ranges.end())
395 {
396 const T& from = it->first;
397 const T additional = it->second;
398 if (from <= t && t <= from + additional)
399 {
400 // Contained within this range
401 if (from == t)
402 {
403 if (additional == 0u)
404 {
405 // Remove range entirely
406 ranges.erase(it);
407 return true;
408 }
409 else
410 {
411 // Shrink start of range
412 ++it->first;
413 --it->second;
414 return true;
415 }
416 }
417 else if (t == from + additional)
418 {
419 // Shrink end of range
420 --it->second;
421 return true;
422 }
423 else
424 {
425 const auto before = t - it->first - 1;
426 const auto after = it->first + it->second - t - 1;
427
428 it->second = before;
429
430 auto next_it = std::next(it);
431 ranges.emplace(next_it, t + 1, after);
432 return true;
433 }
434 }
435 }
436
437 return false;
438 }
439
440 void extend(const T& from, size_t additional)
441 {
442 for (auto n = from; n <= from + additional; ++n)
443 {
444 const auto b = insert(n);
445 }
446 }
447
448 bool contains(const T& t) const
449 {
450 return find_internal(t) != end().it;
451 }
452
453 ConstIterator find(const T& t) const
454 {
455 auto it = find_internal(t);
456 if (it != ranges.end())
457 {
458 return ConstIterator(it, t - it->first);
459 }
460
461 return end();
462 }
463
464 ConstIterator lower_bound(const T& t) const
465 {
466 return std::lower_bound(begin(), end(), t);
467 }
468
469 ConstIterator upper_bound(const T& t) const
470 {
471 return std::upper_bound(begin(), end(), t);
472 }
473
474 void clear()
475 {
476 ranges.clear();
477 }
478
479 T front() const
480 {
481 return ranges.front().first;
482 }
483
484 T back() const
485 {
486 const auto back = ranges.back();
487 return back.first + back.second;
488 }
489
491 {
492 return ConstIterator(ranges.begin());
493 }
494
496 {
497 return ConstIterator(ranges.end());
498 }
499 };
500}
501
502FMT_BEGIN_NAMESPACE
503template <typename T>
504struct formatter<ccf::ds::ContiguousSet<T>>
505{
506 template <typename ParseContext>
507 constexpr auto parse(ParseContext& ctx)
508 {
509 return ctx.begin();
510 }
511
512 template <typename FormatContext>
513 auto format(const ccf::ds::ContiguousSet<T>& v, FormatContext& ctx) const
514 {
515 std::vector<std::string> ranges;
516 for (const auto& [from, additional] : v.get_ranges())
517 {
518 ranges.emplace_back(fmt::format("[{}->{}]", from, from + additional));
519 }
520 return format_to(
521 ctx.out(),
522 "{{{} values in {} ranges: {}}}",
523 v.size(),
524 v.get_ranges().size(),
525 fmt::join(ranges, ", "));
526 }
527};
528FMT_END_NAMESPACE
Definition contiguous_set.h:18
ConstIterator end() const
Definition contiguous_set.h:495
ContiguousSet(const T &from, size_t additional)
Definition contiguous_set.h:292
T back() const
Definition contiguous_set.h:484
bool erase(const T &t)
Definition contiguous_set.h:371
void extend(const T &from, size_t additional)
Definition contiguous_set.h:440
ContiguousSet(It &&begin, It &&end)
Definition contiguous_set.h:287
ConstIterator find(const T &t) const
Definition contiguous_set.h:453
bool insert(const T &t)
Definition contiguous_set.h:325
bool empty() const
Definition contiguous_set.h:320
T front() const
Definition contiguous_set.h:479
ConstIterator begin() const
Definition contiguous_set.h:490
bool operator==(const ContiguousSet &other) const
Definition contiguous_set.h:297
bool operator!=(const ContiguousSet &other) const
Definition contiguous_set.h:302
size_t size() const
Definition contiguous_set.h:312
bool contains(const T &t) const
Definition contiguous_set.h:448
const Ranges & get_ranges() const
Definition contiguous_set.h:307
std::pair< T, size_t > Range
Definition contiguous_set.h:22
ConstIterator lower_bound(const T &t) const
Definition contiguous_set.h:464
std::vector< Range > Ranges
Definition contiguous_set.h:23
ConstIterator upper_bound(const T &t) const
Definition contiguous_set.h:469
void clear()
Definition contiguous_set.h:474
Definition contiguous_set.h:12
Definition app_interface.h:14
Definition contiguous_set.h:28
ConstIterator & operator-=(size_t n)
Definition contiguous_set.h:128
size_t reference
Definition contiguous_set.h:33
friend ConstIterator operator+(size_t n, const ConstIterator &other)
Definition contiguous_set.h:123
ConstIterator(RangeIt i, size_t o=0)
Definition contiguous_set.h:40
ConstIterator operator+(size_t n) const
Definition contiguous_set.h:116
size_t difference_type
Definition contiguous_set.h:31
RangeIt it
Definition contiguous_set.h:37
ConstIterator operator++(int)
Definition contiguous_set.h:68
size_t value_type
Definition contiguous_set.h:30
const size_t * pointer
Definition contiguous_set.h:32
ConstIterator & operator--()
Definition contiguous_set.h:75
ConstIterator operator--(int)
Definition contiguous_set.h:89
T operator*() const
Definition contiguous_set.h:42
std::random_access_iterator_tag iterator_category
Definition contiguous_set.h:29
ConstIterator operator-(size_t n) const
Definition contiguous_set.h:140
ConstIterator & operator++()
Definition contiguous_set.h:57
size_t offset
Definition contiguous_set.h:38
bool operator==(const ConstIterator &other) const
Definition contiguous_set.h:47
ConstIterator & operator+=(difference_type n_)
Definition contiguous_set.h:96
typename Ranges::const_iterator RangeIt
Definition contiguous_set.h:35
difference_type operator-(const ConstIterator &other) const
Definition contiguous_set.h:147
bool operator!=(const ConstIterator &other) const
Definition contiguous_set.h:52
constexpr auto parse(ParseContext &ctx)
Definition contiguous_set.h:507
auto format(const ccf::ds::ContiguousSet< T > &v, FormatContext &ctx) const
Definition contiguous_set.h:513