comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/kj/table.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 // Copyright (c) 2018 Kenton Varda and contributors
2 // Licensed under the MIT License:
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21
22 #pragma once
23
24 #include "common.h"
25 #include "tuple.h"
26 #include "vector.h"
27 #include "function.h"
28
29 #if _MSC_VER
30 // Need _ReadWriteBarrier
31 #if _MSC_VER < 1910
32 #include <intrin.h>
33 #else
34 #include <intrin0.h>
35 #endif
36 #endif
37
38 #if KJ_DEBUG_TABLE_IMPL
39 #include "debug.h"
40 #define KJ_TABLE_IREQUIRE KJ_REQUIRE
41 #define KJ_TABLE_IASSERT KJ_ASSERT
42 #else
43 #define KJ_TABLE_IREQUIRE KJ_IREQUIRE
44 #define KJ_TABLE_IASSERT KJ_IASSERT
45 #endif
46
47 KJ_BEGIN_HEADER
48
49 namespace kj {
50
51 class String;
52
53 namespace _ { // private
54
55 template <typename Row>
56 class TableMapping;
57 template <typename Row, typename Inner>
58 using TableIterable = MappedIterable<Inner, TableMapping<Row>>;
59 template <typename Row, typename Inner>
60 using TableIterator = MappedIterator<Inner, TableMapping<Row>>;
61
62 } // namespace _ (private)
63
64 template <typename Row, typename... Indexes>
65 class Table {
66 // A table with one or more indexes. This is the KJ alternative to map, set, unordered_map, and
67 // unordered_set.
68 //
69 // Unlike a traditional map, which explicitly stores key/value pairs, a Table simply stores
70 // "rows" of arbitrary type, and then lets the application specify how these should be indexed.
71 // Rows could be indexed on a specific struct field, or they could be indexed based on a computed
72 // property. An index could be hash-based or tree-based. Multiple indexes are supported, making
73 // it easy to construct a "bimap".
74 //
75 // The table has deterministic iteration order based on the sequence of insertions and deletions.
76 // In the case of only insertions, the iteration order is the order of insertion. If deletions
77 // occur, then the current last row is moved to occupy the deleted slot. This determinism is
78 // intended to be reliable for the purpose of testing, etc.
79 //
80 // Each index is a class that looks like:
81 //
82 // class Index {
83 // public:
84 // void reserve(size_t size);
85 // // Called when Table::reserve() is called.
86 //
87 // SearchParam& keyForRow(const Row& row) const;
88 // // Given a row, return a value appropriate to pass as SearchParams to the other functions.
89 //
90 // // In all function calls below, `SearchPrams` refers to whatever parameters the index
91 // // supports for looking up a row in the table.
92 //
93 // template <typename... SearchParams>
94 // kj::Maybe<size_t> insert(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...);
95 // // Called to indicate that we're about to insert a new row which will match the given
96 // // search parameters, and will be located at the given position. If this index disallows
97 // // duplicates and some other matching row already exists, then insert() returns the index
98 // // of that row without modifying the index. If the row does not exist, then insert()
99 // // updates the index to note that the new row is located at `pos`. Note that `table[pos]`
100 // // may not be valid yet at the time of this call; the index must go on the search params
101 // // alone.
102 // //
103 // // Insert may throw an exception, in which case the table will roll back insertion.
104 //
105 // template <typename... SearchParams>
106 // void erase(kj::ArrayPtr<const Row> table, size_t pos, SearchParams&&...);
107 // // Called to indicate that the index must remove references to row number `pos`. The
108 // // index must not attempt to access table[pos] directly -- in fact, `pos` may be equal to
109 // // `table.size()`, i.e., may be out-of-bounds (this happens when rolling back a failed
110 // // insertion). Instead, the index can use the search params to search for the row -- they
111 // // will either be the same as the params passed to insert(), or will be a single value of
112 // // type `Row&`.
113 // //
114 // // erase() called immediately after a successful insert() must not throw an exception, as
115 // // it may be called during unwind.
116 //
117 // template <typename... SearchParams>
118 // void move(kj::ArrayPtr<const Row> table, size_t oldPos, size_t newPos, SearchParams&&...);
119 // // Called when a row is about to be moved from `oldPos` to `newPos` in the table. The
120 // // index should update it to the new location. Neither `table[oldPos]` nor `table[newPos]`
121 // // is valid during the call -- use the search params to find the row. Before this call
122 // // `oldPos` is indexed and `newPos` is not -- after the call, the opposite is true.
123 // //
124 // // This should never throw; if it does the table may be corrupted.
125 //
126 // class Iterator; // Behaves like a C++ iterator over size_t values.
127 // class Iterable; // Has begin() and end() methods returning iterators.
128 //
129 // template <typename... SearchParams>
130 // Maybe<size_t> find(kj::ArrayPtr<const Row> table, SearchParams&&...) const;
131 // // Optional. Implements Table::find<Index>(...).
132 //
133 // template <typename... SearchParams>
134 // Iterator seek(kj::ArrayPtr<const Row> table, SearchParams&&...) const;
135 // // Optional. Implements Table::seek<Index>() and Table::range<Index>(...).
136 //
137 // Iterator begin() const;
138 // Iterator end() const;
139 // // Optional. Implements Table::ordered<Index>().
140 // };
141
142 public:
143 Table();
144 Table(Indexes&&... indexes);
145
146 void reserve(size_t size);
147 // Pre-allocates space for a table of the given size. Normally a Table grows by re-allocating
148 // its backing array whenever more space is needed. Reserving in advance avoids redundantly
149 // re-allocating as the table grows.
150
151 size_t size() const;
152 size_t capacity() const;
153
154 void clear();
155
156 Row* begin();
157 Row* end();
158 const Row* begin() const;
159 const Row* end() const;
160
161 Row& insert(Row&& row);
162 Row& insert(const Row& row);
163 // Inserts a new row. Throws an exception if this would violate the uniqueness constraints of any
164 // of the indexes.
165
166 template <typename Collection>
167 void insertAll(Collection&& collection);
168 template <typename Collection>
169 void insertAll(Collection& collection);
170 // Given an iterable collection of Rows, inserts all of them into this table. If the input is
171 // an rvalue, the rows will be moved rather than copied.
172 //
173 // If an insertion throws (e.g. because it violates a uniqueness constraint of some index),
174 // subsequent insertions do not occur, but previous insertions remain inserted.
175
176 template <typename UpdateFunc>
177 Row& upsert(Row&& row, UpdateFunc&& update);
178 template <typename UpdateFunc>
179 Row& upsert(const Row& row, UpdateFunc&& update);
180 // Tries to insert a new row. However, if a duplicate already exists (according to some index),
181 // then update(Row& existingRow, Row&& newRow) is called to modify the existing row.
182
183 template <typename Index, typename... Params>
184 kj::Maybe<Row&> find(Params&&... params);
185 template <typename Index, typename... Params>
186 kj::Maybe<const Row&> find(Params&&... params) const;
187 // Using the given index, search for a matching row. What parameters are accepted depends on the
188 // index. Not all indexes support this method -- "multimap" indexes may support only range().
189
190 template <typename Index, typename... Params, typename Func>
191 Row& findOrCreate(Params&&... params, Func&& createFunc);
192 // Like find(), but if the row doesn't exist, call a function to create it. createFunc() must
193 // return `Row` or something that implicitly converts to `Row`.
194 //
195 // NOTE: C++ doesn't actually properly support inferring types of a parameter pack at the
196 // beginning of an argument list, but we define a hack to support it below. Don't worry about
197 // it.
198
199 template <typename Index, typename BeginKey, typename EndKey>
200 auto range(BeginKey&& begin, EndKey&& end);
201 template <typename Index, typename BeginKey, typename EndKey>
202 auto range(BeginKey&& begin, EndKey&& end) const;
203 // Using the given index, look up a range of values, returning an iterable. What parameters are
204 // accepted depends on the index. Not all indexes support this method (in particular, unordered
205 // indexes normally don't).
206
207 template <typename Index>
208 _::TableIterable<Row, Index&> ordered();
209 template <typename Index>
210 _::TableIterable<const Row, const Index&> ordered() const;
211 // Returns an iterable over the whole table ordered using the given index. Not all indexes
212 // support this method.
213
214 template <typename Index, typename... Params>
215 auto seek(Params&&... params);
216 template <typename Index, typename... Params>
217 auto seek(Params&&... params) const;
218 // Takes same parameters as find(), but returns an iterator at the position where the search
219 // key should go. That is, this returns an iterator that points to the matching entry or, if
220 // there is no matching entry, points at the next entry after the key, in order. Or, if there
221 // is no such entry, the returned iterator is the same as ordered().end().
222 //
223 // seek() is only supported by indexes that support ordered(). It returns the same kind of
224 // iterator that ordered() uses.
225
226 template <typename Index, typename... Params>
227 bool eraseMatch(Params&&... params);
228 // Erase the row that would be matched by `find<Index>(params)`. Returns true if there was a
229 // match.
230
231 template <typename Index, typename BeginKey, typename EndKey>
232 size_t eraseRange(BeginKey&& begin, EndKey&& end);
233 // Erase the row that would be matched by `range<Index>(params)`. Returns the number of
234 // elements erased.
235
236 void erase(Row& row);
237 // Erase the given row.
238 //
239 // WARNING: This invalidates all iterators, so you can't iterate over rows and erase them this
240 // way. Use `eraseAll()` for that.
241
242 Row release(Row& row);
243 // Remove the given row from the table and return it in one operation.
244 //
245 // WARNING: This invalidates all iterators, so you can't iterate over rows and release them this
246 // way.
247
248 template <typename Predicate, typename = decltype(instance<Predicate>()(instance<Row&>()))>
249 size_t eraseAll(Predicate&& predicate);
250 // Erase all rows for which predicate(row) returns true. This scans over the entire table.
251
252 template <typename Collection, typename = decltype(instance<Collection>().begin()), bool = true>
253 size_t eraseAll(Collection&& collection);
254 // Erase all rows in the given iterable collection of rows. This carefully marks rows for
255 // deletion in a first pass then deletes them in a second.
256
257 template <size_t index = 0, typename... Params>
258 kj::Maybe<Row&> find(Params&&... params);
259 template <size_t index = 0, typename... Params>
260 kj::Maybe<const Row&> find(Params&&... params) const;
261 template <size_t index = 0, typename... Params, typename Func>
262 Row& findOrCreate(Params&&... params, Func&& createFunc);
263 template <size_t index = 0, typename BeginKey, typename EndKey>
264 auto range(BeginKey&& begin, EndKey&& end);
265 template <size_t index = 0, typename BeginKey, typename EndKey>
266 auto range(BeginKey&& begin, EndKey&& end) const;
267 template <size_t index = 0>
268 _::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> ordered();
269 template <size_t index = 0>
270 _::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&> ordered() const;
271 template <size_t index = 0, typename... Params>
272 auto seek(Params&&... params);
273 template <size_t index = 0, typename... Params>
274 auto seek(Params&&... params) const;
275 template <size_t index = 0, typename... Params>
276 bool eraseMatch(Params&&... params);
277 template <size_t index = 0, typename BeginKey, typename EndKey>
278 size_t eraseRange(BeginKey&& begin, EndKey&& end);
279 // Methods which take an index type as a template parameter can also take an index number. This
280 // is useful particularly when you have multiple indexes of the same type but different runtime
281 // properties. Additionally, you can omit the template parameter altogether to use the first
282 // index.
283
284 template <size_t index = 0>
285 void verify();
286 // Checks the integrity of indexes, throwing an exception if there are any problems. This is
287 // intended to be called within the unit test for an index.
288
289 template <typename Index, typename First, typename... Rest>
290 Row& findOrCreate(First&& first, Rest&&... rest);
291 template <size_t index = 0, typename First, typename... Rest>
292 Row& findOrCreate(First&& first, Rest&&... rest);
293 // HACK: A parameter pack can only be inferred if it lives at the end of the argument list, so
294 // the findOrCreate() definitions from earlier won't actually work. These ones will, but we
295 // have to do some annoying things inside to regroup the arguments.
296
297 private:
298 Vector<Row> rows;
299 Tuple<Indexes...> indexes;
300
301 template <size_t index = 0, bool final = (index >= sizeof...(Indexes))>
302 class Impl;
303 template <typename Func, typename... Params>
304 class FindOrCreateImpl;
305
306 template <typename ParamsTuple, typename... Params>
307 struct FindOrCreateHack;
308
309 void eraseImpl(size_t pos);
310 template <typename Collection>
311 size_t eraseAllImpl(Collection&& collection);
312 };
313
314 template <typename Callbacks>
315 class HashIndex;
316 // A Table index based on a hash table.
317 //
318 // This implementation:
319 // * Is based on linear probing, not chaining. It is important to use a high-quality hash function.
320 // Use the KJ hashing library if possible.
321 // * Is limited to tables of 2^30 rows or less, mainly to allow for tighter packing with 32-bit
322 // integers instead of 64-bit.
323 // * Caches hash codes so that each table row need only be hashed once, and never checks equality
324 // unless hash codes have already been determined to be equal.
325 //
326 // The `Callbacks` type defines how to compute hash codes and equality. It should be defined like:
327 //
328 // class Callbacks {
329 // public:
330 // // In this interface, `SearchParams...` means whatever parameters you want to support in
331 // // a call to table.find(...). By overloading the calls to support various inputs, you can
332 // // affect what table.find(...) accepts.
333 //
334 // SearchParam& keyForRow(const Row& row);
335 // // Given a row of the table, return the SearchParams that might be passed to the other
336 // // methods to match this row.
337 //
338 // bool matches(const Row&, SearchParams&&...) const;
339 // // Returns true if the row on the left matches the search params on the right.
340 //
341 // uint hashCode(SearchParams&&...) const;
342 // // Computes the hash code of the given search params. Matching rows (as determined by
343 // // matches()) must have the same hash code. Non-matching rows should have different hash
344 // // codes, to the maximum extent possible. Non-matching rows with the same hash code hurt
345 // // performance.
346 // };
347 //
348 // If your `Callbacks` type has dynamic state, you may pass its constructor parameters as the
349 // constructor parameters to `HashIndex`.
350
351 template <typename Callbacks>
352 class TreeIndex;
353 // A Table index based on a B-tree.
354 //
355 // This allows sorted iteration over rows.
356 //
357 // The `Callbacks` type defines how to compare rows. It should be defined like:
358 //
359 // class Callbacks {
360 // public:
361 // // In this interface, `SearchParams...` means whatever parameters you want to support in
362 // // a call to table.find(...). By overloading the calls to support various inputs, you can
363 // // affect what table.find(...) accepts.
364 //
365 // SearchParam& keyForRow(const Row& row);
366 // // Given a row of the table, return the SearchParams that might be passed to the other
367 // // methods to match this row.
368 //
369 // bool isBefore(const Row&, SearchParams&&...) const;
370 // // Returns true if the row on the left comes before the search params on the right.
371 //
372 // bool matches(const Row&, SearchParams&&...) const;
373 // // Returns true if the row "matches" the search params.
374 // };
375
376 // =======================================================================================
377 // inline implementation details
378
379 namespace _ { // private
380
381 KJ_NORETURN(void throwDuplicateTableRow());
382
383 template <typename Dst, typename Src, typename = decltype(instance<Src>().size())>
384 inline void tryReserveSize(Dst& dst, Src&& src) { dst.reserve(dst.size() + src.size()); }
385 template <typename... Params>
386 inline void tryReserveSize(Params&&...) {}
387 // If `src` has a `.size()` method, call dst.reserve(dst.size() + src.size()).
388 // Otherwise, do nothing.
389
390 template <typename Row>
391 class TableMapping {
392 public:
393 TableMapping(Row* table): table(table) {}
394 Row& map(size_t i) const { return table[i]; }
395
396 private:
397 Row* table;
398 };
399
400 template <typename Row>
401 class TableUnmapping {
402 public:
403 TableUnmapping(Row* table): table(table) {}
404 size_t map(Row& row) const { return &row - table; }
405 size_t map(Row* row) const { return row - table; }
406
407 private:
408 Row* table;
409 };
410
411 template <typename Iterator>
412 class IterRange {
413 public:
414 inline IterRange(Iterator b, Iterator e): b(b), e(e) {}
415
416 inline Iterator begin() const { return b; }
417 inline Iterator end() const { return e; }
418 private:
419 Iterator b;
420 Iterator e;
421 };
422
423 template <typename Iterator>
424 inline IterRange<Decay<Iterator>> iterRange(Iterator b, Iterator e) {
425 return { b, e };
426 }
427
428 } // namespace _ (private)
429
430 template <typename Row, typename... Indexes>
431 template <size_t index>
432 class Table<Row, Indexes...>::Impl<index, false> {
433 public:
434 static void reserve(Table<Row, Indexes...>& table, size_t size) {
435 get<index>(table.indexes).reserve(size);
436 Impl<index + 1>::reserve(table, size);
437 }
438
439 static void clear(Table<Row, Indexes...>& table) {
440 get<index>(table.indexes).clear();
441 Impl<index + 1>::clear(table);
442 }
443
444 static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) {
445 if (skip == index) {
446 return Impl<index + 1>::insert(table, pos, row, skip);
447 }
448 auto& indexObj = get<index>(table.indexes);
449 KJ_IF_MAYBE(existing, indexObj.insert(table.rows.asPtr(), pos, indexObj.keyForRow(row))) {
450 return *existing;
451 }
452
453 bool success = false;
454 KJ_DEFER(if (!success) {
455 indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row));
456 });
457 auto result = Impl<index + 1>::insert(table, pos, row, skip);
458 success = result == nullptr;
459 return result;
460 }
461
462 static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) {
463 auto& indexObj = get<index>(table.indexes);
464 indexObj.erase(table.rows.asPtr(), pos, indexObj.keyForRow(row));
465 Impl<index + 1>::erase(table, pos, row);
466 }
467
468 static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) {
469 auto& indexObj = get<index>(table.indexes);
470 indexObj.move(table.rows.asPtr(), oldPos, newPos, indexObj.keyForRow(row));
471 Impl<index + 1>::move(table, oldPos, newPos, row);
472 }
473 };
474
475 template <typename Row, typename... Indexes>
476 template <size_t index>
477 class Table<Row, Indexes...>::Impl<index, true> {
478 public:
479 static void reserve(Table<Row, Indexes...>& table, size_t size) {}
480 static void clear(Table<Row, Indexes...>& table) {}
481 static kj::Maybe<size_t> insert(Table<Row, Indexes...>& table, size_t pos, Row& row, uint skip) {
482 return nullptr;
483 }
484 static void erase(Table<Row, Indexes...>& table, size_t pos, Row& row) {}
485 static void move(Table<Row, Indexes...>& table, size_t oldPos, size_t newPos, Row& row) {}
486 };
487
488 template <typename Row, typename... Indexes>
489 Table<Row, Indexes...>::Table() {}
490
491 template <typename Row, typename... Indexes>
492 Table<Row, Indexes...>::Table(Indexes&&... indexes)
493 : indexes(tuple(kj::fwd<Indexes&&>(indexes)...)) {}
494
495 template <typename Row, typename... Indexes>
496 void Table<Row, Indexes...>::reserve(size_t size) {
497 rows.reserve(size);
498 Impl<>::reserve(*this, size);
499 }
500
501 template <typename Row, typename... Indexes>
502 size_t Table<Row, Indexes...>::size() const {
503 return rows.size();
504 }
505 template <typename Row, typename... Indexes>
506 void Table<Row, Indexes...>::clear() {
507 Impl<>::clear(*this);
508 rows.clear();
509 }
510 template <typename Row, typename... Indexes>
511 size_t Table<Row, Indexes...>::capacity() const {
512 return rows.capacity();
513 }
514
515 template <typename Row, typename... Indexes>
516 Row* Table<Row, Indexes...>::begin() {
517 return rows.begin();
518 }
519 template <typename Row, typename... Indexes>
520 Row* Table<Row, Indexes...>::end() {
521 return rows.end();
522 }
523 template <typename Row, typename... Indexes>
524 const Row* Table<Row, Indexes...>::begin() const {
525 return rows.begin();
526 }
527 template <typename Row, typename... Indexes>
528 const Row* Table<Row, Indexes...>::end() const {
529 return rows.end();
530 }
531
532 template <typename Row, typename... Indexes>
533 Row& Table<Row, Indexes...>::insert(Row&& row) {
534 KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) {
535 _::throwDuplicateTableRow();
536 } else {
537 return rows.add(kj::mv(row));
538 }
539 }
540 template <typename Row, typename... Indexes>
541 Row& Table<Row, Indexes...>::insert(const Row& row) {
542 return insert(kj::cp(row));
543 }
544
545 template <typename Row, typename... Indexes>
546 template <typename Collection>
547 void Table<Row, Indexes...>::insertAll(Collection&& collection) {
548 _::tryReserveSize(*this, collection);
549 for (auto& row: collection) {
550 insert(kj::mv(row));
551 }
552 }
553
554 template <typename Row, typename... Indexes>
555 template <typename Collection>
556 void Table<Row, Indexes...>::insertAll(Collection& collection) {
557 _::tryReserveSize(*this, collection);
558 for (auto& row: collection) {
559 insert(row);
560 }
561 }
562
563 template <typename Row, typename... Indexes>
564 template <typename UpdateFunc>
565 Row& Table<Row, Indexes...>::upsert(Row&& row, UpdateFunc&& update) {
566 KJ_IF_MAYBE(existing, Impl<>::insert(*this, rows.size(), row, kj::maxValue)) {
567 update(rows[*existing], kj::mv(row));
568 return rows[*existing];
569 } else {
570 return rows.add(kj::mv(row));
571 }
572 }
573 template <typename Row, typename... Indexes>
574 template <typename UpdateFunc>
575 Row& Table<Row, Indexes...>::upsert(const Row& row, UpdateFunc&& update) {
576 return upsert(kj::cp(row), kj::fwd<UpdateFunc>(update));
577 }
578
579 template <typename Row, typename... Indexes>
580 template <typename Index, typename... Params>
581 kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) {
582 return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
583 }
584 template <typename Row, typename... Indexes>
585 template <size_t index, typename... Params>
586 kj::Maybe<Row&> Table<Row, Indexes...>::find(Params&&... params) {
587 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) {
588 return rows[*pos];
589 } else {
590 return nullptr;
591 }
592 }
593 template <typename Row, typename... Indexes>
594 template <typename Index, typename... Params>
595 kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const {
596 return find<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
597 }
598 template <typename Row, typename... Indexes>
599 template <size_t index, typename... Params>
600 kj::Maybe<const Row&> Table<Row, Indexes...>::find(Params&&... params) const {
601 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) {
602 return rows[*pos];
603 } else {
604 return nullptr;
605 }
606 }
607
608 template <typename Row, typename... Indexes>
609 template <typename Func, typename... Params>
610 class Table<Row, Indexes...>::FindOrCreateImpl {
611 public:
612 template <size_t index>
613 static Row& apply(Table<Row, Indexes...>& table, Params&&... params, Func&& createFunc) {
614 auto pos = table.rows.size();
615 KJ_IF_MAYBE(existing, get<index>(table.indexes).insert(table.rows.asPtr(), pos, params...)) {
616 return table.rows[*existing];
617 } else {
618 bool success = false;
619 KJ_DEFER({
620 if (!success) {
621 get<index>(table.indexes).erase(table.rows.asPtr(), pos, params...);
622 }
623 });
624 auto& newRow = table.rows.add(createFunc());
625 KJ_DEFER({
626 if (!success) {
627 table.rows.removeLast();
628 }
629 });
630 if (Table<Row, Indexes...>::template Impl<>::insert(table, pos, newRow, index) == nullptr) {
631 success = true;
632 } else {
633 _::throwDuplicateTableRow();
634 }
635 return newRow;
636 }
637 }
638 };
639
640 template <typename Row, typename... Indexes>
641 template <typename... T, typename U, typename V, typename... W>
642 struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U, V, W...>
643 : public FindOrCreateHack<_::Tuple<T..., U>, V, W...> {};
644 template <typename Row, typename... Indexes>
645 template <typename... T, typename U>
646 struct Table<Row, Indexes...>::FindOrCreateHack<_::Tuple<T...>, U>
647 : public FindOrCreateImpl<U, T...> {};
648 // This awful hack works around C++'s lack of support for parameter packs anywhere other than at
649 // the end of an argument list. We accumulate all of the types except for the last one into a
650 // Tuple, then forward to FindOrCreateImpl with the last parameter as the Func.
651
652 template <typename Row, typename... Indexes>
653 template <typename Index, typename First, typename... Rest>
654 Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) {
655 return findOrCreate<indexOfType<Index, Tuple<Indexes...>>()>(
656 kj::fwd<First>(first), kj::fwd<Rest>(rest)...);
657 }
658 template <typename Row, typename... Indexes>
659 template <size_t index, typename First, typename... Rest>
660 Row& Table<Row, Indexes...>::findOrCreate(First&& first, Rest&&... rest) {
661 return FindOrCreateHack<_::Tuple<>, First, Rest...>::template apply<index>(
662 *this, kj::fwd<First>(first), kj::fwd<Rest>(rest)...);
663 }
664
665 template <typename Row, typename... Indexes>
666 template <typename Index, typename BeginKey, typename EndKey>
667 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) {
668 return range<indexOfType<Index, Tuple<Indexes...>>()>(
669 kj::fwd<BeginKey>(begin), kj::fwd<EndKey>(end));
670 }
671 template <typename Row, typename... Indexes>
672 template <size_t index, typename BeginKey, typename EndKey>
673 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) {
674 auto inner = _::iterRange(get<index>(indexes).seek(rows.asPtr(), kj::fwd<BeginKey>(begin)),
675 get<index>(indexes).seek(rows.asPtr(), kj::fwd<EndKey>(end)));
676 return _::TableIterable<Row, decltype(inner)>(kj::mv(inner), rows.begin());
677 }
678 template <typename Row, typename... Indexes>
679 template <typename Index, typename BeginKey, typename EndKey>
680 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) const {
681 return range<indexOfType<Index, Tuple<Indexes...>>()>(
682 kj::fwd<BeginKey>(begin), kj::fwd<EndKey>(end));
683 }
684 template <typename Row, typename... Indexes>
685 template <size_t index, typename BeginKey, typename EndKey>
686 auto Table<Row, Indexes...>::range(BeginKey&& begin, EndKey&& end) const {
687 auto inner = _::iterRange(get<index>(indexes).seek(rows.asPtr(), kj::fwd<BeginKey>(begin)),
688 get<index>(indexes).seek(rows.asPtr(), kj::fwd<EndKey>(end)));
689 return _::TableIterable<const Row, decltype(inner)>(kj::mv(inner), rows.begin());
690 }
691
692 template <typename Row, typename... Indexes>
693 template <typename Index>
694 _::TableIterable<Row, Index&> Table<Row, Indexes...>::ordered() {
695 return ordered<indexOfType<Index, Tuple<Indexes...>>()>();
696 }
697 template <typename Row, typename... Indexes>
698 template <size_t index>
699 _::TableIterable<Row, TypeOfIndex<index, Tuple<Indexes...>>&> Table<Row, Indexes...>::ordered() {
700 return { get<index>(indexes), rows.begin() };
701 }
702 template <typename Row, typename... Indexes>
703 template <typename Index>
704 _::TableIterable<const Row, const Index&> Table<Row, Indexes...>::ordered() const {
705 return ordered<indexOfType<Index, Tuple<Indexes...>>()>();
706 }
707 template <typename Row, typename... Indexes>
708 template <size_t index>
709 _::TableIterable<const Row, const TypeOfIndex<index, Tuple<Indexes...>>&>
710 Table<Row, Indexes...>::ordered() const {
711 return { get<index>(indexes), rows.begin() };
712 }
713
714 template <typename Row, typename... Indexes>
715 template <typename Index, typename... Params>
716 auto Table<Row, Indexes...>::seek(Params&&... params) {
717 return seek<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
718 }
719 template <typename Row, typename... Indexes>
720 template <size_t index, typename... Params>
721 auto Table<Row, Indexes...>::seek(Params&&... params) {
722 auto inner = get<index>(indexes).seek(rows.asPtr(), kj::fwd<Params>(params)...);
723 return _::TableIterator<Row, decltype(inner)>(kj::mv(inner), rows.begin());
724 }
725 template <typename Row, typename... Indexes>
726 template <typename Index, typename... Params>
727 auto Table<Row, Indexes...>::seek(Params&&... params) const {
728 return seek<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
729 }
730 template <typename Row, typename... Indexes>
731 template <size_t index, typename... Params>
732 auto Table<Row, Indexes...>::seek(Params&&... params) const {
733 auto inner = get<index>(indexes).seek(rows.asPtr(), kj::fwd<Params>(params)...);
734 return _::TableIterator<Row, decltype(inner)>(kj::mv(inner), rows.begin());
735 }
736
737 template <typename Row, typename... Indexes>
738 template <typename Index, typename... Params>
739 bool Table<Row, Indexes...>::eraseMatch(Params&&... params) {
740 return eraseMatch<indexOfType<Index, Tuple<Indexes...>>()>(kj::fwd<Params>(params)...);
741 }
742 template <typename Row, typename... Indexes>
743 template <size_t index, typename... Params>
744 bool Table<Row, Indexes...>::eraseMatch(Params&&... params) {
745 KJ_IF_MAYBE(pos, get<index>(indexes).find(rows.asPtr(), kj::fwd<Params>(params)...)) {
746 eraseImpl(*pos);
747 return true;
748 } else {
749 return false;
750 }
751 }
752
753 template <typename Row, typename... Indexes>
754 template <typename Index, typename BeginKey, typename EndKey>
755 size_t Table<Row, Indexes...>::eraseRange(BeginKey&& begin, EndKey&& end) {
756 return eraseRange<indexOfType<Index, Tuple<Indexes...>>()>(
757 kj::fwd<BeginKey>(begin), kj::fwd<EndKey>(end));
758 }
759 template <typename Row, typename... Indexes>
760 template <size_t index, typename BeginKey, typename EndKey>
761 size_t Table<Row, Indexes...>::eraseRange(BeginKey&& begin, EndKey&& end) {
762 auto inner = _::iterRange(get<index>(indexes).seek(rows.asPtr(), kj::fwd<BeginKey>(begin)),
763 get<index>(indexes).seek(rows.asPtr(), kj::fwd<EndKey>(end)));
764 return eraseAllImpl(inner);
765 }
766
767 template <typename Row, typename... Indexes>
768 template <size_t index>
769 void Table<Row, Indexes...>::verify() {
770 get<index>(indexes).verify(rows.asPtr());
771 }
772
773 template <typename Row, typename... Indexes>
774 void Table<Row, Indexes...>::erase(Row& row) {
775 KJ_TABLE_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table");
776 eraseImpl(&row - rows.begin());
777 }
778 template <typename Row, typename... Indexes>
779 void Table<Row, Indexes...>::eraseImpl(size_t pos) {
780 Impl<>::erase(*this, pos, rows[pos]);
781 size_t back = rows.size() - 1;
782 if (pos != back) {
783 Impl<>::move(*this, back, pos, rows[back]);
784 rows[pos] = kj::mv(rows[back]);
785 }
786 rows.removeLast();
787 }
788
789 template <typename Row, typename... Indexes>
790 Row Table<Row, Indexes...>::release(Row& row) {
791 KJ_TABLE_IREQUIRE(&row >= rows.begin() && &row < rows.end(), "row is not a member of this table");
792 size_t pos = &row - rows.begin();
793 Impl<>::erase(*this, pos, row);
794 Row result = kj::mv(row);
795 size_t back = rows.size() - 1;
796 if (pos != back) {
797 Impl<>::move(*this, back, pos, rows[back]);
798 row = kj::mv(rows[back]);
799 }
800 rows.removeLast();
801 return result;
802 }
803
804 template <typename Row, typename... Indexes>
805 template <typename Predicate, typename>
806 size_t Table<Row, Indexes...>::eraseAll(Predicate&& predicate) {
807 size_t count = 0;
808 for (size_t i = 0; i < rows.size();) {
809 if (predicate(rows[i])) {
810 eraseImpl(i);
811 ++count;
812 // eraseImpl() replaces the erased row with the last row, so don't increment i here; repeat
813 // with the same i.
814 } else {
815 ++i;
816 }
817 }
818 return count;
819 }
820
821 template <typename Row, typename... Indexes>
822 template <typename Collection, typename, bool>
823 size_t Table<Row, Indexes...>::eraseAll(Collection&& collection) {
824 return eraseAllImpl(MappedIterable<Collection&, _::TableUnmapping<Row>>(
825 collection, rows.begin()));
826 }
827
828 template <typename Row, typename... Indexes>
829 template <typename Collection>
830 size_t Table<Row, Indexes...>::eraseAllImpl(Collection&& collection) {
831 // We need to transform the collection of row numbers into a sequence of erasures, accounting
832 // for the fact that each erasure re-positions the last row into its slot.
833 Vector<size_t> erased;
834 _::tryReserveSize(erased, collection);
835 for (size_t pos: collection) {
836 while (pos >= rows.size() - erased.size()) {
837 // Oops, the next item to be erased is already scheduled to be moved to a different location
838 // due to a previous erasure. Figure out where it will be at this point.
839 size_t erasureNumber = rows.size() - pos - 1;
840 pos = erased[erasureNumber];
841 }
842 erased.add(pos);
843 }
844
845 // Now we can execute the sequence of erasures.
846 for (size_t pos: erased) {
847 eraseImpl(pos);
848 }
849
850 return erased.size();
851 }
852
853 // -----------------------------------------------------------------------------
854 // Hash table index
855
856 namespace _ { // private
857
858 void logHashTableInconsistency();
859
860 struct HashBucket {
861 uint hash;
862 uint value;
863
864 HashBucket() = default;
865 HashBucket(uint hash, uint pos)
866 : hash(hash), value(pos + 2) {}
867
868 inline bool isEmpty() const { return value == 0; }
869 inline bool isErased() const { return value == 1; }
870 inline bool isOccupied() const { return value >= 2; }
871 template <typename Row>
872 inline Row& getRow(ArrayPtr<Row> table) const { return table[getPos()]; }
873 template <typename Row>
874 inline const Row& getRow(ArrayPtr<const Row> table) const { return table[getPos()]; }
875 inline bool isPos(uint pos) const { return pos + 2 == value; }
876 inline uint getPos() const {
877 KJ_TABLE_IASSERT(value >= 2);
878 return value - 2;
879 }
880 inline void setEmpty() { value = 0; }
881 inline void setErased() { value = 1; }
882 inline void setPos(uint pos) { value = pos + 2; }
883 };
884
885 inline size_t probeHash(const kj::Array<HashBucket>& buckets, size_t i) {
886 // TODO(perf): Is linear probing OK or should we do something fancier?
887 if (++i == buckets.size()) {
888 return 0;
889 } else {
890 return i;
891 }
892 }
893
894 kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize);
895
896 uint chooseBucket(uint hash, uint count);
897
898 } // namespace _ (private)
899
900 template <typename Callbacks>
901 class HashIndex {
902 public:
903 HashIndex() = default;
904 template <typename... Params>
905 HashIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {}
906
907 size_t capacity() {
908 // This method is for testing.
909 return buckets.size();
910 }
911
912 void reserve(size_t size) {
913 if (buckets.size() < size * 2) {
914 rehash(size);
915 }
916 }
917
918 void clear() {
919 erasedCount = 0;
920 if (buckets.size() > 0) memset(buckets.begin(), 0, buckets.asBytes().size());
921 }
922
923 template <typename Row>
924 decltype(auto) keyForRow(Row&& row) const {
925 return cb.keyForRow(kj::fwd<Row>(row));
926 }
927
928 template <typename Row, typename... Params>
929 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
930 if (buckets.size() * 2 < (table.size() + 1 + erasedCount) * 3) {
931 // Load factor is more than 2/3, let's rehash so that it's 1/3, i.e. double the buckets.
932 // Note that rehashing also cleans up erased entries, so we may not actually be doubling if
933 // there are a lot of erasures. Nevertheless, this gives us amortized constant time -- it
934 // would take at least O(table.size()) more insertions (whether or not erasures occur)
935 // before another rehash is needed.
936 rehash((table.size() + 1) * 3);
937 }
938
939 uint hashCode = cb.hashCode(params...);
940 Maybe<_::HashBucket&> erasedSlot;
941 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
942 auto& bucket = buckets[i];
943 if (bucket.isEmpty()) {
944 // no duplicates found
945 KJ_IF_MAYBE(s, erasedSlot) {
946 --erasedCount;
947 *s = { hashCode, uint(pos) };
948 } else {
949 bucket = { hashCode, uint(pos) };
950 }
951 return nullptr;
952 } else if (bucket.isErased()) {
953 // We can fill in the erased slot. However, we have to keep searching to make sure there
954 // are no duplicates before we do that.
955 if (erasedSlot == nullptr) {
956 erasedSlot = bucket;
957 }
958 } else if (bucket.hash == hashCode &&
959 cb.matches(bucket.getRow(table), params...)) {
960 // duplicate row
961 return size_t(bucket.getPos());
962 }
963 }
964 }
965
966 template <typename Row, typename... Params>
967 void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
968 uint hashCode = cb.hashCode(params...);
969 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
970 auto& bucket = buckets[i];
971 if (bucket.isPos(pos)) {
972 // found it
973 ++erasedCount;
974 bucket.setErased();
975 return;
976 } else if (bucket.isEmpty()) {
977 // can't find the bucket, something is very wrong
978 _::logHashTableInconsistency();
979 return;
980 }
981 }
982 }
983
984 template <typename Row, typename... Params>
985 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) {
986 uint hashCode = cb.hashCode(params...);
987 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
988 auto& bucket = buckets[i];
989 if (bucket.isPos(oldPos)) {
990 // found it
991 bucket.setPos(newPos);
992 return;
993 } else if (bucket.isEmpty()) {
994 // can't find the bucket, something is very wrong
995 _::logHashTableInconsistency();
996 return;
997 }
998 }
999 }
1000
1001 template <typename Row, typename... Params>
1002 Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const {
1003 if (buckets.size() == 0) return nullptr;
1004
1005 uint hashCode = cb.hashCode(params...);
1006 for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
1007 auto& bucket = buckets[i];
1008 if (bucket.isEmpty()) {
1009 // not found.
1010 return nullptr;
1011 } else if (bucket.isErased()) {
1012 // skip, keep searching
1013 } else if (bucket.hash == hashCode &&
1014 cb.matches(bucket.getRow(table), params...)) {
1015 // found
1016 return size_t(bucket.getPos());
1017 }
1018 }
1019 }
1020
1021 // No begin() nor end() because hash tables are not usefully ordered.
1022
1023 private:
1024 Callbacks cb;
1025 size_t erasedCount = 0;
1026 Array<_::HashBucket> buckets;
1027
1028 void rehash(size_t targetSize) {
1029 buckets = _::rehash(buckets, targetSize);
1030 erasedCount = 0;
1031 }
1032 };
1033
1034 // -----------------------------------------------------------------------------
1035 // BTree index
1036
1037 namespace _ { // private
1038
1039 KJ_ALWAYS_INLINE(void compilerBarrier());
1040 void compilerBarrier() {
1041 // Make sure that reads occurring before this call cannot be re-ordered to happen after
1042 // writes that occur after this call. We need this in a couple places below to prevent C++
1043 // strict aliasing rules from breaking things.
1044 #if _MSC_VER
1045 _ReadWriteBarrier();
1046 #else
1047 __asm__ __volatile__("": : :"memory");
1048 #endif
1049 }
1050
1051 template <typename T>
1052 inline void acopy(T* to, T* from, size_t size) { memcpy(to, from, size * sizeof(T)); }
1053 template <typename T>
1054 inline void amove(T* to, T* from, size_t size) { memmove(to, from, size * sizeof(T)); }
1055 template <typename T>
1056 inline void azero(T* ptr, size_t size) { memset(ptr, 0, size * sizeof(T)); }
1057 // memcpy/memmove/memset variants that count size in elements, not bytes.
1058 //
1059 // TODO(cleanup): These are generally useful, put them somewhere.
1060
1061 class BTreeImpl {
1062 public:
1063 class Iterator;
1064 class MaybeUint;
1065 struct NodeUnion;
1066 struct Leaf;
1067 struct Parent;
1068 struct Freelisted;
1069
1070 class SearchKey {
1071 // Passed to methods that need to search the tree. This class allows most of the B-tree
1072 // implementation to be kept out of templates, avoiding code bloat, at the cost of some
1073 // performance trade-off. In order to lessen the performance cost of virtual calls, we design
1074 // this interface so that it only needs to be called once per tree node, rather than once per
1075 // comparison.
1076
1077 public:
1078 virtual uint search(const Parent& parent) const = 0;
1079 virtual uint search(const Leaf& leaf) const = 0;
1080 // Binary search for the first key/row in the parent/leaf that is equal to or comes after the
1081 // search key.
1082
1083 virtual bool isAfter(uint rowIndex) const = 0;
1084 // Returns true if the key comes after the value in the given row.
1085 };
1086
1087 BTreeImpl();
1088 ~BTreeImpl() noexcept(false);
1089
1090 KJ_DISALLOW_COPY(BTreeImpl);
1091 BTreeImpl(BTreeImpl&& other);
1092 BTreeImpl& operator=(BTreeImpl&& other);
1093
1094 void logInconsistency() const;
1095
1096 void reserve(size_t size);
1097
1098 void clear();
1099
1100 Iterator begin() const;
1101 Iterator end() const;
1102
1103 Iterator search(const SearchKey& searchKey) const;
1104 // Find the "first" row (in sorted order) for which searchKey.isAfter(rowNumber) returns true.
1105
1106 Iterator insert(const SearchKey& searchKey);
1107 // Like search() but ensures that there is room in the leaf node to insert a new row.
1108
1109 void erase(uint row, const SearchKey& searchKey);
1110 // Erase the given row number from the tree. searchKey.isAfter() returns true for the given row
1111 // and all rows after it.
1112
1113 void renumber(uint oldRow, uint newRow, const SearchKey& searchKey);
1114 // Renumber the given row from oldRow to newRow. searchKey.isAfter() returns true for oldRow and
1115 // all rows after it. (It will not be called on newRow.)
1116
1117 void verify(size_t size, FunctionParam<bool(uint, uint)>);
1118
1119 private:
1120 NodeUnion* tree; // allocated with aligned_alloc aligned to cache lines
1121 uint treeCapacity;
1122 uint height; // height of *parent* tree -- does not include the leaf level
1123 uint freelistHead;
1124 uint freelistSize;
1125 uint beginLeaf;
1126 uint endLeaf;
1127 void growTree(uint minCapacity = 0);
1128
1129 template <typename T>
1130 struct AllocResult;
1131
1132 template <typename T>
1133 inline AllocResult<T> alloc();
1134 inline void free(uint pos);
1135
1136 inline uint split(Parent& src, uint srcPos, Parent& dst, uint dstPos);
1137 inline uint split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos);
1138 inline void merge(Parent& dst, uint dstPos, uint pivot, Parent& src);
1139 inline void merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src);
1140 inline void move(Parent& dst, uint dstPos, Parent& src);
1141 inline void move(Leaf& dst, uint dstPos, Leaf& src);
1142 inline void rotateLeft(
1143 Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup);
1144 inline void rotateLeft(
1145 Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup);
1146 inline void rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent);
1147 inline void rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent);
1148
1149 template <typename Node>
1150 inline Node& insertHelper(const SearchKey& searchKey,
1151 Node& node, Parent* parent, uint indexInParent, uint pos);
1152
1153 template <typename Node>
1154 inline Node& eraseHelper(
1155 Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup);
1156
1157 size_t verifyNode(size_t size, FunctionParam<bool(uint, uint)>&,
1158 uint pos, uint height, MaybeUint maxRow);
1159
1160 static const NodeUnion EMPTY_NODE;
1161 };
1162
1163 class BTreeImpl::MaybeUint {
1164 // A nullable uint, using the value zero to mean null and shifting all other values up by 1.
1165 public:
1166 MaybeUint() = default;
1167 inline MaybeUint(uint i): i(i - 1) {}
1168 inline MaybeUint(decltype(nullptr)): i(0) {}
1169
1170 inline bool operator==(decltype(nullptr)) const { return i == 0; }
1171 inline bool operator==(uint j) const { return i == j + 1; }
1172 inline bool operator==(const MaybeUint& other) const { return i == other.i; }
1173 inline bool operator!=(decltype(nullptr)) const { return i != 0; }
1174 inline bool operator!=(uint j) const { return i != j + 1; }
1175 inline bool operator!=(const MaybeUint& other) const { return i != other.i; }
1176
1177 inline MaybeUint& operator=(decltype(nullptr)) { i = 0; return *this; }
1178 inline MaybeUint& operator=(uint j) { i = j + 1; return *this; }
1179
1180 inline uint operator*() const { KJ_TABLE_IREQUIRE(i != 0); return i - 1; }
1181
1182 template <typename Func>
1183 inline bool check(Func& func) const { return i != 0 && func(i - 1); }
1184 // Equivalent to *this != nullptr && func(**this)
1185
1186 kj::String toString() const;
1187
1188 private:
1189 uint i;
1190 };
1191
1192 struct BTreeImpl::Leaf {
1193 uint next;
1194 uint prev;
1195 // Pointers to next and previous nodes at the same level, used for fast iteration.
1196
1197 static constexpr size_t NROWS = 14;
1198 MaybeUint rows[NROWS];
1199 // Pointers to table rows, offset by 1 so that 0 is an empty value.
1200
1201 inline bool isFull() const;
1202 inline bool isMostlyFull() const;
1203 inline bool isHalfFull() const;
1204
1205 inline void insert(uint i, uint newRow) {
1206 KJ_TABLE_IREQUIRE(rows[Leaf::NROWS - 1] == nullptr); // check not full
1207
1208 amove(rows + i + 1, rows + i, Leaf::NROWS - (i + 1));
1209 rows[i] = newRow;
1210 }
1211
1212 inline void erase(uint i) {
1213 KJ_TABLE_IREQUIRE(rows[0] != nullptr); // check not empty
1214
1215 amove(rows + i, rows + i + 1, Leaf::NROWS - (i + 1));
1216 rows[Leaf::NROWS - 1] = nullptr;
1217 }
1218
1219 inline uint size() const {
1220 static_assert(Leaf::NROWS == 14, "logic here needs updating");
1221
1222 // Binary search for first empty element in `rows`, or return 14 if no empty elements. We do
1223 // this in a branch-free manner. Since there are 15 possible results (0 through 14, inclusive),
1224 // this isn't a perfectly balanced binary search. We carefully choose the split points so that
1225 // there's no way we'll try to dereference row[14] or later (which would be a buffer overflow).
1226 uint i = (rows[6] != nullptr) * 7;
1227 i += (rows[i + 3] != nullptr) * 4;
1228 i += (rows[i + 1] != nullptr) * 2;
1229 i += (rows[i ] != nullptr);
1230 return i;
1231 }
1232
1233 template <typename Func>
1234 inline uint binarySearch(Func& predicate) const {
1235 // Binary search to find first row for which predicate(row) is false.
1236
1237 static_assert(Leaf::NROWS == 14, "logic here needs updating");
1238
1239 // See comments in size().
1240 uint i = (rows[6].check(predicate)) * 7;
1241 i += (rows[i + 3].check(predicate)) * 4;
1242 i += (rows[i + 1].check(predicate)) * 2;
1243 if (i != 6) { // don't redundantly check row 6
1244 i += (rows[i ].check(predicate));
1245 }
1246 return i;
1247 }
1248 };
1249
1250 struct BTreeImpl::Parent {
1251 uint unused;
1252 // Not used. May be arbitrarily non-zero due to overlap with Freelisted::nextOffset.
1253
1254 static constexpr size_t NKEYS = 7;
1255 MaybeUint keys[NKEYS];
1256 // Pointers to table rows, offset by 1 so that 0 is an empty value.
1257 //
1258 // Each keys[i] specifies the table row which is the "last" row found under children[i].
1259 //
1260 // Note that `keys` has size 7 but `children` has size 8. `children[8]`'s "last row" is not
1261 // recorded here, because the Parent's Parent records it instead. (Or maybe the Parent's Parent's
1262 // Parent, if this Parent is `children[8]` of its own Parent. And so on.)
1263
1264 static constexpr size_t NCHILDREN = NKEYS + 1;
1265 uint children[NCHILDREN];
1266 // Pointers to children. Not offset because the root is always at position 0, and a pointer
1267 // to the root would be nonsensical.
1268
1269 inline bool isFull() const;
1270 inline bool isMostlyFull() const;
1271 inline bool isHalfFull() const;
1272 inline void initRoot(uint key, uint leftChild, uint rightChild);
1273 inline void insertAfter(uint i, uint splitKey, uint child);
1274 inline void eraseAfter(uint i);
1275
1276 inline uint keyCount() const {
1277 static_assert(Parent::NKEYS == 7, "logic here needs updating");
1278
1279 // Binary search for first empty element in `keys`, or return 7 if no empty elements. We do
1280 // this in a branch-free manner. Since there are 8 possible results (0 through 7, inclusive),
1281 // this is a perfectly balanced binary search.
1282 uint i = (keys[3] != nullptr) * 4;
1283 i += (keys[i + 1] != nullptr) * 2;
1284 i += (keys[i ] != nullptr);
1285 return i;
1286 }
1287
1288 template <typename Func>
1289 inline uint binarySearch(Func& predicate) const {
1290 // Binary search to find first key for which predicate(key) is false.
1291
1292 static_assert(Parent::NKEYS == 7, "logic here needs updating");
1293
1294 // See comments in size().
1295 uint i = (keys[3].check(predicate)) * 4;
1296 i += (keys[i + 1].check(predicate)) * 2;
1297 i += (keys[i ].check(predicate));
1298 return i;
1299 }
1300 };
1301
1302 struct BTreeImpl::Freelisted {
1303 int nextOffset;
1304 // The next node in the freelist is at: this + 1 + nextOffset
1305 //
1306 // Hence, newly-allocated space can initialize this to zero.
1307
1308 uint zero[15];
1309 // Freelisted entries are always zero'd.
1310 };
1311
1312 struct BTreeImpl::NodeUnion {
1313 union {
1314 Freelisted freelist;
1315 // If this node is in the freelist.
1316
1317 Leaf leaf;
1318 // If this node is a leaf.
1319
1320 Parent parent;
1321 // If this node is not a leaf.
1322 };
1323
1324 inline operator Leaf&() { return leaf; }
1325 inline operator Parent&() { return parent; }
1326 inline operator const Leaf&() const { return leaf; }
1327 inline operator const Parent&() const { return parent; }
1328 };
1329
1330 static_assert(sizeof(BTreeImpl::Parent) == 64,
1331 "BTreeImpl::Parent should be optimized to fit a cache line");
1332 static_assert(sizeof(BTreeImpl::Leaf) == 64,
1333 "BTreeImpl::Leaf should be optimized to fit a cache line");
1334 static_assert(sizeof(BTreeImpl::Freelisted) == 64,
1335 "BTreeImpl::Freelisted should be optimized to fit a cache line");
1336 static_assert(sizeof(BTreeImpl::NodeUnion) == 64,
1337 "BTreeImpl::NodeUnion should be optimized to fit a cache line");
1338
1339 bool BTreeImpl::Leaf::isFull() const {
1340 return rows[Leaf::NROWS - 1] != nullptr;
1341 }
1342 bool BTreeImpl::Leaf::isMostlyFull() const {
1343 return rows[Leaf::NROWS / 2] != nullptr;
1344 }
1345 bool BTreeImpl::Leaf::isHalfFull() const {
1346 KJ_TABLE_IASSERT(rows[Leaf::NROWS / 2 - 1] != nullptr);
1347 return rows[Leaf::NROWS / 2] == nullptr;
1348 }
1349
1350 bool BTreeImpl::Parent::isFull() const {
1351 return keys[Parent::NKEYS - 1] != nullptr;
1352 }
1353 bool BTreeImpl::Parent::isMostlyFull() const {
1354 return keys[Parent::NKEYS / 2] != nullptr;
1355 }
1356 bool BTreeImpl::Parent::isHalfFull() const {
1357 KJ_TABLE_IASSERT(keys[Parent::NKEYS / 2 - 1] != nullptr);
1358 return keys[Parent::NKEYS / 2] == nullptr;
1359 }
1360
1361 class BTreeImpl::Iterator {
1362 public:
1363 Iterator(const NodeUnion* tree, const Leaf* leaf, uint row)
1364 : tree(tree), leaf(leaf), row(row) {}
1365
1366 size_t operator*() const {
1367 KJ_TABLE_IREQUIRE(row < Leaf::NROWS && leaf->rows[row] != nullptr,
1368 "tried to dereference end() iterator");
1369 return *leaf->rows[row];
1370 }
1371
1372 inline Iterator& operator++() {
1373 KJ_TABLE_IREQUIRE(leaf->rows[row] != nullptr, "B-tree iterator overflow");
1374 ++row;
1375 if (row >= Leaf::NROWS || leaf->rows[row] == nullptr) {
1376 if (leaf->next == 0) {
1377 // at end; stay on current leaf
1378 } else {
1379 leaf = &tree[leaf->next].leaf;
1380 row = 0;
1381 }
1382 }
1383 return *this;
1384 }
1385 inline Iterator operator++(int) {
1386 Iterator other = *this;
1387 ++*this;
1388 return other;
1389 }
1390
1391 inline Iterator& operator--() {
1392 if (row == 0) {
1393 KJ_TABLE_IREQUIRE(leaf->prev != 0, "B-tree iterator underflow");
1394 leaf = &tree[leaf->prev].leaf;
1395 row = leaf->size() - 1;
1396 } else {
1397 --row;
1398 }
1399 return *this;
1400 }
1401 inline Iterator operator--(int) {
1402 Iterator other = *this;
1403 --*this;
1404 return other;
1405 }
1406
1407 inline bool operator==(const Iterator& other) const {
1408 return leaf == other.leaf && row == other.row;
1409 }
1410 inline bool operator!=(const Iterator& other) const {
1411 return leaf != other.leaf || row != other.row;
1412 }
1413
1414 bool isEnd() {
1415 return row == Leaf::NROWS || leaf->rows[row] == nullptr;
1416 }
1417
1418 void insert(BTreeImpl& impl, uint newRow) {
1419 KJ_TABLE_IASSERT(impl.tree == tree);
1420 const_cast<Leaf*>(leaf)->insert(row, newRow);
1421 }
1422
1423 void erase(BTreeImpl& impl) {
1424 KJ_TABLE_IASSERT(impl.tree == tree);
1425 const_cast<Leaf*>(leaf)->erase(row);
1426 }
1427
1428 void replace(BTreeImpl& impl, uint newRow) {
1429 KJ_TABLE_IASSERT(impl.tree == tree);
1430 const_cast<Leaf*>(leaf)->rows[row] = newRow;
1431 }
1432
1433 private:
1434 const NodeUnion* tree;
1435 const Leaf* leaf;
1436 uint row;
1437 };
1438
1439 inline BTreeImpl::Iterator BTreeImpl::begin() const {
1440 return { tree, &tree[beginLeaf].leaf, 0 };
1441 }
1442 inline BTreeImpl::Iterator BTreeImpl::end() const {
1443 auto& leaf = tree[endLeaf].leaf;
1444 return { tree, &leaf, leaf.size() };
1445 }
1446
1447 } // namespace _ (private)
1448
1449 template <typename Callbacks>
1450 class TreeIndex {
1451 public:
1452 TreeIndex() = default;
1453 template <typename... Params>
1454 TreeIndex(Params&&... params): cb(kj::fwd<Params>(params)...) {}
1455
1456 template <typename Row>
1457 void verify(kj::ArrayPtr<Row> table) {
1458 impl.verify(table.size(), [&](uint i, uint j) {
1459 return cb.isBefore(table[i], table[j]);
1460 });
1461 }
1462
1463 inline void reserve(size_t size) { impl.reserve(size); }
1464 inline void clear() { impl.clear(); }
1465 inline auto begin() const { return impl.begin(); }
1466 inline auto end() const { return impl.end(); }
1467
1468 template <typename Row>
1469 decltype(auto) keyForRow(Row&& row) const {
1470 return cb.keyForRow(kj::fwd<Row>(row));
1471 }
1472
1473 template <typename Row, typename... Params>
1474 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
1475 auto iter = impl.insert(searchKey(table, params...));
1476
1477 if (!iter.isEnd() && cb.matches(table[*iter], params...)) {
1478 return *iter;
1479 } else {
1480 iter.insert(impl, pos);
1481 return nullptr;
1482 }
1483 }
1484
1485 template <typename Row, typename... Params>
1486 void erase(kj::ArrayPtr<Row> table, size_t pos, Params&&... params) {
1487 impl.erase(pos, searchKeyForErase(table, pos, params...));
1488 }
1489
1490 template <typename Row, typename... Params>
1491 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, Params&&... params) {
1492 impl.renumber(oldPos, newPos, searchKey(table, params...));
1493 }
1494
1495 template <typename Row, typename... Params>
1496 Maybe<size_t> find(kj::ArrayPtr<Row> table, Params&&... params) const {
1497 auto iter = impl.search(searchKey(table, params...));
1498
1499 if (!iter.isEnd() && cb.matches(table[*iter], params...)) {
1500 return size_t(*iter);
1501 } else {
1502 return nullptr;
1503 }
1504 }
1505
1506 template <typename Row, typename... Params>
1507 _::BTreeImpl::Iterator seek(kj::ArrayPtr<Row> table, Params&&... params) const {
1508 return impl.search(searchKey(table, params...));
1509 }
1510
1511 private:
1512 Callbacks cb;
1513 _::BTreeImpl impl;
1514
1515 template <typename Predicate>
1516 class SearchKeyImpl: public _::BTreeImpl::SearchKey {
1517 public:
1518 SearchKeyImpl(Predicate&& predicate)
1519 : predicate(kj::mv(predicate)) {}
1520
1521 uint search(const _::BTreeImpl::Parent& parent) const override {
1522 return parent.binarySearch(predicate);
1523 }
1524 uint search(const _::BTreeImpl::Leaf& leaf) const override {
1525 return leaf.binarySearch(predicate);
1526 }
1527 bool isAfter(uint rowIndex) const override {
1528 return predicate(rowIndex);
1529 }
1530
1531 private:
1532 Predicate predicate;
1533 };
1534
1535 template <typename Row, typename... Params>
1536 inline auto searchKey(kj::ArrayPtr<Row>& table, Params&... params) const {
1537 auto predicate = [&](uint i) { return cb.isBefore(table[i], params...); };
1538 return SearchKeyImpl<decltype(predicate)>(kj::mv(predicate));
1539 }
1540
1541 template <typename Row, typename... Params>
1542 inline auto searchKeyForErase(kj::ArrayPtr<Row>& table, uint pos, Params&... params) const {
1543 // When erasing, the table entry for the erased row may already be invalid, so we must avoid
1544 // accessing it.
1545 auto predicate = [&,pos](uint i) {
1546 return i != pos && cb.isBefore(table[i], params...);
1547 };
1548 return SearchKeyImpl<decltype(predicate)>(kj::mv(predicate));
1549 }
1550 };
1551
1552 // -----------------------------------------------------------------------------
1553 // Insertion order index
1554
1555 class InsertionOrderIndex {
1556 // Table index which allows iterating over elements in order of insertion. This index cannot
1557 // be used for Table::find(), but can be used for Table::ordered().
1558
1559 struct Link;
1560 public:
1561 InsertionOrderIndex();
1562 InsertionOrderIndex(const InsertionOrderIndex&) = delete;
1563 InsertionOrderIndex& operator=(const InsertionOrderIndex&) = delete;
1564 InsertionOrderIndex(InsertionOrderIndex&& other);
1565 InsertionOrderIndex& operator=(InsertionOrderIndex&& other);
1566 ~InsertionOrderIndex() noexcept(false);
1567
1568 class Iterator {
1569 public:
1570 Iterator(const Link* links, uint pos)
1571 : links(links), pos(pos) {}
1572
1573 inline size_t operator*() const {
1574 KJ_TABLE_IREQUIRE(pos != 0, "can't dereference end() iterator");
1575 return pos - 1;
1576 };
1577
1578 inline Iterator& operator++() {
1579 pos = links[pos].next;
1580 return *this;
1581 }
1582 inline Iterator operator++(int) {
1583 Iterator result = *this;
1584 ++*this;
1585 return result;
1586 }
1587 inline Iterator& operator--() {
1588 pos = links[pos].prev;
1589 return *this;
1590 }
1591 inline Iterator operator--(int) {
1592 Iterator result = *this;
1593 --*this;
1594 return result;
1595 }
1596
1597 inline bool operator==(const Iterator& other) const {
1598 return pos == other.pos;
1599 }
1600 inline bool operator!=(const Iterator& other) const {
1601 return pos != other.pos;
1602 }
1603
1604 private:
1605 const Link* links;
1606 uint pos;
1607 };
1608
1609 template <typename Row>
1610 Row& keyForRow(Row& row) const { return row; }
1611
1612 void reserve(size_t size);
1613 void clear();
1614 inline Iterator begin() const { return Iterator(links, links[0].next); }
1615 inline Iterator end() const { return Iterator(links, 0); }
1616
1617 template <typename Row>
1618 kj::Maybe<size_t> insert(kj::ArrayPtr<Row> table, size_t pos, const Row& row) {
1619 return insertImpl(pos);
1620 }
1621
1622 template <typename Row>
1623 void erase(kj::ArrayPtr<Row> table, size_t pos, const Row& row) {
1624 eraseImpl(pos);
1625 }
1626
1627 template <typename Row>
1628 void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos, const Row& row) {
1629 return moveImpl(oldPos, newPos);
1630 }
1631
1632 private:
1633 struct Link {
1634 uint next;
1635 uint prev;
1636 };
1637
1638 uint capacity;
1639 Link* links;
1640 // links[0] is special: links[0].next points to the first link, links[0].prev points to the last.
1641 // links[n+1] corresponds to row n.
1642
1643 kj::Maybe<size_t> insertImpl(size_t pos);
1644 void eraseImpl(size_t pos);
1645 void moveImpl(size_t oldPos, size_t newPos);
1646
1647 static const Link EMPTY_LINK;
1648 };
1649
1650 } // namespace kj
1651
1652 KJ_END_HEADER