Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _BCACHEFS_BSET_H | |
3 | #define _BCACHEFS_BSET_H | |
4 | ||
5 | #include <linux/kernel.h> | |
6 | #include <linux/types.h> | |
7 | ||
29364f34 | 8 | #include "bcachefs.h" |
1c6fdbd8 KO |
9 | #include "bkey.h" |
10 | #include "bkey_methods.h" | |
11 | #include "btree_types.h" | |
12 | #include "util.h" /* for time_stats */ | |
13 | #include "vstructs.h" | |
14 | ||
15 | /* | |
16 | * BKEYS: | |
17 | * | |
18 | * A bkey contains a key, a size field, a variable number of pointers, and some | |
19 | * ancillary flag bits. | |
20 | * | |
21 | * We use two different functions for validating bkeys, bkey_invalid and | |
22 | * bkey_deleted(). | |
23 | * | |
24 | * The one exception to the rule that ptr_invalid() filters out invalid keys is | |
25 | * that it also filters out keys of size 0 - these are keys that have been | |
26 | * completely overwritten. It'd be safe to delete these in memory while leaving | |
27 | * them on disk, just unnecessary work - so we filter them out when resorting | |
28 | * instead. | |
29 | * | |
30 | * We can't filter out stale keys when we're resorting, because garbage | |
31 | * collection needs to find them to ensure bucket gens don't wrap around - | |
32 | * unless we're rewriting the btree node those stale keys still exist on disk. | |
33 | * | |
34 | * We also implement functions here for removing some number of sectors from the | |
35 | * front or the back of a bkey - this is mainly used for fixing overlapping | |
36 | * extents, by removing the overlapping sectors from the older key. | |
37 | * | |
38 | * BSETS: | |
39 | * | |
40 | * A bset is an array of bkeys laid out contiguously in memory in sorted order, | |
41 | * along with a header. A btree node is made up of a number of these, written at | |
42 | * different times. | |
43 | * | |
44 | * There could be many of them on disk, but we never allow there to be more than | |
45 | * 4 in memory - we lazily resort as needed. | |
46 | * | |
47 | * We implement code here for creating and maintaining auxiliary search trees | |
48 | * (described below) for searching an individial bset, and on top of that we | |
49 | * implement a btree iterator. | |
50 | * | |
51 | * BTREE ITERATOR: | |
52 | * | |
53 | * Most of the code in bcache doesn't care about an individual bset - it needs | |
54 | * to search entire btree nodes and iterate over them in sorted order. | |
55 | * | |
56 | * The btree iterator code serves both functions; it iterates through the keys | |
57 | * in a btree node in sorted order, starting from either keys after a specific | |
58 | * point (if you pass it a search key) or the start of the btree node. | |
59 | * | |
60 | * AUXILIARY SEARCH TREES: | |
61 | * | |
62 | * Since keys are variable length, we can't use a binary search on a bset - we | |
63 | * wouldn't be able to find the start of the next key. But binary searches are | |
64 | * slow anyways, due to terrible cache behaviour; bcache originally used binary | |
65 | * searches and that code topped out at under 50k lookups/second. | |
66 | * | |
67 | * So we need to construct some sort of lookup table. Since we only insert keys | |
68 | * into the last (unwritten) set, most of the keys within a given btree node are | |
69 | * usually in sets that are mostly constant. We use two different types of | |
70 | * lookup tables to take advantage of this. | |
71 | * | |
72 | * Both lookup tables share in common that they don't index every key in the | |
73 | * set; they index one key every BSET_CACHELINE bytes, and then a linear search | |
74 | * is used for the rest. | |
75 | * | |
76 | * For sets that have been written to disk and are no longer being inserted | |
77 | * into, we construct a binary search tree in an array - traversing a binary | |
78 | * search tree in an array gives excellent locality of reference and is very | |
79 | * fast, since both children of any node are adjacent to each other in memory | |
80 | * (and their grandchildren, and great grandchildren...) - this means | |
81 | * prefetching can be used to great effect. | |
82 | * | |
83 | * It's quite useful performance wise to keep these nodes small - not just | |
84 | * because they're more likely to be in L2, but also because we can prefetch | |
85 | * more nodes on a single cacheline and thus prefetch more iterations in advance | |
86 | * when traversing this tree. | |
87 | * | |
88 | * Nodes in the auxiliary search tree must contain both a key to compare against | |
89 | * (we don't want to fetch the key from the set, that would defeat the purpose), | |
90 | * and a pointer to the key. We use a few tricks to compress both of these. | |
91 | * | |
92 | * To compress the pointer, we take advantage of the fact that one node in the | |
93 | * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have | |
94 | * a function (to_inorder()) that takes the index of a node in a binary tree and | |
95 | * returns what its index would be in an inorder traversal, so we only have to | |
96 | * store the low bits of the offset. | |
97 | * | |
98 | * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To | |
99 | * compress that, we take advantage of the fact that when we're traversing the | |
100 | * search tree at every iteration we know that both our search key and the key | |
101 | * we're looking for lie within some range - bounded by our previous | |
102 | * comparisons. (We special case the start of a search so that this is true even | |
103 | * at the root of the tree). | |
104 | * | |
105 | * So we know the key we're looking for is between a and b, and a and b don't | |
106 | * differ higher than bit 50, we don't need to check anything higher than bit | |
107 | * 50. | |
108 | * | |
109 | * We don't usually need the rest of the bits, either; we only need enough bits | |
110 | * to partition the key range we're currently checking. Consider key n - the | |
111 | * key our auxiliary search tree node corresponds to, and key p, the key | |
112 | * immediately preceding n. The lowest bit we need to store in the auxiliary | |
113 | * search tree is the highest bit that differs between n and p. | |
114 | * | |
115 | * Note that this could be bit 0 - we might sometimes need all 80 bits to do the | |
116 | * comparison. But we'd really like our nodes in the auxiliary search tree to be | |
117 | * of fixed size. | |
118 | * | |
119 | * The solution is to make them fixed size, and when we're constructing a node | |
120 | * check if p and n differed in the bits we needed them to. If they don't we | |
121 | * flag that node, and when doing lookups we fallback to comparing against the | |
122 | * real key. As long as this doesn't happen to often (and it seems to reliably | |
123 | * happen a bit less than 1% of the time), we win - even on failures, that key | |
124 | * is then more likely to be in cache than if we were doing binary searches all | |
125 | * the way, since we're touching so much less memory. | |
126 | * | |
127 | * The keys in the auxiliary search tree are stored in (software) floating | |
128 | * point, with an exponent and a mantissa. The exponent needs to be big enough | |
129 | * to address all the bits in the original key, but the number of bits in the | |
130 | * mantissa is somewhat arbitrary; more bits just gets us fewer failures. | |
131 | * | |
132 | * We need 7 bits for the exponent and 3 bits for the key's offset (since keys | |
133 | * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes. | |
134 | * We need one node per 128 bytes in the btree node, which means the auxiliary | |
135 | * search trees take up 3% as much memory as the btree itself. | |
136 | * | |
137 | * Constructing these auxiliary search trees is moderately expensive, and we | |
138 | * don't want to be constantly rebuilding the search tree for the last set | |
139 | * whenever we insert another key into it. For the unwritten set, we use a much | |
140 | * simpler lookup table - it's just a flat array, so index i in the lookup table | |
141 | * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing | |
142 | * within each byte range works the same as with the auxiliary search trees. | |
143 | * | |
144 | * These are much easier to keep up to date when we insert a key - we do it | |
145 | * somewhat lazily; when we shift a key up we usually just increment the pointer | |
146 | * to it, only when it would overflow do we go to the trouble of finding the | |
147 | * first key in that range of bytes again. | |
148 | */ | |
149 | ||
1c6fdbd8 KO |
150 | enum bset_aux_tree_type { |
151 | BSET_NO_AUX_TREE, | |
152 | BSET_RO_AUX_TREE, | |
153 | BSET_RW_AUX_TREE, | |
154 | }; | |
155 | ||
156 | #define BSET_TREE_NR_TYPES 3 | |
157 | ||
158 | #define BSET_NO_AUX_TREE_VAL (U16_MAX) | |
159 | #define BSET_RW_AUX_TREE_VAL (U16_MAX - 1) | |
160 | ||
161 | static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t) | |
162 | { | |
163 | switch (t->extra) { | |
164 | case BSET_NO_AUX_TREE_VAL: | |
165 | EBUG_ON(t->size); | |
166 | return BSET_NO_AUX_TREE; | |
167 | case BSET_RW_AUX_TREE_VAL: | |
168 | EBUG_ON(!t->size); | |
169 | return BSET_RW_AUX_TREE; | |
170 | default: | |
171 | EBUG_ON(!t->size); | |
172 | return BSET_RO_AUX_TREE; | |
173 | } | |
174 | } | |
175 | ||
4580baec KO |
176 | /* |
177 | * BSET_CACHELINE was originally intended to match the hardware cacheline size - | |
178 | * it used to be 64, but I realized the lookup code would touch slightly less | |
179 | * memory if it was 128. | |
180 | * | |
181 | * It definites the number of bytes (in struct bset) per struct bkey_float in | |
182 | * the auxiliar search tree - when we're done searching the bset_float tree we | |
183 | * have this many bytes left that we do a linear search over. | |
184 | * | |
185 | * Since (after level 5) every level of the bset_tree is on a new cacheline, | |
186 | * we're touching one fewer cacheline in the bset tree in exchange for one more | |
187 | * cacheline in the linear search - but the linear search might stop before it | |
188 | * gets to the second cacheline. | |
189 | */ | |
190 | ||
191 | #define BSET_CACHELINE 128 | |
192 | ||
d108efc2 | 193 | static inline size_t btree_keys_cachelines(const struct btree *b) |
4580baec KO |
194 | { |
195 | return (1U << b->byte_order) / BSET_CACHELINE; | |
196 | } | |
197 | ||
d108efc2 | 198 | static inline size_t btree_aux_data_bytes(const struct btree *b) |
4580baec KO |
199 | { |
200 | return btree_keys_cachelines(b) * 8; | |
201 | } | |
202 | ||
d108efc2 | 203 | static inline size_t btree_aux_data_u64s(const struct btree *b) |
4580baec KO |
204 | { |
205 | return btree_aux_data_bytes(b) / sizeof(u64); | |
206 | } | |
207 | ||
1c6fdbd8 KO |
208 | typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *); |
209 | ||
210 | static inline void | |
211 | __bkey_unpack_key_format_checked(const struct btree *b, | |
212 | struct bkey *dst, | |
213 | const struct bkey_packed *src) | |
214 | { | |
215 | #ifdef HAVE_BCACHEFS_COMPILED_UNPACK | |
216 | { | |
217 | compiled_unpack_fn unpack_fn = b->aux_data; | |
218 | unpack_fn(dst, src); | |
219 | ||
29364f34 | 220 | if (bch2_expensive_debug_checks) { |
1c6fdbd8 KO |
221 | struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src); |
222 | ||
1c6fdbd8 KO |
223 | BUG_ON(memcmp(dst, &dst2, sizeof(*dst))); |
224 | } | |
225 | } | |
226 | #else | |
227 | *dst = __bch2_bkey_unpack_key(&b->format, src); | |
228 | #endif | |
229 | } | |
230 | ||
231 | static inline struct bkey | |
232 | bkey_unpack_key_format_checked(const struct btree *b, | |
233 | const struct bkey_packed *src) | |
234 | { | |
235 | struct bkey dst; | |
236 | ||
237 | __bkey_unpack_key_format_checked(b, &dst, src); | |
238 | return dst; | |
239 | } | |
240 | ||
241 | static inline void __bkey_unpack_key(const struct btree *b, | |
242 | struct bkey *dst, | |
243 | const struct bkey_packed *src) | |
244 | { | |
245 | if (likely(bkey_packed(src))) | |
246 | __bkey_unpack_key_format_checked(b, dst, src); | |
247 | else | |
248 | *dst = *packed_to_bkey_c(src); | |
249 | } | |
250 | ||
251 | /** | |
252 | * bkey_unpack_key -- unpack just the key, not the value | |
253 | */ | |
254 | static inline struct bkey bkey_unpack_key(const struct btree *b, | |
255 | const struct bkey_packed *src) | |
256 | { | |
257 | return likely(bkey_packed(src)) | |
258 | ? bkey_unpack_key_format_checked(b, src) | |
259 | : *packed_to_bkey_c(src); | |
260 | } | |
261 | ||
262 | static inline struct bpos | |
263 | bkey_unpack_pos_format_checked(const struct btree *b, | |
264 | const struct bkey_packed *src) | |
265 | { | |
266 | #ifdef HAVE_BCACHEFS_COMPILED_UNPACK | |
267 | return bkey_unpack_key_format_checked(b, src).p; | |
268 | #else | |
269 | return __bkey_unpack_pos(&b->format, src); | |
270 | #endif | |
271 | } | |
272 | ||
273 | static inline struct bpos bkey_unpack_pos(const struct btree *b, | |
274 | const struct bkey_packed *src) | |
275 | { | |
276 | return likely(bkey_packed(src)) | |
277 | ? bkey_unpack_pos_format_checked(b, src) | |
278 | : packed_to_bkey_c(src)->p; | |
279 | } | |
280 | ||
281 | /* Disassembled bkeys */ | |
282 | ||
283 | static inline struct bkey_s_c bkey_disassemble(struct btree *b, | |
284 | const struct bkey_packed *k, | |
285 | struct bkey *u) | |
286 | { | |
287 | __bkey_unpack_key(b, u, k); | |
288 | ||
289 | return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), }; | |
290 | } | |
291 | ||
292 | /* non const version: */ | |
293 | static inline struct bkey_s __bkey_disassemble(struct btree *b, | |
294 | struct bkey_packed *k, | |
295 | struct bkey *u) | |
296 | { | |
297 | __bkey_unpack_key(b, u, k); | |
298 | ||
299 | return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), }; | |
300 | } | |
301 | ||
ad44bdc3 | 302 | #define for_each_bset(_b, _t) \ |
1c6fdbd8 KO |
303 | for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++) |
304 | ||
ad44bdc3 KO |
305 | #define bset_tree_for_each_key(_b, _t, _k) \ |
306 | for (_k = btree_bkey_first(_b, _t); \ | |
307 | _k != btree_bkey_last(_b, _t); \ | |
308 | _k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t))) | |
309 | ||
1c6fdbd8 KO |
310 | static inline bool bset_has_ro_aux_tree(struct bset_tree *t) |
311 | { | |
312 | return bset_aux_tree_type(t) == BSET_RO_AUX_TREE; | |
313 | } | |
314 | ||
315 | static inline bool bset_has_rw_aux_tree(struct bset_tree *t) | |
316 | { | |
317 | return bset_aux_tree_type(t) == BSET_RW_AUX_TREE; | |
318 | } | |
319 | ||
320 | static inline void bch2_bset_set_no_aux_tree(struct btree *b, | |
321 | struct bset_tree *t) | |
322 | { | |
323 | BUG_ON(t < b->set); | |
324 | ||
325 | for (; t < b->set + ARRAY_SIZE(b->set); t++) { | |
326 | t->size = 0; | |
327 | t->extra = BSET_NO_AUX_TREE_VAL; | |
328 | t->aux_data_offset = U16_MAX; | |
329 | } | |
330 | } | |
331 | ||
332 | static inline void btree_node_set_format(struct btree *b, | |
333 | struct bkey_format f) | |
334 | { | |
335 | int len; | |
336 | ||
337 | b->format = f; | |
338 | b->nr_key_bits = bkey_format_key_bits(&f); | |
339 | ||
340 | len = bch2_compile_bkey_format(&b->format, b->aux_data); | |
341 | BUG_ON(len < 0 || len > U8_MAX); | |
342 | ||
343 | b->unpack_fn_len = len; | |
344 | ||
345 | bch2_bset_set_no_aux_tree(b, b->set); | |
346 | } | |
347 | ||
348 | static inline struct bset *bset_next_set(struct btree *b, | |
349 | unsigned block_bytes) | |
350 | { | |
351 | struct bset *i = btree_bset_last(b); | |
352 | ||
353 | EBUG_ON(!is_power_of_2(block_bytes)); | |
354 | ||
355 | return ((void *) i) + round_up(vstruct_bytes(i), block_bytes); | |
356 | } | |
357 | ||
29364f34 | 358 | void bch2_btree_keys_init(struct btree *); |
1c6fdbd8 KO |
359 | |
360 | void bch2_bset_init_first(struct btree *, struct bset *); | |
361 | void bch2_bset_init_next(struct bch_fs *, struct btree *, | |
362 | struct btree_node_entry *); | |
363 | void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool); | |
216c9fac | 364 | void bch2_bset_fix_invalidated_key(struct btree *, struct bkey_packed *); |
1c6fdbd8 KO |
365 | |
366 | void bch2_bset_insert(struct btree *, struct btree_node_iter *, | |
367 | struct bkey_packed *, struct bkey_i *, unsigned); | |
368 | void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned); | |
369 | ||
370 | /* Bkey utility code */ | |
371 | ||
372 | /* packed or unpacked */ | |
373 | static inline int bkey_cmp_p_or_unp(const struct btree *b, | |
374 | const struct bkey_packed *l, | |
375 | const struct bkey_packed *r_packed, | |
9626aeb1 | 376 | const struct bpos *r) |
1c6fdbd8 KO |
377 | { |
378 | EBUG_ON(r_packed && !bkey_packed(r_packed)); | |
379 | ||
380 | if (unlikely(!bkey_packed(l))) | |
381 | return bkey_cmp(packed_to_bkey_c(l)->p, *r); | |
382 | ||
383 | if (likely(r_packed)) | |
384 | return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b); | |
385 | ||
386 | return __bch2_bkey_cmp_left_packed_format_checked(b, l, r); | |
387 | } | |
388 | ||
216c9fac KO |
389 | static inline struct bset_tree * |
390 | bch2_bkey_to_bset_inlined(struct btree *b, struct bkey_packed *k) | |
391 | { | |
392 | unsigned offset = __btree_node_key_to_offset(b, k); | |
393 | struct bset_tree *t; | |
394 | ||
395 | for_each_bset(b, t) | |
396 | if (offset <= t->end_offset) { | |
397 | EBUG_ON(offset < btree_bkey_first_offset(t)); | |
398 | return t; | |
399 | } | |
400 | ||
401 | BUG(); | |
402 | } | |
403 | ||
1c6fdbd8 KO |
404 | struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *); |
405 | ||
406 | struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *, | |
407 | struct bkey_packed *, unsigned); | |
408 | ||
409 | static inline struct bkey_packed * | |
410 | bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k) | |
411 | { | |
412 | return bch2_bkey_prev_filter(b, t, k, 0); | |
413 | } | |
414 | ||
415 | static inline struct bkey_packed * | |
416 | bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k) | |
417 | { | |
26609b61 | 418 | return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1); |
1c6fdbd8 KO |
419 | } |
420 | ||
421 | enum bch_extent_overlap { | |
422 | BCH_EXTENT_OVERLAP_ALL = 0, | |
423 | BCH_EXTENT_OVERLAP_BACK = 1, | |
424 | BCH_EXTENT_OVERLAP_FRONT = 2, | |
425 | BCH_EXTENT_OVERLAP_MIDDLE = 3, | |
426 | }; | |
427 | ||
428 | /* Returns how k overlaps with m */ | |
429 | static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k, | |
271a3d3a | 430 | const struct bkey *m) |
1c6fdbd8 KO |
431 | { |
432 | int cmp1 = bkey_cmp(k->p, m->p) < 0; | |
433 | int cmp2 = bkey_cmp(bkey_start_pos(k), | |
434 | bkey_start_pos(m)) > 0; | |
435 | ||
436 | return (cmp1 << 1) + cmp2; | |
437 | } | |
438 | ||
439 | /* Btree key iteration */ | |
440 | ||
1c6fdbd8 KO |
441 | void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *, |
442 | const struct bkey_packed *, | |
443 | const struct bkey_packed *); | |
444 | void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *, | |
a00fd8c5 | 445 | struct bpos *); |
1c6fdbd8 | 446 | void bch2_btree_node_iter_init_from_start(struct btree_node_iter *, |
271a3d3a | 447 | struct btree *); |
1c6fdbd8 KO |
448 | struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *, |
449 | struct btree *, | |
450 | struct bset_tree *); | |
451 | ||
452 | void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *); | |
453 | void bch2_btree_node_iter_set_drop(struct btree_node_iter *, | |
454 | struct btree_node_iter_set *); | |
455 | void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *); | |
456 | ||
457 | #define btree_node_iter_for_each(_iter, _set) \ | |
458 | for (_set = (_iter)->data; \ | |
459 | _set < (_iter)->data + ARRAY_SIZE((_iter)->data) && \ | |
460 | (_set)->k != (_set)->end; \ | |
461 | _set++) | |
462 | ||
463 | static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter, | |
464 | unsigned i) | |
465 | { | |
466 | return iter->data[i].k == iter->data[i].end; | |
467 | } | |
468 | ||
469 | static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter) | |
470 | { | |
471 | return __btree_node_iter_set_end(iter, 0); | |
472 | } | |
473 | ||
a00fd8c5 KO |
474 | /* |
475 | * When keys compare equal, deleted keys compare first: | |
476 | * | |
477 | * XXX: only need to compare pointers for keys that are both within a | |
478 | * btree_node_iterator - we need to break ties for prev() to work correctly | |
479 | */ | |
9626aeb1 | 480 | static inline int bkey_iter_cmp(const struct btree *b, |
a00fd8c5 KO |
481 | const struct bkey_packed *l, |
482 | const struct bkey_packed *r) | |
1c6fdbd8 | 483 | { |
811d2bcd | 484 | return bch2_bkey_cmp_packed(b, l, r) |
271a3d3a | 485 | ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) |
3ea2b1e1 | 486 | ?: cmp_int(l, r); |
1c6fdbd8 KO |
487 | } |
488 | ||
9626aeb1 | 489 | static inline int btree_node_iter_cmp(const struct btree *b, |
1c6fdbd8 KO |
490 | struct btree_node_iter_set l, |
491 | struct btree_node_iter_set r) | |
492 | { | |
a00fd8c5 | 493 | return bkey_iter_cmp(b, |
1c6fdbd8 KO |
494 | __btree_node_offset_to_key(b, l.k), |
495 | __btree_node_offset_to_key(b, r.k)); | |
496 | } | |
497 | ||
9626aeb1 KO |
498 | /* These assume r (the search key) is not a deleted key: */ |
499 | static inline int bkey_iter_pos_cmp(const struct btree *b, | |
500 | const struct bkey_packed *l, | |
501 | const struct bpos *r) | |
1c6fdbd8 | 502 | { |
9626aeb1 KO |
503 | return bkey_cmp_left_packed(b, l, r) |
504 | ?: -((int) bkey_deleted(l)); | |
a00fd8c5 | 505 | } |
1c6fdbd8 | 506 | |
9626aeb1 KO |
507 | static inline int bkey_iter_cmp_p_or_unp(const struct btree *b, |
508 | const struct bkey_packed *l, | |
509 | const struct bkey_packed *r_packed, | |
510 | const struct bpos *r) | |
a00fd8c5 | 511 | { |
9626aeb1 KO |
512 | return bkey_cmp_p_or_unp(b, l, r_packed, r) |
513 | ?: -((int) bkey_deleted(l)); | |
1c6fdbd8 KO |
514 | } |
515 | ||
516 | static inline struct bkey_packed * | |
517 | __bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, | |
518 | struct btree *b) | |
519 | { | |
520 | return __btree_node_offset_to_key(b, iter->data->k); | |
521 | } | |
522 | ||
523 | static inline struct bkey_packed * | |
524 | bch2_btree_node_iter_peek_filter(struct btree_node_iter *iter, | |
525 | struct btree *b, | |
526 | unsigned min_key_type) | |
527 | { | |
528 | while (!bch2_btree_node_iter_end(iter)) { | |
529 | struct bkey_packed *k = __bch2_btree_node_iter_peek_all(iter, b); | |
530 | ||
531 | if (k->type >= min_key_type) | |
532 | return k; | |
533 | ||
534 | bch2_btree_node_iter_advance(iter, b); | |
535 | } | |
536 | ||
537 | return NULL; | |
538 | } | |
539 | ||
540 | static inline struct bkey_packed * | |
541 | bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, | |
542 | struct btree *b) | |
543 | { | |
544 | return bch2_btree_node_iter_peek_filter(iter, b, 0); | |
545 | } | |
546 | ||
547 | static inline struct bkey_packed * | |
548 | bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b) | |
549 | { | |
26609b61 | 550 | return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1); |
1c6fdbd8 KO |
551 | } |
552 | ||
553 | static inline struct bkey_packed * | |
554 | bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b) | |
555 | { | |
556 | struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b); | |
557 | ||
558 | if (ret) | |
559 | bch2_btree_node_iter_advance(iter, b); | |
560 | ||
561 | return ret; | |
562 | } | |
563 | ||
e67ab045 KO |
564 | struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *, |
565 | struct btree *); | |
1c6fdbd8 KO |
566 | struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *, |
567 | struct btree *, unsigned); | |
568 | ||
1c6fdbd8 KO |
569 | static inline struct bkey_packed * |
570 | bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b) | |
571 | { | |
26609b61 | 572 | return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1); |
1c6fdbd8 KO |
573 | } |
574 | ||
1c6fdbd8 KO |
575 | struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, |
576 | struct btree *, | |
577 | struct bkey *); | |
578 | ||
271a3d3a KO |
579 | #define for_each_btree_node_key_unpack(b, k, iter, unpacked) \ |
580 | for (bch2_btree_node_iter_init_from_start((iter), (b)); \ | |
1c6fdbd8 KO |
581 | (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\ |
582 | bch2_btree_node_iter_advance(iter, b)) | |
583 | ||
584 | /* Accounting: */ | |
585 | ||
586 | static inline void btree_keys_account_key(struct btree_nr_keys *n, | |
587 | unsigned bset, | |
588 | struct bkey_packed *k, | |
589 | int sign) | |
590 | { | |
591 | n->live_u64s += k->u64s * sign; | |
592 | n->bset_u64s[bset] += k->u64s * sign; | |
593 | ||
594 | if (bkey_packed(k)) | |
595 | n->packed_keys += sign; | |
596 | else | |
597 | n->unpacked_keys += sign; | |
598 | } | |
599 | ||
085ab693 KO |
600 | static inline void btree_keys_account_val_delta(struct btree *b, |
601 | struct bkey_packed *k, | |
602 | int delta) | |
603 | { | |
604 | struct bset_tree *t = bch2_bkey_to_bset(b, k); | |
605 | ||
606 | b->nr.live_u64s += delta; | |
607 | b->nr.bset_u64s[t - b->set] += delta; | |
608 | } | |
609 | ||
1c6fdbd8 KO |
610 | #define btree_keys_account_key_add(_nr, _bset_idx, _k) \ |
611 | btree_keys_account_key(_nr, _bset_idx, _k, 1) | |
612 | #define btree_keys_account_key_drop(_nr, _bset_idx, _k) \ | |
613 | btree_keys_account_key(_nr, _bset_idx, _k, -1) | |
614 | ||
216c9fac KO |
615 | #define btree_account_key_add(_b, _k) \ |
616 | btree_keys_account_key(&(_b)->nr, \ | |
617 | bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1) | |
618 | #define btree_account_key_drop(_b, _k) \ | |
619 | btree_keys_account_key(&(_b)->nr, \ | |
620 | bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1) | |
621 | ||
1c6fdbd8 KO |
622 | struct bset_stats { |
623 | struct { | |
624 | size_t nr, bytes; | |
625 | } sets[BSET_TREE_NR_TYPES]; | |
626 | ||
627 | size_t floats; | |
58404bb2 | 628 | size_t failed; |
1c6fdbd8 KO |
629 | }; |
630 | ||
631 | void bch2_btree_keys_stats(struct btree *, struct bset_stats *); | |
319f9ac3 KO |
632 | void bch2_bfloat_to_text(struct printbuf *, struct btree *, |
633 | struct bkey_packed *); | |
1c6fdbd8 KO |
634 | |
635 | /* Debug stuff */ | |
636 | ||
a34782a0 KO |
637 | void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned); |
638 | void bch2_dump_btree_node(struct bch_fs *, struct btree *); | |
1c6fdbd8 KO |
639 | void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *); |
640 | ||
641 | #ifdef CONFIG_BCACHEFS_DEBUG | |
642 | ||
643 | void __bch2_verify_btree_nr_keys(struct btree *); | |
644 | void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *); | |
271a3d3a KO |
645 | void bch2_verify_insert_pos(struct btree *, struct bkey_packed *, |
646 | struct bkey_packed *, unsigned); | |
1c6fdbd8 KO |
647 | |
648 | #else | |
649 | ||
650 | static inline void __bch2_verify_btree_nr_keys(struct btree *b) {} | |
651 | static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter, | |
652 | struct btree *b) {} | |
271a3d3a KO |
653 | static inline void bch2_verify_insert_pos(struct btree *b, |
654 | struct bkey_packed *where, | |
655 | struct bkey_packed *insert, | |
656 | unsigned clobber_u64s) {} | |
1c6fdbd8 KO |
657 | #endif |
658 | ||
659 | static inline void bch2_verify_btree_nr_keys(struct btree *b) | |
660 | { | |
692d4031 | 661 | if (bch2_debug_check_btree_accounting) |
1c6fdbd8 KO |
662 | __bch2_verify_btree_nr_keys(b); |
663 | } | |
664 | ||
665 | #endif /* _BCACHEFS_BSET_H */ |