Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Code for working with individual keys, and sorted sets of keys with in a | |
4 | * btree node | |
5 | * | |
6 | * Copyright 2012 Google, Inc. | |
7 | */ | |
8 | ||
9 | #include "bcachefs.h" | |
10 | #include "btree_cache.h" | |
11 | #include "bset.h" | |
12 | #include "eytzinger.h" | |
13 | #include "trace.h" | |
14 | #include "util.h" | |
15 | ||
16 | #include <asm/unaligned.h> | |
17 | #include <linux/console.h> | |
18 | #include <linux/random.h> | |
19 | #include <linux/prefetch.h> | |
20 | ||
271a3d3a KO |
21 | static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *, |
22 | struct btree *); | |
23 | ||
63f1a598 KO |
24 | static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter) |
25 | { | |
26 | unsigned n = ARRAY_SIZE(iter->data); | |
27 | ||
28 | while (n && __btree_node_iter_set_end(iter, n - 1)) | |
29 | --n; | |
30 | ||
31 | return n; | |
32 | } | |
33 | ||
1c6fdbd8 KO |
34 | struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k) |
35 | { | |
216c9fac | 36 | return bch2_bkey_to_bset_inlined(b, k); |
1c6fdbd8 KO |
37 | } |
38 | ||
39 | /* | |
40 | * There are never duplicate live keys in the btree - but including keys that | |
41 | * have been flagged as deleted (and will be cleaned up later) we _will_ see | |
42 | * duplicates. | |
43 | * | |
44 | * Thus the sort order is: usual key comparison first, but for keys that compare | |
45 | * equal the deleted key(s) come first, and the (at most one) live version comes | |
46 | * last. | |
47 | * | |
48 | * The main reason for this is insertion: to handle overwrites, we first iterate | |
49 | * over keys that compare equal to our insert key, and then insert immediately | |
50 | * prior to the first key greater than the key we're inserting - our insert | |
51 | * position will be after all keys that compare equal to our insert key, which | |
52 | * by the time we actually do the insert will all be deleted. | |
53 | */ | |
54 | ||
a34782a0 KO |
55 | void bch2_dump_bset(struct bch_fs *c, struct btree *b, |
56 | struct bset *i, unsigned set) | |
1c6fdbd8 KO |
57 | { |
58 | struct bkey_packed *_k, *_n; | |
a34782a0 KO |
59 | struct bkey uk, n; |
60 | struct bkey_s_c k; | |
fa8e94fa | 61 | struct printbuf buf = PRINTBUF; |
1c6fdbd8 KO |
62 | |
63 | if (!i->u64s) | |
64 | return; | |
65 | ||
a34782a0 | 66 | for (_k = i->start; |
1c6fdbd8 | 67 | _k < vstruct_last(i); |
a34782a0 | 68 | _k = _n) { |
ac2ccddc | 69 | _n = bkey_p_next(_k); |
1c6fdbd8 | 70 | |
a34782a0 | 71 | k = bkey_disassemble(b, _k, &uk); |
fa8e94fa KO |
72 | |
73 | printbuf_reset(&buf); | |
a34782a0 | 74 | if (c) |
fa8e94fa | 75 | bch2_bkey_val_to_text(&buf, c, k); |
a34782a0 | 76 | else |
fa8e94fa | 77 | bch2_bkey_to_text(&buf, k.k); |
24e0c3f8 | 78 | printk(KERN_ERR "block %u key %5zu: %s\n", set, |
fa8e94fa | 79 | _k->_data - i->_data, buf.buf); |
1c6fdbd8 KO |
80 | |
81 | if (_n == vstruct_last(i)) | |
82 | continue; | |
83 | ||
84 | n = bkey_unpack_key(b, _n); | |
85 | ||
e88a75eb | 86 | if (bpos_lt(n.p, k.k->p)) { |
1c6fdbd8 KO |
87 | printk(KERN_ERR "Key skipped backwards\n"); |
88 | continue; | |
89 | } | |
90 | ||
e88a75eb | 91 | if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p)) |
1c6fdbd8 KO |
92 | printk(KERN_ERR "Duplicate keys\n"); |
93 | } | |
fa8e94fa KO |
94 | |
95 | printbuf_exit(&buf); | |
1c6fdbd8 KO |
96 | } |
97 | ||
a34782a0 | 98 | void bch2_dump_btree_node(struct bch_fs *c, struct btree *b) |
1c6fdbd8 KO |
99 | { |
100 | struct bset_tree *t; | |
101 | ||
102 | console_lock(); | |
103 | for_each_bset(b, t) | |
a34782a0 | 104 | bch2_dump_bset(c, b, bset(b, t), t - b->set); |
1c6fdbd8 KO |
105 | console_unlock(); |
106 | } | |
107 | ||
108 | void bch2_dump_btree_node_iter(struct btree *b, | |
109 | struct btree_node_iter *iter) | |
110 | { | |
111 | struct btree_node_iter_set *set; | |
fa8e94fa | 112 | struct printbuf buf = PRINTBUF; |
1c6fdbd8 | 113 | |
63f1a598 KO |
114 | printk(KERN_ERR "btree node iter with %u/%u sets:\n", |
115 | __btree_node_iter_used(iter), b->nsets); | |
1c6fdbd8 KO |
116 | |
117 | btree_node_iter_for_each(iter, set) { | |
118 | struct bkey_packed *k = __btree_node_offset_to_key(b, set->k); | |
119 | struct bset_tree *t = bch2_bkey_to_bset(b, k); | |
120 | struct bkey uk = bkey_unpack_key(b, k); | |
1c6fdbd8 | 121 | |
fa8e94fa KO |
122 | printbuf_reset(&buf); |
123 | bch2_bkey_to_text(&buf, &uk); | |
63f1a598 | 124 | printk(KERN_ERR "set %zu key %u: %s\n", |
fa8e94fa | 125 | t - b->set, set->k, buf.buf); |
1c6fdbd8 | 126 | } |
fa8e94fa KO |
127 | |
128 | printbuf_exit(&buf); | |
1c6fdbd8 KO |
129 | } |
130 | ||
131 | #ifdef CONFIG_BCACHEFS_DEBUG | |
132 | ||
1c6fdbd8 KO |
133 | void __bch2_verify_btree_nr_keys(struct btree *b) |
134 | { | |
135 | struct bset_tree *t; | |
136 | struct bkey_packed *k; | |
137 | struct btree_nr_keys nr = { 0 }; | |
138 | ||
139 | for_each_bset(b, t) | |
ad44bdc3 | 140 | bset_tree_for_each_key(b, t, k) |
c052cf82 | 141 | if (!bkey_deleted(k)) |
1c6fdbd8 KO |
142 | btree_keys_account_key_add(&nr, t - b->set, k); |
143 | ||
144 | BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); | |
145 | } | |
146 | ||
271a3d3a KO |
147 | static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, |
148 | struct btree *b) | |
1c6fdbd8 | 149 | { |
271a3d3a KO |
150 | struct btree_node_iter iter = *_iter; |
151 | const struct bkey_packed *k, *n; | |
152 | ||
153 | k = bch2_btree_node_iter_peek_all(&iter, b); | |
154 | __bch2_btree_node_iter_advance(&iter, b); | |
155 | n = bch2_btree_node_iter_peek_all(&iter, b); | |
1c6fdbd8 KO |
156 | |
157 | bkey_unpack_key(b, k); | |
158 | ||
159 | if (n && | |
a00fd8c5 | 160 | bkey_iter_cmp(b, k, n) > 0) { |
271a3d3a | 161 | struct btree_node_iter_set *set; |
1c6fdbd8 KO |
162 | struct bkey ku = bkey_unpack_key(b, k); |
163 | struct bkey nu = bkey_unpack_key(b, n); | |
fa8e94fa KO |
164 | struct printbuf buf1 = PRINTBUF; |
165 | struct printbuf buf2 = PRINTBUF; | |
1c6fdbd8 | 166 | |
a34782a0 | 167 | bch2_dump_btree_node(NULL, b); |
fa8e94fa KO |
168 | bch2_bkey_to_text(&buf1, &ku); |
169 | bch2_bkey_to_text(&buf2, &nu); | |
271a3d3a | 170 | printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n", |
fa8e94fa | 171 | buf1.buf, buf2.buf); |
271a3d3a KO |
172 | printk(KERN_ERR "iter was:"); |
173 | ||
174 | btree_node_iter_for_each(_iter, set) { | |
96dea3d5 KO |
175 | struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k); |
176 | struct bset_tree *t = bch2_bkey_to_bset(b, k2); | |
271a3d3a | 177 | printk(" [%zi %zi]", t - b->set, |
96dea3d5 | 178 | k2->_data - bset(b, t)->_data); |
271a3d3a KO |
179 | } |
180 | panic("\n"); | |
1c6fdbd8 KO |
181 | } |
182 | } | |
183 | ||
184 | void bch2_btree_node_iter_verify(struct btree_node_iter *iter, | |
271a3d3a | 185 | struct btree *b) |
1c6fdbd8 | 186 | { |
1fe08f31 | 187 | struct btree_node_iter_set *set, *s2; |
63f1a598 | 188 | struct bkey_packed *k, *p; |
1c6fdbd8 | 189 | struct bset_tree *t; |
1c6fdbd8 | 190 | |
63f1a598 KO |
191 | if (bch2_btree_node_iter_end(iter)) |
192 | return; | |
193 | ||
1fe08f31 | 194 | /* Verify no duplicates: */ |
67e0dd8f KO |
195 | btree_node_iter_for_each(iter, set) { |
196 | BUG_ON(set->k > set->end); | |
1fe08f31 KO |
197 | btree_node_iter_for_each(iter, s2) |
198 | BUG_ON(set != s2 && set->end == s2->end); | |
67e0dd8f | 199 | } |
1c6fdbd8 | 200 | |
1fe08f31 | 201 | /* Verify that set->end is correct: */ |
1c6fdbd8 | 202 | btree_node_iter_for_each(iter, set) { |
1fe08f31 KO |
203 | for_each_bset(b, t) |
204 | if (set->end == t->end_offset) | |
205 | goto found; | |
206 | BUG(); | |
207 | found: | |
208 | BUG_ON(set->k < btree_bkey_first_offset(t) || | |
209 | set->k >= t->end_offset); | |
1c6fdbd8 KO |
210 | } |
211 | ||
1fe08f31 KO |
212 | /* Verify iterator is sorted: */ |
213 | btree_node_iter_for_each(iter, set) | |
214 | BUG_ON(set != iter->data && | |
271a3d3a | 215 | btree_node_iter_cmp(b, set[-1], set[0]) > 0); |
63f1a598 KO |
216 | |
217 | k = bch2_btree_node_iter_peek_all(iter, b); | |
218 | ||
219 | for_each_bset(b, t) { | |
220 | if (iter->data[0].end == t->end_offset) | |
221 | continue; | |
222 | ||
223 | p = bch2_bkey_prev_all(b, t, | |
224 | bch2_btree_node_iter_bset_pos(iter, b, t)); | |
225 | ||
226 | BUG_ON(p && bkey_iter_cmp(b, k, p) < 0); | |
227 | } | |
1c6fdbd8 KO |
228 | } |
229 | ||
271a3d3a KO |
230 | void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, |
231 | struct bkey_packed *insert, unsigned clobber_u64s) | |
1c6fdbd8 KO |
232 | { |
233 | struct bset_tree *t = bch2_bkey_to_bset(b, where); | |
271a3d3a | 234 | struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where); |
5cfd6977 | 235 | struct bkey_packed *next = (void *) ((u64 *) where->_data + clobber_u64s); |
fa8e94fa KO |
236 | struct printbuf buf1 = PRINTBUF; |
237 | struct printbuf buf2 = PRINTBUF; | |
271a3d3a KO |
238 | #if 0 |
239 | BUG_ON(prev && | |
a00fd8c5 | 240 | bkey_iter_cmp(b, prev, insert) > 0); |
271a3d3a KO |
241 | #else |
242 | if (prev && | |
a00fd8c5 | 243 | bkey_iter_cmp(b, prev, insert) > 0) { |
271a3d3a KO |
244 | struct bkey k1 = bkey_unpack_key(b, prev); |
245 | struct bkey k2 = bkey_unpack_key(b, insert); | |
1c6fdbd8 | 246 | |
a34782a0 | 247 | bch2_dump_btree_node(NULL, b); |
fa8e94fa KO |
248 | bch2_bkey_to_text(&buf1, &k1); |
249 | bch2_bkey_to_text(&buf2, &k2); | |
271a3d3a KO |
250 | |
251 | panic("prev > insert:\n" | |
c201e2d9 KO |
252 | "prev key %s\n" |
253 | "insert key %s\n", | |
fa8e94fa | 254 | buf1.buf, buf2.buf); |
1c6fdbd8 | 255 | } |
271a3d3a KO |
256 | #endif |
257 | #if 0 | |
258 | BUG_ON(next != btree_bkey_last(b, t) && | |
a00fd8c5 | 259 | bkey_iter_cmp(b, insert, next) > 0); |
271a3d3a KO |
260 | #else |
261 | if (next != btree_bkey_last(b, t) && | |
a00fd8c5 | 262 | bkey_iter_cmp(b, insert, next) > 0) { |
271a3d3a KO |
263 | struct bkey k1 = bkey_unpack_key(b, insert); |
264 | struct bkey k2 = bkey_unpack_key(b, next); | |
1c6fdbd8 | 265 | |
a34782a0 | 266 | bch2_dump_btree_node(NULL, b); |
fa8e94fa KO |
267 | bch2_bkey_to_text(&buf1, &k1); |
268 | bch2_bkey_to_text(&buf2, &k2); | |
271a3d3a KO |
269 | |
270 | panic("insert > next:\n" | |
c201e2d9 KO |
271 | "insert key %s\n" |
272 | "next key %s\n", | |
fa8e94fa | 273 | buf1.buf, buf2.buf); |
1c6fdbd8 | 274 | } |
271a3d3a KO |
275 | #endif |
276 | } | |
277 | ||
1c6fdbd8 KO |
278 | #else |
279 | ||
280 | static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, | |
271a3d3a | 281 | struct btree *b) {} |
1c6fdbd8 KO |
282 | |
283 | #endif | |
284 | ||
285 | /* Auxiliary search trees */ | |
286 | ||
58404bb2 KO |
287 | #define BFLOAT_FAILED_UNPACKED U8_MAX |
288 | #define BFLOAT_FAILED U8_MAX | |
1c6fdbd8 | 289 | |
1c6fdbd8 KO |
290 | struct bkey_float { |
291 | u8 exponent; | |
292 | u8 key_offset; | |
b904a799 KO |
293 | u16 mantissa; |
294 | }; | |
295 | #define BKEY_MANTISSA_BITS 16 | |
1c6fdbd8 KO |
296 | |
297 | static unsigned bkey_float_byte_offset(unsigned idx) | |
298 | { | |
b904a799 | 299 | return idx * sizeof(struct bkey_float); |
1c6fdbd8 KO |
300 | } |
301 | ||
302 | struct ro_aux_tree { | |
5cfd6977 KO |
303 | u8 nothing[0]; |
304 | struct bkey_float f[]; | |
1c6fdbd8 KO |
305 | }; |
306 | ||
307 | struct rw_aux_tree { | |
308 | u16 offset; | |
309 | struct bpos k; | |
310 | }; | |
311 | ||
1c6fdbd8 KO |
312 | static unsigned bset_aux_tree_buf_end(const struct bset_tree *t) |
313 | { | |
314 | BUG_ON(t->aux_data_offset == U16_MAX); | |
315 | ||
316 | switch (bset_aux_tree_type(t)) { | |
317 | case BSET_NO_AUX_TREE: | |
318 | return t->aux_data_offset; | |
319 | case BSET_RO_AUX_TREE: | |
320 | return t->aux_data_offset + | |
b904a799 KO |
321 | DIV_ROUND_UP(t->size * sizeof(struct bkey_float) + |
322 | t->size * sizeof(u8), 8); | |
1c6fdbd8 KO |
323 | case BSET_RW_AUX_TREE: |
324 | return t->aux_data_offset + | |
325 | DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8); | |
326 | default: | |
327 | BUG(); | |
328 | } | |
329 | } | |
330 | ||
331 | static unsigned bset_aux_tree_buf_start(const struct btree *b, | |
332 | const struct bset_tree *t) | |
333 | { | |
334 | return t == b->set | |
335 | ? DIV_ROUND_UP(b->unpack_fn_len, 8) | |
336 | : bset_aux_tree_buf_end(t - 1); | |
337 | } | |
338 | ||
339 | static void *__aux_tree_base(const struct btree *b, | |
340 | const struct bset_tree *t) | |
341 | { | |
342 | return b->aux_data + t->aux_data_offset * 8; | |
343 | } | |
344 | ||
345 | static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b, | |
346 | const struct bset_tree *t) | |
347 | { | |
348 | EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE); | |
349 | ||
350 | return __aux_tree_base(b, t); | |
351 | } | |
352 | ||
353 | static u8 *ro_aux_tree_prev(const struct btree *b, | |
354 | const struct bset_tree *t) | |
355 | { | |
356 | EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE); | |
357 | ||
358 | return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size); | |
359 | } | |
360 | ||
1c6fdbd8 KO |
361 | static struct bkey_float *bkey_float(const struct btree *b, |
362 | const struct bset_tree *t, | |
363 | unsigned idx) | |
364 | { | |
b904a799 | 365 | return ro_aux_tree_base(b, t)->f + idx; |
1c6fdbd8 KO |
366 | } |
367 | ||
d108efc2 | 368 | static void bset_aux_tree_verify(const struct btree *b) |
1c6fdbd8 KO |
369 | { |
370 | #ifdef CONFIG_BCACHEFS_DEBUG | |
d108efc2 | 371 | const struct bset_tree *t; |
1c6fdbd8 KO |
372 | |
373 | for_each_bset(b, t) { | |
374 | if (t->aux_data_offset == U16_MAX) | |
375 | continue; | |
376 | ||
377 | BUG_ON(t != b->set && | |
378 | t[-1].aux_data_offset == U16_MAX); | |
379 | ||
380 | BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); | |
381 | BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); | |
382 | BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); | |
383 | } | |
384 | #endif | |
385 | } | |
386 | ||
29364f34 | 387 | void bch2_btree_keys_init(struct btree *b) |
1c6fdbd8 KO |
388 | { |
389 | unsigned i; | |
390 | ||
391 | b->nsets = 0; | |
392 | memset(&b->nr, 0, sizeof(b->nr)); | |
29364f34 | 393 | |
1c6fdbd8 KO |
394 | for (i = 0; i < MAX_BSETS; i++) |
395 | b->set[i].data_offset = U16_MAX; | |
396 | ||
397 | bch2_bset_set_no_aux_tree(b, b->set); | |
398 | } | |
399 | ||
400 | /* Binary tree stuff for auxiliary search trees */ | |
401 | ||
402 | /* | |
403 | * Cacheline/offset <-> bkey pointer arithmetic: | |
404 | * | |
405 | * t->tree is a binary search tree in an array; each node corresponds to a key | |
406 | * in one cacheline in t->set (BSET_CACHELINE bytes). | |
407 | * | |
408 | * This means we don't have to store the full index of the key that a node in | |
409 | * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and | |
410 | * then bkey_float->m gives us the offset within that cacheline, in units of 8 | |
411 | * bytes. | |
412 | * | |
413 | * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to | |
414 | * make this work. | |
415 | * | |
416 | * To construct the bfloat for an arbitrary key we need to know what the key | |
417 | * immediately preceding it is: we have to check if the two keys differ in the | |
418 | * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size | |
419 | * of the previous key so we can walk backwards to it from t->tree[j]'s key. | |
420 | */ | |
421 | ||
422 | static inline void *bset_cacheline(const struct btree *b, | |
423 | const struct bset_tree *t, | |
424 | unsigned cacheline) | |
425 | { | |
426 | return (void *) round_down((unsigned long) btree_bkey_first(b, t), | |
427 | L1_CACHE_BYTES) + | |
428 | cacheline * BSET_CACHELINE; | |
429 | } | |
430 | ||
431 | static struct bkey_packed *cacheline_to_bkey(const struct btree *b, | |
432 | const struct bset_tree *t, | |
433 | unsigned cacheline, | |
434 | unsigned offset) | |
435 | { | |
436 | return bset_cacheline(b, t, cacheline) + offset * 8; | |
437 | } | |
438 | ||
439 | static unsigned bkey_to_cacheline(const struct btree *b, | |
440 | const struct bset_tree *t, | |
441 | const struct bkey_packed *k) | |
442 | { | |
443 | return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE; | |
444 | } | |
445 | ||
446 | static ssize_t __bkey_to_cacheline_offset(const struct btree *b, | |
447 | const struct bset_tree *t, | |
448 | unsigned cacheline, | |
449 | const struct bkey_packed *k) | |
450 | { | |
451 | return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline); | |
452 | } | |
453 | ||
454 | static unsigned bkey_to_cacheline_offset(const struct btree *b, | |
455 | const struct bset_tree *t, | |
456 | unsigned cacheline, | |
457 | const struct bkey_packed *k) | |
458 | { | |
459 | size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k); | |
460 | ||
461 | EBUG_ON(m > U8_MAX); | |
462 | return m; | |
463 | } | |
464 | ||
465 | static inline struct bkey_packed *tree_to_bkey(const struct btree *b, | |
466 | const struct bset_tree *t, | |
467 | unsigned j) | |
468 | { | |
469 | return cacheline_to_bkey(b, t, | |
72492d55 | 470 | __eytzinger1_to_inorder(j, t->size - 1, t->extra), |
1c6fdbd8 KO |
471 | bkey_float(b, t, j)->key_offset); |
472 | } | |
473 | ||
474 | static struct bkey_packed *tree_to_prev_bkey(const struct btree *b, | |
475 | const struct bset_tree *t, | |
476 | unsigned j) | |
477 | { | |
478 | unsigned prev_u64s = ro_aux_tree_prev(b, t)[j]; | |
479 | ||
5cfd6977 | 480 | return (void *) ((u64 *) tree_to_bkey(b, t, j)->_data - prev_u64s); |
1c6fdbd8 KO |
481 | } |
482 | ||
483 | static struct rw_aux_tree *rw_aux_tree(const struct btree *b, | |
484 | const struct bset_tree *t) | |
485 | { | |
486 | EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE); | |
487 | ||
488 | return __aux_tree_base(b, t); | |
489 | } | |
490 | ||
491 | /* | |
492 | * For the write set - the one we're currently inserting keys into - we don't | |
493 | * maintain a full search tree, we just keep a simple lookup table in t->prev. | |
494 | */ | |
495 | static struct bkey_packed *rw_aux_to_bkey(const struct btree *b, | |
496 | struct bset_tree *t, | |
497 | unsigned j) | |
498 | { | |
499 | return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset); | |
500 | } | |
501 | ||
502 | static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t, | |
503 | unsigned j, struct bkey_packed *k) | |
504 | { | |
505 | EBUG_ON(k >= btree_bkey_last(b, t)); | |
506 | ||
507 | rw_aux_tree(b, t)[j] = (struct rw_aux_tree) { | |
508 | .offset = __btree_node_key_to_offset(b, k), | |
509 | .k = bkey_unpack_pos(b, k), | |
510 | }; | |
511 | } | |
512 | ||
513 | static void bch2_bset_verify_rw_aux_tree(struct btree *b, | |
514 | struct bset_tree *t) | |
515 | { | |
516 | struct bkey_packed *k = btree_bkey_first(b, t); | |
517 | unsigned j = 0; | |
518 | ||
29364f34 | 519 | if (!bch2_expensive_debug_checks) |
1c6fdbd8 KO |
520 | return; |
521 | ||
522 | BUG_ON(bset_has_ro_aux_tree(t)); | |
523 | ||
524 | if (!bset_has_rw_aux_tree(t)) | |
525 | return; | |
526 | ||
527 | BUG_ON(t->size < 1); | |
528 | BUG_ON(rw_aux_to_bkey(b, t, j) != k); | |
529 | ||
530 | goto start; | |
531 | while (1) { | |
532 | if (rw_aux_to_bkey(b, t, j) == k) { | |
e88a75eb | 533 | BUG_ON(!bpos_eq(rw_aux_tree(b, t)[j].k, |
1c6fdbd8 KO |
534 | bkey_unpack_pos(b, k))); |
535 | start: | |
536 | if (++j == t->size) | |
537 | break; | |
538 | ||
539 | BUG_ON(rw_aux_tree(b, t)[j].offset <= | |
540 | rw_aux_tree(b, t)[j - 1].offset); | |
541 | } | |
542 | ||
ac2ccddc | 543 | k = bkey_p_next(k); |
1c6fdbd8 KO |
544 | BUG_ON(k >= btree_bkey_last(b, t)); |
545 | } | |
546 | } | |
547 | ||
548 | /* returns idx of first entry >= offset: */ | |
549 | static unsigned rw_aux_tree_bsearch(struct btree *b, | |
550 | struct bset_tree *t, | |
551 | unsigned offset) | |
552 | { | |
617391ba KO |
553 | unsigned bset_offs = offset - btree_bkey_first_offset(t); |
554 | unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t); | |
555 | unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0; | |
1c6fdbd8 KO |
556 | |
557 | EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE); | |
617391ba KO |
558 | EBUG_ON(!t->size); |
559 | EBUG_ON(idx > t->size); | |
1c6fdbd8 | 560 | |
617391ba KO |
561 | while (idx < t->size && |
562 | rw_aux_tree(b, t)[idx].offset < offset) | |
563 | idx++; | |
1c6fdbd8 | 564 | |
617391ba KO |
565 | while (idx && |
566 | rw_aux_tree(b, t)[idx - 1].offset >= offset) | |
567 | idx--; | |
1c6fdbd8 | 568 | |
617391ba KO |
569 | EBUG_ON(idx < t->size && |
570 | rw_aux_tree(b, t)[idx].offset < offset); | |
571 | EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset); | |
572 | EBUG_ON(idx + 1 < t->size && | |
573 | rw_aux_tree(b, t)[idx].offset == | |
574 | rw_aux_tree(b, t)[idx + 1].offset); | |
1c6fdbd8 | 575 | |
617391ba | 576 | return idx; |
1c6fdbd8 KO |
577 | } |
578 | ||
1c6fdbd8 KO |
579 | static inline unsigned bkey_mantissa(const struct bkey_packed *k, |
580 | const struct bkey_float *f, | |
581 | unsigned idx) | |
582 | { | |
583 | u64 v; | |
584 | ||
585 | EBUG_ON(!bkey_packed(k)); | |
586 | ||
587 | v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3))); | |
588 | ||
589 | /* | |
590 | * In little endian, we're shifting off low bits (and then the bits we | |
591 | * want are at the low end), in big endian we're shifting off high bits | |
592 | * (and then the bits we want are at the high end, so we shift them | |
593 | * back down): | |
594 | */ | |
595 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |
596 | v >>= f->exponent & 7; | |
597 | #else | |
b904a799 | 598 | v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS; |
1c6fdbd8 | 599 | #endif |
b904a799 | 600 | return (u16) v; |
1c6fdbd8 KO |
601 | } |
602 | ||
73bd774d KO |
603 | static __always_inline void make_bfloat(struct btree *b, struct bset_tree *t, |
604 | unsigned j, | |
605 | struct bkey_packed *min_key, | |
606 | struct bkey_packed *max_key) | |
1c6fdbd8 KO |
607 | { |
608 | struct bkey_float *f = bkey_float(b, t, j); | |
609 | struct bkey_packed *m = tree_to_bkey(b, t, j); | |
9ae82fe6 KO |
610 | struct bkey_packed *l = is_power_of_2(j) |
611 | ? min_key | |
612 | : tree_to_prev_bkey(b, t, j >> ffs(j)); | |
613 | struct bkey_packed *r = is_power_of_2(j + 1) | |
614 | ? max_key | |
615 | : tree_to_bkey(b, t, j >> (ffz(j) + 1)); | |
1c6fdbd8 KO |
616 | unsigned mantissa; |
617 | int shift, exponent, high_bit; | |
618 | ||
1c6fdbd8 KO |
619 | /* |
620 | * for failed bfloats, the lookup code falls back to comparing against | |
621 | * the original key. | |
622 | */ | |
623 | ||
1bdb67e8 | 624 | if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) || |
1c6fdbd8 KO |
625 | !b->nr_key_bits) { |
626 | f->exponent = BFLOAT_FAILED_UNPACKED; | |
627 | return; | |
628 | } | |
629 | ||
630 | /* | |
631 | * The greatest differing bit of l and r is the first bit we must | |
632 | * include in the bfloat mantissa we're creating in order to do | |
633 | * comparisons - that bit always becomes the high bit of | |
634 | * bfloat->mantissa, and thus the exponent we're calculating here is | |
635 | * the position of what will become the low bit in bfloat->mantissa: | |
636 | * | |
637 | * Note that this may be negative - we may be running off the low end | |
638 | * of the key: we handle this later: | |
639 | */ | |
640 | high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r), | |
b904a799 KO |
641 | min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1); |
642 | exponent = high_bit - (BKEY_MANTISSA_BITS - 1); | |
1c6fdbd8 KO |
643 | |
644 | /* | |
645 | * Then we calculate the actual shift value, from the start of the key | |
646 | * (k->_data), to get the key bits starting at exponent: | |
647 | */ | |
648 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |
649 | shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent; | |
650 | ||
b904a799 | 651 | EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64); |
1c6fdbd8 KO |
652 | #else |
653 | shift = high_bit_offset + | |
654 | b->nr_key_bits - | |
655 | exponent - | |
b904a799 | 656 | BKEY_MANTISSA_BITS; |
1c6fdbd8 KO |
657 | |
658 | EBUG_ON(shift < KEY_PACKED_BITS_START); | |
659 | #endif | |
660 | EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED); | |
661 | ||
662 | f->exponent = shift; | |
663 | mantissa = bkey_mantissa(m, f, j); | |
664 | ||
665 | /* | |
666 | * If we've got garbage bits, set them to all 1s - it's legal for the | |
667 | * bfloat to compare larger than the original key, but not smaller: | |
668 | */ | |
669 | if (exponent < 0) | |
670 | mantissa |= ~(~0U << -exponent); | |
671 | ||
b904a799 | 672 | f->mantissa = mantissa; |
1c6fdbd8 KO |
673 | } |
674 | ||
675 | /* bytes remaining - only valid for last bset: */ | |
d108efc2 | 676 | static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t) |
1c6fdbd8 KO |
677 | { |
678 | bset_aux_tree_verify(b); | |
679 | ||
680 | return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64); | |
681 | } | |
682 | ||
d108efc2 | 683 | static unsigned bset_ro_tree_capacity(const struct btree *b, const struct bset_tree *t) |
1c6fdbd8 | 684 | { |
b904a799 KO |
685 | return __bset_tree_capacity(b, t) / |
686 | (sizeof(struct bkey_float) + sizeof(u8)); | |
1c6fdbd8 KO |
687 | } |
688 | ||
d108efc2 | 689 | static unsigned bset_rw_tree_capacity(const struct btree *b, const struct bset_tree *t) |
1c6fdbd8 KO |
690 | { |
691 | return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree); | |
692 | } | |
693 | ||
9ae82fe6 | 694 | static noinline void __build_rw_aux_tree(struct btree *b, struct bset_tree *t) |
1c6fdbd8 KO |
695 | { |
696 | struct bkey_packed *k; | |
697 | ||
698 | t->size = 1; | |
699 | t->extra = BSET_RW_AUX_TREE_VAL; | |
700 | rw_aux_tree(b, t)[0].offset = | |
701 | __btree_node_key_to_offset(b, btree_bkey_first(b, t)); | |
702 | ||
ad44bdc3 | 703 | bset_tree_for_each_key(b, t, k) { |
1c6fdbd8 KO |
704 | if (t->size == bset_rw_tree_capacity(b, t)) |
705 | break; | |
706 | ||
707 | if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) > | |
708 | L1_CACHE_BYTES) | |
709 | rw_aux_tree_set(b, t, t->size++, k); | |
710 | } | |
711 | } | |
712 | ||
9ae82fe6 | 713 | static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t) |
1c6fdbd8 KO |
714 | { |
715 | struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t); | |
9ae82fe6 | 716 | struct bkey_i min_key, max_key; |
1c6fdbd8 KO |
717 | unsigned j, cacheline = 1; |
718 | ||
1c6fdbd8 KO |
719 | t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)), |
720 | bset_ro_tree_capacity(b, t)); | |
721 | retry: | |
722 | if (t->size < 2) { | |
723 | t->size = 0; | |
724 | t->extra = BSET_NO_AUX_TREE_VAL; | |
725 | return; | |
726 | } | |
727 | ||
728 | t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1; | |
729 | ||
730 | /* First we figure out where the first key in each cacheline is */ | |
72492d55 | 731 | eytzinger1_for_each(j, t->size - 1) { |
1c6fdbd8 | 732 | while (bkey_to_cacheline(b, t, k) < cacheline) |
ac2ccddc | 733 | prev = k, k = bkey_p_next(k); |
1c6fdbd8 KO |
734 | |
735 | if (k >= btree_bkey_last(b, t)) { | |
736 | /* XXX: this path sucks */ | |
737 | t->size--; | |
738 | goto retry; | |
739 | } | |
740 | ||
741 | ro_aux_tree_prev(b, t)[j] = prev->u64s; | |
742 | bkey_float(b, t, j)->key_offset = | |
743 | bkey_to_cacheline_offset(b, t, cacheline++, k); | |
744 | ||
745 | EBUG_ON(tree_to_prev_bkey(b, t, j) != prev); | |
746 | EBUG_ON(tree_to_bkey(b, t, j) != k); | |
747 | } | |
748 | ||
ad44bdc3 | 749 | while (k != btree_bkey_last(b, t)) |
ac2ccddc | 750 | prev = k, k = bkey_p_next(k); |
1c6fdbd8 | 751 | |
c7e04e22 KO |
752 | if (!bkey_pack_pos(bkey_to_packed(&min_key), b->data->min_key, b)) { |
753 | bkey_init(&min_key.k); | |
754 | min_key.k.p = b->data->min_key; | |
755 | } | |
756 | ||
757 | if (!bkey_pack_pos(bkey_to_packed(&max_key), b->data->max_key, b)) { | |
758 | bkey_init(&max_key.k); | |
3ce8b463 | 759 | max_key.k.p = b->data->max_key; |
c7e04e22 | 760 | } |
9ae82fe6 | 761 | |
1c6fdbd8 | 762 | /* Then we build the tree */ |
72492d55 | 763 | eytzinger1_for_each(j, t->size - 1) |
17563164 KO |
764 | make_bfloat(b, t, j, |
765 | bkey_to_packed(&min_key), | |
766 | bkey_to_packed(&max_key)); | |
1c6fdbd8 KO |
767 | } |
768 | ||
769 | static void bset_alloc_tree(struct btree *b, struct bset_tree *t) | |
770 | { | |
771 | struct bset_tree *i; | |
772 | ||
773 | for (i = b->set; i != t; i++) | |
774 | BUG_ON(bset_has_rw_aux_tree(i)); | |
775 | ||
776 | bch2_bset_set_no_aux_tree(b, t); | |
777 | ||
778 | /* round up to next cacheline: */ | |
779 | t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t), | |
780 | SMP_CACHE_BYTES / sizeof(u64)); | |
781 | ||
782 | bset_aux_tree_verify(b); | |
783 | } | |
784 | ||
785 | void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t, | |
786 | bool writeable) | |
787 | { | |
788 | if (writeable | |
789 | ? bset_has_rw_aux_tree(t) | |
790 | : bset_has_ro_aux_tree(t)) | |
791 | return; | |
792 | ||
793 | bset_alloc_tree(b, t); | |
794 | ||
795 | if (!__bset_tree_capacity(b, t)) | |
796 | return; | |
797 | ||
798 | if (writeable) | |
799 | __build_rw_aux_tree(b, t); | |
800 | else | |
801 | __build_ro_aux_tree(b, t); | |
802 | ||
803 | bset_aux_tree_verify(b); | |
804 | } | |
805 | ||
806 | void bch2_bset_init_first(struct btree *b, struct bset *i) | |
807 | { | |
808 | struct bset_tree *t; | |
809 | ||
810 | BUG_ON(b->nsets); | |
811 | ||
812 | memset(i, 0, sizeof(*i)); | |
813 | get_random_bytes(&i->seq, sizeof(i->seq)); | |
814 | SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); | |
815 | ||
816 | t = &b->set[b->nsets++]; | |
817 | set_btree_bset(b, t, i); | |
818 | } | |
819 | ||
820 | void bch2_bset_init_next(struct bch_fs *c, struct btree *b, | |
821 | struct btree_node_entry *bne) | |
822 | { | |
823 | struct bset *i = &bne->keys; | |
824 | struct bset_tree *t; | |
825 | ||
826 | BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c)); | |
827 | BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b))); | |
828 | BUG_ON(b->nsets >= MAX_BSETS); | |
829 | ||
830 | memset(i, 0, sizeof(*i)); | |
831 | i->seq = btree_bset_first(b)->seq; | |
832 | SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); | |
833 | ||
834 | t = &b->set[b->nsets++]; | |
835 | set_btree_bset(b, t, i); | |
836 | } | |
837 | ||
838 | /* | |
839 | * find _some_ key in the same bset as @k that precedes @k - not necessarily the | |
840 | * immediate predecessor: | |
841 | */ | |
842 | static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t, | |
843 | struct bkey_packed *k) | |
844 | { | |
845 | struct bkey_packed *p; | |
846 | unsigned offset; | |
847 | int j; | |
848 | ||
849 | EBUG_ON(k < btree_bkey_first(b, t) || | |
850 | k > btree_bkey_last(b, t)); | |
851 | ||
852 | if (k == btree_bkey_first(b, t)) | |
853 | return NULL; | |
854 | ||
855 | switch (bset_aux_tree_type(t)) { | |
856 | case BSET_NO_AUX_TREE: | |
857 | p = btree_bkey_first(b, t); | |
858 | break; | |
859 | case BSET_RO_AUX_TREE: | |
860 | j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k)); | |
861 | ||
862 | do { | |
863 | p = j ? tree_to_bkey(b, t, | |
864 | __inorder_to_eytzinger1(j--, | |
72492d55 | 865 | t->size - 1, t->extra)) |
1c6fdbd8 KO |
866 | : btree_bkey_first(b, t); |
867 | } while (p >= k); | |
868 | break; | |
869 | case BSET_RW_AUX_TREE: | |
870 | offset = __btree_node_key_to_offset(b, k); | |
871 | j = rw_aux_tree_bsearch(b, t, offset); | |
872 | p = j ? rw_aux_to_bkey(b, t, j - 1) | |
873 | : btree_bkey_first(b, t); | |
874 | break; | |
875 | } | |
876 | ||
877 | return p; | |
878 | } | |
879 | ||
880 | struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, | |
881 | struct bset_tree *t, | |
882 | struct bkey_packed *k, | |
883 | unsigned min_key_type) | |
884 | { | |
885 | struct bkey_packed *p, *i, *ret = NULL, *orig_k = k; | |
886 | ||
887 | while ((p = __bkey_prev(b, t, k)) && !ret) { | |
ac2ccddc | 888 | for (i = p; i != k; i = bkey_p_next(i)) |
1c6fdbd8 KO |
889 | if (i->type >= min_key_type) |
890 | ret = i; | |
891 | ||
892 | k = p; | |
893 | } | |
894 | ||
29364f34 | 895 | if (bch2_expensive_debug_checks) { |
1c6fdbd8 KO |
896 | BUG_ON(ret >= orig_k); |
897 | ||
ad44bdc3 | 898 | for (i = ret |
ac2ccddc | 899 | ? bkey_p_next(ret) |
ad44bdc3 | 900 | : btree_bkey_first(b, t); |
1c6fdbd8 | 901 | i != orig_k; |
ac2ccddc | 902 | i = bkey_p_next(i)) |
1c6fdbd8 KO |
903 | BUG_ON(i->type >= min_key_type); |
904 | } | |
905 | ||
906 | return ret; | |
907 | } | |
908 | ||
909 | /* Insert */ | |
910 | ||
1c6fdbd8 KO |
911 | static void bch2_bset_fix_lookup_table(struct btree *b, |
912 | struct bset_tree *t, | |
913 | struct bkey_packed *_where, | |
914 | unsigned clobber_u64s, | |
915 | unsigned new_u64s) | |
916 | { | |
917 | int shift = new_u64s - clobber_u64s; | |
918 | unsigned l, j, where = __btree_node_key_to_offset(b, _where); | |
919 | ||
920 | EBUG_ON(bset_has_ro_aux_tree(t)); | |
921 | ||
922 | if (!bset_has_rw_aux_tree(t)) | |
923 | return; | |
924 | ||
617391ba | 925 | /* returns first entry >= where */ |
1c6fdbd8 KO |
926 | l = rw_aux_tree_bsearch(b, t, where); |
927 | ||
1c6fdbd8 KO |
928 | if (!l) /* never delete first entry */ |
929 | l++; | |
930 | else if (l < t->size && | |
931 | where < t->end_offset && | |
932 | rw_aux_tree(b, t)[l].offset == where) | |
933 | rw_aux_tree_set(b, t, l++, _where); | |
934 | ||
935 | /* l now > where */ | |
936 | ||
937 | for (j = l; | |
938 | j < t->size && | |
939 | rw_aux_tree(b, t)[j].offset < where + clobber_u64s; | |
940 | j++) | |
941 | ; | |
942 | ||
943 | if (j < t->size && | |
944 | rw_aux_tree(b, t)[j].offset + shift == | |
945 | rw_aux_tree(b, t)[l - 1].offset) | |
946 | j++; | |
947 | ||
948 | memmove(&rw_aux_tree(b, t)[l], | |
949 | &rw_aux_tree(b, t)[j], | |
950 | (void *) &rw_aux_tree(b, t)[t->size] - | |
951 | (void *) &rw_aux_tree(b, t)[j]); | |
952 | t->size -= j - l; | |
953 | ||
954 | for (j = l; j < t->size; j++) | |
3e3e02e6 | 955 | rw_aux_tree(b, t)[j].offset += shift; |
1c6fdbd8 KO |
956 | |
957 | EBUG_ON(l < t->size && | |
958 | rw_aux_tree(b, t)[l].offset == | |
959 | rw_aux_tree(b, t)[l - 1].offset); | |
960 | ||
961 | if (t->size < bset_rw_tree_capacity(b, t) && | |
962 | (l < t->size | |
963 | ? rw_aux_tree(b, t)[l].offset | |
964 | : t->end_offset) - | |
965 | rw_aux_tree(b, t)[l - 1].offset > | |
966 | L1_CACHE_BYTES / sizeof(u64)) { | |
967 | struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1); | |
968 | struct bkey_packed *end = l < t->size | |
969 | ? rw_aux_to_bkey(b, t, l) | |
970 | : btree_bkey_last(b, t); | |
971 | struct bkey_packed *k = start; | |
972 | ||
973 | while (1) { | |
ac2ccddc | 974 | k = bkey_p_next(k); |
1c6fdbd8 KO |
975 | if (k == end) |
976 | break; | |
977 | ||
978 | if ((void *) k - (void *) start >= L1_CACHE_BYTES) { | |
979 | memmove(&rw_aux_tree(b, t)[l + 1], | |
980 | &rw_aux_tree(b, t)[l], | |
981 | (void *) &rw_aux_tree(b, t)[t->size] - | |
982 | (void *) &rw_aux_tree(b, t)[l]); | |
983 | t->size++; | |
984 | rw_aux_tree_set(b, t, l, k); | |
985 | break; | |
986 | } | |
987 | } | |
988 | } | |
989 | ||
990 | bch2_bset_verify_rw_aux_tree(b, t); | |
991 | bset_aux_tree_verify(b); | |
992 | } | |
993 | ||
994 | void bch2_bset_insert(struct btree *b, | |
995 | struct btree_node_iter *iter, | |
996 | struct bkey_packed *where, | |
997 | struct bkey_i *insert, | |
998 | unsigned clobber_u64s) | |
999 | { | |
1000 | struct bkey_format *f = &b->format; | |
1001 | struct bset_tree *t = bset_tree_last(b); | |
1002 | struct bkey_packed packed, *src = bkey_to_packed(insert); | |
1003 | ||
1004 | bch2_bset_verify_rw_aux_tree(b, t); | |
271a3d3a | 1005 | bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s); |
1c6fdbd8 KO |
1006 | |
1007 | if (bch2_bkey_pack_key(&packed, &insert->k, f)) | |
1008 | src = &packed; | |
1009 | ||
c052cf82 | 1010 | if (!bkey_deleted(&insert->k)) |
1c6fdbd8 KO |
1011 | btree_keys_account_key_add(&b->nr, t - b->set, src); |
1012 | ||
1013 | if (src->u64s != clobber_u64s) { | |
5cfd6977 KO |
1014 | u64 *src_p = (u64 *) where->_data + clobber_u64s; |
1015 | u64 *dst_p = (u64 *) where->_data + src->u64s; | |
1c6fdbd8 KO |
1016 | |
1017 | EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) < | |
1018 | (int) clobber_u64s - src->u64s); | |
1019 | ||
1020 | memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p); | |
1021 | le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s); | |
1022 | set_btree_bset_end(b, t); | |
1023 | } | |
1024 | ||
d598a9b7 | 1025 | memcpy_u64s_small(where, src, |
1c6fdbd8 KO |
1026 | bkeyp_key_u64s(f, src)); |
1027 | memcpy_u64s(bkeyp_val(f, where), &insert->v, | |
1028 | bkeyp_val_u64s(f, src)); | |
1029 | ||
fdf22400 KO |
1030 | if (src->u64s != clobber_u64s) |
1031 | bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s); | |
1c6fdbd8 | 1032 | |
1c6fdbd8 KO |
1033 | bch2_verify_btree_nr_keys(b); |
1034 | } | |
1035 | ||
1036 | void bch2_bset_delete(struct btree *b, | |
1037 | struct bkey_packed *where, | |
1038 | unsigned clobber_u64s) | |
1039 | { | |
1040 | struct bset_tree *t = bset_tree_last(b); | |
5cfd6977 | 1041 | u64 *src_p = (u64 *) where->_data + clobber_u64s; |
1c6fdbd8 KO |
1042 | u64 *dst_p = where->_data; |
1043 | ||
1044 | bch2_bset_verify_rw_aux_tree(b, t); | |
1045 | ||
1046 | EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s); | |
1047 | ||
1048 | memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p); | |
1049 | le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s); | |
1050 | set_btree_bset_end(b, t); | |
1051 | ||
1052 | bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0); | |
1053 | } | |
1054 | ||
1055 | /* Lookup */ | |
1056 | ||
1057 | __flatten | |
1058 | static struct bkey_packed *bset_search_write_set(const struct btree *b, | |
1059 | struct bset_tree *t, | |
2649b514 | 1060 | struct bpos *search) |
1c6fdbd8 KO |
1061 | { |
1062 | unsigned l = 0, r = t->size; | |
1063 | ||
1064 | while (l + 1 != r) { | |
1065 | unsigned m = (l + r) >> 1; | |
1066 | ||
e88a75eb | 1067 | if (bpos_lt(rw_aux_tree(b, t)[m].k, *search)) |
1c6fdbd8 KO |
1068 | l = m; |
1069 | else | |
1070 | r = m; | |
1071 | } | |
1072 | ||
1073 | return rw_aux_to_bkey(b, t, l); | |
1074 | } | |
1075 | ||
c4537686 KO |
1076 | static inline void prefetch_four_cachelines(void *p) |
1077 | { | |
6f152b0f | 1078 | #ifdef CONFIG_X86_64 |
2eba51a6 BH |
1079 | asm("prefetcht0 (-127 + 64 * 0)(%0);" |
1080 | "prefetcht0 (-127 + 64 * 1)(%0);" | |
1081 | "prefetcht0 (-127 + 64 * 2)(%0);" | |
1082 | "prefetcht0 (-127 + 64 * 3)(%0);" | |
c4537686 KO |
1083 | : |
1084 | : "r" (p + 127)); | |
1085 | #else | |
1086 | prefetch(p + L1_CACHE_BYTES * 0); | |
1087 | prefetch(p + L1_CACHE_BYTES * 1); | |
1088 | prefetch(p + L1_CACHE_BYTES * 2); | |
1089 | prefetch(p + L1_CACHE_BYTES * 3); | |
1090 | #endif | |
1091 | } | |
1092 | ||
58404bb2 KO |
1093 | static inline bool bkey_mantissa_bits_dropped(const struct btree *b, |
1094 | const struct bkey_float *f, | |
1095 | unsigned idx) | |
1096 | { | |
1097 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | |
1098 | unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits; | |
1099 | ||
1100 | return f->exponent > key_bits_start; | |
1101 | #else | |
1102 | unsigned key_bits_end = high_bit_offset + b->nr_key_bits; | |
58404bb2 | 1103 | |
b904a799 | 1104 | return f->exponent + BKEY_MANTISSA_BITS < key_bits_end; |
58404bb2 KO |
1105 | #endif |
1106 | } | |
1107 | ||
1c6fdbd8 KO |
1108 | __flatten |
1109 | static struct bkey_packed *bset_search_tree(const struct btree *b, | |
d108efc2 KO |
1110 | const struct bset_tree *t, |
1111 | const struct bpos *search, | |
1c6fdbd8 KO |
1112 | const struct bkey_packed *packed_search) |
1113 | { | |
1114 | struct ro_aux_tree *base = ro_aux_tree_base(b, t); | |
c4537686 | 1115 | struct bkey_float *f; |
58404bb2 KO |
1116 | struct bkey_packed *k; |
1117 | unsigned inorder, n = 1, l, r; | |
1118 | int cmp; | |
1c6fdbd8 | 1119 | |
c4537686 KO |
1120 | do { |
1121 | if (likely(n << 4 < t->size)) | |
b904a799 | 1122 | prefetch(&base->f[n << 4]); |
1c6fdbd8 | 1123 | |
b904a799 | 1124 | f = &base->f[n]; |
58404bb2 KO |
1125 | if (unlikely(f->exponent >= BFLOAT_FAILED)) |
1126 | goto slowpath; | |
1127 | ||
b904a799 | 1128 | l = f->mantissa; |
58404bb2 KO |
1129 | r = bkey_mantissa(packed_search, f, n); |
1130 | ||
1131 | if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n)) | |
1132 | goto slowpath; | |
1133 | ||
1134 | n = n * 2 + (l < r); | |
1135 | continue; | |
1136 | slowpath: | |
1137 | k = tree_to_bkey(b, t, n); | |
1138 | cmp = bkey_cmp_p_or_unp(b, k, packed_search, search); | |
1139 | if (!cmp) | |
1140 | return k; | |
1141 | ||
1142 | n = n * 2 + (cmp < 0); | |
1c6fdbd8 KO |
1143 | } while (n < t->size); |
1144 | ||
72492d55 | 1145 | inorder = __eytzinger1_to_inorder(n >> 1, t->size - 1, t->extra); |
1c6fdbd8 KO |
1146 | |
1147 | /* | |
1148 | * n would have been the node we recursed to - the low bit tells us if | |
1149 | * we recursed left or recursed right. | |
1150 | */ | |
b904a799 KO |
1151 | if (likely(!(n & 1))) { |
1152 | --inorder; | |
1153 | if (unlikely(!inorder)) | |
1c6fdbd8 | 1154 | return btree_bkey_first(b, t); |
b904a799 | 1155 | |
72492d55 | 1156 | f = &base->f[eytzinger1_prev(n >> 1, t->size - 1)]; |
1c6fdbd8 | 1157 | } |
b904a799 KO |
1158 | |
1159 | return cacheline_to_bkey(b, t, inorder, f->key_offset); | |
1c6fdbd8 KO |
1160 | } |
1161 | ||
c4537686 KO |
1162 | static __always_inline __flatten |
1163 | struct bkey_packed *__bch2_bset_search(struct btree *b, | |
1c6fdbd8 | 1164 | struct bset_tree *t, |
a00fd8c5 | 1165 | struct bpos *search, |
a00fd8c5 | 1166 | const struct bkey_packed *lossy_packed_search) |
1c6fdbd8 | 1167 | { |
1c6fdbd8 KO |
1168 | |
1169 | /* | |
1170 | * First, we search for a cacheline, then lastly we do a linear search | |
1171 | * within that cacheline. | |
1172 | * | |
1173 | * To search for the cacheline, there's three different possibilities: | |
1174 | * * The set is too small to have a search tree, so we just do a linear | |
1175 | * search over the whole set. | |
1176 | * * The set is the one we're currently inserting into; keeping a full | |
1177 | * auxiliary search tree up to date would be too expensive, so we | |
1178 | * use a much simpler lookup table to do a binary search - | |
1179 | * bset_search_write_set(). | |
1180 | * * Or we use the auxiliary search tree we constructed earlier - | |
1181 | * bset_search_tree() | |
1182 | */ | |
1183 | ||
1184 | switch (bset_aux_tree_type(t)) { | |
1185 | case BSET_NO_AUX_TREE: | |
c4537686 | 1186 | return btree_bkey_first(b, t); |
1c6fdbd8 | 1187 | case BSET_RW_AUX_TREE: |
2649b514 | 1188 | return bset_search_write_set(b, t, search); |
1c6fdbd8 | 1189 | case BSET_RO_AUX_TREE: |
c4537686 KO |
1190 | return bset_search_tree(b, t, search, lossy_packed_search); |
1191 | default: | |
439c172b | 1192 | BUG(); |
1c6fdbd8 | 1193 | } |
c4537686 | 1194 | } |
1c6fdbd8 | 1195 | |
c4537686 KO |
1196 | static __always_inline __flatten |
1197 | struct bkey_packed *bch2_bset_search_linear(struct btree *b, | |
1198 | struct bset_tree *t, | |
1199 | struct bpos *search, | |
1200 | struct bkey_packed *packed_search, | |
1201 | const struct bkey_packed *lossy_packed_search, | |
1202 | struct bkey_packed *m) | |
1203 | { | |
1c6fdbd8 KO |
1204 | if (lossy_packed_search) |
1205 | while (m != btree_bkey_last(b, t) && | |
9626aeb1 KO |
1206 | bkey_iter_cmp_p_or_unp(b, m, |
1207 | lossy_packed_search, search) < 0) | |
ac2ccddc | 1208 | m = bkey_p_next(m); |
1c6fdbd8 KO |
1209 | |
1210 | if (!packed_search) | |
1211 | while (m != btree_bkey_last(b, t) && | |
9626aeb1 | 1212 | bkey_iter_pos_cmp(b, m, search) < 0) |
ac2ccddc | 1213 | m = bkey_p_next(m); |
1c6fdbd8 | 1214 | |
29364f34 | 1215 | if (bch2_expensive_debug_checks) { |
1c6fdbd8 KO |
1216 | struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); |
1217 | ||
1218 | BUG_ON(prev && | |
9626aeb1 KO |
1219 | bkey_iter_cmp_p_or_unp(b, prev, |
1220 | packed_search, search) >= 0); | |
1c6fdbd8 KO |
1221 | } |
1222 | ||
1223 | return m; | |
1224 | } | |
1225 | ||
1226 | /* Btree node iterator */ | |
1227 | ||
a00fd8c5 KO |
1228 | static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter, |
1229 | struct btree *b, | |
1230 | const struct bkey_packed *k, | |
1231 | const struct bkey_packed *end) | |
1232 | { | |
1233 | if (k != end) { | |
1234 | struct btree_node_iter_set *pos; | |
1235 | ||
1236 | btree_node_iter_for_each(iter, pos) | |
1237 | ; | |
1238 | ||
1239 | BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data)); | |
1240 | *pos = (struct btree_node_iter_set) { | |
1241 | __btree_node_key_to_offset(b, k), | |
1242 | __btree_node_key_to_offset(b, end) | |
1243 | }; | |
1244 | } | |
1245 | } | |
1246 | ||
1c6fdbd8 KO |
1247 | void bch2_btree_node_iter_push(struct btree_node_iter *iter, |
1248 | struct btree *b, | |
1249 | const struct bkey_packed *k, | |
1250 | const struct bkey_packed *end) | |
1251 | { | |
1252 | __bch2_btree_node_iter_push(iter, b, k, end); | |
1253 | bch2_btree_node_iter_sort(iter, b); | |
1254 | } | |
1255 | ||
3e3e02e6 | 1256 | noinline __flatten __cold |
1c6fdbd8 | 1257 | static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, |
a00fd8c5 | 1258 | struct btree *b, struct bpos *search) |
1c6fdbd8 | 1259 | { |
2649b514 | 1260 | struct bkey_packed *k; |
1c6fdbd8 | 1261 | |
a00fd8c5 | 1262 | trace_bkey_pack_pos_fail(search); |
1c6fdbd8 | 1263 | |
2649b514 | 1264 | bch2_btree_node_iter_init_from_start(iter, b); |
1c6fdbd8 | 1265 | |
2649b514 KO |
1266 | while ((k = bch2_btree_node_iter_peek(iter, b)) && |
1267 | bkey_iter_pos_cmp(b, k, search) < 0) | |
1268 | bch2_btree_node_iter_advance(iter, b); | |
1c6fdbd8 KO |
1269 | } |
1270 | ||
1271 | /** | |
96dea3d5 | 1272 | * bch2_btree_node_iter_init - initialize a btree node iterator, starting from a |
1c6fdbd8 KO |
1273 | * given position |
1274 | * | |
96dea3d5 KO |
1275 | * @iter: iterator to initialize |
1276 | * @b: btree node to search | |
1277 | * @search: search key | |
1278 | * | |
1c6fdbd8 KO |
1279 | * Main entry point to the lookup code for individual btree nodes: |
1280 | * | |
1281 | * NOTE: | |
1282 | * | |
1283 | * When you don't filter out deleted keys, btree nodes _do_ contain duplicate | |
1284 | * keys. This doesn't matter for most code, but it does matter for lookups. | |
1285 | * | |
1286 | * Some adjacent keys with a string of equal keys: | |
1287 | * i j k k k k l m | |
1288 | * | |
1289 | * If you search for k, the lookup code isn't guaranteed to return you any | |
1290 | * specific k. The lookup code is conceptually doing a binary search and | |
1291 | * iterating backwards is very expensive so if the pivot happens to land at the | |
1292 | * last k that's what you'll get. | |
1293 | * | |
1294 | * This works out ok, but it's something to be aware of: | |
1295 | * | |
1296 | * - For non extents, we guarantee that the live key comes last - see | |
1297 | * btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't | |
1298 | * see will only be deleted keys you don't care about. | |
1299 | * | |
1300 | * - For extents, deleted keys sort last (see the comment at the top of this | |
1301 | * file). But when you're searching for extents, you actually want the first | |
1302 | * key strictly greater than your search key - an extent that compares equal | |
1303 | * to the search key is going to have 0 sectors after the search key. | |
1304 | * | |
1305 | * But this does mean that we can't just search for | |
e751c01a | 1306 | * bpos_successor(start_of_range) to get the first extent that overlaps with |
1c6fdbd8 KO |
1307 | * the range we want - if we're unlucky and there's an extent that ends |
1308 | * exactly where we searched, then there could be a deleted key at the same | |
1309 | * position and we'd get that when we search instead of the preceding extent | |
1310 | * we needed. | |
1311 | * | |
1312 | * So we've got to search for start_of_range, then after the lookup iterate | |
1313 | * past any extents that compare equal to the position we searched for. | |
1314 | */ | |
c4e065c2 | 1315 | __flatten |
1c6fdbd8 | 1316 | void bch2_btree_node_iter_init(struct btree_node_iter *iter, |
a00fd8c5 | 1317 | struct btree *b, struct bpos *search) |
1c6fdbd8 | 1318 | { |
1c6fdbd8 | 1319 | struct bkey_packed p, *packed_search = NULL; |
c4e065c2 | 1320 | struct btree_node_iter_set *pos = iter->data; |
c4537686 KO |
1321 | struct bkey_packed *k[MAX_BSETS]; |
1322 | unsigned i; | |
1c6fdbd8 | 1323 | |
e88a75eb KO |
1324 | EBUG_ON(bpos_lt(*search, b->data->min_key)); |
1325 | EBUG_ON(bpos_gt(*search, b->data->max_key)); | |
1c6fdbd8 KO |
1326 | bset_aux_tree_verify(b); |
1327 | ||
271a3d3a | 1328 | memset(iter, 0, sizeof(*iter)); |
1c6fdbd8 | 1329 | |
a00fd8c5 | 1330 | switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) { |
1c6fdbd8 KO |
1331 | case BKEY_PACK_POS_EXACT: |
1332 | packed_search = &p; | |
1333 | break; | |
1334 | case BKEY_PACK_POS_SMALLER: | |
1335 | packed_search = NULL; | |
1336 | break; | |
1337 | case BKEY_PACK_POS_FAIL: | |
a00fd8c5 | 1338 | btree_node_iter_init_pack_failed(iter, b, search); |
1c6fdbd8 KO |
1339 | return; |
1340 | } | |
1341 | ||
c4537686 KO |
1342 | for (i = 0; i < b->nsets; i++) { |
1343 | k[i] = __bch2_bset_search(b, b->set + i, search, &p); | |
1344 | prefetch_four_cachelines(k[i]); | |
1345 | } | |
1346 | ||
1347 | for (i = 0; i < b->nsets; i++) { | |
1348 | struct bset_tree *t = b->set + i; | |
c4e065c2 KO |
1349 | struct bkey_packed *end = btree_bkey_last(b, t); |
1350 | ||
c4537686 KO |
1351 | k[i] = bch2_bset_search_linear(b, t, search, |
1352 | packed_search, &p, k[i]); | |
1353 | if (k[i] != end) | |
c4e065c2 | 1354 | *pos++ = (struct btree_node_iter_set) { |
c4537686 | 1355 | __btree_node_key_to_offset(b, k[i]), |
c4e065c2 KO |
1356 | __btree_node_key_to_offset(b, end) |
1357 | }; | |
1358 | } | |
1c6fdbd8 KO |
1359 | |
1360 | bch2_btree_node_iter_sort(iter, b); | |
1361 | } | |
1362 | ||
1363 | void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter, | |
271a3d3a | 1364 | struct btree *b) |
1c6fdbd8 KO |
1365 | { |
1366 | struct bset_tree *t; | |
1367 | ||
271a3d3a | 1368 | memset(iter, 0, sizeof(*iter)); |
1c6fdbd8 KO |
1369 | |
1370 | for_each_bset(b, t) | |
1371 | __bch2_btree_node_iter_push(iter, b, | |
1372 | btree_bkey_first(b, t), | |
1373 | btree_bkey_last(b, t)); | |
1374 | bch2_btree_node_iter_sort(iter, b); | |
1375 | } | |
1376 | ||
1377 | struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter, | |
1378 | struct btree *b, | |
1379 | struct bset_tree *t) | |
1380 | { | |
1381 | struct btree_node_iter_set *set; | |
1382 | ||
1383 | btree_node_iter_for_each(iter, set) | |
1384 | if (set->end == t->end_offset) | |
1385 | return __btree_node_offset_to_key(b, set->k); | |
1386 | ||
1387 | return btree_bkey_last(b, t); | |
1388 | } | |
1389 | ||
1390 | static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter, | |
1391 | struct btree *b, | |
1392 | unsigned first) | |
1393 | { | |
1394 | bool ret; | |
1395 | ||
271a3d3a | 1396 | if ((ret = (btree_node_iter_cmp(b, |
1c6fdbd8 KO |
1397 | iter->data[first], |
1398 | iter->data[first + 1]) > 0))) | |
1399 | swap(iter->data[first], iter->data[first + 1]); | |
1400 | return ret; | |
1401 | } | |
1402 | ||
1403 | void bch2_btree_node_iter_sort(struct btree_node_iter *iter, | |
1404 | struct btree *b) | |
1405 | { | |
1406 | /* unrolled bubble sort: */ | |
1407 | ||
1408 | if (!__btree_node_iter_set_end(iter, 2)) { | |
1409 | btree_node_iter_sort_two(iter, b, 0); | |
1410 | btree_node_iter_sort_two(iter, b, 1); | |
1411 | } | |
1412 | ||
1413 | if (!__btree_node_iter_set_end(iter, 1)) | |
1414 | btree_node_iter_sort_two(iter, b, 0); | |
1415 | } | |
1416 | ||
1417 | void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter, | |
1418 | struct btree_node_iter_set *set) | |
1419 | { | |
1420 | struct btree_node_iter_set *last = | |
1421 | iter->data + ARRAY_SIZE(iter->data) - 1; | |
1422 | ||
1423 | memmove(&set[0], &set[1], (void *) last - (void *) set); | |
1424 | *last = (struct btree_node_iter_set) { 0, 0 }; | |
1425 | } | |
1426 | ||
1427 | static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, | |
1428 | struct btree *b) | |
1429 | { | |
1430 | iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s; | |
1431 | ||
1432 | EBUG_ON(iter->data->k > iter->data->end); | |
1433 | ||
1434 | if (unlikely(__btree_node_iter_set_end(iter, 0))) { | |
005def8f KO |
1435 | /* avoid an expensive memmove call: */ |
1436 | iter->data[0] = iter->data[1]; | |
1437 | iter->data[1] = iter->data[2]; | |
1438 | iter->data[2] = (struct btree_node_iter_set) { 0, 0 }; | |
1c6fdbd8 KO |
1439 | return; |
1440 | } | |
1441 | ||
1442 | if (__btree_node_iter_set_end(iter, 1)) | |
1443 | return; | |
1444 | ||
1445 | if (!btree_node_iter_sort_two(iter, b, 0)) | |
1446 | return; | |
1447 | ||
1448 | if (__btree_node_iter_set_end(iter, 2)) | |
1449 | return; | |
1450 | ||
1451 | btree_node_iter_sort_two(iter, b, 1); | |
1452 | } | |
1453 | ||
1c6fdbd8 KO |
1454 | void bch2_btree_node_iter_advance(struct btree_node_iter *iter, |
1455 | struct btree *b) | |
1456 | { | |
29364f34 | 1457 | if (bch2_expensive_debug_checks) { |
f13f5a8c KO |
1458 | bch2_btree_node_iter_verify(iter, b); |
1459 | bch2_btree_node_iter_next_check(iter, b); | |
1460 | } | |
1461 | ||
271a3d3a | 1462 | __bch2_btree_node_iter_advance(iter, b); |
1c6fdbd8 KO |
1463 | } |
1464 | ||
1c6fdbd8 KO |
1465 | /* |
1466 | * Expensive: | |
1467 | */ | |
e67ab045 KO |
1468 | struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, |
1469 | struct btree *b) | |
1c6fdbd8 KO |
1470 | { |
1471 | struct bkey_packed *k, *prev = NULL; | |
1c6fdbd8 KO |
1472 | struct btree_node_iter_set *set; |
1473 | struct bset_tree *t; | |
ac10a961 | 1474 | unsigned end = 0; |
1c6fdbd8 | 1475 | |
29364f34 | 1476 | if (bch2_expensive_debug_checks) |
2dac0eae | 1477 | bch2_btree_node_iter_verify(iter, b); |
1c6fdbd8 KO |
1478 | |
1479 | for_each_bset(b, t) { | |
e67ab045 KO |
1480 | k = bch2_bkey_prev_all(b, t, |
1481 | bch2_btree_node_iter_bset_pos(iter, b, t)); | |
1c6fdbd8 | 1482 | if (k && |
a00fd8c5 | 1483 | (!prev || bkey_iter_cmp(b, k, prev) > 0)) { |
1c6fdbd8 KO |
1484 | prev = k; |
1485 | end = t->end_offset; | |
1486 | } | |
1487 | } | |
1488 | ||
1489 | if (!prev) | |
e67ab045 | 1490 | return NULL; |
1c6fdbd8 KO |
1491 | |
1492 | /* | |
1493 | * We're manually memmoving instead of just calling sort() to ensure the | |
1494 | * prev we picked ends up in slot 0 - sort won't necessarily put it | |
1495 | * there because of duplicate deleted keys: | |
1496 | */ | |
1497 | btree_node_iter_for_each(iter, set) | |
1498 | if (set->end == end) | |
1499 | goto found; | |
1500 | ||
1501 | BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]); | |
1502 | found: | |
1503 | BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data)); | |
1504 | ||
1505 | memmove(&iter->data[1], | |
1506 | &iter->data[0], | |
1507 | (void *) set - (void *) &iter->data[0]); | |
1508 | ||
1509 | iter->data[0].k = __btree_node_key_to_offset(b, prev); | |
1510 | iter->data[0].end = end; | |
1c6fdbd8 | 1511 | |
29364f34 | 1512 | if (bch2_expensive_debug_checks) |
2dac0eae | 1513 | bch2_btree_node_iter_verify(iter, b); |
e67ab045 KO |
1514 | return prev; |
1515 | } | |
1c6fdbd8 | 1516 | |
c052cf82 KO |
1517 | struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *iter, |
1518 | struct btree *b) | |
e67ab045 KO |
1519 | { |
1520 | struct bkey_packed *prev; | |
1521 | ||
1522 | do { | |
1523 | prev = bch2_btree_node_iter_prev_all(iter, b); | |
c052cf82 | 1524 | } while (prev && bkey_deleted(prev)); |
1c6fdbd8 KO |
1525 | |
1526 | return prev; | |
1527 | } | |
1528 | ||
1529 | struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter, | |
1530 | struct btree *b, | |
1531 | struct bkey *u) | |
1532 | { | |
1533 | struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b); | |
1534 | ||
1535 | return k ? bkey_disassemble(b, k, u) : bkey_s_c_null; | |
1536 | } | |
1537 | ||
1538 | /* Mergesort */ | |
1539 | ||
a345b0f3 | 1540 | void bch2_btree_keys_stats(const struct btree *b, struct bset_stats *stats) |
1c6fdbd8 | 1541 | { |
a345b0f3 | 1542 | const struct bset_tree *t; |
1c6fdbd8 KO |
1543 | |
1544 | for_each_bset(b, t) { | |
1545 | enum bset_aux_tree_type type = bset_aux_tree_type(t); | |
1546 | size_t j; | |
1547 | ||
1548 | stats->sets[type].nr++; | |
1549 | stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) * | |
1550 | sizeof(u64); | |
1551 | ||
1552 | if (bset_has_ro_aux_tree(t)) { | |
1553 | stats->floats += t->size - 1; | |
1554 | ||
1555 | for (j = 1; j < t->size; j++) | |
58404bb2 KO |
1556 | stats->failed += |
1557 | bkey_float(b, t, j)->exponent == | |
1558 | BFLOAT_FAILED; | |
1c6fdbd8 KO |
1559 | } |
1560 | } | |
1561 | } | |
1562 | ||
319f9ac3 KO |
1563 | void bch2_bfloat_to_text(struct printbuf *out, struct btree *b, |
1564 | struct bkey_packed *k) | |
1c6fdbd8 KO |
1565 | { |
1566 | struct bset_tree *t = bch2_bkey_to_bset(b, k); | |
1bdb67e8 | 1567 | struct bkey uk; |
b564513c | 1568 | unsigned j, inorder; |
1c6fdbd8 | 1569 | |
1c6fdbd8 | 1570 | if (!bset_has_ro_aux_tree(t)) |
319f9ac3 | 1571 | return; |
1c6fdbd8 | 1572 | |
b564513c KO |
1573 | inorder = bkey_to_cacheline(b, t, k); |
1574 | if (!inorder || inorder >= t->size) | |
319f9ac3 | 1575 | return; |
b564513c | 1576 | |
72492d55 | 1577 | j = __inorder_to_eytzinger1(inorder, t->size - 1, t->extra); |
b564513c | 1578 | if (k != tree_to_bkey(b, t, j)) |
319f9ac3 | 1579 | return; |
b564513c KO |
1580 | |
1581 | switch (bkey_float(b, t, j)->exponent) { | |
58404bb2 | 1582 | case BFLOAT_FAILED: |
b564513c | 1583 | uk = bkey_unpack_key(b, k); |
401ec4db | 1584 | prt_printf(out, |
319f9ac3 | 1585 | " failed unpacked at depth %u\n" |
f020bfcd KO |
1586 | "\t", |
1587 | ilog2(j)); | |
1588 | bch2_bpos_to_text(out, uk.p); | |
401ec4db | 1589 | prt_printf(out, "\n"); |
319f9ac3 | 1590 | break; |
b564513c | 1591 | } |
1c6fdbd8 | 1592 | } |