bcachefs: kill trans_for_each_path_from()
[linux-block.git] / fs / bcachefs / btree_iter.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "bkey_methods.h"
07a1006a 5#include "bkey_buf.h"
1c6fdbd8
KO
6#include "btree_cache.h"
7#include "btree_iter.h"
401585fe 8#include "btree_journal_iter.h"
2ca88e5a 9#include "btree_key_cache.h"
1c6fdbd8 10#include "btree_locking.h"
57b0b3db 11#include "btree_update.h"
1c6fdbd8 12#include "debug.h"
50dc0f69 13#include "error.h"
1c6fdbd8 14#include "extents.h"
2ca88e5a 15#include "journal.h"
67997234 16#include "journal_io.h"
35d5aff2 17#include "replicas.h"
8e877caa 18#include "snapshot.h"
1c6fdbd8
KO
19#include "trace.h"
20
a0cb8d78 21#include <linux/random.h>
1c6fdbd8
KO
22#include <linux/prefetch.h>
23
67e0dd8f 24static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
07f383c7
KO
25static inline void btree_path_list_add(struct btree_trans *,
26 btree_path_idx_t, btree_path_idx_t);
67e0dd8f 27
ee2c6ea7
KO
28static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter)
29{
94c69faf 30#ifdef TRACK_PATH_ALLOCATED
ee2c6ea7
KO
31 return iter->ip_allocated;
32#else
33 return 0;
34#endif
35}
36
788cc25d 37static btree_path_idx_t btree_path_alloc(struct btree_trans *, btree_path_idx_t);
79904fa2 38static void bch2_trans_srcu_lock(struct btree_trans *);
67e0dd8f
KO
39
40static inline int __btree_path_cmp(const struct btree_path *l,
41 enum btree_id r_btree_id,
42 bool r_cached,
43 struct bpos r_pos,
44 unsigned r_level)
0423fb71 45{
7abda8c1
KO
46 /*
47 * Must match lock ordering as defined by __bch2_btree_node_lock:
48 */
67e0dd8f 49 return cmp_int(l->btree_id, r_btree_id) ?:
32b26e8c 50 cmp_int((int) l->cached, (int) r_cached) ?:
67e0dd8f
KO
51 bpos_cmp(l->pos, r_pos) ?:
52 -cmp_int(l->level, r_level);
53}
54
55static inline int btree_path_cmp(const struct btree_path *l,
56 const struct btree_path *r)
57{
58 return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
0423fb71
KO
59}
60
e751c01a
KO
61static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
62{
e751c01a
KO
63 /* Are we iterating over keys in all snapshots? */
64 if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
65 p = bpos_successor(p);
66 } else {
67 p = bpos_nosnap_successor(p);
68 p.snapshot = iter->snapshot;
69 }
70
71 return p;
72}
73
74static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
75{
e751c01a
KO
76 /* Are we iterating over keys in all snapshots? */
77 if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
78 p = bpos_predecessor(p);
79 } else {
80 p = bpos_nosnap_predecessor(p);
81 p.snapshot = iter->snapshot;
82 }
83
84 return p;
85}
86
9626aeb1 87static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
a00fd8c5 88{
9626aeb1 89 struct bpos pos = iter->pos;
cbdf24ce 90
9626aeb1 91 if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
e88a75eb 92 !bkey_eq(pos, POS_MAX))
e751c01a 93 pos = bkey_successor(iter, pos);
9626aeb1 94 return pos;
a00fd8c5
KO
95}
96
67e0dd8f 97static inline bool btree_path_pos_before_node(struct btree_path *path,
e3ecf4f5
KO
98 struct btree *b)
99{
e88a75eb 100 return bpos_lt(path->pos, b->data->min_key);
e3ecf4f5
KO
101}
102
67e0dd8f 103static inline bool btree_path_pos_after_node(struct btree_path *path,
e3ecf4f5
KO
104 struct btree *b)
105{
e88a75eb 106 return bpos_gt(path->pos, b->key.k.p);
e3ecf4f5
KO
107}
108
67e0dd8f 109static inline bool btree_path_pos_in_node(struct btree_path *path,
e3ecf4f5
KO
110 struct btree *b)
111{
67e0dd8f
KO
112 return path->btree_id == b->c.btree_id &&
113 !btree_path_pos_before_node(path, b) &&
114 !btree_path_pos_after_node(path, b);
e3ecf4f5
KO
115}
116
1c6fdbd8
KO
117/* Btree iterator: */
118
119#ifdef CONFIG_BCACHEFS_DEBUG
120
67e0dd8f
KO
121static void bch2_btree_path_verify_cached(struct btree_trans *trans,
122 struct btree_path *path)
d211b408
KO
123{
124 struct bkey_cached *ck;
67e0dd8f 125 bool locked = btree_node_locked(path, 0);
d211b408 126
67e0dd8f 127 if (!bch2_btree_node_relock(trans, path, 0))
d211b408
KO
128 return;
129
67e0dd8f
KO
130 ck = (void *) path->l[0].b;
131 BUG_ON(ck->key.btree_id != path->btree_id ||
e88a75eb 132 !bkey_eq(ck->key.pos, path->pos));
d211b408
KO
133
134 if (!locked)
8bfe14e8 135 btree_node_unlock(trans, path, 0);
d211b408
KO
136}
137
67e0dd8f
KO
138static void bch2_btree_path_verify_level(struct btree_trans *trans,
139 struct btree_path *path, unsigned level)
1c6fdbd8 140{
67e0dd8f 141 struct btree_path_level *l;
4ce41957
KO
142 struct btree_node_iter tmp;
143 bool locked;
2dac0eae 144 struct bkey_packed *p, *k;
fa8e94fa
KO
145 struct printbuf buf1 = PRINTBUF;
146 struct printbuf buf2 = PRINTBUF;
147 struct printbuf buf3 = PRINTBUF;
2dac0eae 148 const char *msg;
1c6fdbd8 149
29364f34 150 if (!bch2_debug_check_iterators)
f13f5a8c
KO
151 return;
152
67e0dd8f 153 l = &path->l[level];
4ce41957 154 tmp = l->iter;
67e0dd8f 155 locked = btree_node_locked(path, level);
4ce41957 156
67e0dd8f 157 if (path->cached) {
d211b408 158 if (!level)
67e0dd8f 159 bch2_btree_path_verify_cached(trans, path);
d211b408
KO
160 return;
161 }
162
67e0dd8f 163 if (!btree_path_node(path, level))
2dac0eae
KO
164 return;
165
e9174370 166 if (!bch2_btree_node_relock_notrace(trans, path, level))
271a3d3a
KO
167 return;
168
67e0dd8f 169 BUG_ON(!btree_path_pos_in_node(path, l->b));
2dac0eae 170
2dac0eae 171 bch2_btree_node_iter_verify(&l->iter, l->b);
1c6fdbd8
KO
172
173 /*
1ae29c1f 174 * For interior nodes, the iterator will have skipped past deleted keys:
1c6fdbd8 175 */
1ae29c1f 176 p = level
c052cf82 177 ? bch2_btree_node_iter_prev(&tmp, l->b)
2dac0eae
KO
178 : bch2_btree_node_iter_prev_all(&tmp, l->b);
179 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1c6fdbd8 180
67e0dd8f 181 if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
2dac0eae
KO
182 msg = "before";
183 goto err;
1c6fdbd8
KO
184 }
185
67e0dd8f 186 if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
2dac0eae
KO
187 msg = "after";
188 goto err;
189 }
f21566f1 190
2dac0eae 191 if (!locked)
8bfe14e8 192 btree_node_unlock(trans, path, level);
2dac0eae
KO
193 return;
194err:
fa8e94fa 195 bch2_bpos_to_text(&buf1, path->pos);
2dac0eae
KO
196
197 if (p) {
198 struct bkey uk = bkey_unpack_key(l->b, p);
a1019576 199
fa8e94fa
KO
200 bch2_bkey_to_text(&buf2, &uk);
201 } else {
401ec4db 202 prt_printf(&buf2, "(none)");
2dac0eae 203 }
1c6fdbd8 204
2dac0eae
KO
205 if (k) {
206 struct bkey uk = bkey_unpack_key(l->b, k);
a1019576 207
fa8e94fa
KO
208 bch2_bkey_to_text(&buf3, &uk);
209 } else {
401ec4db 210 prt_printf(&buf3, "(none)");
1c6fdbd8 211 }
2dac0eae 212
67e0dd8f
KO
213 panic("path should be %s key at level %u:\n"
214 "path pos %s\n"
2dac0eae
KO
215 "prev key %s\n"
216 "cur key %s\n",
fa8e94fa 217 msg, level, buf1.buf, buf2.buf, buf3.buf);
1c6fdbd8
KO
218}
219
67e0dd8f
KO
220static void bch2_btree_path_verify(struct btree_trans *trans,
221 struct btree_path *path)
1c6fdbd8 222{
5aab6635 223 struct bch_fs *c = trans->c;
2dac0eae 224 unsigned i;
1c6fdbd8 225
67e0dd8f
KO
226 EBUG_ON(path->btree_id >= BTREE_ID_NR);
227
228 for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
229 if (!path->l[i].b) {
d7407292 230 BUG_ON(!path->cached &&
faa6cb6c 231 bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
67e0dd8f
KO
232 break;
233 }
234
235 bch2_btree_path_verify_level(trans, path, i);
236 }
237
238 bch2_btree_path_verify_locks(path);
239}
240
241void bch2_trans_verify_paths(struct btree_trans *trans)
242{
243 struct btree_path *path;
244
67e0dd8f
KO
245 trans_for_each_path(trans, path)
246 bch2_btree_path_verify(trans, path);
247}
248
249static void bch2_btree_iter_verify(struct btree_iter *iter)
250{
251 struct btree_trans *trans = iter->trans;
252
253 BUG_ON(iter->btree_id >= BTREE_ID_NR);
254
07f383c7 255 BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != btree_iter_path(trans, iter)->cached);
7e1a3aa9 256
e751c01a
KO
257 BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
258 (iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
259
f21566f1 260 BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
e751c01a 261 (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
9fcdd23b 262 !btree_type_has_snapshot_field(iter->btree_id));
e751c01a 263
1f2d9192 264 if (iter->update_path)
07f383c7
KO
265 bch2_btree_path_verify(trans, &trans->paths[iter->update_path]);
266 bch2_btree_path_verify(trans, btree_iter_path(trans, iter));
2dac0eae
KO
267}
268
7e1a3aa9
KO
269static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
270{
a861c722
KO
271 BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
272 !iter->pos.snapshot);
273
e751c01a
KO
274 BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
275 iter->pos.snapshot != iter->snapshot);
276
e88a75eb
KO
277 BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) ||
278 bkey_gt(iter->pos, iter->k.p));
7e1a3aa9
KO
279}
280
c075ff70
KO
281static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
282{
283 struct btree_trans *trans = iter->trans;
284 struct btree_iter copy;
285 struct bkey_s_c prev;
286 int ret = 0;
287
288 if (!bch2_debug_check_iterators)
289 return 0;
290
291 if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
292 return 0;
293
294 if (bkey_err(k) || !k.k)
295 return 0;
296
297 BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
298 iter->snapshot,
299 k.k->p.snapshot));
300
301 bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
ffa7d262 302 BTREE_ITER_NOPRESERVE|
c075ff70
KO
303 BTREE_ITER_ALL_SNAPSHOTS);
304 prev = bch2_btree_iter_prev(&copy);
305 if (!prev.k)
306 goto out;
307
308 ret = bkey_err(prev);
309 if (ret)
310 goto out;
311
e88a75eb 312 if (bkey_eq(prev.k->p, k.k->p) &&
c075ff70
KO
313 bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
314 prev.k->p.snapshot) > 0) {
fa8e94fa 315 struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
c075ff70 316
fa8e94fa
KO
317 bch2_bkey_to_text(&buf1, k.k);
318 bch2_bkey_to_text(&buf2, prev.k);
c075ff70
KO
319
320 panic("iter snap %u\n"
321 "k %s\n"
322 "prev %s\n",
323 iter->snapshot,
fa8e94fa 324 buf1.buf, buf2.buf);
c075ff70
KO
325 }
326out:
327 bch2_trans_iter_exit(trans, &copy);
328 return ret;
329}
330
32b26e8c
KO
331void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
332 struct bpos pos, bool key_cache)
333{
334 struct btree_path *path;
1f75ba4e 335 struct trans_for_each_path_inorder_iter iter;
fa8e94fa 336 struct printbuf buf = PRINTBUF;
32b26e8c 337
5c0bb66a
KO
338 btree_trans_sort_paths(trans);
339
1f75ba4e 340 trans_for_each_path_inorder(trans, path, iter) {
32b26e8c
KO
341 int cmp = cmp_int(path->btree_id, id) ?:
342 cmp_int(path->cached, key_cache);
343
344 if (cmp > 0)
345 break;
346 if (cmp < 0)
347 continue;
348
d4263e56 349 if (!btree_node_locked(path, 0) ||
32b26e8c
KO
350 !path->should_be_locked)
351 continue;
352
353 if (!key_cache) {
e88a75eb
KO
354 if (bkey_ge(pos, path->l[0].b->data->min_key) &&
355 bkey_le(pos, path->l[0].b->key.k.p))
32b26e8c
KO
356 return;
357 } else {
e88a75eb 358 if (bkey_eq(pos, path->pos))
32b26e8c
KO
359 return;
360 }
361 }
362
363 bch2_dump_trans_paths_updates(trans);
fa8e94fa
KO
364 bch2_bpos_to_text(&buf, pos);
365
32b26e8c 366 panic("not locked: %s %s%s\n",
88dfe193 367 bch2_btree_id_str(id), buf.buf,
32b26e8c
KO
368 key_cache ? " cached" : "");
369}
370
271a3d3a
KO
371#else
372
67e0dd8f
KO
373static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
374 struct btree_path *path, unsigned l) {}
375static inline void bch2_btree_path_verify(struct btree_trans *trans,
376 struct btree_path *path) {}
7d6f9b64 377static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
7e1a3aa9 378static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
c075ff70 379static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
271a3d3a 380
1c6fdbd8
KO
381#endif
382
67e0dd8f
KO
383/* Btree path: fixups after btree updates */
384
23bbd2bb
KO
385static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
386 struct btree *b,
387 struct bset_tree *t,
388 struct bkey_packed *k)
389{
390 struct btree_node_iter_set *set;
391
392 btree_node_iter_for_each(iter, set)
393 if (set->end == t->end_offset) {
394 set->k = __btree_node_key_to_offset(b, k);
395 bch2_btree_node_iter_sort(iter, b);
396 return;
397 }
398
399 bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
400}
401
67e0dd8f 402static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
9626aeb1
KO
403 struct btree *b,
404 struct bkey_packed *where)
887c2a4e 405{
67e0dd8f 406 struct btree_path_level *l = &path->l[b->c.level];
887c2a4e 407
9626aeb1
KO
408 if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
409 return;
410
67e0dd8f 411 if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
9626aeb1 412 bch2_btree_node_iter_advance(&l->iter, l->b);
887c2a4e
KO
413}
414
67e0dd8f 415void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
887c2a4e
KO
416 struct btree *b,
417 struct bkey_packed *where)
418{
67e0dd8f 419 struct btree_path *path;
887c2a4e 420
67e0dd8f
KO
421 trans_for_each_path_with_node(trans, b, path) {
422 __bch2_btree_path_fix_key_modified(path, b, where);
423 bch2_btree_path_verify_level(trans, path, b->c.level);
887c2a4e
KO
424 }
425}
426
67e0dd8f
KO
427static void __bch2_btree_node_iter_fix(struct btree_path *path,
428 struct btree *b,
429 struct btree_node_iter *node_iter,
430 struct bset_tree *t,
431 struct bkey_packed *where,
432 unsigned clobber_u64s,
433 unsigned new_u64s)
1c6fdbd8
KO
434{
435 const struct bkey_packed *end = btree_bkey_last(b, t);
436 struct btree_node_iter_set *set;
437 unsigned offset = __btree_node_key_to_offset(b, where);
438 int shift = new_u64s - clobber_u64s;
271a3d3a 439 unsigned old_end = t->end_offset - shift;
c0fc30da
KO
440 unsigned orig_iter_pos = node_iter->data[0].k;
441 bool iter_current_key_modified =
442 orig_iter_pos >= offset &&
443 orig_iter_pos <= offset + clobber_u64s;
1c6fdbd8
KO
444
445 btree_node_iter_for_each(node_iter, set)
446 if (set->end == old_end)
447 goto found;
448
449 /* didn't find the bset in the iterator - might have to readd it: */
450 if (new_u64s &&
67e0dd8f 451 bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
1c6fdbd8 452 bch2_btree_node_iter_push(node_iter, b, where, end);
c0fc30da
KO
453 goto fixup_done;
454 } else {
455 /* Iterator is after key that changed */
059e4134 456 return;
1c6fdbd8 457 }
1c6fdbd8 458found:
271a3d3a 459 set->end = t->end_offset;
1c6fdbd8
KO
460
461 /* Iterator hasn't gotten to the key that changed yet: */
462 if (set->k < offset)
059e4134 463 return;
1c6fdbd8
KO
464
465 if (new_u64s &&
67e0dd8f 466 bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
1c6fdbd8
KO
467 set->k = offset;
468 } else if (set->k < offset + clobber_u64s) {
469 set->k = offset + new_u64s;
470 if (set->k == set->end)
471 bch2_btree_node_iter_set_drop(node_iter, set);
472 } else {
c0fc30da 473 /* Iterator is after key that changed */
1c6fdbd8 474 set->k = (int) set->k + shift;
059e4134 475 return;
1c6fdbd8
KO
476 }
477
1c6fdbd8 478 bch2_btree_node_iter_sort(node_iter, b);
c0fc30da
KO
479fixup_done:
480 if (node_iter->data[0].k != orig_iter_pos)
481 iter_current_key_modified = true;
56e0e7c7 482
1c6fdbd8 483 /*
23bbd2bb
KO
484 * When a new key is added, and the node iterator now points to that
485 * key, the iterator might have skipped past deleted keys that should
486 * come after the key the iterator now points to. We have to rewind to
c0fc30da
KO
487 * before those deleted keys - otherwise
488 * bch2_btree_node_iter_prev_all() breaks:
1c6fdbd8 489 */
23bbd2bb 490 if (!bch2_btree_node_iter_end(node_iter) &&
c0fc30da 491 iter_current_key_modified &&
1ae29c1f 492 b->c.level) {
23bbd2bb
KO
493 struct bkey_packed *k, *k2, *p;
494
495 k = bch2_btree_node_iter_peek_all(node_iter, b);
1c6fdbd8
KO
496
497 for_each_bset(b, t) {
23bbd2bb
KO
498 bool set_pos = false;
499
500 if (node_iter->data[0].end == t->end_offset)
1c6fdbd8
KO
501 continue;
502
23bbd2bb
KO
503 k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
504
505 while ((p = bch2_bkey_prev_all(b, t, k2)) &&
506 bkey_iter_cmp(b, k, p) < 0) {
507 k2 = p;
508 set_pos = true;
1c6fdbd8 509 }
23bbd2bb
KO
510
511 if (set_pos)
512 btree_node_iter_set_set_pos(node_iter,
513 b, t, k2);
1c6fdbd8
KO
514 }
515 }
516}
517
9f6bd307 518void bch2_btree_node_iter_fix(struct btree_trans *trans,
67e0dd8f 519 struct btree_path *path,
216c9fac
KO
520 struct btree *b,
521 struct btree_node_iter *node_iter,
522 struct bkey_packed *where,
523 unsigned clobber_u64s,
524 unsigned new_u64s)
1c6fdbd8 525{
216c9fac 526 struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
67e0dd8f 527 struct btree_path *linked;
1c6fdbd8 528
67e0dd8f
KO
529 if (node_iter != &path->l[b->c.level].iter) {
530 __bch2_btree_node_iter_fix(path, b, node_iter, t,
059e4134 531 where, clobber_u64s, new_u64s);
2dac0eae 532
29364f34 533 if (bch2_debug_check_iterators)
2dac0eae 534 bch2_btree_node_iter_verify(node_iter, b);
059e4134 535 }
1c6fdbd8 536
67e0dd8f 537 trans_for_each_path_with_node(trans, b, linked) {
1c6fdbd8 538 __bch2_btree_node_iter_fix(linked, b,
059e4134
KO
539 &linked->l[b->c.level].iter, t,
540 where, clobber_u64s, new_u64s);
67e0dd8f 541 bch2_btree_path_verify_level(trans, linked, b->c.level);
059e4134 542 }
1c6fdbd8
KO
543}
544
67e0dd8f
KO
545/* Btree path level: pointer to a particular btree node and node iter */
546
547static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
548 struct btree_path_level *l,
1c6fdbd8
KO
549 struct bkey *u,
550 struct bkey_packed *k)
551{
1c6fdbd8
KO
552 if (unlikely(!k)) {
553 /*
554 * signal to bch2_btree_iter_peek_slot() that we're currently at
555 * a hole
556 */
26609b61 557 u->type = KEY_TYPE_deleted;
1c6fdbd8
KO
558 return bkey_s_c_null;
559 }
560
2ce8fbd9 561 return bkey_disassemble(l->b, k, u);
1c6fdbd8
KO
562}
563
67e0dd8f
KO
564static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
565 struct btree_path_level *l,
566 struct bkey *u)
1c6fdbd8 567{
67e0dd8f 568 return __btree_iter_unpack(c, l, u,
1c6fdbd8
KO
569 bch2_btree_node_iter_peek_all(&l->iter, l->b));
570}
571
67e0dd8f
KO
572static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
573 struct btree_path *path,
574 struct btree_path_level *l,
575 struct bkey *u)
1c6fdbd8 576{
67e0dd8f 577 struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
1c6fdbd8 578 bch2_btree_node_iter_peek(&l->iter, l->b));
ca58cbd4 579
67e0dd8f
KO
580 path->pos = k.k ? k.k->p : l->b->key.k.p;
581 trans->paths_sorted = false;
01eed771 582 bch2_btree_path_verify_level(trans, path, l - path->l);
ca58cbd4 583 return k;
1c6fdbd8
KO
584}
585
67e0dd8f
KO
586static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
587 struct btree_path *path,
588 struct btree_path_level *l,
589 struct bkey *u)
ccf5a109 590{
67e0dd8f 591 struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
ccf5a109 592 bch2_btree_node_iter_prev(&l->iter, l->b));
ca58cbd4 593
67e0dd8f
KO
594 path->pos = k.k ? k.k->p : l->b->data->min_key;
595 trans->paths_sorted = false;
01eed771 596 bch2_btree_path_verify_level(trans, path, l - path->l);
ca58cbd4 597 return k;
ccf5a109
KO
598}
599
67e0dd8f
KO
600static inline bool btree_path_advance_to_pos(struct btree_path *path,
601 struct btree_path_level *l,
a00fd8c5 602 int max_advance)
1c6fdbd8 603{
a00fd8c5
KO
604 struct bkey_packed *k;
605 int nr_advanced = 0;
606
607 while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
67e0dd8f 608 bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
a00fd8c5
KO
609 if (max_advance > 0 && nr_advanced >= max_advance)
610 return false;
611
612 bch2_btree_node_iter_advance(&l->iter, l->b);
613 nr_advanced++;
614 }
615
616 return true;
1c6fdbd8
KO
617}
618
67e0dd8f 619static inline void __btree_path_level_init(struct btree_path *path,
78cf784e 620 unsigned level)
1c6fdbd8 621{
67e0dd8f 622 struct btree_path_level *l = &path->l[level];
a00fd8c5 623
67e0dd8f 624 bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
1c6fdbd8 625
537c49d6
KO
626 /*
627 * Iterators to interior nodes should always be pointed at the first non
628 * whiteout:
629 */
630 if (level)
631 bch2_btree_node_iter_peek(&l->iter, l->b);
1c6fdbd8
KO
632}
633
b2d1d56b
KO
634void bch2_btree_path_level_init(struct btree_trans *trans,
635 struct btree_path *path,
636 struct btree *b)
1c6fdbd8 637{
67e0dd8f 638 BUG_ON(path->cached);
2ca88e5a 639
67e0dd8f 640 EBUG_ON(!btree_path_pos_in_node(path, b));
1c6fdbd8 641
1fb4fe63 642 path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
67e0dd8f
KO
643 path->l[b->c.level].b = b;
644 __btree_path_level_init(path, b->c.level);
1c6fdbd8
KO
645}
646
67e0dd8f
KO
647/* Btree path: fixups after btree node updates: */
648
08f78031
KO
649static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b)
650{
651 struct bch_fs *c = trans->c;
08f78031
KO
652
653 trans_for_each_update(trans, i)
654 if (!i->cached &&
655 i->level == b->c.level &&
656 i->btree_id == b->c.btree_id &&
657 bpos_cmp(i->k->k.p, b->data->min_key) >= 0 &&
658 bpos_cmp(i->k->k.p, b->data->max_key) <= 0) {
7f9821a7 659 i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v;
08f78031
KO
660
661 if (unlikely(trans->journal_replay_not_finished)) {
662 struct bkey_i *j_k =
663 bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
664 i->k->k.p);
665
666 if (j_k) {
667 i->old_k = j_k->k;
668 i->old_v = &j_k->v;
669 }
670 }
671 }
672}
673
1c6fdbd8
KO
674/*
675 * A btree node is being replaced - update the iterator to point to the new
676 * node:
677 */
f3360005
KO
678void bch2_trans_node_add(struct btree_trans *trans,
679 struct btree_path *path,
680 struct btree *b)
1c6fdbd8 681{
f3360005 682 struct btree_path *prev;
1c6fdbd8 683
f3360005
KO
684 BUG_ON(!btree_path_pos_in_node(path, b));
685
686 while ((prev = prev_btree_path(trans, path)) &&
687 btree_path_pos_in_node(prev, b))
688 path = prev;
689
690 for (;
691 path && btree_path_pos_in_node(path, b);
692 path = next_btree_path(trans, path))
693 if (path->uptodate == BTREE_ITER_UPTODATE && !path->cached) {
1d3ecd7e
KO
694 enum btree_node_locked_type t =
695 btree_lock_want(path, b->c.level);
1c6fdbd8 696
38474c26 697 if (t != BTREE_NODE_UNLOCKED) {
8bfe14e8 698 btree_node_unlock(trans, path, b->c.level);
c43a6ef9 699 six_lock_increment(&b->c.lock, (enum six_lock_type) t);
5b7fbdcd 700 mark_btree_node_locked(trans, path, b->c.level, t);
1c6fdbd8
KO
701 }
702
e4215d0f 703 bch2_btree_path_level_init(trans, path, b);
1c6fdbd8 704 }
08f78031
KO
705
706 bch2_trans_revalidate_updates_in_node(trans, b);
1c6fdbd8
KO
707}
708
1c6fdbd8
KO
709/*
710 * A btree node has been modified in such a way as to invalidate iterators - fix
711 * them:
712 */
f7a966a3 713void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
1c6fdbd8 714{
67e0dd8f 715 struct btree_path *path;
1c6fdbd8 716
67e0dd8f
KO
717 trans_for_each_path_with_node(trans, b, path)
718 __btree_path_level_init(path, b->c.level);
08f78031
KO
719
720 bch2_trans_revalidate_updates_in_node(trans, b);
1c6fdbd8
KO
721}
722
67e0dd8f
KO
723/* Btree path: traverse, set_pos: */
724
67e0dd8f
KO
725static inline int btree_path_lock_root(struct btree_trans *trans,
726 struct btree_path *path,
a301dc38
KO
727 unsigned depth_want,
728 unsigned long trace_ip)
1c6fdbd8 729{
e5af273f 730 struct bch_fs *c = trans->c;
faa6cb6c 731 struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b;
1c6fdbd8
KO
732 enum six_lock_type lock_type;
733 unsigned i;
549d173c 734 int ret;
1c6fdbd8 735
67e0dd8f 736 EBUG_ON(path->nodes_locked);
1c6fdbd8
KO
737
738 while (1) {
bd2bb273 739 b = READ_ONCE(*rootp);
67e0dd8f 740 path->level = READ_ONCE(b->c.level);
1c6fdbd8 741
67e0dd8f 742 if (unlikely(path->level < depth_want)) {
1c6fdbd8
KO
743 /*
744 * the root is at a lower depth than the depth we want:
745 * got to the end of the btree, or we're walking nodes
746 * greater than some depth and there are no nodes >=
747 * that depth
748 */
67e0dd8f
KO
749 path->level = depth_want;
750 for (i = path->level; i < BTREE_MAX_DEPTH; i++)
751 path->l[i].b = NULL;
8812600c 752 return 1;
1c6fdbd8
KO
753 }
754
67e0dd8f 755 lock_type = __btree_lock_want(path, path->level);
0d7009d7
KO
756 ret = btree_node_lock(trans, path, &b->c,
757 path->level, lock_type, trace_ip);
549d173c
KO
758 if (unlikely(ret)) {
759 if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
760 continue;
761 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
762 return ret;
763 BUG();
e5af273f 764 }
1c6fdbd8 765
bd2bb273 766 if (likely(b == READ_ONCE(*rootp) &&
67e0dd8f 767 b->c.level == path->level &&
1c6fdbd8 768 !race_fault())) {
67e0dd8f 769 for (i = 0; i < path->level; i++)
315c9ba6 770 path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
67e0dd8f
KO
771 path->l[path->level].b = b;
772 for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
773 path->l[i].b = NULL;
774
5b7fbdcd
KO
775 mark_btree_node_locked(trans, path, path->level,
776 (enum btree_node_locked_type) lock_type);
e4215d0f 777 bch2_btree_path_level_init(trans, path, b);
1c6fdbd8 778 return 0;
1c6fdbd8
KO
779 }
780
c43a6ef9 781 six_unlock_type(&b->c.lock, lock_type);
1c6fdbd8
KO
782 }
783}
784
785noinline
67e0dd8f 786static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
1c6fdbd8 787{
9f6bd307 788 struct bch_fs *c = trans->c;
67e0dd8f 789 struct btree_path_level *l = path_l(path);
1c6fdbd8
KO
790 struct btree_node_iter node_iter = l->iter;
791 struct bkey_packed *k;
07a1006a 792 struct bkey_buf tmp;
3c471b65 793 unsigned nr = test_bit(BCH_FS_started, &c->flags)
67e0dd8f
KO
794 ? (path->level > 1 ? 0 : 2)
795 : (path->level > 1 ? 1 : 16);
796 bool was_locked = btree_node_locked(path, path->level);
8b3e9bd6 797 int ret = 0;
1c6fdbd8 798
07a1006a
KO
799 bch2_bkey_buf_init(&tmp);
800
dccedaaa 801 while (nr-- && !ret) {
67e0dd8f 802 if (!bch2_btree_node_relock(trans, path, path->level))
07a1006a 803 break;
1c6fdbd8
KO
804
805 bch2_btree_node_iter_advance(&node_iter, l->b);
806 k = bch2_btree_node_iter_peek(&node_iter, l->b);
807 if (!k)
808 break;
809
07a1006a 810 bch2_bkey_buf_unpack(&tmp, c, l->b, k);
1306f87d 811 ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
67e0dd8f 812 path->level - 1);
1c6fdbd8
KO
813 }
814
815 if (!was_locked)
8bfe14e8 816 btree_node_unlock(trans, path, path->level);
07a1006a
KO
817
818 bch2_bkey_buf_exit(&tmp, c);
8b3e9bd6 819 return ret;
1c6fdbd8
KO
820}
821
5222a460
KO
822static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
823 struct btree_and_journal_iter *jiter)
824{
825 struct bch_fs *c = trans->c;
826 struct bkey_s_c k;
827 struct bkey_buf tmp;
3c471b65 828 unsigned nr = test_bit(BCH_FS_started, &c->flags)
5222a460
KO
829 ? (path->level > 1 ? 0 : 2)
830 : (path->level > 1 ? 1 : 16);
831 bool was_locked = btree_node_locked(path, path->level);
832 int ret = 0;
833
834 bch2_bkey_buf_init(&tmp);
835
dccedaaa 836 while (nr-- && !ret) {
5222a460
KO
837 if (!bch2_btree_node_relock(trans, path, path->level))
838 break;
839
840 bch2_btree_and_journal_iter_advance(jiter);
841 k = bch2_btree_and_journal_iter_peek(jiter);
842 if (!k.k)
843 break;
844
845 bch2_bkey_buf_reassemble(&tmp, c, k);
1306f87d 846 ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
5222a460
KO
847 path->level - 1);
848 }
849
850 if (!was_locked)
8bfe14e8 851 btree_node_unlock(trans, path, path->level);
5222a460
KO
852
853 bch2_bkey_buf_exit(&tmp, c);
854 return ret;
855}
856
78cf784e 857static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
67e0dd8f 858 struct btree_path *path,
72141e1f
KO
859 unsigned plevel, struct btree *b)
860{
67e0dd8f
KO
861 struct btree_path_level *l = &path->l[plevel];
862 bool locked = btree_node_locked(path, plevel);
72141e1f
KO
863 struct bkey_packed *k;
864 struct bch_btree_ptr_v2 *bp;
865
67e0dd8f 866 if (!bch2_btree_node_relock(trans, path, plevel))
72141e1f
KO
867 return;
868
869 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
870 BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
871
872 bp = (void *) bkeyp_val(&l->b->format, k);
873 bp->mem_ptr = (unsigned long)b;
874
875 if (!locked)
8bfe14e8 876 btree_node_unlock(trans, path, plevel);
72141e1f
KO
877}
878
5222a460
KO
879static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
880 struct btree_path *path,
881 unsigned flags,
882 struct bkey_buf *out)
883{
884 struct bch_fs *c = trans->c;
885 struct btree_path_level *l = path_l(path);
886 struct btree_and_journal_iter jiter;
887 struct bkey_s_c k;
888 int ret = 0;
889
890 __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
891
892 k = bch2_btree_and_journal_iter_peek(&jiter);
893
894 bch2_bkey_buf_reassemble(out, c, k);
895
896 if (flags & BTREE_ITER_PREFETCH)
897 ret = btree_path_prefetch_j(trans, path, &jiter);
898
899 bch2_btree_and_journal_iter_exit(&jiter);
900 return ret;
901}
902
67e0dd8f
KO
903static __always_inline int btree_path_down(struct btree_trans *trans,
904 struct btree_path *path,
905 unsigned flags,
a301dc38 906 unsigned long trace_ip)
1c6fdbd8 907{
6e075b54 908 struct bch_fs *c = trans->c;
67e0dd8f 909 struct btree_path_level *l = path_l(path);
1c6fdbd8 910 struct btree *b;
67e0dd8f
KO
911 unsigned level = path->level - 1;
912 enum six_lock_type lock_type = __btree_lock_want(path, level);
07a1006a
KO
913 struct bkey_buf tmp;
914 int ret;
1c6fdbd8 915
67e0dd8f 916 EBUG_ON(!btree_node_locked(path, path->level));
1c6fdbd8 917
07a1006a 918 bch2_bkey_buf_init(&tmp);
5222a460
KO
919
920 if (unlikely(trans->journal_replay_not_finished)) {
921 ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
922 if (ret)
923 goto err;
924 } else {
925 bch2_bkey_buf_unpack(&tmp, c, l->b,
926 bch2_btree_node_iter_peek(&l->iter, l->b));
927
928 if (flags & BTREE_ITER_PREFETCH) {
929 ret = btree_path_prefetch(trans, path);
930 if (ret)
931 goto err;
932 }
933 }
1c6fdbd8 934
67e0dd8f 935 b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
07a1006a
KO
936 ret = PTR_ERR_OR_ZERO(b);
937 if (unlikely(ret))
938 goto err;
1c6fdbd8 939
5222a460
KO
940 if (likely(!trans->journal_replay_not_finished &&
941 tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
07a1006a 942 unlikely(b != btree_node_mem_ptr(tmp.k)))
67e0dd8f 943 btree_node_mem_ptr_set(trans, path, level + 1, b);
72141e1f 944
67e0dd8f 945 if (btree_node_read_locked(path, level + 1))
8bfe14e8 946 btree_node_unlock(trans, path, level + 1);
223b560e 947
5b7fbdcd
KO
948 mark_btree_node_locked(trans, path, level,
949 (enum btree_node_locked_type) lock_type);
67e0dd8f 950 path->level = level;
e4215d0f 951 bch2_btree_path_level_init(trans, path, b);
c205321b 952
67e0dd8f 953 bch2_btree_path_verify_locks(path);
07a1006a
KO
954err:
955 bch2_bkey_buf_exit(&tmp, c);
956 return ret;
1c6fdbd8
KO
957}
958
1c6fdbd8 959
8f9ad91a 960static int bch2_btree_path_traverse_all(struct btree_trans *trans)
1c6fdbd8 961{
e542029e 962 struct bch_fs *c = trans->c;
0d7009d7 963 struct btree_path *path;
8f9ad91a
KO
964 unsigned long trace_ip = _RET_IP_;
965 int i, ret = 0;
e542029e 966
2ca88e5a 967 if (trans->in_traverse_all)
549d173c 968 return -BCH_ERR_transaction_restart_in_traverse_all;
2ca88e5a
KO
969
970 trans->in_traverse_all = true;
971retry_all:
549d173c 972 trans->restarted = 0;
5e2d8be8 973 trans->last_restarted_ip = 0;
e5af273f 974
67e0dd8f
KO
975 trans_for_each_path(trans, path)
976 path->should_be_locked = false;
e542029e 977
67e0dd8f 978 btree_trans_sort_paths(trans);
2527dd91 979
58fbf808 980 bch2_trans_unlock(trans);
a301dc38 981 cond_resched();
1c6fdbd8 982
8f9ad91a 983 if (unlikely(trans->memory_allocation_failure)) {
1c6fdbd8
KO
984 struct closure cl;
985
986 closure_init_stack(&cl);
987
988 do {
a564c9fa 989 ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
1c6fdbd8
KO
990 closure_sync(&cl);
991 } while (ret);
992 }
993
1c6fdbd8 994 /* Now, redo traversals in correct order: */
0423fb71
KO
995 i = 0;
996 while (i < trans->nr_sorted) {
96ed47d1 997 btree_path_idx_t idx = trans->sorted[i];
2ca88e5a 998
33aa419d
KO
999 /*
1000 * Traversing a path can cause another path to be added at about
1001 * the same position:
1002 */
96ed47d1
KO
1003 if (trans->paths[idx].uptodate) {
1004 __btree_path_get(&trans->paths[idx], false);
1005 ret = bch2_btree_path_traverse_one(trans, idx, 0, _THIS_IP_);
1006 __btree_path_put(&trans->paths[idx], false);
c82ed304 1007
549d173c 1008 if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
65d48e35 1009 bch2_err_matches(ret, ENOMEM))
33aa419d 1010 goto retry_all;
a897ef68
KO
1011 if (ret)
1012 goto err;
33aa419d 1013 } else {
0423fb71 1014 i++;
33aa419d 1015 }
e542029e 1016 }
0423fb71
KO
1017
1018 /*
ee94c413
KO
1019 * We used to assert that all paths had been traversed here
1020 * (path->uptodate < BTREE_ITER_NEED_TRAVERSE); however, since
1e81f89b 1021 * path->should_be_locked is not set yet, we might have unlocked and
ee94c413 1022 * then failed to relock a path - that's fine.
0423fb71 1023 */
a897ef68 1024err:
a564c9fa 1025 bch2_btree_cache_cannibalize_unlock(trans);
2ca88e5a 1026
669f87a5
KO
1027 trans->in_traverse_all = false;
1028
674cfc26 1029 trace_and_count(c, trans_traverse_all, trans, trace_ip);
1c6fdbd8 1030 return ret;
bf7b87a4 1031}
1c6fdbd8 1032
29aa78f1
KO
1033static inline bool btree_path_check_pos_in_node(struct btree_path *path,
1034 unsigned l, int check_pos)
f4b61341 1035{
67e0dd8f 1036 if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
f4b61341 1037 return false;
67e0dd8f 1038 if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
f4b61341
KO
1039 return false;
1040 return true;
1041}
1042
29aa78f1
KO
1043static inline bool btree_path_good_node(struct btree_trans *trans,
1044 struct btree_path *path,
1045 unsigned l, int check_pos)
1046{
1047 return is_btree_node(path, l) &&
1048 bch2_btree_node_relock(trans, path, l) &&
1049 btree_path_check_pos_in_node(path, l, check_pos);
1050}
1051
c4bce586
KO
1052static void btree_path_set_level_down(struct btree_trans *trans,
1053 struct btree_path *path,
1054 unsigned new_level)
1055{
1056 unsigned l;
1057
1058 path->level = new_level;
1059
1060 for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
1061 if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
8bfe14e8 1062 btree_node_unlock(trans, path, l);
c4bce586
KO
1063
1064 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1065 bch2_btree_path_verify(trans, path);
1066}
1067
29aa78f1
KO
1068static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans,
1069 struct btree_path *path,
1070 int check_pos)
1c6fdbd8 1071{
8ee0134e 1072 unsigned i, l = path->level;
29cea6f4 1073again:
67e0dd8f 1074 while (btree_path_node(path, l) &&
315c9ba6
KO
1075 !btree_path_good_node(trans, path, l, check_pos))
1076 __btree_path_set_level_up(trans, path, l++);
1c6fdbd8 1077
8ee0134e
KO
1078 /* If we need intent locks, take them too: */
1079 for (i = l + 1;
1080 i < path->locks_want && btree_path_node(path, i);
1081 i++)
29cea6f4 1082 if (!bch2_btree_node_relock(trans, path, i)) {
315c9ba6
KO
1083 while (l <= i)
1084 __btree_path_set_level_up(trans, path, l++);
29cea6f4
KO
1085 goto again;
1086 }
8ee0134e 1087
1c6fdbd8
KO
1088 return l;
1089}
1090
29aa78f1
KO
1091static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
1092 struct btree_path *path,
1093 int check_pos)
1094{
1095 return likely(btree_node_locked(path, path->level) &&
1096 btree_path_check_pos_in_node(path, path->level, check_pos))
1097 ? path->level
1098 : __btree_path_up_until_good_node(trans, path, check_pos);
1099}
1100
1c6fdbd8
KO
1101/*
1102 * This is the main state machine for walking down the btree - walks down to a
1103 * specified depth
1104 *
1105 * Returns 0 on success, -EIO on error (error reading in a btree node).
1106 *
1107 * On error, caller (peek_node()/peek_key()) must return NULL; the error is
b7607ce9 1108 * stashed in the iterator and returned from bch2_trans_exit().
1c6fdbd8 1109 */
4e3d1899 1110int bch2_btree_path_traverse_one(struct btree_trans *trans,
96ed47d1 1111 btree_path_idx_t path_idx,
4e3d1899
KO
1112 unsigned flags,
1113 unsigned long trace_ip)
1c6fdbd8 1114{
96ed47d1 1115 struct btree_path *path = &trans->paths[path_idx];
8ee0134e 1116 unsigned depth_want = path->level;
0f35e086 1117 int ret = -((int) trans->restarted);
1c6fdbd8 1118
549d173c 1119 if (unlikely(ret))
979735df 1120 goto out;
979735df 1121
c4accde4
KO
1122 if (unlikely(!trans->srcu_held))
1123 bch2_trans_srcu_lock(trans);
1124
e829b717 1125 /*
67e0dd8f
KO
1126 * Ensure we obey path->should_be_locked: if it's set, we can't unlock
1127 * and re-traverse the path without a transaction restart:
e829b717 1128 */
67e0dd8f 1129 if (path->should_be_locked) {
549d173c 1130 ret = bch2_btree_path_relock(trans, path, trace_ip);
e829b717
KO
1131 goto out;
1132 }
1133
67e0dd8f
KO
1134 if (path->cached) {
1135 ret = bch2_btree_path_traverse_cached(trans, path, flags);
531a0095
KO
1136 goto out;
1137 }
2ca88e5a 1138
96ed47d1
KO
1139 path = &trans->paths[path_idx];
1140
67e0dd8f 1141 if (unlikely(path->level >= BTREE_MAX_DEPTH))
531a0095 1142 goto out;
2ca88e5a 1143
67e0dd8f 1144 path->level = btree_path_up_until_good_node(trans, path, 0);
1c6fdbd8 1145
845cffed
KO
1146 EBUG_ON(btree_path_node(path, path->level) &&
1147 !btree_node_locked(path, path->level));
1148
1c6fdbd8 1149 /*
67e0dd8f 1150 * Note: path->nodes[path->level] may be temporarily NULL here - that
1c6fdbd8
KO
1151 * would indicate to other code that we got to the end of the btree,
1152 * here it indicates that relocking the root failed - it's critical that
67e0dd8f 1153 * btree_path_lock_root() comes next and that it can't fail
1c6fdbd8 1154 */
67e0dd8f
KO
1155 while (path->level > depth_want) {
1156 ret = btree_path_node(path, path->level)
1157 ? btree_path_down(trans, path, flags, trace_ip)
1158 : btree_path_lock_root(trans, path, depth_want, trace_ip);
1c6fdbd8 1159 if (unlikely(ret)) {
531a0095
KO
1160 if (ret == 1) {
1161 /*
f21566f1
KO
1162 * No nodes at this level - got to the end of
1163 * the btree:
531a0095
KO
1164 */
1165 ret = 0;
1166 goto out;
1167 }
8812600c 1168
8bfe14e8 1169 __bch2_btree_path_unlock(trans, path);
67e0dd8f 1170 path->level = depth_want;
315c9ba6 1171 path->l[path->level].b = ERR_PTR(ret);
531a0095 1172 goto out;
1c6fdbd8
KO
1173 }
1174 }
1175
67e0dd8f 1176 path->uptodate = BTREE_ITER_UPTODATE;
531a0095 1177out:
434b1c75
KO
1178 if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
1179 panic("ret %s (%i) trans->restarted %s (%i)\n",
1180 bch2_err_str(ret), ret,
1181 bch2_err_str(trans->restarted), trans->restarted);
67e0dd8f 1182 bch2_btree_path_verify(trans, path);
531a0095 1183 return ret;
1c6fdbd8
KO
1184}
1185
0cc455b3 1186static inline void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
67e0dd8f
KO
1187 struct btree_path *src)
1188{
1189 unsigned i, offset = offsetof(struct btree_path, pos);
1190
1191 memcpy((void *) dst + offset,
1192 (void *) src + offset,
1193 sizeof(struct btree_path) - offset);
1194
c81f5836
KO
1195 for (i = 0; i < BTREE_MAX_DEPTH; i++) {
1196 unsigned t = btree_node_locked_type(dst, i);
1197
1198 if (t != BTREE_NODE_UNLOCKED)
1199 six_lock_increment(&dst->l[i].b->c.lock, t);
1200 }
67e0dd8f
KO
1201}
1202
788cc25d
KO
1203static btree_path_idx_t btree_path_clone(struct btree_trans *trans, btree_path_idx_t src,
1204 bool intent)
67e0dd8f 1205{
788cc25d 1206 btree_path_idx_t new = btree_path_alloc(trans, src);
67e0dd8f 1207
788cc25d
KO
1208 btree_path_copy(trans, trans->paths + new, trans->paths + src);
1209 __btree_path_get(trans->paths + new, intent);
c404f203
KO
1210 return new;
1211}
1212
0cc455b3 1213__flatten
f6363aca
KO
1214btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *trans,
1215 btree_path_idx_t path, bool intent, unsigned long ip)
c404f203 1216{
f6363aca 1217 __btree_path_put(trans->paths + path, intent);
788cc25d 1218 path = btree_path_clone(trans, path, intent);
f6363aca 1219 trans->paths[path].preserve = false;
67e0dd8f
KO
1220 return path;
1221}
1222
4617d946 1223btree_path_idx_t __must_check
67e0dd8f 1224__bch2_btree_path_set_pos(struct btree_trans *trans,
4617d946
KO
1225 btree_path_idx_t path_idx, struct bpos new_pos,
1226 bool intent, unsigned long ip)
67e0dd8f 1227{
4617d946 1228 int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos);
5ce8b92d 1229
12344c7c 1230 bch2_trans_verify_not_in_restart(trans);
4617d946 1231 EBUG_ON(!trans->paths[path_idx].ref);
67e0dd8f 1232
f6363aca 1233 path_idx = bch2_btree_path_make_mut(trans, path_idx, intent, ip);
67e0dd8f 1234
4617d946 1235 struct btree_path *path = trans->paths + path_idx;
67e0dd8f 1236 path->pos = new_pos;
67e0dd8f
KO
1237 trans->paths_sorted = false;
1238
1239 if (unlikely(path->cached)) {
8bfe14e8 1240 btree_node_unlock(trans, path, 0);
315c9ba6 1241 path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
67e0dd8f
KO
1242 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1243 goto out;
1244 }
1245
cd5bd162 1246 unsigned level = btree_path_up_until_good_node(trans, path, cmp);
67e0dd8f 1247
5c792e1b
KO
1248 if (btree_path_node(path, level)) {
1249 struct btree_path_level *l = &path->l[level];
1250
1251 BUG_ON(!btree_node_locked(path, level));
67e0dd8f
KO
1252 /*
1253 * We might have to skip over many keys, or just a few: try
1254 * advancing the node iterator, and if we have to skip over too
1255 * many keys just reinit it (or if we're rewinding, since that
1256 * is expensive).
1257 */
1258 if (cmp < 0 ||
5c792e1b
KO
1259 !btree_path_advance_to_pos(path, l, 8))
1260 bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
1261
1262 /*
1263 * Iterators to interior nodes should always be pointed at the first non
1264 * whiteout:
1265 */
1266 if (unlikely(level))
1267 bch2_btree_node_iter_peek(&l->iter, l->b);
67e0dd8f
KO
1268 }
1269
5c792e1b 1270 if (unlikely(level != path->level)) {
67e0dd8f 1271 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
8bfe14e8 1272 __bch2_btree_path_unlock(trans, path);
1d3ecd7e 1273 }
67e0dd8f
KO
1274out:
1275 bch2_btree_path_verify(trans, path);
4617d946 1276 return path_idx;
67e0dd8f
KO
1277}
1278
67e0dd8f
KO
1279/* Btree path: main interface: */
1280
1281static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
1282{
a300261a 1283 struct btree_path *sib;
67e0dd8f 1284
a300261a
KO
1285 sib = prev_btree_path(trans, path);
1286 if (sib && !btree_path_cmp(sib, path))
1287 return sib;
67e0dd8f 1288
a300261a
KO
1289 sib = next_btree_path(trans, path);
1290 if (sib && !btree_path_cmp(sib, path))
1291 return sib;
67e0dd8f
KO
1292
1293 return NULL;
1294}
1295
9a74f63c 1296static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
67e0dd8f 1297{
a300261a 1298 struct btree_path *sib;
67e0dd8f 1299
a300261a
KO
1300 sib = prev_btree_path(trans, path);
1301 if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
1302 return sib;
67e0dd8f 1303
a300261a
KO
1304 sib = next_btree_path(trans, path);
1305 if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
1306 return sib;
67e0dd8f 1307
9a74f63c 1308 return NULL;
67e0dd8f
KO
1309}
1310
74e600c1 1311static inline void __bch2_path_free(struct btree_trans *trans, btree_path_idx_t path)
08070cba 1312{
74e600c1
KO
1313 __bch2_btree_path_unlock(trans, trans->paths + path);
1314 btree_path_list_remove(trans, trans->paths + path);
1315 __clear_bit(path, trans->paths_allocated);
08070cba
KO
1316}
1317
74e600c1 1318void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool intent)
67e0dd8f 1319{
74e600c1 1320 struct btree_path *path = trans->paths + path_idx, *dup;
67e0dd8f 1321
74e600c1 1322 EBUG_ON(path->idx != path_idx);
67e0dd8f
KO
1323
1324 if (!__btree_path_put(path, intent))
1325 return;
1326
a300261a
KO
1327 dup = path->preserve
1328 ? have_path_at_pos(trans, path)
1329 : have_node_at_pos(trans, path);
1330
1331 if (!dup && !(!path->preserve && !is_btree_node(path, path->level)))
1332 return;
67e0dd8f 1333
9a74f63c 1334 if (path->should_be_locked &&
a300261a
KO
1335 !trans->restarted &&
1336 (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
9a74f63c
KO
1337 return;
1338
a300261a
KO
1339 if (dup) {
1340 dup->preserve |= path->preserve;
1341 dup->should_be_locked |= path->should_be_locked;
1342 }
1343
74e600c1 1344 __bch2_path_free(trans, path_idx);
67e0dd8f
KO
1345}
1346
74e600c1 1347static void bch2_path_put_nokeep(struct btree_trans *trans, btree_path_idx_t path,
7dcbdbd8
KO
1348 bool intent)
1349{
74e600c1 1350 EBUG_ON(trans->paths[path].idx != path);
7dcbdbd8 1351
74e600c1 1352 if (!__btree_path_put(trans->paths + path, intent))
7dcbdbd8
KO
1353 return;
1354
1355 __bch2_path_free(trans, path);
1356}
1357
12344c7c
KO
1358void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
1359{
1360 panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
1361 trans->restart_count, restart_count,
5e2d8be8 1362 (void *) trans->last_begin_ip);
12344c7c
KO
1363}
1364
1365void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans)
1366{
1367 panic("in transaction restart: %s, last restarted by %pS\n",
1368 bch2_err_str(trans->restarted),
1369 (void *) trans->last_restarted_ip);
1370}
1371
5c0bb66a 1372noinline __cold
8570d775 1373void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
67e0dd8f 1374{
401ec4db 1375 prt_printf(buf, "transaction updates for %s journal seq %llu",
8570d775 1376 trans->fn, trans->journal_res.seq);
401ec4db
KO
1377 prt_newline(buf);
1378 printbuf_indent_add(buf, 2);
2158fe46
KO
1379
1380 trans_for_each_update(trans, i) {
1381 struct bkey_s_c old = { &i->old_k, i->old_v };
1382
401ec4db 1383 prt_printf(buf, "update: btree=%s cached=%u %pS",
88dfe193 1384 bch2_btree_id_str(i->btree_id),
8570d775
KO
1385 i->cached,
1386 (void *) i->ip_allocated);
401ec4db 1387 prt_newline(buf);
8570d775 1388
401ec4db 1389 prt_printf(buf, " old ");
8570d775 1390 bch2_bkey_val_to_text(buf, trans->c, old);
401ec4db 1391 prt_newline(buf);
8570d775 1392
401ec4db 1393 prt_printf(buf, " new ");
8570d775 1394 bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
401ec4db 1395 prt_newline(buf);
2158fe46
KO
1396 }
1397
67997234
KO
1398 for (struct jset_entry *e = trans->journal_entries;
1399 e != btree_trans_journal_entries_top(trans);
1400 e = vstruct_next(e))
1401 bch2_journal_entry_to_text(buf, trans->c, e);
920e69bc 1402
401ec4db 1403 printbuf_indent_sub(buf, 2);
8570d775
KO
1404}
1405
1406noinline __cold
1407void bch2_dump_trans_updates(struct btree_trans *trans)
1408{
1409 struct printbuf buf = PRINTBUF;
1410
1411 bch2_trans_updates_to_text(&buf, trans);
a8f35428 1412 bch2_print_string_as_lines(KERN_ERR, buf.buf);
8570d775 1413 printbuf_exit(&buf);
2158fe46
KO
1414}
1415
311e446a 1416static void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
5c0bb66a 1417{
311e446a
KO
1418 struct btree_path *path = trans->paths + path_idx;
1419
5c0bb66a 1420 prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ",
311e446a 1421 path_idx, path->ref, path->intent_ref,
5c0bb66a
KO
1422 path->preserve ? 'P' : ' ',
1423 path->should_be_locked ? 'S' : ' ',
88dfe193 1424 bch2_btree_id_str(path->btree_id),
5c0bb66a
KO
1425 path->level);
1426 bch2_bpos_to_text(out, path->pos);
1427
1428 prt_printf(out, " locks %u", path->nodes_locked);
94c69faf 1429#ifdef TRACK_PATH_ALLOCATED
5c0bb66a
KO
1430 prt_printf(out, " %pS", (void *) path->ip_allocated);
1431#endif
1432 prt_newline(out);
1433}
1434
73bd774d 1435static noinline __cold
5c0bb66a
KO
1436void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans,
1437 bool nosort)
2158fe46 1438{
1f75ba4e 1439 struct trans_for_each_path_inorder_iter iter;
67e0dd8f 1440
5c0bb66a
KO
1441 if (!nosort)
1442 btree_trans_sort_paths(trans);
67e0dd8f 1443
311e446a
KO
1444 trans_for_each_path_idx_inorder(trans, iter)
1445 bch2_btree_path_to_text(out, trans, iter.path_idx);
5c0bb66a 1446}
fa8e94fa 1447
5c0bb66a
KO
1448noinline __cold
1449void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
1450{
1451 __bch2_trans_paths_to_text(out, trans, false);
1452}
fa8e94fa 1453
73bd774d 1454static noinline __cold
5c0bb66a
KO
1455void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
1456{
1457 struct printbuf buf = PRINTBUF;
1458
1459 __bch2_trans_paths_to_text(&buf, trans, nosort);
a8f35428 1460 bch2_trans_updates_to_text(&buf, trans);
67e0dd8f 1461
a8f35428 1462 bch2_print_string_as_lines(KERN_ERR, buf.buf);
2158fe46 1463 printbuf_exit(&buf);
67e0dd8f
KO
1464}
1465
5c0bb66a
KO
1466noinline __cold
1467void bch2_dump_trans_paths_updates(struct btree_trans *trans)
1468{
1469 __bch2_dump_trans_paths_updates(trans, false);
1470}
1471
1472noinline __cold
1473static void bch2_trans_update_max_paths(struct btree_trans *trans)
1474{
1475 struct btree_transaction_stats *s = btree_trans_stats(trans);
1476 struct printbuf buf = PRINTBUF;
6e92d155 1477 size_t nr = bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX);
5c0bb66a
KO
1478
1479 if (!s)
1480 return;
1481
1482 bch2_trans_paths_to_text(&buf, trans);
1483
1484 if (!buf.allocation_failure) {
1485 mutex_lock(&s->lock);
6e92d155
KO
1486 if (nr > s->nr_max_paths) {
1487 s->nr_max_paths = nr;
5c0bb66a
KO
1488 swap(s->max_paths_text, buf.buf);
1489 }
1490 mutex_unlock(&s->lock);
1491 }
1492
1493 printbuf_exit(&buf);
1494
6e92d155 1495 trans->nr_max_paths = nr;
5c0bb66a
KO
1496}
1497
e153a0d7
KO
1498noinline __cold
1499int __bch2_btree_trans_too_many_iters(struct btree_trans *trans)
1500{
1501 if (trace_trans_restart_too_many_iters_enabled()) {
1502 struct printbuf buf = PRINTBUF;
1503
1504 bch2_trans_paths_to_text(&buf, trans);
1505 trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf);
1506 printbuf_exit(&buf);
1507 }
1508
1509 count_event(trans->c, trans_restart_too_many_iters);
1510
1511 return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
1512}
1513
3f3bc66e
KO
1514static noinline void btree_path_overflow(struct btree_trans *trans)
1515{
1516 bch2_dump_trans_paths_updates(trans);
6bf3766b 1517 panic("trans path overflow\n");
3f3bc66e
KO
1518}
1519
788cc25d
KO
1520static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans,
1521 btree_path_idx_t pos)
67e0dd8f 1522{
788cc25d 1523 btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, BTREE_ITER_MAX);
67e0dd8f 1524
6e92d155 1525 if (unlikely(idx == BTREE_ITER_MAX))
3f3bc66e 1526 btree_path_overflow(trans);
67e0dd8f 1527
6e92d155 1528 BUG_ON(idx > BTREE_ITER_MAX);
992fa4e6
KO
1529
1530 /*
1531 * Do this before marking the new path as allocated, since it won't be
1532 * initialized yet:
1533 */
1534 if (unlikely(idx > trans->nr_max_paths))
1535 bch2_trans_update_max_paths(trans);
1536
6e92d155 1537 __set_bit(idx, trans->paths_allocated);
67e0dd8f 1538
788cc25d 1539 struct btree_path *path = &trans->paths[idx];
67e0dd8f
KO
1540 path->idx = idx;
1541 path->ref = 0;
1542 path->intent_ref = 0;
1543 path->nodes_locked = 0;
67e0dd8f 1544
07f383c7 1545 btree_path_list_add(trans, pos, idx);
5c0bb66a 1546 trans->paths_sorted = false;
788cc25d 1547 return idx;
67e0dd8f
KO
1548}
1549
255ebbbf
KO
1550btree_path_idx_t bch2_path_get(struct btree_trans *trans,
1551 enum btree_id btree_id, struct bpos pos,
1552 unsigned locks_want, unsigned level,
1553 unsigned flags, unsigned long ip)
67e0dd8f 1554{
788cc25d 1555 struct btree_path *path;
f3e1f444
KO
1556 bool cached = flags & BTREE_ITER_CACHED;
1557 bool intent = flags & BTREE_ITER_INTENT;
1f75ba4e
KO
1558 struct trans_for_each_path_inorder_iter iter;
1559 btree_path_idx_t path_pos = 0;
67e0dd8f 1560
12344c7c 1561 bch2_trans_verify_not_in_restart(trans);
7071878b
KO
1562 bch2_trans_verify_locks(trans);
1563
eb7bd15f 1564 btree_trans_sort_paths(trans);
67e0dd8f 1565
1f75ba4e 1566 trans_for_each_path_inorder(trans, path, iter) {
807dda8c
KO
1567 if (__btree_path_cmp(path,
1568 btree_id,
1569 cached,
1570 pos,
1571 level) > 0)
1572 break;
67e0dd8f 1573
1f75ba4e 1574 path_pos = iter.path_idx;
67e0dd8f
KO
1575 }
1576
807dda8c 1577 if (path_pos &&
788cc25d
KO
1578 trans->paths[path_pos].cached == cached &&
1579 trans->paths[path_pos].btree_id == btree_id &&
1580 trans->paths[path_pos].level == level) {
1581 __btree_path_get(trans->paths + path_pos, intent);
1582 path = trans->paths + bch2_btree_path_set_pos(trans,
1583 path_pos, pos, intent, ip);
67e0dd8f 1584 } else {
788cc25d 1585 path = trans->paths + btree_path_alloc(trans, path_pos);
67e0dd8f
KO
1586
1587 __btree_path_get(path, intent);
1588 path->pos = pos;
1589 path->btree_id = btree_id;
1590 path->cached = cached;
67e0dd8f
KO
1591 path->uptodate = BTREE_ITER_NEED_TRAVERSE;
1592 path->should_be_locked = false;
1593 path->level = level;
1594 path->locks_want = locks_want;
1595 path->nodes_locked = 0;
1f75ba4e 1596 for (unsigned i = 0; i < ARRAY_SIZE(path->l); i++)
315c9ba6 1597 path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
94c69faf 1598#ifdef TRACK_PATH_ALLOCATED
ee2c6ea7 1599 path->ip_allocated = ip;
67e0dd8f
KO
1600#endif
1601 trans->paths_sorted = false;
1602 }
1603
f3e1f444
KO
1604 if (!(flags & BTREE_ITER_NOPRESERVE))
1605 path->preserve = true;
1606
67e0dd8f
KO
1607 if (path->intent_ref)
1608 locks_want = max(locks_want, level + 1);
1609
1610 /*
1611 * If the path has locks_want greater than requested, we don't downgrade
1612 * it here - on transaction restart because btree node split needs to
1613 * upgrade locks, we might be putting/getting the iterator again.
1614 * Downgrading iterators only happens via bch2_trans_downgrade(), after
1615 * a successful transaction commit.
1616 */
1617
1618 locks_want = min(locks_want, BTREE_MAX_DEPTH);
cd5afabe 1619 if (locks_want > path->locks_want)
be9e782d 1620 bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
67e0dd8f 1621
255ebbbf 1622 return path->idx;
67e0dd8f
KO
1623}
1624
b2d1d56b 1625struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
67e0dd8f
KO
1626{
1627
f42238b5
KO
1628 struct btree_path_level *l = path_l(path);
1629 struct bkey_packed *_k;
67e0dd8f
KO
1630 struct bkey_s_c k;
1631
f42238b5
KO
1632 if (unlikely(!l->b))
1633 return bkey_s_c_null;
f7b6ca23 1634
f42238b5
KO
1635 EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
1636 EBUG_ON(!btree_node_locked(path, path->level));
67e0dd8f 1637
f42238b5 1638 if (!path->cached) {
f7b6ca23 1639 _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
67e0dd8f
KO
1640 k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
1641
e88a75eb 1642 EBUG_ON(k.k && bkey_deleted(k.k) && bpos_eq(k.k->p, path->pos));
67e0dd8f 1643
e88a75eb 1644 if (!k.k || !bpos_eq(path->pos, k.k->p))
67e0dd8f
KO
1645 goto hole;
1646 } else {
1647 struct bkey_cached *ck = (void *) path->l[0].b;
1648
f7b6ca23
KO
1649 EBUG_ON(ck &&
1650 (path->btree_id != ck->key.btree_id ||
e88a75eb 1651 !bkey_eq(path->pos, ck->key.pos)));
087e53c2
KO
1652 if (!ck || !ck->valid)
1653 return bkey_s_c_null;
67e0dd8f 1654
2e63e180 1655 *u = ck->k->k;
67e0dd8f
KO
1656 k = bkey_i_to_s_c(ck->k);
1657 }
1658
1659 return k;
1660hole:
1661 bkey_init(u);
1662 u->p = path->pos;
1663 return (struct bkey_s_c) { u, NULL };
1664}
1665
1666/* Btree iterators: */
1667
db92f2ea
KO
1668int __must_check
1669__bch2_btree_iter_traverse(struct btree_iter *iter)
1670{
07f383c7 1671 return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
db92f2ea
KO
1672}
1673
08070cba
KO
1674int __must_check
1675bch2_btree_iter_traverse(struct btree_iter *iter)
1676{
4617d946 1677 struct btree_trans *trans = iter->trans;
66a0a497
KO
1678 int ret;
1679
07f383c7 1680 iter->path = bch2_btree_path_set_pos(trans, iter->path,
67e0dd8f 1681 btree_iter_search_key(iter),
ee2c6ea7
KO
1682 iter->flags & BTREE_ITER_INTENT,
1683 btree_iter_ip_allocated(iter));
08070cba 1684
07f383c7 1685 ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
66a0a497
KO
1686 if (ret)
1687 return ret;
1688
07f383c7 1689 btree_path_set_should_be_locked(trans->paths + iter->path);
66a0a497 1690 return 0;
08070cba
KO
1691}
1692
1c6fdbd8
KO
1693/* Iterate across nodes (leaf and interior nodes) */
1694
1695struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
1696{
502027a8 1697 struct btree_trans *trans = iter->trans;
f21566f1 1698 struct btree *b = NULL;
1c6fdbd8
KO
1699 int ret;
1700
07f383c7 1701 EBUG_ON(trans->paths[iter->path].cached);
7e1a3aa9 1702 bch2_btree_iter_verify(iter);
1c6fdbd8 1703
07f383c7 1704 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
1c6fdbd8 1705 if (ret)
d355c6f4 1706 goto err;
1c6fdbd8 1707
07f383c7
KO
1708 struct btree_path *path = btree_iter_path(trans, iter);
1709 b = btree_path_node(path, path->level);
1c6fdbd8 1710 if (!b)
f21566f1 1711 goto out;
1c6fdbd8 1712
e88a75eb 1713 BUG_ON(bpos_lt(b->key.k.p, iter->pos));
1c6fdbd8 1714
f21566f1 1715 bkey_init(&iter->k);
67e0dd8f 1716 iter->k.p = iter->pos = b->key.k.p;
502027a8 1717
07f383c7 1718 iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
ee2c6ea7
KO
1719 iter->flags & BTREE_ITER_INTENT,
1720 btree_iter_ip_allocated(iter));
07f383c7 1721 btree_path_set_should_be_locked(btree_iter_path(trans, iter));
f21566f1
KO
1722out:
1723 bch2_btree_iter_verify_entry_exit(iter);
1724 bch2_btree_iter_verify(iter);
2dac0eae 1725
1c6fdbd8 1726 return b;
d355c6f4
KO
1727err:
1728 b = ERR_PTR(ret);
1729 goto out;
1c6fdbd8
KO
1730}
1731
511b629a
KO
1732struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
1733{
1734 struct btree *b;
1735
1736 while (b = bch2_btree_iter_peek_node(iter),
1737 bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
1738 bch2_trans_begin(iter->trans);
1739
1740 return b;
1741}
1742
2dac0eae 1743struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
1c6fdbd8 1744{
67e0dd8f 1745 struct btree_trans *trans = iter->trans;
f21566f1 1746 struct btree *b = NULL;
1c6fdbd8
KO
1747 int ret;
1748
07f383c7 1749 EBUG_ON(trans->paths[iter->path].cached);
12344c7c 1750 bch2_trans_verify_not_in_restart(trans);
7e1a3aa9 1751 bch2_btree_iter_verify(iter);
1c6fdbd8 1752
07f383c7
KO
1753 struct btree_path *path = btree_iter_path(trans, iter);
1754
979735df 1755 /* already at end? */
67e0dd8f 1756 if (!btree_path_node(path, path->level))
979735df 1757 return NULL;
1c6fdbd8 1758
979735df
KO
1759 /* got to end? */
1760 if (!btree_path_node(path, path->level + 1)) {
8bfe14e8 1761 btree_path_set_level_up(trans, path);
979735df
KO
1762 return NULL;
1763 }
1c6fdbd8 1764
979735df 1765 if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
8bfe14e8 1766 __bch2_btree_path_unlock(trans, path);
315c9ba6
KO
1767 path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
1768 path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
7071878b 1769 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
674cfc26 1770 trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path);
549d173c 1771 ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
d355c6f4 1772 goto err;
979735df 1773 }
1c6fdbd8 1774
979735df 1775 b = btree_path_node(path, path->level + 1);
1c6fdbd8 1776
e88a75eb 1777 if (bpos_eq(iter->pos, b->key.k.p)) {
315c9ba6 1778 __btree_path_set_level_up(trans, path, path->level++);
979735df 1779 } else {
1c6fdbd8
KO
1780 /*
1781 * Haven't gotten to the end of the parent node: go back down to
1782 * the next child node
1783 */
07f383c7
KO
1784 iter->path = bch2_btree_path_set_pos(trans, iter->path,
1785 bpos_successor(iter->pos),
1786 iter->flags & BTREE_ITER_INTENT,
1787 btree_iter_ip_allocated(iter));
1c6fdbd8 1788
07f383c7 1789 path = btree_iter_path(trans, iter);
c4bce586 1790 btree_path_set_level_down(trans, path, iter->min_depth);
345ca825 1791
07f383c7 1792 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
d355c6f4
KO
1793 if (ret)
1794 goto err;
1c6fdbd8 1795
07f383c7 1796 path = btree_iter_path(trans, iter);
67e0dd8f 1797 b = path->l[path->level].b;
1c6fdbd8
KO
1798 }
1799
f21566f1 1800 bkey_init(&iter->k);
67e0dd8f 1801 iter->k.p = iter->pos = b->key.k.p;
502027a8 1802
07f383c7 1803 iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
ee2c6ea7
KO
1804 iter->flags & BTREE_ITER_INTENT,
1805 btree_iter_ip_allocated(iter));
07f383c7
KO
1806 btree_path_set_should_be_locked(btree_iter_path(trans, iter));
1807 EBUG_ON(btree_iter_path(trans, iter)->uptodate);
f21566f1
KO
1808out:
1809 bch2_btree_iter_verify_entry_exit(iter);
1810 bch2_btree_iter_verify(iter);
2dac0eae 1811
1c6fdbd8 1812 return b;
d355c6f4
KO
1813err:
1814 b = ERR_PTR(ret);
1815 goto out;
1c6fdbd8
KO
1816}
1817
1818/* Iterate across keys (in leaf nodes only) */
1819
e0ba3b64 1820inline bool bch2_btree_iter_advance(struct btree_iter *iter)
434094be 1821{
e56978c8
KO
1822 struct bpos pos = iter->k.p;
1823 bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
1824 ? bpos_eq(pos, SPOS_MAX)
1825 : bkey_eq(pos, SPOS_MAX));
3d495595 1826
e56978c8
KO
1827 if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
1828 pos = bkey_successor(iter, pos);
1829 bch2_btree_iter_set_pos(iter, pos);
1830 return ret;
3d495595
KO
1831}
1832
e0ba3b64 1833inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
3d495595
KO
1834{
1835 struct bpos pos = bkey_start_pos(&iter->k);
e88a75eb
KO
1836 bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
1837 ? bpos_eq(pos, POS_MIN)
1838 : bkey_eq(pos, POS_MIN));
3d495595 1839
7e1a3aa9 1840 if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
e751c01a 1841 pos = bkey_predecessor(iter, pos);
3d495595 1842 bch2_btree_iter_set_pos(iter, pos);
7e1a3aa9 1843 return ret;
434094be
KO
1844}
1845
3763cb95 1846static noinline
67e0dd8f 1847struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
818664f5 1848{
07f383c7 1849 struct btree_trans *trans = iter->trans;
5288e66a 1850 struct bkey_i *ret = NULL;
818664f5 1851
07f383c7 1852 trans_for_each_update(trans, i) {
5288e66a
KO
1853 if (i->btree_id < iter->btree_id)
1854 continue;
1855 if (i->btree_id > iter->btree_id)
818664f5 1856 break;
07f383c7 1857 if (bpos_lt(i->k->k.p, btree_iter_path(trans, iter)->pos))
5288e66a 1858 continue;
12ce5b7d
KO
1859 if (i->key_cache_already_flushed)
1860 continue;
e88a75eb 1861 if (!ret || bpos_lt(i->k->k.p, ret->k.p))
5288e66a
KO
1862 ret = i->k;
1863 }
818664f5 1864
5288e66a
KO
1865 return ret;
1866}
1867
3763cb95
KO
1868static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
1869{
1870 return iter->flags & BTREE_ITER_WITH_UPDATES
1871 ? __bch2_btree_trans_peek_updates(iter)
1872 : NULL;
1873}
1874
73bd774d
KO
1875static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
1876 struct btree_iter *iter,
1877 struct bpos end_pos)
30525f68 1878{
07f383c7
KO
1879 struct btree_path *path = btree_iter_path(trans, iter);
1880
ad9c7992 1881 return bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
07f383c7
KO
1882 path->level,
1883 path->pos,
ad9c7992
KO
1884 end_pos,
1885 &iter->journal_idx);
30525f68
KO
1886}
1887
5222a460
KO
1888static noinline
1889struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
1890 struct btree_iter *iter)
1891{
07f383c7
KO
1892 struct btree_path *path = btree_iter_path(trans, iter);
1893 struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos);
5222a460 1894
5650bb46 1895 if (k) {
5222a460
KO
1896 iter->k = k->k;
1897 return bkey_i_to_s_c(k);
1898 } else {
1899 return bkey_s_c_null;
1900 }
1901}
1902
1903static noinline
1904struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
1905 struct btree_iter *iter,
1906 struct bkey_s_c k)
1907{
07f383c7 1908 struct btree_path *path = btree_iter_path(trans, iter);
5222a460 1909 struct bkey_i *next_journal =
30525f68 1910 bch2_btree_journal_peek(trans, iter,
07f383c7 1911 k.k ? k.k->p : path_l(path)->b->key.k.p);
5222a460 1912
5650bb46 1913 if (next_journal) {
5222a460
KO
1914 iter->k = next_journal->k;
1915 k = bkey_i_to_s_c(next_journal);
1916 }
1917
1918 return k;
1919}
1920
f7b6ca23
KO
1921/*
1922 * Checks btree key cache for key at iter->pos and returns it if present, or
1923 * bkey_s_c_null:
1924 */
1925static noinline
dcced069 1926struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
f7b6ca23
KO
1927{
1928 struct btree_trans *trans = iter->trans;
1929 struct bch_fs *c = trans->c;
1930 struct bkey u;
52bf51b9 1931 struct bkey_s_c k;
f7b6ca23
KO
1932 int ret;
1933
1617d56d
KO
1934 if ((iter->flags & BTREE_ITER_KEY_CACHE_FILL) &&
1935 bpos_eq(iter->pos, pos))
1936 return bkey_s_c_null;
1937
f7b6ca23
KO
1938 if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
1939 return bkey_s_c_null;
1940
1941 if (!iter->key_cache_path)
07f383c7 1942 iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
f7b6ca23 1943 iter->flags & BTREE_ITER_INTENT, 0,
087e53c2 1944 iter->flags|BTREE_ITER_CACHED|
ee2c6ea7
KO
1945 BTREE_ITER_CACHED_NOFILL,
1946 _THIS_IP_);
f7b6ca23 1947
07f383c7 1948 iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
ee2c6ea7
KO
1949 iter->flags & BTREE_ITER_INTENT,
1950 btree_iter_ip_allocated(iter));
f7b6ca23 1951
07f383c7 1952 ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
dcced069 1953 iter->flags|BTREE_ITER_CACHED) ?:
07f383c7 1954 bch2_btree_path_relock(trans, btree_iter_path(trans, iter), _THIS_IP_);
f7b6ca23
KO
1955 if (unlikely(ret))
1956 return bkey_s_c_err(ret);
1957
07f383c7 1958 btree_path_set_should_be_locked(trans->paths + iter->key_cache_path);
f7b6ca23 1959
07f383c7 1960 k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
52bf51b9
KO
1961 if (k.k && !bkey_err(k)) {
1962 iter->k = u;
1963 k.k = &iter->k;
1964 }
1965 return k;
f7b6ca23
KO
1966}
1967
a1e82d35 1968static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
1c6fdbd8 1969{
67e0dd8f 1970 struct btree_trans *trans = iter->trans;
360746bf 1971 struct bkey_i *next_update;
f7b6ca23 1972 struct bkey_s_c k, k2;
8e432d98 1973 int ret;
1c6fdbd8 1974
07f383c7 1975 EBUG_ON(btree_iter_path(trans, iter)->cached);
7e1a3aa9 1976 bch2_btree_iter_verify(iter);
1c6fdbd8 1977
1c6fdbd8 1978 while (1) {
969576ec
KO
1979 struct btree_path_level *l;
1980
07f383c7 1981 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
ee2c6ea7
KO
1982 iter->flags & BTREE_ITER_INTENT,
1983 btree_iter_ip_allocated(iter));
c8476a4e 1984
07f383c7 1985 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
e6e024e9
KO
1986 if (unlikely(ret)) {
1987 /* ensure that iter->k is consistent with iter->pos: */
1988 bch2_btree_iter_set_pos(iter, iter->pos);
1989 k = bkey_s_c_err(ret);
1990 goto out;
1991 }
1c6fdbd8 1992
07f383c7
KO
1993 struct btree_path *path = btree_iter_path(trans, iter);
1994 l = path_l(path);
969576ec
KO
1995
1996 if (unlikely(!l->b)) {
1997 /* No btree nodes at requested level: */
1998 bch2_btree_iter_set_pos(iter, SPOS_MAX);
1999 k = bkey_s_c_null;
2000 goto out;
2001 }
2002
07f383c7 2003 btree_path_set_should_be_locked(path);
f7b6ca23 2004
969576ec 2005 k = btree_path_level_peek_all(trans->c, l, &iter->k);
e6e024e9 2006
f7b6ca23
KO
2007 if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
2008 k.k &&
2009 (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
8192f8a5
KO
2010 k = k2;
2011 ret = bkey_err(k);
f7b6ca23 2012 if (ret) {
f7b6ca23
KO
2013 bch2_btree_iter_set_pos(iter, iter->pos);
2014 goto out;
2015 }
f7b6ca23
KO
2016 }
2017
5222a460
KO
2018 if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
2019 k = btree_trans_peek_journal(trans, iter, k);
2020
2021 next_update = btree_trans_peek_updates(iter);
e6e024e9 2022
818664f5 2023 if (next_update &&
e88a75eb
KO
2024 bpos_le(next_update->k.p,
2025 k.k ? k.k->p : l->b->key.k.p)) {
5288e66a 2026 iter->k = next_update->k;
818664f5 2027 k = bkey_i_to_s_c(next_update);
5288e66a 2028 }
818664f5 2029
5222a460
KO
2030 if (k.k && bkey_deleted(k.k)) {
2031 /*
2032 * If we've got a whiteout, and it's after the search
2033 * key, advance the search key to the whiteout instead
2034 * of just after the whiteout - it might be a btree
2035 * whiteout, with a real key at the same position, since
2036 * in the btree deleted keys sort before non deleted.
2037 */
e88a75eb 2038 search_key = !bpos_eq(search_key, k.k->p)
5222a460
KO
2039 ? k.k->p
2040 : bpos_successor(k.k->p);
2041 continue;
2042 }
2043
818664f5 2044 if (likely(k.k)) {
c075ff70 2045 break;
e88a75eb 2046 } else if (likely(!bpos_eq(l->b->key.k.p, SPOS_MAX))) {
e6e024e9 2047 /* Advance to next leaf node: */
969576ec 2048 search_key = bpos_successor(l->b->key.k.p);
e6e024e9
KO
2049 } else {
2050 /* End of btree: */
c8476a4e
KO
2051 bch2_btree_iter_set_pos(iter, SPOS_MAX);
2052 k = bkey_s_c_null;
2053 goto out;
2054 }
1c6fdbd8 2055 }
a1e82d35 2056out:
a1e82d35
KO
2057 bch2_btree_iter_verify(iter);
2058
2059 return k;
2060}
2061
2062/**
96dea3d5
KO
2063 * bch2_btree_iter_peek_upto() - returns first key greater than or equal to
2064 * iterator's current position
2065 * @iter: iterator to peek from
2066 * @end: search limit: returns keys less than or equal to @end
2067 *
2068 * Returns: key if found, or an error extractable with bkey_err().
a1e82d35 2069 */
85d8cf16 2070struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
a1e82d35
KO
2071{
2072 struct btree_trans *trans = iter->trans;
2073 struct bpos search_key = btree_iter_search_key(iter);
2074 struct bkey_s_c k;
85d8cf16 2075 struct bpos iter_pos;
a1e82d35
KO
2076 int ret;
2077
c72f687a 2078 EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX));
b0babf2a 2079
1f2d9192 2080 if (iter->update_path) {
07f383c7 2081 bch2_path_put_nokeep(trans, iter->update_path,
7dcbdbd8 2082 iter->flags & BTREE_ITER_INTENT);
07f383c7 2083 iter->update_path = 0;
1f2d9192
KO
2084 }
2085
a1e82d35
KO
2086 bch2_btree_iter_verify_entry_exit(iter);
2087
2088 while (1) {
2089 k = __bch2_btree_iter_peek(iter, search_key);
c72f687a
KO
2090 if (unlikely(!k.k))
2091 goto end;
2092 if (unlikely(bkey_err(k)))
fd211bc7 2093 goto out_no_locked;
a1e82d35 2094
85d8cf16 2095 /*
fa014953
KO
2096 * We need to check against @end before FILTER_SNAPSHOTS because
2097 * if we get to a different inode that requested we might be
2098 * seeing keys for a different snapshot tree that will all be
2099 * filtered out.
2100 *
2101 * But we can't do the full check here, because bkey_start_pos()
2102 * isn't monotonically increasing before FILTER_SNAPSHOTS, and
2103 * that's what we check against in extents mode:
85d8cf16 2104 */
fa014953 2105 if (k.k->p.inode > end.inode)
c72f687a 2106 goto end;
85d8cf16 2107
1f2d9192 2108 if (iter->update_path &&
07f383c7
KO
2109 !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) {
2110 bch2_path_put_nokeep(trans, iter->update_path,
7dcbdbd8 2111 iter->flags & BTREE_ITER_INTENT);
07f383c7 2112 iter->update_path = 0;
1f2d9192
KO
2113 }
2114
2115 if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
2116 (iter->flags & BTREE_ITER_INTENT) &&
2117 !(iter->flags & BTREE_ITER_IS_EXTENTS) &&
2118 !iter->update_path) {
2119 struct bpos pos = k.k->p;
2120
2121 if (pos.snapshot < iter->snapshot) {
2122 search_key = bpos_successor(k.k->p);
2123 continue;
2124 }
2125
2126 pos.snapshot = iter->snapshot;
2127
2128 /*
2129 * advance, same as on exit for iter->path, but only up
2130 * to snapshot
2131 */
07f383c7 2132 __btree_path_get(trans->paths + iter->path, iter->flags & BTREE_ITER_INTENT);
1f2d9192
KO
2133 iter->update_path = iter->path;
2134
07f383c7
KO
2135 iter->update_path = bch2_btree_path_set_pos(trans,
2136 iter->update_path, pos,
ee2c6ea7
KO
2137 iter->flags & BTREE_ITER_INTENT,
2138 _THIS_IP_);
07f383c7 2139 ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags);
ef073286
KO
2140 if (unlikely(ret)) {
2141 k = bkey_s_c_err(ret);
2142 goto out_no_locked;
2143 }
1f2d9192
KO
2144 }
2145
a1e82d35
KO
2146 /*
2147 * We can never have a key in a leaf node at POS_MAX, so
2148 * we don't have to check these successor() calls:
2149 */
2150 if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
2151 !bch2_snapshot_is_ancestor(trans->c,
2152 iter->snapshot,
2153 k.k->p.snapshot)) {
2154 search_key = bpos_successor(k.k->p);
2155 continue;
2156 }
2157
2158 if (bkey_whiteout(k.k) &&
2159 !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2160 search_key = bkey_successor(iter, k.k->p);
2161 continue;
2162 }
2163
fa014953
KO
2164 /*
2165 * iter->pos should be mononotically increasing, and always be
2166 * equal to the key we just returned - except extents can
2167 * straddle iter->pos:
2168 */
2169 if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
2170 iter_pos = k.k->p;
2171 else
2172 iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
2173
2174 if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
2175 ? bkey_gt(iter_pos, end)
2176 : bkey_ge(iter_pos, end)))
2177 goto end;
2178
a1e82d35
KO
2179 break;
2180 }
2181
85d8cf16 2182 iter->pos = iter_pos;
1f2d9192 2183
07f383c7 2184 iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
ee2c6ea7
KO
2185 iter->flags & BTREE_ITER_INTENT,
2186 btree_iter_ip_allocated(iter));
fd211bc7 2187
07f383c7 2188 btree_path_set_should_be_locked(btree_iter_path(trans, iter));
fd211bc7 2189out_no_locked:
1f2d9192 2190 if (iter->update_path) {
07f383c7 2191 ret = bch2_btree_path_relock(trans, trans->paths + iter->update_path, _THIS_IP_);
fd211bc7 2192 if (unlikely(ret))
549d173c 2193 k = bkey_s_c_err(ret);
fd211bc7 2194 else
07f383c7 2195 btree_path_set_should_be_locked(trans->paths + iter->update_path);
1f2d9192 2196 }
1f2d9192 2197
a1e82d35 2198 if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
c075ff70
KO
2199 iter->pos.snapshot = iter->snapshot;
2200
a1e82d35
KO
2201 ret = bch2_btree_iter_verify_ret(iter, k);
2202 if (unlikely(ret)) {
2203 bch2_btree_iter_set_pos(iter, iter->pos);
2204 k = bkey_s_c_err(ret);
2205 }
67e0dd8f 2206
7e1a3aa9 2207 bch2_btree_iter_verify_entry_exit(iter);
c075ff70 2208
1c6fdbd8 2209 return k;
c72f687a
KO
2210end:
2211 bch2_btree_iter_set_pos(iter, end);
2212 k = bkey_s_c_null;
2213 goto out_no_locked;
1c6fdbd8
KO
2214}
2215
f4b61341 2216/**
96dea3d5 2217 * bch2_btree_iter_next() - returns first key greater than iterator's current
f4b61341 2218 * position
96dea3d5
KO
2219 * @iter: iterator to peek from
2220 *
2221 * Returns: key if found, or an error extractable with bkey_err().
f4b61341 2222 */
1c6fdbd8
KO
2223struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
2224{
e0ba3b64 2225 if (!bch2_btree_iter_advance(iter))
2e70ce56 2226 return bkey_s_c_null;
f4b61341 2227
2dac0eae 2228 return bch2_btree_iter_peek(iter);
1c6fdbd8
KO
2229}
2230
ccf5a109 2231/**
96dea3d5 2232 * bch2_btree_iter_peek_prev() - returns first key less than or equal to
ccf5a109 2233 * iterator's current position
96dea3d5
KO
2234 * @iter: iterator to peek from
2235 *
2236 * Returns: key if found, or an error extractable with bkey_err().
ccf5a109
KO
2237 */
2238struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
1c6fdbd8 2239{
67e0dd8f 2240 struct btree_trans *trans = iter->trans;
c8476a4e 2241 struct bpos search_key = iter->pos;
c075ff70 2242 struct btree_path *saved_path = NULL;
1c6fdbd8 2243 struct bkey_s_c k;
c075ff70
KO
2244 struct bkey saved_k;
2245 const struct bch_val *saved_v;
1c6fdbd8
KO
2246 int ret;
2247
07f383c7
KO
2248 EBUG_ON(btree_iter_path(trans, iter)->cached ||
2249 btree_iter_path(trans, iter)->level);
5288e66a 2250 EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
5222a460
KO
2251
2252 if (iter->flags & BTREE_ITER_WITH_JOURNAL)
2253 return bkey_s_c_err(-EIO);
2254
7e1a3aa9
KO
2255 bch2_btree_iter_verify(iter);
2256 bch2_btree_iter_verify_entry_exit(iter);
2257
c075ff70
KO
2258 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2259 search_key.snapshot = U32_MAX;
2260
1c6fdbd8 2261 while (1) {
07f383c7 2262 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
ee2c6ea7
KO
2263 iter->flags & BTREE_ITER_INTENT,
2264 btree_iter_ip_allocated(iter));
c8476a4e 2265
07f383c7 2266 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
7e1a3aa9 2267 if (unlikely(ret)) {
e6e024e9
KO
2268 /* ensure that iter->k is consistent with iter->pos: */
2269 bch2_btree_iter_set_pos(iter, iter->pos);
7e1a3aa9 2270 k = bkey_s_c_err(ret);
fd211bc7 2271 goto out_no_locked;
7e1a3aa9 2272 }
1c6fdbd8 2273
07f383c7
KO
2274 struct btree_path *path = btree_iter_path(trans, iter);
2275
2276 k = btree_path_level_peek(trans, path, &path->l[0], &iter->k);
3d495595
KO
2277 if (!k.k ||
2278 ((iter->flags & BTREE_ITER_IS_EXTENTS)
e88a75eb
KO
2279 ? bpos_ge(bkey_start_pos(k.k), search_key)
2280 : bpos_gt(k.k->p, search_key)))
07f383c7 2281 k = btree_path_level_prev(trans, path, &path->l[0], &iter->k);
ccf5a109 2282
e6e024e9 2283 if (likely(k.k)) {
c075ff70
KO
2284 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
2285 if (k.k->p.snapshot == iter->snapshot)
2286 goto got_key;
2287
2288 /*
2289 * If we have a saved candidate, and we're no
2290 * longer at the same _key_ (not pos), return
2291 * that candidate
2292 */
e88a75eb 2293 if (saved_path && !bkey_eq(k.k->p, saved_k.p)) {
07f383c7 2294 bch2_path_put_nokeep(trans, iter->path,
c075ff70 2295 iter->flags & BTREE_ITER_INTENT);
07f383c7 2296 iter->path = saved_path->idx;
c075ff70
KO
2297 saved_path = NULL;
2298 iter->k = saved_k;
2299 k.v = saved_v;
2300 goto got_key;
2301 }
2302
0bc64d7e 2303 if (bch2_snapshot_is_ancestor(trans->c,
c075ff70
KO
2304 iter->snapshot,
2305 k.k->p.snapshot)) {
2306 if (saved_path)
74e600c1 2307 bch2_path_put_nokeep(trans, saved_path->idx,
c075ff70 2308 iter->flags & BTREE_ITER_INTENT);
07f383c7 2309 saved_path = trans->paths + btree_path_clone(trans, iter->path,
c075ff70 2310 iter->flags & BTREE_ITER_INTENT);
07f383c7 2311 path = btree_iter_path(trans, iter);
c075ff70
KO
2312 saved_k = *k.k;
2313 saved_v = k.v;
2314 }
2315
2316 search_key = bpos_predecessor(k.k->p);
2317 continue;
2318 }
2319got_key:
2320 if (bkey_whiteout(k.k) &&
2321 !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2322 search_key = bkey_predecessor(iter, k.k->p);
2323 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2324 search_key.snapshot = U32_MAX;
2325 continue;
2326 }
2327
07f383c7 2328 btree_path_set_should_be_locked(path);
1c6fdbd8 2329 break;
07f383c7 2330 } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) {
e6e024e9 2331 /* Advance to previous leaf node: */
07f383c7 2332 search_key = bpos_predecessor(path->l[0].b->data->min_key);
e6e024e9
KO
2333 } else {
2334 /* Start of btree: */
c8476a4e 2335 bch2_btree_iter_set_pos(iter, POS_MIN);
7e1a3aa9 2336 k = bkey_s_c_null;
fd211bc7 2337 goto out_no_locked;
7e1a3aa9 2338 }
ccf5a109 2339 }
1c6fdbd8 2340
e88a75eb 2341 EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos));
3d495595
KO
2342
2343 /* Extents can straddle iter->pos: */
e88a75eb 2344 if (bkey_lt(k.k->p, iter->pos))
3d495595 2345 iter->pos = k.k->p;
c075ff70
KO
2346
2347 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2348 iter->pos.snapshot = iter->snapshot;
fd211bc7 2349out_no_locked:
c075ff70 2350 if (saved_path)
74e600c1 2351 bch2_path_put_nokeep(trans, saved_path->idx, iter->flags & BTREE_ITER_INTENT);
67e0dd8f 2352
7e1a3aa9
KO
2353 bch2_btree_iter_verify_entry_exit(iter);
2354 bch2_btree_iter_verify(iter);
67e0dd8f 2355
1c6fdbd8
KO
2356 return k;
2357}
2358
ccf5a109 2359/**
96dea3d5 2360 * bch2_btree_iter_prev() - returns first key less than iterator's current
ccf5a109 2361 * position
96dea3d5
KO
2362 * @iter: iterator to peek from
2363 *
2364 * Returns: key if found, or an error extractable with bkey_err().
ccf5a109
KO
2365 */
2366struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
2367{
e0ba3b64 2368 if (!bch2_btree_iter_rewind(iter))
2e70ce56 2369 return bkey_s_c_null;
ccf5a109 2370
2e70ce56 2371 return bch2_btree_iter_peek_prev(iter);
ccf5a109
KO
2372}
2373
c3801239 2374struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
271a3d3a 2375{
9f6bd307 2376 struct btree_trans *trans = iter->trans;
953ee28a 2377 struct bpos search_key;
271a3d3a 2378 struct bkey_s_c k;
c3801239
KO
2379 int ret;
2380
7e1a3aa9
KO
2381 bch2_btree_iter_verify(iter);
2382 bch2_btree_iter_verify_entry_exit(iter);
07f383c7 2383 EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE));
7e1a3aa9 2384
1d214eb1
KO
2385 /* extents can't span inode numbers: */
2386 if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
953ee28a 2387 unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
1d214eb1
KO
2388 if (iter->pos.inode == KEY_INODE_MAX)
2389 return bkey_s_c_null;
c3801239 2390
1d214eb1
KO
2391 bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
2392 }
8042b5b7 2393
953ee28a 2394 search_key = btree_iter_search_key(iter);
07f383c7 2395 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
ee2c6ea7
KO
2396 iter->flags & BTREE_ITER_INTENT,
2397 btree_iter_ip_allocated(iter));
953ee28a 2398
07f383c7 2399 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
23dfb3a2
KO
2400 if (unlikely(ret)) {
2401 k = bkey_s_c_err(ret);
2402 goto out_no_locked;
2403 }
1c6fdbd8 2404
c075ff70
KO
2405 if ((iter->flags & BTREE_ITER_CACHED) ||
2406 !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
953ee28a 2407 struct bkey_i *next_update;
1c6fdbd8 2408
5222a460 2409 if ((next_update = btree_trans_peek_updates(iter)) &&
e88a75eb 2410 bpos_eq(next_update->k.p, iter->pos)) {
1d214eb1
KO
2411 iter->k = next_update->k;
2412 k = bkey_i_to_s_c(next_update);
5222a460 2413 goto out;
1d214eb1
KO
2414 }
2415
5222a460
KO
2416 if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
2417 (k = btree_trans_peek_slot_journal(trans, iter)).k)
2418 goto out;
2419
f7b6ca23 2420 if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
dcced069 2421 (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
8192f8a5 2422 if (!bkey_err(k))
f7b6ca23 2423 iter->k = *k.k;
8192f8a5
KO
2424 /* We're not returning a key from iter->path: */
2425 goto out_no_locked;
f7b6ca23
KO
2426 }
2427
07f383c7 2428 k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k);
f42238b5
KO
2429 if (unlikely(!k.k))
2430 goto out_no_locked;
1d214eb1
KO
2431 } else {
2432 struct bpos next;
c72f687a
KO
2433 struct bpos end = iter->pos;
2434
2435 if (iter->flags & BTREE_ITER_IS_EXTENTS)
2436 end.offset = U64_MAX;
1d214eb1 2437
07f383c7 2438 EBUG_ON(btree_iter_path(trans, iter)->level);
2ae4573e 2439
b1d87f52 2440 if (iter->flags & BTREE_ITER_INTENT) {
67e0dd8f 2441 struct btree_iter iter2;
b1d87f52 2442
67e0dd8f 2443 bch2_trans_copy_iter(&iter2, iter);
85d8cf16 2444 k = bch2_btree_iter_peek_upto(&iter2, end);
b1d87f52 2445
67e0dd8f
KO
2446 if (k.k && !bkey_err(k)) {
2447 iter->k = iter2.k;
2448 k.k = &iter->k;
2449 }
2450 bch2_trans_iter_exit(trans, &iter2);
b1d87f52
KO
2451 } else {
2452 struct bpos pos = iter->pos;
2453
c72f687a 2454 k = bch2_btree_iter_peek_upto(iter, end);
23dfb3a2
KO
2455 if (unlikely(bkey_err(k)))
2456 bch2_btree_iter_set_pos(iter, pos);
2457 else
2458 iter->pos = pos;
b1d87f52 2459 }
1d214eb1
KO
2460
2461 if (unlikely(bkey_err(k)))
f42238b5 2462 goto out_no_locked;
1d214eb1
KO
2463
2464 next = k.k ? bkey_start_pos(k.k) : POS_MAX;
2465
e88a75eb 2466 if (bkey_lt(iter->pos, next)) {
1d214eb1
KO
2467 bkey_init(&iter->k);
2468 iter->k.p = iter->pos;
c075ff70
KO
2469
2470 if (iter->flags & BTREE_ITER_IS_EXTENTS) {
2471 bch2_key_resize(&iter->k,
2472 min_t(u64, KEY_SIZE_MAX,
2473 (next.inode == iter->pos.inode
2474 ? next.offset
2475 : KEY_OFFSET_MAX) -
2476 iter->pos.offset));
2477 EBUG_ON(!iter->k.size);
2478 }
1d214eb1
KO
2479
2480 k = (struct bkey_s_c) { &iter->k, NULL };
1d214eb1 2481 }
271a3d3a 2482 }
5222a460 2483out:
07f383c7 2484 btree_path_set_should_be_locked(btree_iter_path(trans, iter));
fd211bc7 2485out_no_locked:
7e1a3aa9
KO
2486 bch2_btree_iter_verify_entry_exit(iter);
2487 bch2_btree_iter_verify(iter);
c075ff70
KO
2488 ret = bch2_btree_iter_verify_ret(iter, k);
2489 if (unlikely(ret))
2490 return bkey_s_c_err(ret);
66a0a497 2491
63f1a598 2492 return k;
1c6fdbd8
KO
2493}
2494
1c6fdbd8
KO
2495struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
2496{
e0ba3b64 2497 if (!bch2_btree_iter_advance(iter))
2e70ce56 2498 return bkey_s_c_null;
1c6fdbd8 2499
2dac0eae 2500 return bch2_btree_iter_peek_slot(iter);
1c6fdbd8
KO
2501}
2502
18fc6ae5
KO
2503struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
2504{
e0ba3b64 2505 if (!bch2_btree_iter_rewind(iter))
18fc6ae5
KO
2506 return bkey_s_c_null;
2507
2508 return bch2_btree_iter_peek_slot(iter);
2509}
2510
0f2ea655
KO
2511struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
2512{
2513 struct bkey_s_c k;
2514
2515 while (btree_trans_too_many_iters(iter->trans) ||
2516 (k = bch2_btree_iter_peek_type(iter, iter->flags),
2517 bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
2518 bch2_trans_begin(iter->trans);
2519
2520 return k;
2521}
2522
1c6fdbd8
KO
2523/* new transactional stuff: */
2524
0423fb71
KO
2525#ifdef CONFIG_BCACHEFS_DEBUG
2526static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
2527{
67e0dd8f 2528 struct btree_path *path;
0423fb71
KO
2529 unsigned i;
2530
f8fd5871 2531 BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, BTREE_ITER_MAX) - 1);
0423fb71 2532
67e0dd8f
KO
2533 trans_for_each_path(trans, path) {
2534 BUG_ON(path->sorted_idx >= trans->nr_sorted);
2535 BUG_ON(trans->sorted[path->sorted_idx] != path->idx);
0423fb71
KO
2536 }
2537
2538 for (i = 0; i < trans->nr_sorted; i++) {
2539 unsigned idx = trans->sorted[i];
2540
6e92d155 2541 BUG_ON(!test_bit(idx, trans->paths_allocated));
67e0dd8f 2542 BUG_ON(trans->paths[idx].sorted_idx != i);
0423fb71
KO
2543 }
2544}
0423fb71
KO
2545
2546static void btree_trans_verify_sorted(struct btree_trans *trans)
2547{
67e0dd8f 2548 struct btree_path *path, *prev = NULL;
1f75ba4e 2549 struct trans_for_each_path_inorder_iter iter;
0423fb71 2550
ee4d17d0
KO
2551 if (!bch2_debug_check_iterators)
2552 return;
2553
1f75ba4e 2554 trans_for_each_path_inorder(trans, path, iter) {
eb7bd15f 2555 if (prev && btree_path_cmp(prev, path) > 0) {
5c0bb66a 2556 __bch2_dump_trans_paths_updates(trans, true);
eb7bd15f
KO
2557 panic("trans paths out of order!\n");
2558 }
67e0dd8f 2559 prev = path;
0423fb71 2560 }
0423fb71 2561}
068bcaa5
KO
2562#else
2563static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
2564static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
2565#endif
0423fb71 2566
068bcaa5 2567void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
0423fb71
KO
2568{
2569 int i, l = 0, r = trans->nr_sorted, inc = 1;
2570 bool swapped;
2571
068bcaa5
KO
2572 btree_trans_verify_sorted_refs(trans);
2573
2574 if (trans->paths_sorted)
2575 goto out;
2576
0423fb71
KO
2577 /*
2578 * Cocktail shaker sort: this is efficient because iterators will be
5c0bb66a 2579 * mostly sorted.
0423fb71
KO
2580 */
2581 do {
2582 swapped = false;
2583
2584 for (i = inc > 0 ? l : r - 2;
2585 i + 1 < r && i >= l;
2586 i += inc) {
67e0dd8f
KO
2587 if (btree_path_cmp(trans->paths + trans->sorted[i],
2588 trans->paths + trans->sorted[i + 1]) > 0) {
0423fb71 2589 swap(trans->sorted[i], trans->sorted[i + 1]);
67e0dd8f
KO
2590 trans->paths[trans->sorted[i]].sorted_idx = i;
2591 trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
0423fb71
KO
2592 swapped = true;
2593 }
2594 }
2595
2596 if (inc > 0)
2597 --r;
2598 else
2599 l++;
2600 inc = -inc;
2601 } while (swapped);
2602
67e0dd8f 2603 trans->paths_sorted = true;
068bcaa5 2604out:
0423fb71
KO
2605 btree_trans_verify_sorted(trans);
2606}
2607
67e0dd8f
KO
2608static inline void btree_path_list_remove(struct btree_trans *trans,
2609 struct btree_path *path)
0423fb71
KO
2610{
2611 unsigned i;
2612
67e0dd8f 2613 EBUG_ON(path->sorted_idx >= trans->nr_sorted);
0423fb71
KO
2614#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
2615 trans->nr_sorted--;
67e0dd8f
KO
2616 memmove_u64s_down_small(trans->sorted + path->sorted_idx,
2617 trans->sorted + path->sorted_idx + 1,
2618 DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
0423fb71 2619#else
67e0dd8f 2620 array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
0423fb71 2621#endif
67e0dd8f
KO
2622 for (i = path->sorted_idx; i < trans->nr_sorted; i++)
2623 trans->paths[trans->sorted[i]].sorted_idx = i;
0423fb71 2624
67e0dd8f 2625 path->sorted_idx = U8_MAX;
0423fb71
KO
2626}
2627
67e0dd8f 2628static inline void btree_path_list_add(struct btree_trans *trans,
07f383c7
KO
2629 btree_path_idx_t pos,
2630 btree_path_idx_t path_idx)
0423fb71 2631{
07f383c7 2632 struct btree_path *path = trans->paths + path_idx;
0423fb71
KO
2633 unsigned i;
2634
07f383c7 2635 path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted;
0423fb71
KO
2636
2637#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
67e0dd8f
KO
2638 memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
2639 trans->sorted + path->sorted_idx,
2640 DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
0423fb71 2641 trans->nr_sorted++;
07f383c7 2642 trans->sorted[path->sorted_idx] = path_idx;
0423fb71 2643#else
07f383c7 2644 array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx);
0423fb71
KO
2645#endif
2646
67e0dd8f
KO
2647 for (i = path->sorted_idx; i < trans->nr_sorted; i++)
2648 trans->paths[trans->sorted[i]].sorted_idx = i;
64bc0011 2649
0423fb71 2650 btree_trans_verify_sorted_refs(trans);
7c26ecae
KO
2651}
2652
67e0dd8f 2653void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
64bc0011 2654{
1f2d9192 2655 if (iter->update_path)
07f383c7 2656 bch2_path_put_nokeep(trans, iter->update_path,
1f2d9192 2657 iter->flags & BTREE_ITER_INTENT);
11f11737 2658 if (iter->path)
07f383c7 2659 bch2_path_put(trans, iter->path,
11f11737 2660 iter->flags & BTREE_ITER_INTENT);
f7b6ca23 2661 if (iter->key_cache_path)
07f383c7 2662 bch2_path_put(trans, iter->key_cache_path,
f7b6ca23 2663 iter->flags & BTREE_ITER_INTENT);
07f383c7
KO
2664 iter->path = 0;
2665 iter->update_path = 0;
2666 iter->key_cache_path = 0;
2667 iter->trans = NULL;
64bc0011
KO
2668}
2669
c96f108b
KO
2670void bch2_trans_iter_init_outlined(struct btree_trans *trans,
2671 struct btree_iter *iter,
2672 enum btree_id btree_id, struct bpos pos,
2673 unsigned flags)
2674{
2675 bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
ee2c6ea7
KO
2676 bch2_btree_iter_flags(trans, btree_id, flags),
2677 _RET_IP_);
424eb881
KO
2678}
2679
67e0dd8f
KO
2680void bch2_trans_node_iter_init(struct btree_trans *trans,
2681 struct btree_iter *iter,
2682 enum btree_id btree_id,
2683 struct bpos pos,
2684 unsigned locks_want,
2685 unsigned depth,
2686 unsigned flags)
1c6fdbd8 2687{
1e81f89b
KO
2688 flags |= BTREE_ITER_NOT_EXTENTS;
2689 flags |= __BTREE_ITER_ALL_SNAPSHOTS;
2690 flags |= BTREE_ITER_ALL_SNAPSHOTS;
c96f108b
KO
2691
2692 bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth,
ee2c6ea7
KO
2693 __bch2_btree_iter_flags(trans, btree_id, flags),
2694 _RET_IP_);
c96f108b
KO
2695
2696 iter->min_depth = depth;
2697
07f383c7
KO
2698 struct btree_path *path = btree_iter_path(trans, iter);
2699 BUG_ON(path->locks_want < min(locks_want, BTREE_MAX_DEPTH));
2700 BUG_ON(path->level != depth);
2701 BUG_ON(iter->min_depth != depth);
67e0dd8f 2702}
7c26ecae 2703
67e0dd8f
KO
2704void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
2705{
07f383c7
KO
2706 struct btree_trans *trans = src->trans;
2707
67e0dd8f
KO
2708 *dst = *src;
2709 if (src->path)
07f383c7 2710 __btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
1f2d9192 2711 if (src->update_path)
07f383c7
KO
2712 __btree_path_get(trans->paths + src->update_path, src->flags & BTREE_ITER_INTENT);
2713 dst->key_cache_path = 0;
1c6fdbd8
KO
2714}
2715
14d8f26a 2716void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
1c6fdbd8 2717{
616928c3 2718 unsigned new_top = trans->mem_top + size;
e06af207
KO
2719 unsigned old_bytes = trans->mem_bytes;
2720 unsigned new_bytes = roundup_pow_of_two(new_top);
78367aaa 2721 int ret;
14d8f26a 2722 void *new_mem;
73a117d2
KO
2723 void *p;
2724
616928c3
KO
2725 trans->mem_max = max(trans->mem_max, new_top);
2726
14d8f26a 2727 WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
e131b6aa 2728
78367aaa
KO
2729 new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
2730 if (unlikely(!new_mem)) {
2731 bch2_trans_unlock(trans);
2732
2733 new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
2734 if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
2735 new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
2736 new_bytes = BTREE_TRANS_MEM_MAX;
2737 kfree(trans->mem);
2738 }
1c6fdbd8 2739
78367aaa
KO
2740 if (!new_mem)
2741 return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
2742
2743 trans->mem = new_mem;
2744 trans->mem_bytes = new_bytes;
2745
2746 ret = bch2_trans_relock(trans);
2747 if (ret)
2748 return ERR_PTR(ret);
2749 }
1c6fdbd8 2750
14d8f26a
KO
2751 trans->mem = new_mem;
2752 trans->mem_bytes = new_bytes;
1c6fdbd8 2753
14d8f26a
KO
2754 if (old_bytes) {
2755 trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
2756 return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
1c6fdbd8
KO
2757 }
2758
20bceecb 2759 p = trans->mem + trans->mem_top;
1c6fdbd8 2760 trans->mem_top += size;
7ed158f2 2761 memset(p, 0, size);
20bceecb 2762 return p;
1c6fdbd8
KO
2763}
2764
f82755e4
KO
2765static inline void check_srcu_held_too_long(struct btree_trans *trans)
2766{
2767 WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
2768 "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
2769 (jiffies - trans->srcu_lock_time) / HZ);
2770}
2771
c4accde4 2772void bch2_trans_srcu_unlock(struct btree_trans *trans)
e242b92a 2773{
c4accde4
KO
2774 if (trans->srcu_held) {
2775 struct bch_fs *c = trans->c;
2776 struct btree_path *path;
e242b92a 2777
c4accde4
KO
2778 trans_for_each_path(trans, path)
2779 if (path->cached && !btree_node_locked(path, 0))
2780 path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
e242b92a 2781
f82755e4 2782 check_srcu_held_too_long(trans);
c4accde4
KO
2783 srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
2784 trans->srcu_held = false;
2785 }
2786}
2787
79904fa2 2788static void bch2_trans_srcu_lock(struct btree_trans *trans)
c4accde4
KO
2789{
2790 if (!trans->srcu_held) {
2791 trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
2792 trans->srcu_lock_time = jiffies;
2793 trans->srcu_held = true;
2794 }
e242b92a
KO
2795}
2796
d38494c4 2797/**
955af634 2798 * bch2_trans_begin() - reset a transaction after a interrupted attempt
d38494c4 2799 * @trans: transaction to reset
d38494c4 2800 *
96dea3d5
KO
2801 * Returns: current restart counter, to be used with trans_was_restarted()
2802 *
549d173c
KO
2803 * While iterating over nodes or updating nodes a attempt to lock a btree node
2804 * may return BCH_ERR_transaction_restart when the trylock fails. When this
2805 * occurs bch2_trans_begin() should be called and the transaction retried.
d38494c4 2806 */
e941ae7d 2807u32 bch2_trans_begin(struct btree_trans *trans)
1c6fdbd8 2808{
67e0dd8f 2809 struct btree_path *path;
f375d6ca 2810 u64 now;
a8e00bd4 2811
0fbf71f8 2812 bch2_trans_reset_updates(trans);
1c6fdbd8 2813
c497df8b 2814 trans->restart_count++;
163e885a 2815 trans->mem_top = 0;
24de63da 2816 trans->journal_entries = NULL;
bf7b87a4 2817
67e0dd8f 2818 trans_for_each_path(trans, path) {
25a77231
KO
2819 path->should_be_locked = false;
2820
e1f7fa06
KO
2821 /*
2822 * If the transaction wasn't restarted, we're presuming to be
2823 * doing something new: dont keep iterators excpt the ones that
2824 * are in use - except for the subvolumes btree:
2825 */
2826 if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes)
2827 path->preserve = false;
2828
67e0dd8f
KO
2829 /*
2830 * XXX: we probably shouldn't be doing this if the transaction
2831 * was restarted, but currently we still overflow transaction
2832 * iterators if we do that
2833 */
2834 if (!path->ref && !path->preserve)
74e600c1 2835 __bch2_path_free(trans, path->idx);
67e0dd8f 2836 else
25a77231 2837 path->preserve = false;
67e0dd8f
KO
2838 }
2839
f375d6ca 2840 now = local_clock();
43de721a
KO
2841 if (!trans->restarted &&
2842 (need_resched() ||
f375d6ca 2843 now - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) {
49c7cd9d 2844 drop_locks_do(trans, (cond_resched(), 0));
f375d6ca 2845 now = local_clock();
43de721a 2846 }
f375d6ca 2847 trans->last_begin_time = now;
50dc0f69 2848
c4accde4
KO
2849 if (unlikely(trans->srcu_held &&
2850 time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
2851 bch2_trans_srcu_unlock(trans);
e242b92a 2852
5e2d8be8 2853 trans->last_begin_ip = _RET_IP_;
60b55388 2854 if (trans->restarted) {
67e0dd8f 2855 bch2_btree_path_traverse_all(trans);
60b55388
KO
2856 trans->notrace_relock_fail = false;
2857 }
e5af273f 2858
e941ae7d
KO
2859 return trans->restart_count;
2860}
2861
6bd68ec2 2862static struct btree_trans *bch2_trans_alloc(struct bch_fs *c)
1a21bf98 2863{
6bd68ec2 2864 struct btree_trans *trans;
1a21bf98 2865
6bd68ec2
KO
2866 if (IS_ENABLED(__KERNEL__)) {
2867 trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
2868 if (trans)
2869 return trans;
ff5b741c 2870 }
1a21bf98 2871
6bd68ec2
KO
2872 trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
2873 /*
2874 * paths need to be zeroed, bch2_check_for_deadlock looks at
2875 * paths in other threads
2876 */
2877 memset(&trans->paths, 0, sizeof(trans->paths));
2878 return trans;
1a21bf98
KO
2879}
2880
307e3c13
KO
2881const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
2882
2883unsigned bch2_trans_get_fn_idx(const char *fn)
4aba7d45
KO
2884{
2885 unsigned i;
2886
307e3c13
KO
2887 for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
2888 if (!bch2_btree_transaction_fns[i] ||
2889 bch2_btree_transaction_fns[i] == fn) {
2890 bch2_btree_transaction_fns[i] = fn;
4aba7d45
KO
2891 return i;
2892 }
2893
2894 pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
2895 return i;
2896}
2897
6bd68ec2 2898struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
c0ebe3e4 2899 __acquires(&c->btree_trans_barrier)
1c6fdbd8 2900{
6bd68ec2 2901 struct btree_trans *trans;
5c0bb66a
KO
2902 struct btree_transaction_stats *s;
2903
6bd68ec2
KO
2904 trans = bch2_trans_alloc(c);
2905
ae1ede58 2906 memset(trans, 0, sizeof(*trans));
1c6fdbd8 2907 trans->c = c;
307e3c13
KO
2908 trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns)
2909 ? bch2_btree_transaction_fns[fn_idx] : NULL;
d7e4e513 2910 trans->last_begin_time = local_clock();
307e3c13 2911 trans->fn_idx = fn_idx;
33bd5d06 2912 trans->locking_wait.task = current;
5222a460 2913 trans->journal_replay_not_finished =
8a443d3e
KO
2914 unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
2915 atomic_inc_not_zero(&c->journal_keys.ref);
33bd5d06 2916 closure_init_stack(&trans->ref);
20bceecb 2917
f8fd5871
KO
2918 trans->paths_allocated[0] = 1;
2919
616928c3 2920 s = btree_trans_stats(trans);
1f69368c 2921 if (s && s->max_mem) {
616928c3
KO
2922 unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
2923
0b5c9f59 2924 trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL);
e131b6aa
KO
2925
2926 if (!unlikely(trans->mem)) {
2927 trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
2928 trans->mem_bytes = BTREE_TRANS_MEM_MAX;
2929 } else {
0b5c9f59 2930 trans->mem_bytes = expected_mem_bytes;
e131b6aa 2931 }
616928c3 2932 }
920e69bc
KO
2933
2934 if (s) {
1f69368c 2935 trans->nr_max_paths = s->nr_max_paths;
24de63da 2936 trans->journal_entries_size = s->journal_entries_size;
920e69bc 2937 }
5c0bb66a 2938
c4accde4 2939 trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
e242b92a 2940 trans->srcu_lock_time = jiffies;
c4accde4 2941 trans->srcu_held = true;
876c7af3 2942
b84d42c3 2943 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
50b13bee
KO
2944 struct btree_trans *pos;
2945
a5b696ee 2946 seqmutex_lock(&c->btree_trans_lock);
50b13bee 2947 list_for_each_entry(pos, &c->btree_trans_list, list) {
6623c0fc
KO
2948 /*
2949 * We'd much prefer to be stricter here and completely
2950 * disallow multiple btree_trans in the same thread -
2951 * but the data move path calls bch2_write when we
2952 * already have a btree_trans initialized.
2953 */
2954 BUG_ON(trans->locking_wait.task->pid == pos->locking_wait.task->pid &&
2955 bch2_trans_locked(pos));
2956
33bd5d06 2957 if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) {
50b13bee
KO
2958 list_add_tail(&trans->list, &pos->list);
2959 goto list_add_done;
2960 }
2961 }
2962 list_add_tail(&trans->list, &c->btree_trans_list);
2963list_add_done:
a5b696ee 2964 seqmutex_unlock(&c->btree_trans_lock);
b84d42c3 2965 }
6bd68ec2
KO
2966
2967 return trans;
1c6fdbd8
KO
2968}
2969
67e0dd8f
KO
2970static void check_btree_paths_leaked(struct btree_trans *trans)
2971{
2972#ifdef CONFIG_BCACHEFS_DEBUG
2973 struct bch_fs *c = trans->c;
2974 struct btree_path *path;
2975
2976 trans_for_each_path(trans, path)
2977 if (path->ref)
2978 goto leaked;
2979 return;
2980leaked:
669f87a5 2981 bch_err(c, "btree paths leaked from %s!", trans->fn);
67e0dd8f
KO
2982 trans_for_each_path(trans, path)
2983 if (path->ref)
2984 printk(KERN_ERR " btree %s %pS\n",
88dfe193 2985 bch2_btree_id_str(path->btree_id),
67e0dd8f
KO
2986 (void *) path->ip_allocated);
2987 /* Be noisy about this: */
2988 bch2_fatal_error(c);
2989#endif
2990}
2991
6bd68ec2 2992void bch2_trans_put(struct btree_trans *trans)
c0ebe3e4 2993 __releases(&c->btree_trans_barrier)
1c6fdbd8 2994{
1a21bf98 2995 struct bch_fs *c = trans->c;
616928c3 2996 struct btree_transaction_stats *s = btree_trans_stats(trans);
1a21bf98 2997
ece254b2 2998 bch2_trans_unlock(trans);
1c6fdbd8 2999
a5b696ee
KO
3000 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
3001 seqmutex_lock(&c->btree_trans_lock);
3002 list_del(&trans->list);
3003 seqmutex_unlock(&c->btree_trans_lock);
3004 }
3005
33bd5d06
KO
3006 closure_sync(&trans->ref);
3007
616928c3
KO
3008 if (s)
3009 s->max_mem = max(s->max_mem, trans->mem_max);
3010
67e0dd8f 3011 trans_for_each_update(trans, i)
7f9821a7 3012 __btree_path_put(trans->paths + i->path, true);
67e0dd8f 3013 trans->nr_updates = 0;
509d3e0a 3014
67e0dd8f 3015 check_btree_paths_leaked(trans);
50dc0f69 3016
f82755e4
KO
3017 if (trans->srcu_held) {
3018 check_srcu_held_too_long(trans);
c4accde4 3019 srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
f82755e4 3020 }
876c7af3 3021
9620c3ec
KO
3022 if (trans->fs_usage_deltas) {
3023 if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
3024 REPLICAS_DELTA_LIST_MAX)
3025 mempool_free(trans->fs_usage_deltas,
67e0dd8f 3026 &c->replicas_delta_pool);
9620c3ec
KO
3027 else
3028 kfree(trans->fs_usage_deltas);
3029 }
e131b6aa 3030
8a443d3e
KO
3031 if (unlikely(trans->journal_replay_not_finished))
3032 bch2_journal_keys_put(c);
3033
e131b6aa 3034 if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
67e0dd8f 3035 mempool_free(trans->mem, &c->btree_trans_mem_pool);
e131b6aa
KO
3036 else
3037 kfree(trans->mem);
1a21bf98 3038
6bd68ec2
KO
3039 /* Userspace doesn't have a real percpu implementation: */
3040 if (IS_ENABLED(__KERNEL__))
3041 trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
3042 if (trans)
3043 mempool_free(trans, &c->btree_trans_pool);
1c6fdbd8 3044}
36e9d698 3045
b3d1e6ca 3046static void __maybe_unused
afbc7194
KO
3047bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
3048 struct btree_bkey_cached_common *b)
d211b408 3049{
c240c3a9
KO
3050 struct six_lock_count c = six_lock_counts(&b->lock);
3051 struct task_struct *owner;
3052 pid_t pid;
3053
3054 rcu_read_lock();
3055 owner = READ_ONCE(b->lock.owner);
3e3e02e6 3056 pid = owner ? owner->pid : 0;
c240c3a9
KO
3057 rcu_read_unlock();
3058
afbc7194
KO
3059 prt_tab(out);
3060 prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
88dfe193 3061 b->level, bch2_btree_id_str(b->btree_id));
4e6defd1 3062 bch2_bpos_to_text(out, btree_node_pos(b));
c240c3a9 3063
afbc7194
KO
3064 prt_tab(out);
3065 prt_printf(out, " locks %u:%u:%u held by pid %u",
c240c3a9 3066 c.n[0], c.n[1], c.n[2], pid);
d211b408
KO
3067}
3068
50b13bee 3069void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
5c1d808a 3070{
67e0dd8f 3071 struct btree_path *path;
86b74451 3072 struct btree_bkey_cached_common *b;
c7ce2732 3073 static char lock_types[] = { 'r', 'i', 'w' };
33981244 3074 struct task_struct *task = READ_ONCE(trans->locking_wait.task);
f154c3eb 3075 unsigned l, idx;
495aabed 3076
afbc7194
KO
3077 if (!out->nr_tabstops) {
3078 printbuf_tabstop_push(out, 16);
3079 printbuf_tabstop_push(out, 32);
3080 }
3081
33981244 3082 prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn);
495aabed 3083
f154c3eb 3084 trans_for_each_path_safe(trans, path, idx) {
50b13bee
KO
3085 if (!path->nodes_locked)
3086 continue;
495aabed 3087
50b13bee 3088 prt_printf(out, " path %u %c l=%u %s:",
1f75ba4e 3089 idx,
50b13bee
KO
3090 path->cached ? 'c' : 'b',
3091 path->level,
88dfe193 3092 bch2_btree_id_str(path->btree_id));
50b13bee 3093 bch2_bpos_to_text(out, path->pos);
afbc7194 3094 prt_newline(out);
50b13bee
KO
3095
3096 for (l = 0; l < BTREE_MAX_DEPTH; l++) {
86b74451 3097 if (btree_node_locked(path, l) &&
315c9ba6 3098 !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
d4263e56
KO
3099 prt_printf(out, " %c l=%u ",
3100 lock_types[btree_node_locked_type(path, l)], l);
afbc7194
KO
3101 bch2_btree_bkey_cached_common_to_text(out, b);
3102 prt_newline(out);
495aabed
KO
3103 }
3104 }
50b13bee 3105 }
495aabed 3106
50b13bee
KO
3107 b = READ_ONCE(trans->locking);
3108 if (b) {
9d7f2a41
KO
3109 prt_printf(out, " blocked for %lluus on",
3110 div_u64(local_clock() - trans->locking_wait.start_time,
3111 1000));
afbc7194
KO
3112 prt_newline(out);
3113 prt_printf(out, " %c", lock_types[trans->locking_wait.lock_want]);
3114 bch2_btree_bkey_cached_common_to_text(out, b);
3115 prt_newline(out);
495aabed 3116 }
495aabed
KO
3117}
3118
36e9d698
KO
3119void bch2_fs_btree_iter_exit(struct bch_fs *c)
3120{
4aba7d45 3121 struct btree_transaction_stats *s;
6bd68ec2
KO
3122 struct btree_trans *trans;
3123 int cpu;
3124
3125 trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
3126 if (trans)
3127 panic("%s leaked btree_trans\n", trans->fn);
3128
3129 if (c->btree_trans_bufs)
3130 for_each_possible_cpu(cpu)
3131 kfree(per_cpu_ptr(c->btree_trans_bufs, cpu)->trans);
3132 free_percpu(c->btree_trans_bufs);
4aba7d45
KO
3133
3134 for (s = c->btree_transaction_stats;
3135 s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
5c0bb66a
KO
3136 s++) {
3137 kfree(s->max_paths_text);
4aba7d45 3138 bch2_time_stats_exit(&s->lock_hold_times);
5c0bb66a 3139 }
4aba7d45 3140
99fafb04
KO
3141 if (c->btree_trans_barrier_initialized)
3142 cleanup_srcu_struct(&c->btree_trans_barrier);
e131b6aa 3143 mempool_exit(&c->btree_trans_mem_pool);
6bd68ec2 3144 mempool_exit(&c->btree_trans_pool);
36e9d698
KO
3145}
3146
50a8a732 3147void bch2_fs_btree_iter_init_early(struct bch_fs *c)
36e9d698 3148{
4aba7d45 3149 struct btree_transaction_stats *s;
36e9d698 3150
4aba7d45
KO
3151 for (s = c->btree_transaction_stats;
3152 s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
5c0bb66a 3153 s++) {
4aba7d45 3154 bch2_time_stats_init(&s->lock_hold_times);
5c0bb66a
KO
3155 mutex_init(&s->lock);
3156 }
4aba7d45 3157
495aabed 3158 INIT_LIST_HEAD(&c->btree_trans_list);
a5b696ee 3159 seqmutex_init(&c->btree_trans_lock);
50a8a732
TB
3160}
3161
3162int bch2_fs_btree_iter_init(struct bch_fs *c)
3163{
3164 int ret;
495aabed 3165
6bd68ec2
KO
3166 c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
3167 if (!c->btree_trans_bufs)
3168 return -ENOMEM;
3169
3170 ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1,
3171 sizeof(struct btree_trans)) ?:
e131b6aa 3172 mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
99fafb04
KO
3173 BTREE_TRANS_MEM_MAX) ?:
3174 init_srcu_struct(&c->btree_trans_barrier);
3175 if (!ret)
3176 c->btree_trans_barrier_initialized = true;
3177 return ret;
36e9d698 3178}