bcachefs: bch2_btree_iter_peek_slot() now works on interior nodes
[linux-block.git] / fs / bcachefs / btree_iter.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "bkey_methods.h"
07a1006a 5#include "bkey_buf.h"
1c6fdbd8
KO
6#include "btree_cache.h"
7#include "btree_iter.h"
2ca88e5a 8#include "btree_key_cache.h"
1c6fdbd8 9#include "btree_locking.h"
57b0b3db 10#include "btree_update.h"
1c6fdbd8 11#include "debug.h"
50dc0f69 12#include "error.h"
1c6fdbd8 13#include "extents.h"
2ca88e5a 14#include "journal.h"
5222a460 15#include "recovery.h"
35d5aff2 16#include "replicas.h"
c075ff70 17#include "subvolume.h"
1c6fdbd8
KO
18#include "trace.h"
19
20#include <linux/prefetch.h>
21
67e0dd8f
KO
22static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
23static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
24 struct btree_path *);
25
26static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *);
27
b0d1b70a
KO
28/*
29 * Unlocks before scheduling
30 * Note: does not revalidate iterator
31 */
32static inline int bch2_trans_cond_resched(struct btree_trans *trans)
33{
34 if (need_resched() || race_fault()) {
35 bch2_trans_unlock(trans);
36 schedule();
37 return bch2_trans_relock(trans) ? 0 : -EINTR;
38 } else {
39 return 0;
40 }
41}
42
67e0dd8f
KO
43static inline int __btree_path_cmp(const struct btree_path *l,
44 enum btree_id r_btree_id,
45 bool r_cached,
46 struct bpos r_pos,
47 unsigned r_level)
0423fb71 48{
7abda8c1
KO
49 /*
50 * Must match lock ordering as defined by __bch2_btree_node_lock:
51 */
67e0dd8f 52 return cmp_int(l->btree_id, r_btree_id) ?:
32b26e8c 53 cmp_int((int) l->cached, (int) r_cached) ?:
67e0dd8f
KO
54 bpos_cmp(l->pos, r_pos) ?:
55 -cmp_int(l->level, r_level);
56}
57
58static inline int btree_path_cmp(const struct btree_path *l,
59 const struct btree_path *r)
60{
61 return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
0423fb71
KO
62}
63
e751c01a
KO
64static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
65{
e751c01a
KO
66 /* Are we iterating over keys in all snapshots? */
67 if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
68 p = bpos_successor(p);
69 } else {
70 p = bpos_nosnap_successor(p);
71 p.snapshot = iter->snapshot;
72 }
73
74 return p;
75}
76
77static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
78{
e751c01a
KO
79 /* Are we iterating over keys in all snapshots? */
80 if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
81 p = bpos_predecessor(p);
82 } else {
83 p = bpos_nosnap_predecessor(p);
84 p.snapshot = iter->snapshot;
85 }
86
87 return p;
88}
89
67e0dd8f 90static inline bool is_btree_node(struct btree_path *path, unsigned l)
1c6fdbd8
KO
91{
92 return l < BTREE_MAX_DEPTH &&
67e0dd8f 93 (unsigned long) path->l[l].b >= 128;
1c6fdbd8
KO
94}
95
9626aeb1 96static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
a00fd8c5 97{
9626aeb1 98 struct bpos pos = iter->pos;
cbdf24ce 99
9626aeb1
KO
100 if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
101 bkey_cmp(pos, POS_MAX))
e751c01a 102 pos = bkey_successor(iter, pos);
9626aeb1 103 return pos;
a00fd8c5
KO
104}
105
67e0dd8f 106static inline bool btree_path_pos_before_node(struct btree_path *path,
e3ecf4f5
KO
107 struct btree *b)
108{
67e0dd8f 109 return bpos_cmp(path->pos, b->data->min_key) < 0;
e3ecf4f5
KO
110}
111
67e0dd8f 112static inline bool btree_path_pos_after_node(struct btree_path *path,
e3ecf4f5
KO
113 struct btree *b)
114{
67e0dd8f 115 return bpos_cmp(b->key.k.p, path->pos) < 0;
e3ecf4f5
KO
116}
117
67e0dd8f 118static inline bool btree_path_pos_in_node(struct btree_path *path,
e3ecf4f5
KO
119 struct btree *b)
120{
67e0dd8f
KO
121 return path->btree_id == b->c.btree_id &&
122 !btree_path_pos_before_node(path, b) &&
123 !btree_path_pos_after_node(path, b);
e3ecf4f5
KO
124}
125
1c6fdbd8
KO
126/* Btree node locking: */
127
9f6bd307 128void bch2_btree_node_unlock_write(struct btree_trans *trans,
67e0dd8f 129 struct btree_path *path, struct btree *b)
1c6fdbd8 130{
67e0dd8f 131 bch2_btree_node_unlock_write_inlined(trans, path, b);
1c6fdbd8
KO
132}
133
78cf784e 134void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
1c6fdbd8 135{
67e0dd8f 136 struct btree_path *linked;
1c6fdbd8
KO
137 unsigned readers = 0;
138
67e0dd8f
KO
139 trans_for_each_path(trans, linked)
140 if (linked->l[b->c.level].b == b &&
141 btree_node_read_locked(linked, b->c.level))
1c6fdbd8
KO
142 readers++;
143
144 /*
145 * Must drop our read locks before calling six_lock_write() -
146 * six_unlock() won't do wakeups until the reader count
147 * goes to 0, and it's safe because we have the node intent
148 * locked:
149 */
a9d79c6e
KO
150 if (!b->c.lock.readers)
151 atomic64_sub(__SIX_VAL(read_lock, readers),
152 &b->c.lock.state.counter);
153 else
154 this_cpu_sub(*b->c.lock.readers, readers);
155
c7ce2732 156 six_lock_write(&b->c.lock, NULL, NULL);
a9d79c6e
KO
157
158 if (!b->c.lock.readers)
159 atomic64_add(__SIX_VAL(read_lock, readers),
160 &b->c.lock.state.counter);
161 else
162 this_cpu_add(*b->c.lock.readers, readers);
1c6fdbd8
KO
163}
164
78cf784e 165bool __bch2_btree_node_relock(struct btree_trans *trans,
67e0dd8f 166 struct btree_path *path, unsigned level)
1c6fdbd8 167{
67e0dd8f
KO
168 struct btree *b = btree_path_node(path, level);
169 int want = __btree_lock_want(path, level);
1c6fdbd8 170
67e0dd8f 171 if (!is_btree_node(path, level))
bc82d08b 172 goto fail;
1c6fdbd8
KO
173
174 if (race_fault())
bc82d08b 175 goto fail;
1c6fdbd8 176
67e0dd8f
KO
177 if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
178 (btree_node_lock_seq_matches(path, b, level) &&
78cf784e 179 btree_node_lock_increment(trans, b, level, want))) {
3074bc0f 180 mark_btree_node_locked(path, level, want);
ed8413fd 181 return true;
ed8413fd 182 }
bc82d08b
KO
183fail:
184 trace_btree_node_relock_fail(trans->fn, _RET_IP_,
185 path->btree_id,
186 &path->pos,
187 (unsigned long) b,
188 path->l[level].lock_seq,
189 is_btree_node(path, level) ? b->c.lock.state.seq : 0);
190 return false;
1c6fdbd8
KO
191}
192
f527afea
KO
193bool bch2_btree_node_upgrade(struct btree_trans *trans,
194 struct btree_path *path, unsigned level)
1c6fdbd8 195{
67e0dd8f 196 struct btree *b = path->l[level].b;
1c6fdbd8 197
67e0dd8f 198 if (!is_btree_node(path, level))
1c6fdbd8
KO
199 return false;
200
d697b9ab
KO
201 switch (btree_lock_want(path, level)) {
202 case BTREE_NODE_UNLOCKED:
203 BUG_ON(btree_node_locked(path, level));
204 return true;
205 case BTREE_NODE_READ_LOCKED:
206 BUG_ON(btree_node_intent_locked(path, level));
207 return bch2_btree_node_relock(trans, path, level);
208 case BTREE_NODE_INTENT_LOCKED:
209 break;
210 }
211
67e0dd8f 212 if (btree_node_intent_locked(path, level))
1c6fdbd8
KO
213 return true;
214
215 if (race_fault())
216 return false;
217
67e0dd8f 218 if (btree_node_locked(path, level)
c43a6ef9 219 ? six_lock_tryupgrade(&b->c.lock)
67e0dd8f 220 : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
1c6fdbd8
KO
221 goto success;
222
67e0dd8f 223 if (btree_node_lock_seq_matches(path, b, level) &&
78cf784e 224 btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
67e0dd8f 225 btree_node_unlock(path, level);
1c6fdbd8
KO
226 goto success;
227 }
228
229 return false;
230success:
3074bc0f 231 mark_btree_node_intent_locked(path, level);
1c6fdbd8
KO
232 return true;
233}
234
67e0dd8f
KO
235static inline bool btree_path_get_locks(struct btree_trans *trans,
236 struct btree_path *path,
bc82d08b 237 bool upgrade)
1c6fdbd8 238{
67e0dd8f 239 unsigned l = path->level;
1c6fdbd8
KO
240 int fail_idx = -1;
241
242 do {
67e0dd8f 243 if (!btree_path_node(path, l))
1c6fdbd8
KO
244 break;
245
246 if (!(upgrade
67e0dd8f 247 ? bch2_btree_node_upgrade(trans, path, l)
f48361b0 248 : bch2_btree_node_relock(trans, path, l)))
1c6fdbd8 249 fail_idx = l;
1c6fdbd8
KO
250
251 l++;
67e0dd8f 252 } while (l < path->locks_want);
1c6fdbd8
KO
253
254 /*
255 * When we fail to get a lock, we have to ensure that any child nodes
67e0dd8f 256 * can't be relocked so bch2_btree_path_traverse has to walk back up to
1c6fdbd8
KO
257 * the node that we failed to relock:
258 */
1d3ecd7e
KO
259 if (fail_idx >= 0) {
260 __bch2_btree_path_unlock(path);
261 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
262
263 do {
264 path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
265 --fail_idx;
266 } while (fail_idx >= 0);
1c6fdbd8
KO
267 }
268
67e0dd8f
KO
269 if (path->uptodate == BTREE_ITER_NEED_RELOCK)
270 path->uptodate = BTREE_ITER_UPTODATE;
1c6fdbd8 271
a0a56879 272 bch2_trans_verify_locks(trans);
0f238367 273
67e0dd8f 274 return path->uptodate < BTREE_ITER_NEED_RELOCK;
1c6fdbd8
KO
275}
276
d211b408 277static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
f21566f1 278 bool cached)
d211b408 279{
f21566f1 280 return !cached
d211b408
KO
281 ? container_of(_b, struct btree, c)->key.k.p
282 : container_of(_b, struct bkey_cached, c)->key.pos;
283}
284
1c6fdbd8 285/* Slowpath: */
78cf784e 286bool __bch2_btree_node_lock(struct btree_trans *trans,
67e0dd8f
KO
287 struct btree_path *path,
288 struct btree *b,
289 struct bpos pos, unsigned level,
bd2bb273 290 enum six_lock_type type,
a301dc38
KO
291 six_lock_should_sleep_fn should_sleep_fn, void *p,
292 unsigned long ip)
1c6fdbd8 293{
7abda8c1
KO
294 struct btree_path *linked;
295 unsigned reason;
1c6fdbd8 296
647d7b60 297 /* Check if it's safe to block: */
67e0dd8f 298 trans_for_each_path(trans, linked) {
1c6fdbd8
KO
299 if (!linked->nodes_locked)
300 continue;
301
1c6fdbd8
KO
302 /*
303 * Can't block taking an intent lock if we have _any_ nodes read
304 * locked:
305 *
306 * - Our read lock blocks another thread with an intent lock on
307 * the same node from getting a write lock, and thus from
308 * dropping its intent lock
309 *
310 * - And the other thread may have multiple nodes intent locked:
311 * both the node we want to intent lock, and the node we
312 * already have read locked - deadlock:
313 */
314 if (type == SIX_LOCK_intent &&
315 linked->nodes_locked != linked->nodes_intent_locked) {
2527dd91 316 reason = 1;
7abda8c1 317 goto deadlock;
1c6fdbd8
KO
318 }
319
67e0dd8f 320 if (linked->btree_id != path->btree_id) {
7abda8c1
KO
321 if (linked->btree_id < path->btree_id)
322 continue;
323
324 reason = 3;
325 goto deadlock;
7e7ae6ca
KO
326 }
327
328 /*
7abda8c1
KO
329 * Within the same btree, non-cached paths come before cached
330 * paths:
7e7ae6ca 331 */
67e0dd8f 332 if (linked->cached != path->cached) {
7abda8c1
KO
333 if (!linked->cached)
334 continue;
335
336 reason = 4;
337 goto deadlock;
7e7ae6ca
KO
338 }
339
1c6fdbd8
KO
340 /*
341 * Interior nodes must be locked before their descendants: if
67e0dd8f 342 * another path has possible descendants locked of the node
1c6fdbd8
KO
343 * we're about to lock, it must have the ancestors locked too:
344 */
7e7ae6ca 345 if (level > __fls(linked->nodes_locked)) {
2527dd91 346 reason = 5;
7abda8c1 347 goto deadlock;
515282ac
KO
348 }
349
350 /* Must lock btree nodes in key order: */
7e7ae6ca 351 if (btree_node_locked(linked, level) &&
4cf91b02 352 bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
f21566f1 353 linked->cached)) <= 0) {
7e7ae6ca 354 reason = 7;
7abda8c1 355 goto deadlock;
a301dc38 356 }
1c6fdbd8
KO
357 }
358
c7ce2732
KO
359 return btree_node_lock_type(trans, path, b, pos, level,
360 type, should_sleep_fn, p);
7abda8c1
KO
361deadlock:
362 trace_trans_restart_would_deadlock(trans->fn, ip,
363 trans->in_traverse_all, reason,
364 linked->btree_id,
365 linked->cached,
366 &linked->pos,
367 path->btree_id,
368 path->cached,
369 &pos);
370 btree_trans_restart(trans);
371 return false;
1c6fdbd8
KO
372}
373
374/* Btree iterator locking: */
375
376#ifdef CONFIG_BCACHEFS_DEBUG
67e0dd8f
KO
377
378static void bch2_btree_path_verify_locks(struct btree_path *path)
1c6fdbd8
KO
379{
380 unsigned l;
381
1d3ecd7e 382 if (!path->nodes_locked) {
d697b9ab
KO
383 BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
384 btree_path_node(path, path->level));
1d3ecd7e
KO
385 return;
386 }
1c6fdbd8 387
1d3ecd7e 388 for (l = 0; btree_path_node(path, l); l++)
67e0dd8f
KO
389 BUG_ON(btree_lock_want(path, l) !=
390 btree_node_locked_type(path, l));
1c6fdbd8 391}
ad7ae8d6 392
a0a56879 393void bch2_trans_verify_locks(struct btree_trans *trans)
ad7ae8d6 394{
67e0dd8f 395 struct btree_path *path;
ad7ae8d6 396
67e0dd8f
KO
397 trans_for_each_path(trans, path)
398 bch2_btree_path_verify_locks(path);
ad7ae8d6 399}
7d6f9b64 400#else
67e0dd8f 401static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
1c6fdbd8
KO
402#endif
403
67e0dd8f
KO
404/* Btree path locking: */
405
6e075b54
KO
406/*
407 * Only for btree_cache.c - only relocks intent locks
408 */
67e0dd8f
KO
409bool bch2_btree_path_relock_intent(struct btree_trans *trans,
410 struct btree_path *path)
6e075b54
KO
411{
412 unsigned l;
413
67e0dd8f
KO
414 for (l = path->level;
415 l < path->locks_want && btree_path_node(path, l);
6e075b54 416 l++) {
67e0dd8f 417 if (!bch2_btree_node_relock(trans, path, l)) {
1d3ecd7e 418 __bch2_btree_path_unlock(path);
67e0dd8f 419 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bc82d08b
KO
420 trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_,
421 path->btree_id, &path->pos);
9f6bd307 422 btree_trans_restart(trans);
6e075b54
KO
423 return false;
424 }
425 }
426
427 return true;
428}
429
d4d24a65
KO
430noinline __flatten
431static bool __bch2_btree_path_relock(struct btree_trans *trans,
67e0dd8f 432 struct btree_path *path, unsigned long trace_ip)
1c6fdbd8 433{
bc82d08b 434 bool ret = btree_path_get_locks(trans, path, false);
e5af273f 435
bc82d08b
KO
436 if (!ret) {
437 trace_trans_restart_relock_path(trans->fn, trace_ip,
438 path->btree_id, &path->pos);
9f6bd307 439 btree_trans_restart(trans);
bc82d08b 440 }
e5af273f 441 return ret;
1c6fdbd8
KO
442}
443
d4d24a65
KO
444static inline bool bch2_btree_path_relock(struct btree_trans *trans,
445 struct btree_path *path, unsigned long trace_ip)
446{
447 return btree_node_locked(path, path->level)
448 ? true
449 : __bch2_btree_path_relock(trans, path, trace_ip);
450}
451
67e0dd8f
KO
452bool __bch2_btree_path_upgrade(struct btree_trans *trans,
453 struct btree_path *path,
1c6fdbd8
KO
454 unsigned new_locks_want)
455{
67e0dd8f 456 struct btree_path *linked;
5e6a668b 457
67e0dd8f 458 EBUG_ON(path->locks_want >= new_locks_want);
1c6fdbd8 459
67e0dd8f 460 path->locks_want = new_locks_want;
1c6fdbd8 461
bc82d08b 462 if (btree_path_get_locks(trans, path, true))
5e6a668b
KO
463 return true;
464
465 /*
466 * XXX: this is ugly - we'd prefer to not be mucking with other
467 * iterators in the btree_trans here.
468 *
469 * On failure to upgrade the iterator, setting iter->locks_want and
67e0dd8f 470 * calling get_locks() is sufficient to make bch2_btree_path_traverse()
5e6a668b
KO
471 * get the locks we want on transaction restart.
472 *
473 * But if this iterator was a clone, on transaction restart what we did
474 * to this iterator isn't going to be preserved.
475 *
476 * Possibly we could add an iterator field for the parent iterator when
477 * an iterator is a copy - for now, we'll just upgrade any other
478 * iterators with the same btree id.
479 *
480 * The code below used to be needed to ensure ancestor nodes get locked
481 * before interior nodes - now that's handled by
67e0dd8f 482 * bch2_btree_path_traverse_all().
5e6a668b 483 */
61a66469
KO
484 if (!path->cached && !trans->in_traverse_all)
485 trans_for_each_path(trans, linked)
486 if (linked != path &&
487 linked->cached == path->cached &&
488 linked->btree_id == path->btree_id &&
489 linked->locks_want < new_locks_want) {
490 linked->locks_want = new_locks_want;
491 btree_path_get_locks(trans, linked, true);
492 }
5e6a668b
KO
493
494 return false;
1c6fdbd8
KO
495}
496
67e0dd8f 497void __bch2_btree_path_downgrade(struct btree_path *path,
2d587674 498 unsigned new_locks_want)
1c6fdbd8 499{
2d587674 500 unsigned l;
1c6fdbd8 501
67e0dd8f 502 EBUG_ON(path->locks_want < new_locks_want);
1c6fdbd8 503
67e0dd8f 504 path->locks_want = new_locks_want;
2d587674 505
67e0dd8f
KO
506 while (path->nodes_locked &&
507 (l = __fls(path->nodes_locked)) >= path->locks_want) {
508 if (l > path->level) {
509 btree_node_unlock(path, l);
2d587674 510 } else {
67e0dd8f
KO
511 if (btree_node_intent_locked(path, l)) {
512 six_lock_downgrade(&path->l[l].b->c.lock);
513 path->nodes_intent_locked ^= 1 << l;
1c6fdbd8 514 }
2d587674 515 break;
1c6fdbd8 516 }
1c6fdbd8 517 }
ad7ae8d6 518
67e0dd8f 519 bch2_btree_path_verify_locks(path);
1c6fdbd8
KO
520}
521
72545b5e
KO
522void bch2_trans_downgrade(struct btree_trans *trans)
523{
67e0dd8f 524 struct btree_path *path;
72545b5e 525
67e0dd8f
KO
526 trans_for_each_path(trans, path)
527 bch2_btree_path_downgrade(path);
72545b5e
KO
528}
529
58fbf808
KO
530/* Btree transaction locking: */
531
532bool bch2_trans_relock(struct btree_trans *trans)
0f238367 533{
67e0dd8f 534 struct btree_path *path;
0f238367 535
e5af273f
KO
536 if (unlikely(trans->restarted))
537 return false;
538
67e0dd8f
KO
539 trans_for_each_path(trans, path)
540 if (path->should_be_locked &&
541 !bch2_btree_path_relock(trans, path, _RET_IP_)) {
669f87a5 542 trace_trans_restart_relock(trans->fn, _RET_IP_,
67e0dd8f 543 path->btree_id, &path->pos);
e5af273f 544 BUG_ON(!trans->restarted);
d5a43661 545 return false;
241e2636 546 }
d5a43661 547 return true;
0f238367
KO
548}
549
58fbf808 550void bch2_trans_unlock(struct btree_trans *trans)
0f238367 551{
67e0dd8f 552 struct btree_path *path;
0f238367 553
67e0dd8f
KO
554 trans_for_each_path(trans, path)
555 __bch2_btree_path_unlock(path);
0f238367
KO
556}
557
1c6fdbd8
KO
558/* Btree iterator: */
559
560#ifdef CONFIG_BCACHEFS_DEBUG
561
67e0dd8f
KO
562static void bch2_btree_path_verify_cached(struct btree_trans *trans,
563 struct btree_path *path)
d211b408
KO
564{
565 struct bkey_cached *ck;
67e0dd8f 566 bool locked = btree_node_locked(path, 0);
d211b408 567
67e0dd8f 568 if (!bch2_btree_node_relock(trans, path, 0))
d211b408
KO
569 return;
570
67e0dd8f
KO
571 ck = (void *) path->l[0].b;
572 BUG_ON(ck->key.btree_id != path->btree_id ||
573 bkey_cmp(ck->key.pos, path->pos));
d211b408
KO
574
575 if (!locked)
67e0dd8f 576 btree_node_unlock(path, 0);
d211b408
KO
577}
578
67e0dd8f
KO
579static void bch2_btree_path_verify_level(struct btree_trans *trans,
580 struct btree_path *path, unsigned level)
1c6fdbd8 581{
67e0dd8f 582 struct btree_path_level *l;
4ce41957
KO
583 struct btree_node_iter tmp;
584 bool locked;
2dac0eae 585 struct bkey_packed *p, *k;
fa8e94fa
KO
586 struct printbuf buf1 = PRINTBUF;
587 struct printbuf buf2 = PRINTBUF;
588 struct printbuf buf3 = PRINTBUF;
2dac0eae 589 const char *msg;
1c6fdbd8 590
29364f34 591 if (!bch2_debug_check_iterators)
f13f5a8c
KO
592 return;
593
67e0dd8f 594 l = &path->l[level];
4ce41957 595 tmp = l->iter;
67e0dd8f 596 locked = btree_node_locked(path, level);
4ce41957 597
67e0dd8f 598 if (path->cached) {
d211b408 599 if (!level)
67e0dd8f 600 bch2_btree_path_verify_cached(trans, path);
d211b408
KO
601 return;
602 }
603
67e0dd8f 604 if (!btree_path_node(path, level))
2dac0eae
KO
605 return;
606
67e0dd8f 607 if (!bch2_btree_node_relock(trans, path, level))
271a3d3a
KO
608 return;
609
67e0dd8f 610 BUG_ON(!btree_path_pos_in_node(path, l->b));
2dac0eae 611
2dac0eae 612 bch2_btree_node_iter_verify(&l->iter, l->b);
1c6fdbd8
KO
613
614 /*
1ae29c1f 615 * For interior nodes, the iterator will have skipped past deleted keys:
1c6fdbd8 616 */
1ae29c1f 617 p = level
c052cf82 618 ? bch2_btree_node_iter_prev(&tmp, l->b)
2dac0eae
KO
619 : bch2_btree_node_iter_prev_all(&tmp, l->b);
620 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1c6fdbd8 621
67e0dd8f 622 if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
2dac0eae
KO
623 msg = "before";
624 goto err;
1c6fdbd8
KO
625 }
626
67e0dd8f 627 if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
2dac0eae
KO
628 msg = "after";
629 goto err;
630 }
f21566f1 631
2dac0eae 632 if (!locked)
67e0dd8f 633 btree_node_unlock(path, level);
2dac0eae
KO
634 return;
635err:
fa8e94fa 636 bch2_bpos_to_text(&buf1, path->pos);
2dac0eae
KO
637
638 if (p) {
639 struct bkey uk = bkey_unpack_key(l->b, p);
fa8e94fa
KO
640 bch2_bkey_to_text(&buf2, &uk);
641 } else {
642 pr_buf(&buf2, "(none)");
2dac0eae 643 }
1c6fdbd8 644
2dac0eae
KO
645 if (k) {
646 struct bkey uk = bkey_unpack_key(l->b, k);
fa8e94fa
KO
647 bch2_bkey_to_text(&buf3, &uk);
648 } else {
649 pr_buf(&buf3, "(none)");
1c6fdbd8 650 }
2dac0eae 651
67e0dd8f
KO
652 panic("path should be %s key at level %u:\n"
653 "path pos %s\n"
2dac0eae
KO
654 "prev key %s\n"
655 "cur key %s\n",
fa8e94fa 656 msg, level, buf1.buf, buf2.buf, buf3.buf);
1c6fdbd8
KO
657}
658
67e0dd8f
KO
659static void bch2_btree_path_verify(struct btree_trans *trans,
660 struct btree_path *path)
1c6fdbd8 661{
5aab6635 662 struct bch_fs *c = trans->c;
2dac0eae 663 unsigned i;
1c6fdbd8 664
67e0dd8f
KO
665 EBUG_ON(path->btree_id >= BTREE_ID_NR);
666
667 for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
668 if (!path->l[i].b) {
d7407292
KO
669 BUG_ON(!path->cached &&
670 c->btree_roots[path->btree_id].b->c.level > i);
67e0dd8f
KO
671 break;
672 }
673
674 bch2_btree_path_verify_level(trans, path, i);
675 }
676
677 bch2_btree_path_verify_locks(path);
678}
679
680void bch2_trans_verify_paths(struct btree_trans *trans)
681{
682 struct btree_path *path;
683
67e0dd8f
KO
684 trans_for_each_path(trans, path)
685 bch2_btree_path_verify(trans, path);
686}
687
688static void bch2_btree_iter_verify(struct btree_iter *iter)
689{
690 struct btree_trans *trans = iter->trans;
691
692 BUG_ON(iter->btree_id >= BTREE_ID_NR);
693
694 BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached);
7e1a3aa9 695
e751c01a
KO
696 BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
697 (iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
698
f21566f1 699 BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
e751c01a
KO
700 (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
701 !btree_type_has_snapshots(iter->btree_id));
702
1f2d9192
KO
703 if (iter->update_path)
704 bch2_btree_path_verify(trans, iter->update_path);
67e0dd8f 705 bch2_btree_path_verify(trans, iter->path);
2dac0eae
KO
706}
707
7e1a3aa9
KO
708static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
709{
a861c722
KO
710 BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
711 !iter->pos.snapshot);
712
e751c01a
KO
713 BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
714 iter->pos.snapshot != iter->snapshot);
715
f21566f1
KO
716 BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
717 bkey_cmp(iter->pos, iter->k.p) > 0);
7e1a3aa9
KO
718}
719
c075ff70
KO
720static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
721{
722 struct btree_trans *trans = iter->trans;
723 struct btree_iter copy;
724 struct bkey_s_c prev;
725 int ret = 0;
726
727 if (!bch2_debug_check_iterators)
728 return 0;
729
730 if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
731 return 0;
732
733 if (bkey_err(k) || !k.k)
734 return 0;
735
736 BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
737 iter->snapshot,
738 k.k->p.snapshot));
739
740 bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
ffa7d262 741 BTREE_ITER_NOPRESERVE|
c075ff70
KO
742 BTREE_ITER_ALL_SNAPSHOTS);
743 prev = bch2_btree_iter_prev(&copy);
744 if (!prev.k)
745 goto out;
746
747 ret = bkey_err(prev);
748 if (ret)
749 goto out;
750
751 if (!bkey_cmp(prev.k->p, k.k->p) &&
752 bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
753 prev.k->p.snapshot) > 0) {
fa8e94fa 754 struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
c075ff70 755
fa8e94fa
KO
756 bch2_bkey_to_text(&buf1, k.k);
757 bch2_bkey_to_text(&buf2, prev.k);
c075ff70
KO
758
759 panic("iter snap %u\n"
760 "k %s\n"
761 "prev %s\n",
762 iter->snapshot,
fa8e94fa 763 buf1.buf, buf2.buf);
c075ff70
KO
764 }
765out:
766 bch2_trans_iter_exit(trans, &copy);
767 return ret;
768}
769
32b26e8c
KO
770void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
771 struct bpos pos, bool key_cache)
772{
773 struct btree_path *path;
774 unsigned idx;
fa8e94fa 775 struct printbuf buf = PRINTBUF;
32b26e8c
KO
776
777 trans_for_each_path_inorder(trans, path, idx) {
778 int cmp = cmp_int(path->btree_id, id) ?:
779 cmp_int(path->cached, key_cache);
780
781 if (cmp > 0)
782 break;
783 if (cmp < 0)
784 continue;
785
786 if (!(path->nodes_locked & 1) ||
787 !path->should_be_locked)
788 continue;
789
790 if (!key_cache) {
791 if (bkey_cmp(pos, path->l[0].b->data->min_key) >= 0 &&
792 bkey_cmp(pos, path->l[0].b->key.k.p) <= 0)
793 return;
794 } else {
795 if (!bkey_cmp(pos, path->pos))
796 return;
797 }
798 }
799
800 bch2_dump_trans_paths_updates(trans);
fa8e94fa
KO
801 bch2_bpos_to_text(&buf, pos);
802
32b26e8c 803 panic("not locked: %s %s%s\n",
fa8e94fa 804 bch2_btree_ids[id], buf.buf,
32b26e8c
KO
805 key_cache ? " cached" : "");
806}
807
271a3d3a
KO
808#else
809
67e0dd8f
KO
810static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
811 struct btree_path *path, unsigned l) {}
812static inline void bch2_btree_path_verify(struct btree_trans *trans,
813 struct btree_path *path) {}
7d6f9b64 814static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
7e1a3aa9 815static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
c075ff70 816static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
271a3d3a 817
1c6fdbd8
KO
818#endif
819
67e0dd8f
KO
820/* Btree path: fixups after btree updates */
821
23bbd2bb
KO
822static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
823 struct btree *b,
824 struct bset_tree *t,
825 struct bkey_packed *k)
826{
827 struct btree_node_iter_set *set;
828
829 btree_node_iter_for_each(iter, set)
830 if (set->end == t->end_offset) {
831 set->k = __btree_node_key_to_offset(b, k);
832 bch2_btree_node_iter_sort(iter, b);
833 return;
834 }
835
836 bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
837}
838
67e0dd8f 839static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
9626aeb1
KO
840 struct btree *b,
841 struct bkey_packed *where)
887c2a4e 842{
67e0dd8f 843 struct btree_path_level *l = &path->l[b->c.level];
887c2a4e 844
9626aeb1
KO
845 if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
846 return;
847
67e0dd8f 848 if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
9626aeb1 849 bch2_btree_node_iter_advance(&l->iter, l->b);
887c2a4e
KO
850}
851
67e0dd8f 852void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
887c2a4e
KO
853 struct btree *b,
854 struct bkey_packed *where)
855{
67e0dd8f 856 struct btree_path *path;
887c2a4e 857
67e0dd8f
KO
858 trans_for_each_path_with_node(trans, b, path) {
859 __bch2_btree_path_fix_key_modified(path, b, where);
860 bch2_btree_path_verify_level(trans, path, b->c.level);
887c2a4e
KO
861 }
862}
863
67e0dd8f
KO
864static void __bch2_btree_node_iter_fix(struct btree_path *path,
865 struct btree *b,
866 struct btree_node_iter *node_iter,
867 struct bset_tree *t,
868 struct bkey_packed *where,
869 unsigned clobber_u64s,
870 unsigned new_u64s)
1c6fdbd8
KO
871{
872 const struct bkey_packed *end = btree_bkey_last(b, t);
873 struct btree_node_iter_set *set;
874 unsigned offset = __btree_node_key_to_offset(b, where);
875 int shift = new_u64s - clobber_u64s;
271a3d3a 876 unsigned old_end = t->end_offset - shift;
c0fc30da
KO
877 unsigned orig_iter_pos = node_iter->data[0].k;
878 bool iter_current_key_modified =
879 orig_iter_pos >= offset &&
880 orig_iter_pos <= offset + clobber_u64s;
1c6fdbd8
KO
881
882 btree_node_iter_for_each(node_iter, set)
883 if (set->end == old_end)
884 goto found;
885
886 /* didn't find the bset in the iterator - might have to readd it: */
887 if (new_u64s &&
67e0dd8f 888 bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
1c6fdbd8 889 bch2_btree_node_iter_push(node_iter, b, where, end);
c0fc30da
KO
890 goto fixup_done;
891 } else {
892 /* Iterator is after key that changed */
059e4134 893 return;
1c6fdbd8 894 }
1c6fdbd8 895found:
271a3d3a 896 set->end = t->end_offset;
1c6fdbd8
KO
897
898 /* Iterator hasn't gotten to the key that changed yet: */
899 if (set->k < offset)
059e4134 900 return;
1c6fdbd8
KO
901
902 if (new_u64s &&
67e0dd8f 903 bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
1c6fdbd8
KO
904 set->k = offset;
905 } else if (set->k < offset + clobber_u64s) {
906 set->k = offset + new_u64s;
907 if (set->k == set->end)
908 bch2_btree_node_iter_set_drop(node_iter, set);
909 } else {
c0fc30da 910 /* Iterator is after key that changed */
1c6fdbd8 911 set->k = (int) set->k + shift;
059e4134 912 return;
1c6fdbd8
KO
913 }
914
1c6fdbd8 915 bch2_btree_node_iter_sort(node_iter, b);
c0fc30da
KO
916fixup_done:
917 if (node_iter->data[0].k != orig_iter_pos)
918 iter_current_key_modified = true;
56e0e7c7 919
1c6fdbd8 920 /*
23bbd2bb
KO
921 * When a new key is added, and the node iterator now points to that
922 * key, the iterator might have skipped past deleted keys that should
923 * come after the key the iterator now points to. We have to rewind to
c0fc30da
KO
924 * before those deleted keys - otherwise
925 * bch2_btree_node_iter_prev_all() breaks:
1c6fdbd8 926 */
23bbd2bb 927 if (!bch2_btree_node_iter_end(node_iter) &&
c0fc30da 928 iter_current_key_modified &&
1ae29c1f 929 b->c.level) {
23bbd2bb
KO
930 struct bset_tree *t;
931 struct bkey_packed *k, *k2, *p;
932
933 k = bch2_btree_node_iter_peek_all(node_iter, b);
1c6fdbd8
KO
934
935 for_each_bset(b, t) {
23bbd2bb
KO
936 bool set_pos = false;
937
938 if (node_iter->data[0].end == t->end_offset)
1c6fdbd8
KO
939 continue;
940
23bbd2bb
KO
941 k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
942
943 while ((p = bch2_bkey_prev_all(b, t, k2)) &&
944 bkey_iter_cmp(b, k, p) < 0) {
945 k2 = p;
946 set_pos = true;
1c6fdbd8 947 }
23bbd2bb
KO
948
949 if (set_pos)
950 btree_node_iter_set_set_pos(node_iter,
951 b, t, k2);
1c6fdbd8
KO
952 }
953 }
954}
955
9f6bd307 956void bch2_btree_node_iter_fix(struct btree_trans *trans,
67e0dd8f 957 struct btree_path *path,
216c9fac
KO
958 struct btree *b,
959 struct btree_node_iter *node_iter,
960 struct bkey_packed *where,
961 unsigned clobber_u64s,
962 unsigned new_u64s)
1c6fdbd8 963{
216c9fac 964 struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
67e0dd8f 965 struct btree_path *linked;
1c6fdbd8 966
67e0dd8f
KO
967 if (node_iter != &path->l[b->c.level].iter) {
968 __bch2_btree_node_iter_fix(path, b, node_iter, t,
059e4134 969 where, clobber_u64s, new_u64s);
2dac0eae 970
29364f34 971 if (bch2_debug_check_iterators)
2dac0eae 972 bch2_btree_node_iter_verify(node_iter, b);
059e4134 973 }
1c6fdbd8 974
67e0dd8f 975 trans_for_each_path_with_node(trans, b, linked) {
1c6fdbd8 976 __bch2_btree_node_iter_fix(linked, b,
059e4134
KO
977 &linked->l[b->c.level].iter, t,
978 where, clobber_u64s, new_u64s);
67e0dd8f 979 bch2_btree_path_verify_level(trans, linked, b->c.level);
059e4134 980 }
1c6fdbd8
KO
981}
982
67e0dd8f
KO
983/* Btree path level: pointer to a particular btree node and node iter */
984
985static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
986 struct btree_path_level *l,
1c6fdbd8
KO
987 struct bkey *u,
988 struct bkey_packed *k)
989{
1c6fdbd8
KO
990 if (unlikely(!k)) {
991 /*
992 * signal to bch2_btree_iter_peek_slot() that we're currently at
993 * a hole
994 */
26609b61 995 u->type = KEY_TYPE_deleted;
1c6fdbd8
KO
996 return bkey_s_c_null;
997 }
998
2ce8fbd9 999 return bkey_disassemble(l->b, k, u);
1c6fdbd8
KO
1000}
1001
67e0dd8f
KO
1002static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
1003 struct btree_path_level *l,
1004 struct bkey *u)
1c6fdbd8 1005{
67e0dd8f 1006 return __btree_iter_unpack(c, l, u,
1c6fdbd8
KO
1007 bch2_btree_node_iter_peek_all(&l->iter, l->b));
1008}
1009
67e0dd8f
KO
1010static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
1011 struct btree_path *path,
1012 struct btree_path_level *l,
1013 struct bkey *u)
1c6fdbd8 1014{
67e0dd8f 1015 struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
1c6fdbd8 1016 bch2_btree_node_iter_peek(&l->iter, l->b));
ca58cbd4 1017
67e0dd8f
KO
1018 path->pos = k.k ? k.k->p : l->b->key.k.p;
1019 trans->paths_sorted = false;
ca58cbd4 1020 return k;
1c6fdbd8
KO
1021}
1022
67e0dd8f
KO
1023static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
1024 struct btree_path *path,
1025 struct btree_path_level *l,
1026 struct bkey *u)
ccf5a109 1027{
67e0dd8f 1028 struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
ccf5a109 1029 bch2_btree_node_iter_prev(&l->iter, l->b));
ca58cbd4 1030
67e0dd8f
KO
1031 path->pos = k.k ? k.k->p : l->b->data->min_key;
1032 trans->paths_sorted = false;
ca58cbd4 1033 return k;
ccf5a109
KO
1034}
1035
67e0dd8f
KO
1036static inline bool btree_path_advance_to_pos(struct btree_path *path,
1037 struct btree_path_level *l,
a00fd8c5 1038 int max_advance)
1c6fdbd8 1039{
a00fd8c5
KO
1040 struct bkey_packed *k;
1041 int nr_advanced = 0;
1042
1043 while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
67e0dd8f 1044 bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
a00fd8c5
KO
1045 if (max_advance > 0 && nr_advanced >= max_advance)
1046 return false;
1047
1048 bch2_btree_node_iter_advance(&l->iter, l->b);
1049 nr_advanced++;
1050 }
1051
1052 return true;
1c6fdbd8
KO
1053}
1054
1055/*
1056 * Verify that iterator for parent node points to child node:
1057 */
67e0dd8f
KO
1058static void btree_path_verify_new_node(struct btree_trans *trans,
1059 struct btree_path *path, struct btree *b)
1c6fdbd8 1060{
5222a460 1061 struct bch_fs *c = trans->c;
67e0dd8f 1062 struct btree_path_level *l;
1c6fdbd8
KO
1063 unsigned plevel;
1064 bool parent_locked;
1065 struct bkey_packed *k;
1066
1067 if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
1068 return;
1069
5222a460
KO
1070 if (trans->journal_replay_not_finished)
1071 return;
1072
c43a6ef9 1073 plevel = b->c.level + 1;
67e0dd8f 1074 if (!btree_path_node(path, plevel))
1c6fdbd8
KO
1075 return;
1076
67e0dd8f 1077 parent_locked = btree_node_locked(path, plevel);
1c6fdbd8 1078
67e0dd8f 1079 if (!bch2_btree_node_relock(trans, path, plevel))
1c6fdbd8
KO
1080 return;
1081
67e0dd8f 1082 l = &path->l[plevel];
1c6fdbd8
KO
1083 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1084 if (!k ||
1085 bkey_deleted(k) ||
1086 bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
fa8e94fa
KO
1087 struct printbuf buf1 = PRINTBUF;
1088 struct printbuf buf2 = PRINTBUF;
1089 struct printbuf buf3 = PRINTBUF;
1090 struct printbuf buf4 = PRINTBUF;
1c6fdbd8
KO
1091 struct bkey uk = bkey_unpack_key(b, k);
1092
5222a460 1093 bch2_dump_btree_node(c, l->b);
fa8e94fa
KO
1094 bch2_bpos_to_text(&buf1, path->pos);
1095 bch2_bkey_to_text(&buf2, &uk);
1096 bch2_bpos_to_text(&buf3, b->data->min_key);
1097 bch2_bpos_to_text(&buf3, b->data->max_key);
a2bfc841 1098 panic("parent iter doesn't point to new node:\n"
f020bfcd 1099 "iter pos %s %s\n"
a2bfc841 1100 "iter key %s\n"
f020bfcd 1101 "new node %s-%s\n",
fa8e94fa
KO
1102 bch2_btree_ids[path->btree_id],
1103 buf1.buf, buf2.buf, buf3.buf, buf4.buf);
1c6fdbd8
KO
1104 }
1105
1106 if (!parent_locked)
1d3ecd7e 1107 btree_node_unlock(path, plevel);
1c6fdbd8
KO
1108}
1109
67e0dd8f 1110static inline void __btree_path_level_init(struct btree_path *path,
78cf784e 1111 unsigned level)
1c6fdbd8 1112{
67e0dd8f 1113 struct btree_path_level *l = &path->l[level];
a00fd8c5 1114
67e0dd8f 1115 bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
1c6fdbd8 1116
537c49d6
KO
1117 /*
1118 * Iterators to interior nodes should always be pointed at the first non
1119 * whiteout:
1120 */
1121 if (level)
1122 bch2_btree_node_iter_peek(&l->iter, l->b);
1c6fdbd8
KO
1123}
1124
67e0dd8f
KO
1125static inline void btree_path_level_init(struct btree_trans *trans,
1126 struct btree_path *path,
78cf784e 1127 struct btree *b)
1c6fdbd8 1128{
67e0dd8f 1129 BUG_ON(path->cached);
2ca88e5a 1130
67e0dd8f 1131 btree_path_verify_new_node(trans, path, b);
1c6fdbd8 1132
67e0dd8f 1133 EBUG_ON(!btree_path_pos_in_node(path, b));
c43a6ef9 1134 EBUG_ON(b->c.lock.state.seq & 1);
1c6fdbd8 1135
67e0dd8f
KO
1136 path->l[b->c.level].lock_seq = b->c.lock.state.seq;
1137 path->l[b->c.level].b = b;
1138 __btree_path_level_init(path, b->c.level);
1c6fdbd8
KO
1139}
1140
67e0dd8f
KO
1141/* Btree path: fixups after btree node updates: */
1142
1c6fdbd8
KO
1143/*
1144 * A btree node is being replaced - update the iterator to point to the new
1145 * node:
1146 */
f7a966a3 1147void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
1c6fdbd8 1148{
67e0dd8f 1149 struct btree_path *path;
1c6fdbd8 1150
67e0dd8f
KO
1151 trans_for_each_path(trans, path)
1152 if (!path->cached &&
1153 btree_path_pos_in_node(path, b)) {
1d3ecd7e
KO
1154 enum btree_node_locked_type t =
1155 btree_lock_want(path, b->c.level);
1c6fdbd8 1156
1d3ecd7e
KO
1157 if (path->nodes_locked &&
1158 t != BTREE_NODE_UNLOCKED) {
1159 btree_node_unlock(path, b->c.level);
c43a6ef9 1160 six_lock_increment(&b->c.lock, (enum six_lock_type) t);
3074bc0f 1161 mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t);
1c6fdbd8
KO
1162 }
1163
67e0dd8f 1164 btree_path_level_init(trans, path, b);
1c6fdbd8 1165 }
1c6fdbd8
KO
1166}
1167
1c6fdbd8
KO
1168/*
1169 * A btree node has been modified in such a way as to invalidate iterators - fix
1170 * them:
1171 */
f7a966a3 1172void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
1c6fdbd8 1173{
67e0dd8f 1174 struct btree_path *path;
1c6fdbd8 1175
67e0dd8f
KO
1176 trans_for_each_path_with_node(trans, b, path)
1177 __btree_path_level_init(path, b->c.level);
1c6fdbd8
KO
1178}
1179
67e0dd8f
KO
1180/* Btree path: traverse, set_pos: */
1181
bd2bb273
KO
1182static int lock_root_check_fn(struct six_lock *lock, void *p)
1183{
1184 struct btree *b = container_of(lock, struct btree, c.lock);
1185 struct btree **rootp = p;
1186
1187 return b == *rootp ? 0 : -1;
1188}
1189
67e0dd8f
KO
1190static inline int btree_path_lock_root(struct btree_trans *trans,
1191 struct btree_path *path,
a301dc38
KO
1192 unsigned depth_want,
1193 unsigned long trace_ip)
1c6fdbd8 1194{
e5af273f 1195 struct bch_fs *c = trans->c;
67e0dd8f 1196 struct btree *b, **rootp = &c->btree_roots[path->btree_id].b;
1c6fdbd8
KO
1197 enum six_lock_type lock_type;
1198 unsigned i;
1199
67e0dd8f 1200 EBUG_ON(path->nodes_locked);
1c6fdbd8
KO
1201
1202 while (1) {
bd2bb273 1203 b = READ_ONCE(*rootp);
67e0dd8f 1204 path->level = READ_ONCE(b->c.level);
1c6fdbd8 1205
67e0dd8f 1206 if (unlikely(path->level < depth_want)) {
1c6fdbd8
KO
1207 /*
1208 * the root is at a lower depth than the depth we want:
1209 * got to the end of the btree, or we're walking nodes
1210 * greater than some depth and there are no nodes >=
1211 * that depth
1212 */
67e0dd8f
KO
1213 path->level = depth_want;
1214 for (i = path->level; i < BTREE_MAX_DEPTH; i++)
1215 path->l[i].b = NULL;
8812600c 1216 return 1;
1c6fdbd8
KO
1217 }
1218
67e0dd8f
KO
1219 lock_type = __btree_lock_want(path, path->level);
1220 if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX,
1221 path->level, lock_type,
a301dc38 1222 lock_root_check_fn, rootp,
e5af273f
KO
1223 trace_ip))) {
1224 if (trans->restarted)
1225 return -EINTR;
1226 continue;
1227 }
1c6fdbd8 1228
bd2bb273 1229 if (likely(b == READ_ONCE(*rootp) &&
67e0dd8f 1230 b->c.level == path->level &&
1c6fdbd8 1231 !race_fault())) {
67e0dd8f
KO
1232 for (i = 0; i < path->level; i++)
1233 path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
1234 path->l[path->level].b = b;
1235 for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
1236 path->l[i].b = NULL;
1237
3074bc0f 1238 mark_btree_node_locked(path, path->level, lock_type);
67e0dd8f 1239 btree_path_level_init(trans, path, b);
1c6fdbd8 1240 return 0;
1c6fdbd8
KO
1241 }
1242
c43a6ef9 1243 six_unlock_type(&b->c.lock, lock_type);
1c6fdbd8
KO
1244 }
1245}
1246
1247noinline
67e0dd8f 1248static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
1c6fdbd8 1249{
9f6bd307 1250 struct bch_fs *c = trans->c;
67e0dd8f 1251 struct btree_path_level *l = path_l(path);
1c6fdbd8
KO
1252 struct btree_node_iter node_iter = l->iter;
1253 struct bkey_packed *k;
07a1006a 1254 struct bkey_buf tmp;
9e5e5b9e 1255 unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
67e0dd8f
KO
1256 ? (path->level > 1 ? 0 : 2)
1257 : (path->level > 1 ? 1 : 16);
1258 bool was_locked = btree_node_locked(path, path->level);
8b3e9bd6 1259 int ret = 0;
1c6fdbd8 1260
07a1006a
KO
1261 bch2_bkey_buf_init(&tmp);
1262
8b3e9bd6 1263 while (nr && !ret) {
67e0dd8f 1264 if (!bch2_btree_node_relock(trans, path, path->level))
07a1006a 1265 break;
1c6fdbd8
KO
1266
1267 bch2_btree_node_iter_advance(&node_iter, l->b);
1268 k = bch2_btree_node_iter_peek(&node_iter, l->b);
1269 if (!k)
1270 break;
1271
07a1006a 1272 bch2_bkey_buf_unpack(&tmp, c, l->b, k);
67e0dd8f
KO
1273 ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
1274 path->level - 1);
1c6fdbd8
KO
1275 }
1276
1277 if (!was_locked)
67e0dd8f 1278 btree_node_unlock(path, path->level);
07a1006a
KO
1279
1280 bch2_bkey_buf_exit(&tmp, c);
8b3e9bd6 1281 return ret;
1c6fdbd8
KO
1282}
1283
5222a460
KO
1284static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
1285 struct btree_and_journal_iter *jiter)
1286{
1287 struct bch_fs *c = trans->c;
1288 struct bkey_s_c k;
1289 struct bkey_buf tmp;
1290 unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
1291 ? (path->level > 1 ? 0 : 2)
1292 : (path->level > 1 ? 1 : 16);
1293 bool was_locked = btree_node_locked(path, path->level);
1294 int ret = 0;
1295
1296 bch2_bkey_buf_init(&tmp);
1297
1298 while (nr && !ret) {
1299 if (!bch2_btree_node_relock(trans, path, path->level))
1300 break;
1301
1302 bch2_btree_and_journal_iter_advance(jiter);
1303 k = bch2_btree_and_journal_iter_peek(jiter);
1304 if (!k.k)
1305 break;
1306
1307 bch2_bkey_buf_reassemble(&tmp, c, k);
1308 ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
1309 path->level - 1);
1310 }
1311
1312 if (!was_locked)
1313 btree_node_unlock(path, path->level);
1314
1315 bch2_bkey_buf_exit(&tmp, c);
1316 return ret;
1317}
1318
78cf784e 1319static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
67e0dd8f 1320 struct btree_path *path,
72141e1f
KO
1321 unsigned plevel, struct btree *b)
1322{
67e0dd8f
KO
1323 struct btree_path_level *l = &path->l[plevel];
1324 bool locked = btree_node_locked(path, plevel);
72141e1f
KO
1325 struct bkey_packed *k;
1326 struct bch_btree_ptr_v2 *bp;
1327
67e0dd8f 1328 if (!bch2_btree_node_relock(trans, path, plevel))
72141e1f
KO
1329 return;
1330
1331 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1332 BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
1333
1334 bp = (void *) bkeyp_val(&l->b->format, k);
1335 bp->mem_ptr = (unsigned long)b;
1336
1337 if (!locked)
67e0dd8f 1338 btree_node_unlock(path, plevel);
72141e1f
KO
1339}
1340
5222a460
KO
1341static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
1342 struct btree_path *path,
1343 unsigned flags,
1344 struct bkey_buf *out)
1345{
1346 struct bch_fs *c = trans->c;
1347 struct btree_path_level *l = path_l(path);
1348 struct btree_and_journal_iter jiter;
1349 struct bkey_s_c k;
1350 int ret = 0;
1351
1352 __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
1353
1354 k = bch2_btree_and_journal_iter_peek(&jiter);
1355
1356 bch2_bkey_buf_reassemble(out, c, k);
1357
1358 if (flags & BTREE_ITER_PREFETCH)
1359 ret = btree_path_prefetch_j(trans, path, &jiter);
1360
1361 bch2_btree_and_journal_iter_exit(&jiter);
1362 return ret;
1363}
1364
67e0dd8f
KO
1365static __always_inline int btree_path_down(struct btree_trans *trans,
1366 struct btree_path *path,
1367 unsigned flags,
a301dc38 1368 unsigned long trace_ip)
1c6fdbd8 1369{
6e075b54 1370 struct bch_fs *c = trans->c;
67e0dd8f 1371 struct btree_path_level *l = path_l(path);
1c6fdbd8 1372 struct btree *b;
67e0dd8f
KO
1373 unsigned level = path->level - 1;
1374 enum six_lock_type lock_type = __btree_lock_want(path, level);
07a1006a
KO
1375 struct bkey_buf tmp;
1376 int ret;
1c6fdbd8 1377
67e0dd8f 1378 EBUG_ON(!btree_node_locked(path, path->level));
1c6fdbd8 1379
07a1006a 1380 bch2_bkey_buf_init(&tmp);
5222a460
KO
1381
1382 if (unlikely(trans->journal_replay_not_finished)) {
1383 ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
1384 if (ret)
1385 goto err;
1386 } else {
1387 bch2_bkey_buf_unpack(&tmp, c, l->b,
1388 bch2_btree_node_iter_peek(&l->iter, l->b));
1389
1390 if (flags & BTREE_ITER_PREFETCH) {
1391 ret = btree_path_prefetch(trans, path);
1392 if (ret)
1393 goto err;
1394 }
1395 }
1c6fdbd8 1396
67e0dd8f 1397 b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
07a1006a
KO
1398 ret = PTR_ERR_OR_ZERO(b);
1399 if (unlikely(ret))
1400 goto err;
1c6fdbd8 1401
3074bc0f 1402 mark_btree_node_locked(path, level, lock_type);
67e0dd8f 1403 btree_path_level_init(trans, path, b);
1c6fdbd8 1404
5222a460
KO
1405 if (likely(!trans->journal_replay_not_finished &&
1406 tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
07a1006a 1407 unlikely(b != btree_node_mem_ptr(tmp.k)))
67e0dd8f 1408 btree_node_mem_ptr_set(trans, path, level + 1, b);
72141e1f 1409
67e0dd8f
KO
1410 if (btree_node_read_locked(path, level + 1))
1411 btree_node_unlock(path, level + 1);
1412 path->level = level;
c205321b 1413
67e0dd8f 1414 bch2_btree_path_verify_locks(path);
07a1006a
KO
1415err:
1416 bch2_bkey_buf_exit(&tmp, c);
1417 return ret;
1c6fdbd8
KO
1418}
1419
67e0dd8f
KO
1420static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
1421 unsigned, unsigned long);
1c6fdbd8 1422
8f9ad91a 1423static int bch2_btree_path_traverse_all(struct btree_trans *trans)
1c6fdbd8 1424{
e542029e 1425 struct bch_fs *c = trans->c;
eb7bd15f 1426 struct btree_path *path, *prev;
8f9ad91a
KO
1427 unsigned long trace_ip = _RET_IP_;
1428 int i, ret = 0;
e542029e 1429
2ca88e5a
KO
1430 if (trans->in_traverse_all)
1431 return -EINTR;
1432
1433 trans->in_traverse_all = true;
1434retry_all:
eb7bd15f 1435 prev = NULL;
e5af273f
KO
1436 trans->restarted = false;
1437
67e0dd8f
KO
1438 trans_for_each_path(trans, path)
1439 path->should_be_locked = false;
e542029e 1440
67e0dd8f 1441 btree_trans_sort_paths(trans);
2527dd91 1442
67e0dd8f 1443 trans_for_each_path_inorder_reverse(trans, path, i) {
0423fb71 1444 if (prev) {
67e0dd8f
KO
1445 if (path->btree_id == prev->btree_id &&
1446 path->locks_want < prev->locks_want)
1447 __bch2_btree_path_upgrade(trans, path, prev->locks_want);
1448 else if (!path->locks_want && prev->locks_want)
1449 __bch2_btree_path_upgrade(trans, path, 1);
0423fb71 1450 }
2527dd91 1451
67e0dd8f 1452 prev = path;
2527dd91
KO
1453 }
1454
58fbf808 1455 bch2_trans_unlock(trans);
a301dc38 1456 cond_resched();
1c6fdbd8 1457
8f9ad91a 1458 if (unlikely(trans->memory_allocation_failure)) {
1c6fdbd8
KO
1459 struct closure cl;
1460
1461 closure_init_stack(&cl);
1462
1463 do {
1464 ret = bch2_btree_cache_cannibalize_lock(c, &cl);
1465 closure_sync(&cl);
1466 } while (ret);
1467 }
1468
1c6fdbd8 1469 /* Now, redo traversals in correct order: */
0423fb71
KO
1470 i = 0;
1471 while (i < trans->nr_sorted) {
67e0dd8f 1472 path = trans->paths + trans->sorted[i];
2ca88e5a 1473
33aa419d
KO
1474 /*
1475 * Traversing a path can cause another path to be added at about
1476 * the same position:
1477 */
1478 if (path->uptodate) {
1479 ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
a897ef68 1480 if (ret == -EINTR || ret == -ENOMEM)
33aa419d 1481 goto retry_all;
a897ef68
KO
1482 if (ret)
1483 goto err;
33aa419d 1484 } else {
0423fb71 1485 i++;
33aa419d 1486 }
e542029e 1487 }
0423fb71
KO
1488
1489 /*
1490 * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock()
67e0dd8f 1491 * and relock(), relock() won't relock since path->should_be_locked
0423fb71
KO
1492 * isn't set yet, which is all fine
1493 */
67e0dd8f
KO
1494 trans_for_each_path(trans, path)
1495 BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
a897ef68 1496err:
1c6fdbd8 1497 bch2_btree_cache_cannibalize_unlock(c);
2ca88e5a 1498
669f87a5
KO
1499 trans->in_traverse_all = false;
1500
1501 trace_trans_traverse_all(trans->fn, trace_ip);
1c6fdbd8 1502 return ret;
bf7b87a4 1503}
1c6fdbd8 1504
67e0dd8f
KO
1505static inline bool btree_path_good_node(struct btree_trans *trans,
1506 struct btree_path *path,
f4b61341
KO
1507 unsigned l, int check_pos)
1508{
67e0dd8f
KO
1509 if (!is_btree_node(path, l) ||
1510 !bch2_btree_node_relock(trans, path, l))
f4b61341
KO
1511 return false;
1512
67e0dd8f 1513 if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
f4b61341 1514 return false;
67e0dd8f 1515 if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
f4b61341
KO
1516 return false;
1517 return true;
1518}
1519
67e0dd8f
KO
1520static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
1521 struct btree_path *path,
f4b61341 1522 int check_pos)
1c6fdbd8 1523{
8ee0134e 1524 unsigned i, l = path->level;
1d3ecd7e 1525
67e0dd8f
KO
1526 while (btree_path_node(path, l) &&
1527 !btree_path_good_node(trans, path, l, check_pos)) {
1528 btree_node_unlock(path, l);
1529 path->l[l].b = BTREE_ITER_NO_NODE_UP;
1c6fdbd8
KO
1530 l++;
1531 }
1532
8ee0134e
KO
1533 /* If we need intent locks, take them too: */
1534 for (i = l + 1;
1535 i < path->locks_want && btree_path_node(path, i);
1536 i++)
1537 if (!bch2_btree_node_relock(trans, path, i))
1538 while (l <= i) {
1539 btree_node_unlock(path, l);
1540 path->l[l].b = BTREE_ITER_NO_NODE_UP;
1541 l++;
1542 }
1543
1c6fdbd8
KO
1544 return l;
1545}
1546
1547/*
1548 * This is the main state machine for walking down the btree - walks down to a
1549 * specified depth
1550 *
1551 * Returns 0 on success, -EIO on error (error reading in a btree node).
1552 *
1553 * On error, caller (peek_node()/peek_key()) must return NULL; the error is
b7607ce9 1554 * stashed in the iterator and returned from bch2_trans_exit().
1c6fdbd8 1555 */
67e0dd8f
KO
1556static int btree_path_traverse_one(struct btree_trans *trans,
1557 struct btree_path *path,
1558 unsigned flags,
a301dc38 1559 unsigned long trace_ip)
1c6fdbd8 1560{
8ee0134e 1561 unsigned depth_want = path->level;
531a0095 1562 int ret = 0;
1c6fdbd8 1563
979735df
KO
1564 if (unlikely(trans->restarted)) {
1565 ret = -EINTR;
1566 goto out;
1567 }
1568
e829b717 1569 /*
67e0dd8f
KO
1570 * Ensure we obey path->should_be_locked: if it's set, we can't unlock
1571 * and re-traverse the path without a transaction restart:
e829b717 1572 */
67e0dd8f
KO
1573 if (path->should_be_locked) {
1574 ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR;
e829b717
KO
1575 goto out;
1576 }
1577
67e0dd8f
KO
1578 if (path->cached) {
1579 ret = bch2_btree_path_traverse_cached(trans, path, flags);
531a0095
KO
1580 goto out;
1581 }
2ca88e5a 1582
67e0dd8f 1583 if (unlikely(path->level >= BTREE_MAX_DEPTH))
531a0095 1584 goto out;
2ca88e5a 1585
67e0dd8f 1586 path->level = btree_path_up_until_good_node(trans, path, 0);
1c6fdbd8 1587
1c6fdbd8 1588 /*
67e0dd8f 1589 * Note: path->nodes[path->level] may be temporarily NULL here - that
1c6fdbd8
KO
1590 * would indicate to other code that we got to the end of the btree,
1591 * here it indicates that relocking the root failed - it's critical that
67e0dd8f 1592 * btree_path_lock_root() comes next and that it can't fail
1c6fdbd8 1593 */
67e0dd8f
KO
1594 while (path->level > depth_want) {
1595 ret = btree_path_node(path, path->level)
1596 ? btree_path_down(trans, path, flags, trace_ip)
1597 : btree_path_lock_root(trans, path, depth_want, trace_ip);
1c6fdbd8 1598 if (unlikely(ret)) {
531a0095
KO
1599 if (ret == 1) {
1600 /*
f21566f1
KO
1601 * No nodes at this level - got to the end of
1602 * the btree:
531a0095
KO
1603 */
1604 ret = 0;
1605 goto out;
1606 }
8812600c 1607
67e0dd8f
KO
1608 __bch2_btree_path_unlock(path);
1609 path->level = depth_want;
2ca88e5a 1610
67e0dd8f
KO
1611 if (ret == -EIO)
1612 path->l[path->level].b =
2ca88e5a 1613 BTREE_ITER_NO_NODE_ERROR;
67e0dd8f
KO
1614 else
1615 path->l[path->level].b =
2ca88e5a 1616 BTREE_ITER_NO_NODE_DOWN;
531a0095 1617 goto out;
1c6fdbd8
KO
1618 }
1619 }
1620
67e0dd8f 1621 path->uptodate = BTREE_ITER_UPTODATE;
531a0095 1622out:
e5af273f 1623 BUG_ON((ret == -EINTR) != !!trans->restarted);
67e0dd8f 1624 bch2_btree_path_verify(trans, path);
531a0095 1625 return ret;
1c6fdbd8
KO
1626}
1627
67e0dd8f
KO
1628int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
1629 struct btree_path *path, unsigned flags)
1c6fdbd8 1630{
67e0dd8f
KO
1631 if (path->uptodate < BTREE_ITER_NEED_RELOCK)
1632 return 0;
1633
e5fa91d7 1634 return bch2_trans_cond_resched(trans) ?:
67e0dd8f 1635 btree_path_traverse_one(trans, path, flags, _RET_IP_);
1c6fdbd8
KO
1636}
1637
67e0dd8f
KO
1638static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
1639 struct btree_path *src)
1640{
1641 unsigned i, offset = offsetof(struct btree_path, pos);
1642
1643 memcpy((void *) dst + offset,
1644 (void *) src + offset,
1645 sizeof(struct btree_path) - offset);
1646
1647 for (i = 0; i < BTREE_MAX_DEPTH; i++)
1648 if (btree_node_locked(dst, i))
1649 six_lock_increment(&dst->l[i].b->c.lock,
1650 __btree_lock_want(dst, i));
1651
1652 trans->paths_sorted = false;
1653}
1654
c404f203
KO
1655static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
1656 bool intent)
67e0dd8f 1657{
c404f203 1658 struct btree_path *new = btree_path_alloc(trans, src);
67e0dd8f 1659
c404f203 1660 btree_path_copy(trans, new, src);
67e0dd8f 1661 __btree_path_get(new, intent);
c404f203
KO
1662 return new;
1663}
1664
d8648425
KO
1665struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans,
1666 struct btree_path *path, bool intent)
c404f203 1667{
67e0dd8f 1668 __btree_path_put(path, intent);
c404f203 1669 path = btree_path_clone(trans, path, intent);
67e0dd8f
KO
1670 path->preserve = false;
1671#ifdef CONFIG_BCACHEFS_DEBUG
1672 path->ip_allocated = _RET_IP_;
1673#endif
d8648425 1674 path->should_be_locked = false;
67e0dd8f
KO
1675 return path;
1676}
1677
ce91abd6 1678struct btree_path * __must_check
67e0dd8f
KO
1679__bch2_btree_path_set_pos(struct btree_trans *trans,
1680 struct btree_path *path, struct bpos new_pos,
1681 bool intent, int cmp)
1682{
67e0dd8f
KO
1683 unsigned l = path->level;
1684
1685 EBUG_ON(trans->restarted);
1686 EBUG_ON(!path->ref);
1687
1688 path = bch2_btree_path_make_mut(trans, path, intent);
1689
1690 path->pos = new_pos;
67e0dd8f
KO
1691 trans->paths_sorted = false;
1692
1693 if (unlikely(path->cached)) {
1694 btree_node_unlock(path, 0);
1695 path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
1696 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1697 goto out;
1698 }
1699
1700 l = btree_path_up_until_good_node(trans, path, cmp);
1701
1702 if (btree_path_node(path, l)) {
7071878b 1703 BUG_ON(!btree_node_locked(path, l));
67e0dd8f
KO
1704 /*
1705 * We might have to skip over many keys, or just a few: try
1706 * advancing the node iterator, and if we have to skip over too
1707 * many keys just reinit it (or if we're rewinding, since that
1708 * is expensive).
1709 */
1710 if (cmp < 0 ||
1711 !btree_path_advance_to_pos(path, &path->l[l], 8))
1712 __btree_path_level_init(path, l);
67e0dd8f
KO
1713 }
1714
1d3ecd7e 1715 if (l != path->level) {
67e0dd8f 1716 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1d3ecd7e
KO
1717 __bch2_btree_path_unlock(path);
1718 }
67e0dd8f
KO
1719out:
1720 bch2_btree_path_verify(trans, path);
67e0dd8f
KO
1721 return path;
1722}
1723
67e0dd8f
KO
1724/* Btree path: main interface: */
1725
1726static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
1727{
1728 struct btree_path *next;
1729
1730 next = prev_btree_path(trans, path);
1731 if (next && !btree_path_cmp(next, path))
1732 return next;
1733
1734 next = next_btree_path(trans, path);
1735 if (next && !btree_path_cmp(next, path))
1736 return next;
1737
1738 return NULL;
1739}
1740
9a74f63c 1741static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
67e0dd8f
KO
1742{
1743 struct btree_path *next;
1744
1745 next = prev_btree_path(trans, path);
9a74f63c
KO
1746 if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
1747 return next;
67e0dd8f
KO
1748
1749 next = next_btree_path(trans, path);
9a74f63c
KO
1750 if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
1751 return next;
67e0dd8f 1752
9a74f63c 1753 return NULL;
67e0dd8f
KO
1754}
1755
1756static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
08070cba 1757{
67e0dd8f
KO
1758 __bch2_btree_path_unlock(path);
1759 btree_path_list_remove(trans, path);
1760 trans->paths_allocated &= ~(1ULL << path->idx);
08070cba
KO
1761}
1762
67e0dd8f
KO
1763void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
1764{
1765 struct btree_path *dup;
1766
1767 EBUG_ON(trans->paths + path->idx != path);
1768 EBUG_ON(!path->ref);
1769
1770 if (!__btree_path_put(path, intent))
1771 return;
1772
1773 /*
1774 * Perhaps instead we should check for duplicate paths in traverse_all:
1775 */
1776 if (path->preserve &&
1777 (dup = have_path_at_pos(trans, path))) {
1778 dup->preserve = true;
1779 path->preserve = false;
9a74f63c 1780 goto free;
67e0dd8f
KO
1781 }
1782
1783 if (!path->preserve &&
9a74f63c
KO
1784 (dup = have_node_at_pos(trans, path)))
1785 goto free;
1786 return;
1787free:
1788 if (path->should_be_locked &&
1789 !btree_node_locked(dup, path->level))
1790 return;
1791
1792 dup->should_be_locked |= path->should_be_locked;
1793 __bch2_path_free(trans, path);
67e0dd8f
KO
1794}
1795
8570d775 1796void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
67e0dd8f 1797{
67e0dd8f 1798 struct btree_insert_entry *i;
2158fe46 1799
8570d775
KO
1800 pr_buf(buf, "transaction updates for %s journal seq %llu",
1801 trans->fn, trans->journal_res.seq);
1802 pr_newline(buf);
1803 pr_indent_push(buf, 2);
2158fe46
KO
1804
1805 trans_for_each_update(trans, i) {
1806 struct bkey_s_c old = { &i->old_k, i->old_v };
1807
8570d775 1808 pr_buf(buf, "update: btree=%s cached=%u %pS",
2158fe46 1809 bch2_btree_ids[i->btree_id],
8570d775
KO
1810 i->cached,
1811 (void *) i->ip_allocated);
1812 pr_newline(buf);
1813
1814 pr_buf(buf, " old ");
1815 bch2_bkey_val_to_text(buf, trans->c, old);
1816 pr_newline(buf);
1817
1818 pr_buf(buf, " new ");
1819 bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
1820 pr_newline(buf);
2158fe46
KO
1821 }
1822
8570d775
KO
1823 pr_indent_pop(buf, 2);
1824}
1825
1826noinline __cold
1827void bch2_dump_trans_updates(struct btree_trans *trans)
1828{
1829 struct printbuf buf = PRINTBUF;
1830
1831 bch2_trans_updates_to_text(&buf, trans);
1832 bch_err(trans->c, "%s", buf.buf);
1833 printbuf_exit(&buf);
2158fe46
KO
1834}
1835
1836noinline __cold
1837void bch2_dump_trans_paths_updates(struct btree_trans *trans)
1838{
1839 struct btree_path *path;
1840 struct printbuf buf = PRINTBUF;
67e0dd8f 1841 unsigned idx;
67e0dd8f
KO
1842
1843 btree_trans_sort_paths(trans);
1844
fa8e94fa 1845 trans_for_each_path_inorder(trans, path, idx) {
2158fe46 1846 printbuf_reset(&buf);
fa8e94fa 1847
2158fe46 1848 bch2_bpos_to_text(&buf, path->pos);
fa8e94fa 1849
a0a07c59 1850 printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
67e0dd8f 1851 path->idx, path->ref, path->intent_ref,
32b26e8c
KO
1852 path->should_be_locked ? " S" : "",
1853 path->preserve ? " P" : "",
67e0dd8f 1854 bch2_btree_ids[path->btree_id],
a0a07c59 1855 path->level,
2158fe46 1856 buf.buf,
32b26e8c 1857 path->nodes_locked,
67e0dd8f
KO
1858#ifdef CONFIG_BCACHEFS_DEBUG
1859 (void *) path->ip_allocated
1860#else
1861 NULL
1862#endif
1863 );
fa8e94fa 1864 }
67e0dd8f 1865
2158fe46 1866 printbuf_exit(&buf);
60816d9b 1867
2158fe46 1868 bch2_dump_trans_updates(trans);
67e0dd8f
KO
1869}
1870
1871static struct btree_path *btree_path_alloc(struct btree_trans *trans,
1872 struct btree_path *pos)
1873{
1874 struct btree_path *path;
1875 unsigned idx;
1876
1877 if (unlikely(trans->paths_allocated ==
1878 ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
1879 bch2_dump_trans_paths_updates(trans);
1880 panic("trans path oveflow\n");
1881 }
1882
1883 idx = __ffs64(~trans->paths_allocated);
1884 trans->paths_allocated |= 1ULL << idx;
1885
1886 path = &trans->paths[idx];
1887
1888 path->idx = idx;
1889 path->ref = 0;
1890 path->intent_ref = 0;
1891 path->nodes_locked = 0;
1892 path->nodes_intent_locked = 0;
1893
1894 btree_path_list_add(trans, pos, path);
1895 return path;
1896}
1897
f3e1f444 1898struct btree_path *bch2_path_get(struct btree_trans *trans,
67e0dd8f
KO
1899 enum btree_id btree_id, struct bpos pos,
1900 unsigned locks_want, unsigned level,
f3e1f444 1901 unsigned flags)
67e0dd8f 1902{
807dda8c 1903 struct btree_path *path, *path_pos = NULL;
f3e1f444
KO
1904 bool cached = flags & BTREE_ITER_CACHED;
1905 bool intent = flags & BTREE_ITER_INTENT;
67e0dd8f
KO
1906 int i;
1907
1908 BUG_ON(trans->restarted);
7071878b
KO
1909 bch2_trans_verify_locks(trans);
1910
eb7bd15f 1911 btree_trans_sort_paths(trans);
67e0dd8f 1912
807dda8c 1913 btree_trans_sort_paths(trans);
67e0dd8f 1914
807dda8c
KO
1915 trans_for_each_path_inorder(trans, path, i) {
1916 if (__btree_path_cmp(path,
1917 btree_id,
1918 cached,
1919 pos,
1920 level) > 0)
1921 break;
67e0dd8f 1922
807dda8c 1923 path_pos = path;
67e0dd8f
KO
1924 }
1925
807dda8c
KO
1926 if (path_pos &&
1927 path_pos->cached == cached &&
1928 path_pos->btree_id == btree_id &&
1929 path_pos->level == level) {
1930 __btree_path_get(path_pos, intent);
ce91abd6 1931 path = bch2_btree_path_set_pos(trans, path_pos, pos, intent);
67e0dd8f 1932 } else {
807dda8c
KO
1933 path = btree_path_alloc(trans, path_pos);
1934 path_pos = NULL;
67e0dd8f
KO
1935
1936 __btree_path_get(path, intent);
1937 path->pos = pos;
1938 path->btree_id = btree_id;
1939 path->cached = cached;
67e0dd8f
KO
1940 path->uptodate = BTREE_ITER_NEED_TRAVERSE;
1941 path->should_be_locked = false;
1942 path->level = level;
1943 path->locks_want = locks_want;
1944 path->nodes_locked = 0;
1945 path->nodes_intent_locked = 0;
1946 for (i = 0; i < ARRAY_SIZE(path->l); i++)
1947 path->l[i].b = BTREE_ITER_NO_NODE_INIT;
1948#ifdef CONFIG_BCACHEFS_DEBUG
1949 path->ip_allocated = _RET_IP_;
1950#endif
1951 trans->paths_sorted = false;
1952 }
1953
f3e1f444
KO
1954 if (!(flags & BTREE_ITER_NOPRESERVE))
1955 path->preserve = true;
1956
67e0dd8f
KO
1957 if (path->intent_ref)
1958 locks_want = max(locks_want, level + 1);
1959
1960 /*
1961 * If the path has locks_want greater than requested, we don't downgrade
1962 * it here - on transaction restart because btree node split needs to
1963 * upgrade locks, we might be putting/getting the iterator again.
1964 * Downgrading iterators only happens via bch2_trans_downgrade(), after
1965 * a successful transaction commit.
1966 */
1967
1968 locks_want = min(locks_want, BTREE_MAX_DEPTH);
1969 if (locks_want > path->locks_want) {
1970 path->locks_want = locks_want;
bc82d08b 1971 btree_path_get_locks(trans, path, true);
67e0dd8f
KO
1972 }
1973
67e0dd8f
KO
1974 return path;
1975}
1976
1977inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
1978{
1979
1980 struct bkey_s_c k;
1981
67e0dd8f
KO
1982 if (!path->cached) {
1983 struct btree_path_level *l = path_l(path);
f7b6ca23
KO
1984 struct bkey_packed *_k;
1985
1986 EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
67e0dd8f 1987
f7b6ca23 1988 _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
67e0dd8f
KO
1989 k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
1990
1991 EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
1992
1993 if (!k.k || bpos_cmp(path->pos, k.k->p))
1994 goto hole;
1995 } else {
1996 struct bkey_cached *ck = (void *) path->l[0].b;
1997
f7b6ca23
KO
1998 EBUG_ON(ck &&
1999 (path->btree_id != ck->key.btree_id ||
2000 bkey_cmp(path->pos, ck->key.pos)));
67e0dd8f 2001
f7b6ca23
KO
2002 /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
2003 if (unlikely(!ck || !ck->valid))
2004 return bkey_s_c_null;
2005
2006 EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
67e0dd8f 2007
2e63e180 2008 *u = ck->k->k;
67e0dd8f
KO
2009 k = bkey_i_to_s_c(ck->k);
2010 }
2011
2012 return k;
2013hole:
2014 bkey_init(u);
2015 u->p = path->pos;
2016 return (struct bkey_s_c) { u, NULL };
2017}
2018
2019/* Btree iterators: */
2020
db92f2ea
KO
2021int __must_check
2022__bch2_btree_iter_traverse(struct btree_iter *iter)
2023{
2024 return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
2025}
2026
08070cba
KO
2027int __must_check
2028bch2_btree_iter_traverse(struct btree_iter *iter)
2029{
66a0a497
KO
2030 int ret;
2031
ce91abd6 2032 iter->path = bch2_btree_path_set_pos(iter->trans, iter->path,
67e0dd8f
KO
2033 btree_iter_search_key(iter),
2034 iter->flags & BTREE_ITER_INTENT);
08070cba 2035
67e0dd8f 2036 ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
66a0a497
KO
2037 if (ret)
2038 return ret;
2039
67e0dd8f 2040 iter->path->should_be_locked = true;
66a0a497 2041 return 0;
08070cba
KO
2042}
2043
1c6fdbd8
KO
2044/* Iterate across nodes (leaf and interior nodes) */
2045
2046struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
2047{
502027a8 2048 struct btree_trans *trans = iter->trans;
f21566f1 2049 struct btree *b = NULL;
1c6fdbd8
KO
2050 int ret;
2051
67e0dd8f 2052 EBUG_ON(iter->path->cached);
7e1a3aa9 2053 bch2_btree_iter_verify(iter);
1c6fdbd8 2054
502027a8 2055 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
1c6fdbd8 2056 if (ret)
d355c6f4 2057 goto err;
1c6fdbd8 2058
67e0dd8f 2059 b = btree_path_node(iter->path, iter->path->level);
1c6fdbd8 2060 if (!b)
f21566f1 2061 goto out;
1c6fdbd8 2062
4cf91b02 2063 BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0);
1c6fdbd8 2064
f21566f1 2065 bkey_init(&iter->k);
67e0dd8f 2066 iter->k.p = iter->pos = b->key.k.p;
502027a8 2067
ce91abd6 2068 iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
502027a8 2069 iter->flags & BTREE_ITER_INTENT);
67e0dd8f 2070 iter->path->should_be_locked = true;
502027a8 2071 BUG_ON(iter->path->uptodate);
f21566f1
KO
2072out:
2073 bch2_btree_iter_verify_entry_exit(iter);
2074 bch2_btree_iter_verify(iter);
2dac0eae 2075
1c6fdbd8 2076 return b;
d355c6f4
KO
2077err:
2078 b = ERR_PTR(ret);
2079 goto out;
1c6fdbd8
KO
2080}
2081
2dac0eae 2082struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
1c6fdbd8 2083{
67e0dd8f
KO
2084 struct btree_trans *trans = iter->trans;
2085 struct btree_path *path = iter->path;
f21566f1 2086 struct btree *b = NULL;
979735df 2087 unsigned l;
1c6fdbd8
KO
2088 int ret;
2089
979735df 2090 BUG_ON(trans->restarted);
67e0dd8f 2091 EBUG_ON(iter->path->cached);
7e1a3aa9 2092 bch2_btree_iter_verify(iter);
1c6fdbd8 2093
979735df 2094 /* already at end? */
67e0dd8f 2095 if (!btree_path_node(path, path->level))
979735df 2096 return NULL;
1c6fdbd8 2097
979735df
KO
2098 /* got to end? */
2099 if (!btree_path_node(path, path->level + 1)) {
2100 btree_node_unlock(path, path->level);
2101 path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
2102 path->level++;
7071878b 2103 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
979735df
KO
2104 return NULL;
2105 }
1c6fdbd8 2106
979735df
KO
2107 if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
2108 __bch2_btree_path_unlock(path);
2109 path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
2110 path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
7071878b 2111 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bc82d08b
KO
2112 trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_,
2113 path->btree_id, &path->pos);
979735df
KO
2114 btree_trans_restart(trans);
2115 ret = -EINTR;
d355c6f4 2116 goto err;
979735df 2117 }
1c6fdbd8 2118
979735df 2119 b = btree_path_node(path, path->level + 1);
1c6fdbd8 2120
979735df
KO
2121 if (!bpos_cmp(iter->pos, b->key.k.p)) {
2122 btree_node_unlock(path, path->level);
2123 path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
2124 path->level++;
2125 } else {
1c6fdbd8
KO
2126 /*
2127 * Haven't gotten to the end of the parent node: go back down to
2128 * the next child node
2129 */
67e0dd8f 2130 path = iter->path =
ce91abd6 2131 bch2_btree_path_set_pos(trans, path, bpos_successor(iter->pos),
67e0dd8f 2132 iter->flags & BTREE_ITER_INTENT);
1c6fdbd8 2133
67e0dd8f 2134 path->level = iter->min_depth;
979735df
KO
2135
2136 for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
2137 if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
2138 btree_node_unlock(path, l);
2139
67e0dd8f 2140 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
345ca825
KO
2141 bch2_btree_iter_verify(iter);
2142
67e0dd8f 2143 ret = bch2_btree_path_traverse(trans, path, iter->flags);
d355c6f4
KO
2144 if (ret)
2145 goto err;
1c6fdbd8 2146
67e0dd8f 2147 b = path->l[path->level].b;
1c6fdbd8
KO
2148 }
2149
f21566f1 2150 bkey_init(&iter->k);
67e0dd8f 2151 iter->k.p = iter->pos = b->key.k.p;
502027a8 2152
ce91abd6 2153 iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
502027a8 2154 iter->flags & BTREE_ITER_INTENT);
67e0dd8f 2155 iter->path->should_be_locked = true;
502027a8 2156 BUG_ON(iter->path->uptodate);
f21566f1
KO
2157out:
2158 bch2_btree_iter_verify_entry_exit(iter);
2159 bch2_btree_iter_verify(iter);
2dac0eae 2160
1c6fdbd8 2161 return b;
d355c6f4
KO
2162err:
2163 b = ERR_PTR(ret);
2164 goto out;
1c6fdbd8
KO
2165}
2166
2167/* Iterate across keys (in leaf nodes only) */
2168
e0ba3b64 2169inline bool bch2_btree_iter_advance(struct btree_iter *iter)
434094be 2170{
3d495595 2171 struct bpos pos = iter->k.p;
6caf0578
KO
2172 bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
2173 ? bpos_cmp(pos, SPOS_MAX)
2174 : bkey_cmp(pos, SPOS_MAX)) != 0;
3d495595 2175
7e1a3aa9 2176 if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
e751c01a 2177 pos = bkey_successor(iter, pos);
3d495595 2178 bch2_btree_iter_set_pos(iter, pos);
7e1a3aa9 2179 return ret;
3d495595
KO
2180}
2181
e0ba3b64 2182inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
3d495595
KO
2183{
2184 struct bpos pos = bkey_start_pos(&iter->k);
71f892a4
KO
2185 bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
2186 ? bpos_cmp(pos, POS_MIN)
2187 : bkey_cmp(pos, POS_MIN)) != 0;
3d495595 2188
7e1a3aa9 2189 if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
e751c01a 2190 pos = bkey_predecessor(iter, pos);
3d495595 2191 bch2_btree_iter_set_pos(iter, pos);
7e1a3aa9 2192 return ret;
434094be
KO
2193}
2194
3763cb95 2195static noinline
67e0dd8f 2196struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
818664f5
KO
2197{
2198 struct btree_insert_entry *i;
5288e66a 2199 struct bkey_i *ret = NULL;
818664f5 2200
8e6bbc41 2201 trans_for_each_update(iter->trans, i) {
5288e66a
KO
2202 if (i->btree_id < iter->btree_id)
2203 continue;
2204 if (i->btree_id > iter->btree_id)
818664f5 2205 break;
67e0dd8f 2206 if (bpos_cmp(i->k->k.p, iter->path->pos) < 0)
5288e66a 2207 continue;
12ce5b7d
KO
2208 if (i->key_cache_already_flushed)
2209 continue;
5288e66a
KO
2210 if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0)
2211 ret = i->k;
2212 }
818664f5 2213
5288e66a
KO
2214 return ret;
2215}
2216
3763cb95
KO
2217static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
2218{
2219 return iter->flags & BTREE_ITER_WITH_UPDATES
2220 ? __bch2_btree_trans_peek_updates(iter)
2221 : NULL;
2222}
2223
5222a460
KO
2224static noinline
2225struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
2226 struct btree_iter *iter)
2227{
5650bb46 2228 struct bkey_i *k = bch2_journal_keys_peek_slot(trans->c, iter->btree_id,
2ae4573e
KO
2229 iter->path->level,
2230 iter->path->pos);
5222a460 2231
5650bb46 2232 if (k) {
5222a460
KO
2233 iter->k = k->k;
2234 return bkey_i_to_s_c(k);
2235 } else {
2236 return bkey_s_c_null;
2237 }
2238}
2239
2240static noinline
2241struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
2242 struct btree_iter *iter,
2243 struct bkey_s_c k)
2244{
2245 struct bkey_i *next_journal =
5650bb46
KO
2246 bch2_journal_keys_peek_upto(trans->c, iter->btree_id, 0,
2247 iter->path->pos,
2248 k.k ? k.k->p : iter->path->l[0].b->key.k.p);
5222a460 2249
5650bb46 2250 if (next_journal) {
5222a460
KO
2251 iter->k = next_journal->k;
2252 k = bkey_i_to_s_c(next_journal);
2253 }
2254
2255 return k;
2256}
2257
f7b6ca23
KO
2258/*
2259 * Checks btree key cache for key at iter->pos and returns it if present, or
2260 * bkey_s_c_null:
2261 */
2262static noinline
2263struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
2264{
2265 struct btree_trans *trans = iter->trans;
2266 struct bch_fs *c = trans->c;
2267 struct bkey u;
2268 int ret;
2269
2270 if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
2271 return bkey_s_c_null;
2272
2273 if (!iter->key_cache_path)
2274 iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
2275 iter->flags & BTREE_ITER_INTENT, 0,
2276 iter->flags|BTREE_ITER_CACHED);
2277
2278 iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
2279 iter->flags & BTREE_ITER_INTENT);
2280
2281 ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED);
2282 if (unlikely(ret))
2283 return bkey_s_c_err(ret);
2284
2285 iter->key_cache_path->should_be_locked = true;
2286
2287 return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
2288}
2289
a1e82d35 2290static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
1c6fdbd8 2291{
67e0dd8f 2292 struct btree_trans *trans = iter->trans;
360746bf 2293 struct bkey_i *next_update;
f7b6ca23 2294 struct bkey_s_c k, k2;
8e432d98 2295 int ret;
1c6fdbd8 2296
67e0dd8f 2297 EBUG_ON(iter->path->cached || iter->path->level);
7e1a3aa9 2298 bch2_btree_iter_verify(iter);
1c6fdbd8 2299
1c6fdbd8 2300 while (1) {
ce91abd6
KO
2301 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
2302 iter->flags & BTREE_ITER_INTENT);
c8476a4e 2303
67e0dd8f 2304 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
e6e024e9
KO
2305 if (unlikely(ret)) {
2306 /* ensure that iter->k is consistent with iter->pos: */
2307 bch2_btree_iter_set_pos(iter, iter->pos);
2308 k = bkey_s_c_err(ret);
2309 goto out;
2310 }
1c6fdbd8 2311
f7b6ca23
KO
2312 iter->path->should_be_locked = true;
2313
67e0dd8f 2314 k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
e6e024e9 2315
f7b6ca23
KO
2316 if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
2317 k.k &&
2318 (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
2319 ret = bkey_err(k2);
2320 if (ret) {
2321 k = k2;
2322 bch2_btree_iter_set_pos(iter, iter->pos);
2323 goto out;
2324 }
2325
2326 k = k2;
2327 iter->k = *k.k;
2328 }
2329
5222a460
KO
2330 if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
2331 k = btree_trans_peek_journal(trans, iter, k);
2332
2333 next_update = btree_trans_peek_updates(iter);
e6e024e9 2334
818664f5 2335 if (next_update &&
e6e024e9 2336 bpos_cmp(next_update->k.p,
67e0dd8f 2337 k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
5288e66a 2338 iter->k = next_update->k;
818664f5 2339 k = bkey_i_to_s_c(next_update);
5288e66a 2340 }
818664f5 2341
5222a460
KO
2342 if (k.k && bkey_deleted(k.k)) {
2343 /*
2344 * If we've got a whiteout, and it's after the search
2345 * key, advance the search key to the whiteout instead
2346 * of just after the whiteout - it might be a btree
2347 * whiteout, with a real key at the same position, since
2348 * in the btree deleted keys sort before non deleted.
2349 */
2350 search_key = bpos_cmp(search_key, k.k->p)
2351 ? k.k->p
2352 : bpos_successor(k.k->p);
2353 continue;
2354 }
2355
818664f5 2356 if (likely(k.k)) {
c075ff70 2357 break;
67e0dd8f 2358 } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
e6e024e9 2359 /* Advance to next leaf node: */
67e0dd8f 2360 search_key = bpos_successor(iter->path->l[0].b->key.k.p);
e6e024e9
KO
2361 } else {
2362 /* End of btree: */
c8476a4e
KO
2363 bch2_btree_iter_set_pos(iter, SPOS_MAX);
2364 k = bkey_s_c_null;
2365 goto out;
2366 }
1c6fdbd8 2367 }
a1e82d35 2368out:
a1e82d35
KO
2369 bch2_btree_iter_verify(iter);
2370
2371 return k;
2372}
2373
2374/**
2375 * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
2376 * current position
2377 */
85d8cf16 2378struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
a1e82d35
KO
2379{
2380 struct btree_trans *trans = iter->trans;
2381 struct bpos search_key = btree_iter_search_key(iter);
2382 struct bkey_s_c k;
85d8cf16 2383 struct bpos iter_pos;
a1e82d35
KO
2384 int ret;
2385
1f2d9192
KO
2386 if (iter->update_path) {
2387 bch2_path_put(trans, iter->update_path,
2388 iter->flags & BTREE_ITER_INTENT);
2389 iter->update_path = NULL;
2390 }
2391
a1e82d35
KO
2392 bch2_btree_iter_verify_entry_exit(iter);
2393
2394 while (1) {
2395 k = __bch2_btree_iter_peek(iter, search_key);
2396 if (!k.k || bkey_err(k))
2397 goto out;
2398
85d8cf16
KO
2399 /*
2400 * iter->pos should be mononotically increasing, and always be
2401 * equal to the key we just returned - except extents can
2402 * straddle iter->pos:
2403 */
2404 if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
2405 iter_pos = k.k->p;
2406 else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
2407 iter_pos = bkey_start_pos(k.k);
2408 else
2409 iter_pos = iter->pos;
2410
2411 if (bkey_cmp(iter_pos, end) > 0) {
2412 bch2_btree_iter_set_pos(iter, end);
2413 k = bkey_s_c_null;
2414 goto out;
2415 }
2416
1f2d9192
KO
2417 if (iter->update_path &&
2418 bkey_cmp(iter->update_path->pos, k.k->p)) {
2419 bch2_path_put(trans, iter->update_path,
2420 iter->flags & BTREE_ITER_INTENT);
2421 iter->update_path = NULL;
2422 }
2423
2424 if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
2425 (iter->flags & BTREE_ITER_INTENT) &&
2426 !(iter->flags & BTREE_ITER_IS_EXTENTS) &&
2427 !iter->update_path) {
2428 struct bpos pos = k.k->p;
2429
2430 if (pos.snapshot < iter->snapshot) {
2431 search_key = bpos_successor(k.k->p);
2432 continue;
2433 }
2434
2435 pos.snapshot = iter->snapshot;
2436
2437 /*
2438 * advance, same as on exit for iter->path, but only up
2439 * to snapshot
2440 */
2441 __btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT);
2442 iter->update_path = iter->path;
2443
ce91abd6 2444 iter->update_path = bch2_btree_path_set_pos(trans,
1f2d9192
KO
2445 iter->update_path, pos,
2446 iter->flags & BTREE_ITER_INTENT);
1f2d9192
KO
2447 }
2448
a1e82d35
KO
2449 /*
2450 * We can never have a key in a leaf node at POS_MAX, so
2451 * we don't have to check these successor() calls:
2452 */
2453 if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
2454 !bch2_snapshot_is_ancestor(trans->c,
2455 iter->snapshot,
2456 k.k->p.snapshot)) {
2457 search_key = bpos_successor(k.k->p);
2458 continue;
2459 }
2460
2461 if (bkey_whiteout(k.k) &&
2462 !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2463 search_key = bkey_successor(iter, k.k->p);
2464 continue;
2465 }
2466
2467 break;
2468 }
2469
85d8cf16 2470 iter->pos = iter_pos;
1f2d9192 2471
ce91abd6 2472 iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
1f2d9192
KO
2473 iter->flags & BTREE_ITER_INTENT);
2474 BUG_ON(!iter->path->nodes_locked);
a1e82d35 2475out:
1f2d9192 2476 if (iter->update_path) {
d4d24a65
KO
2477 if (unlikely(!bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_))) {
2478 k = bkey_s_c_err(-EINTR);
2479 } else {
2480 BUG_ON(!(iter->update_path->nodes_locked & 1));
2481 iter->update_path->should_be_locked = true;
2482 }
1f2d9192
KO
2483 }
2484 iter->path->should_be_locked = true;
2485
a1e82d35 2486 if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
c075ff70
KO
2487 iter->pos.snapshot = iter->snapshot;
2488
a1e82d35
KO
2489 ret = bch2_btree_iter_verify_ret(iter, k);
2490 if (unlikely(ret)) {
2491 bch2_btree_iter_set_pos(iter, iter->pos);
2492 k = bkey_s_c_err(ret);
2493 }
67e0dd8f 2494
7e1a3aa9 2495 bch2_btree_iter_verify_entry_exit(iter);
c075ff70 2496
1c6fdbd8
KO
2497 return k;
2498}
2499
f4b61341
KO
2500/**
2501 * bch2_btree_iter_next: returns first key greater than iterator's current
2502 * position
2503 */
1c6fdbd8
KO
2504struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
2505{
e0ba3b64 2506 if (!bch2_btree_iter_advance(iter))
2e70ce56 2507 return bkey_s_c_null;
f4b61341 2508
2dac0eae 2509 return bch2_btree_iter_peek(iter);
1c6fdbd8
KO
2510}
2511
ccf5a109
KO
2512/**
2513 * bch2_btree_iter_peek_prev: returns first key less than or equal to
2514 * iterator's current position
2515 */
2516struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
1c6fdbd8 2517{
67e0dd8f 2518 struct btree_trans *trans = iter->trans;
c8476a4e 2519 struct bpos search_key = iter->pos;
c075ff70 2520 struct btree_path *saved_path = NULL;
1c6fdbd8 2521 struct bkey_s_c k;
c075ff70
KO
2522 struct bkey saved_k;
2523 const struct bch_val *saved_v;
1c6fdbd8
KO
2524 int ret;
2525
67e0dd8f 2526 EBUG_ON(iter->path->cached || iter->path->level);
5288e66a 2527 EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
5222a460
KO
2528
2529 if (iter->flags & BTREE_ITER_WITH_JOURNAL)
2530 return bkey_s_c_err(-EIO);
2531
7e1a3aa9
KO
2532 bch2_btree_iter_verify(iter);
2533 bch2_btree_iter_verify_entry_exit(iter);
2534
c075ff70
KO
2535 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2536 search_key.snapshot = U32_MAX;
2537
1c6fdbd8 2538 while (1) {
ce91abd6 2539 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
67e0dd8f 2540 iter->flags & BTREE_ITER_INTENT);
c8476a4e 2541
67e0dd8f 2542 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
7e1a3aa9 2543 if (unlikely(ret)) {
e6e024e9
KO
2544 /* ensure that iter->k is consistent with iter->pos: */
2545 bch2_btree_iter_set_pos(iter, iter->pos);
7e1a3aa9 2546 k = bkey_s_c_err(ret);
e6e024e9 2547 goto out;
7e1a3aa9 2548 }
1c6fdbd8 2549
67e0dd8f
KO
2550 k = btree_path_level_peek(trans, iter->path,
2551 &iter->path->l[0], &iter->k);
3d495595
KO
2552 if (!k.k ||
2553 ((iter->flags & BTREE_ITER_IS_EXTENTS)
c075ff70
KO
2554 ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
2555 : bpos_cmp(k.k->p, search_key) > 0))
67e0dd8f
KO
2556 k = btree_path_level_prev(trans, iter->path,
2557 &iter->path->l[0], &iter->k);
ccf5a109 2558
e6e024e9 2559 if (likely(k.k)) {
c075ff70
KO
2560 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
2561 if (k.k->p.snapshot == iter->snapshot)
2562 goto got_key;
2563
2564 /*
2565 * If we have a saved candidate, and we're no
2566 * longer at the same _key_ (not pos), return
2567 * that candidate
2568 */
2569 if (saved_path && bkey_cmp(k.k->p, saved_k.p)) {
2570 bch2_path_put(trans, iter->path,
2571 iter->flags & BTREE_ITER_INTENT);
2572 iter->path = saved_path;
2573 saved_path = NULL;
2574 iter->k = saved_k;
2575 k.v = saved_v;
2576 goto got_key;
2577 }
2578
2579 if (bch2_snapshot_is_ancestor(iter->trans->c,
2580 iter->snapshot,
2581 k.k->p.snapshot)) {
2582 if (saved_path)
2583 bch2_path_put(trans, saved_path,
2584 iter->flags & BTREE_ITER_INTENT);
2585 saved_path = btree_path_clone(trans, iter->path,
2586 iter->flags & BTREE_ITER_INTENT);
2587 saved_k = *k.k;
2588 saved_v = k.v;
2589 }
2590
2591 search_key = bpos_predecessor(k.k->p);
2592 continue;
2593 }
2594got_key:
2595 if (bkey_whiteout(k.k) &&
2596 !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2597 search_key = bkey_predecessor(iter, k.k->p);
2598 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2599 search_key.snapshot = U32_MAX;
2600 continue;
2601 }
2602
1c6fdbd8 2603 break;
67e0dd8f 2604 } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) {
e6e024e9 2605 /* Advance to previous leaf node: */
67e0dd8f 2606 search_key = bpos_predecessor(iter->path->l[0].b->data->min_key);
e6e024e9
KO
2607 } else {
2608 /* Start of btree: */
c8476a4e 2609 bch2_btree_iter_set_pos(iter, POS_MIN);
7e1a3aa9 2610 k = bkey_s_c_null;
e6e024e9 2611 goto out;
7e1a3aa9 2612 }
ccf5a109 2613 }
1c6fdbd8 2614
a045be5a 2615 EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
3d495595
KO
2616
2617 /* Extents can straddle iter->pos: */
a045be5a 2618 if (bkey_cmp(k.k->p, iter->pos) < 0)
3d495595 2619 iter->pos = k.k->p;
c075ff70
KO
2620
2621 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2622 iter->pos.snapshot = iter->snapshot;
7e1a3aa9 2623out:
c075ff70
KO
2624 if (saved_path)
2625 bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
67e0dd8f
KO
2626 iter->path->should_be_locked = true;
2627
7e1a3aa9
KO
2628 bch2_btree_iter_verify_entry_exit(iter);
2629 bch2_btree_iter_verify(iter);
67e0dd8f 2630
1c6fdbd8
KO
2631 return k;
2632}
2633
ccf5a109
KO
2634/**
2635 * bch2_btree_iter_prev: returns first key less than iterator's current
2636 * position
2637 */
2638struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
2639{
e0ba3b64 2640 if (!bch2_btree_iter_rewind(iter))
2e70ce56 2641 return bkey_s_c_null;
ccf5a109 2642
2e70ce56 2643 return bch2_btree_iter_peek_prev(iter);
ccf5a109
KO
2644}
2645
c3801239 2646struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
271a3d3a 2647{
9f6bd307 2648 struct btree_trans *trans = iter->trans;
953ee28a 2649 struct bpos search_key;
271a3d3a 2650 struct bkey_s_c k;
c3801239
KO
2651 int ret;
2652
7e1a3aa9
KO
2653 bch2_btree_iter_verify(iter);
2654 bch2_btree_iter_verify_entry_exit(iter);
2ae4573e 2655 EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE));
7e1a3aa9 2656
1d214eb1
KO
2657 /* extents can't span inode numbers: */
2658 if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
953ee28a 2659 unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
1d214eb1
KO
2660 if (iter->pos.inode == KEY_INODE_MAX)
2661 return bkey_s_c_null;
c3801239 2662
1d214eb1
KO
2663 bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
2664 }
8042b5b7 2665
953ee28a 2666 search_key = btree_iter_search_key(iter);
ce91abd6 2667 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
67e0dd8f 2668 iter->flags & BTREE_ITER_INTENT);
953ee28a 2669
67e0dd8f 2670 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
c3801239
KO
2671 if (unlikely(ret))
2672 return bkey_s_c_err(ret);
1c6fdbd8 2673
c075ff70
KO
2674 if ((iter->flags & BTREE_ITER_CACHED) ||
2675 !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
953ee28a 2676 struct bkey_i *next_update;
1c6fdbd8 2677
5222a460 2678 if ((next_update = btree_trans_peek_updates(iter)) &&
67e0dd8f 2679 !bpos_cmp(next_update->k.p, iter->pos)) {
1d214eb1
KO
2680 iter->k = next_update->k;
2681 k = bkey_i_to_s_c(next_update);
5222a460 2682 goto out;
1d214eb1
KO
2683 }
2684
5222a460
KO
2685 if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
2686 (k = btree_trans_peek_slot_journal(trans, iter)).k)
2687 goto out;
2688
f7b6ca23
KO
2689 if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
2690 (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
2691 if (!bkey_err(k))
2692 iter->k = *k.k;
2693 goto out;
2694 }
2695
5222a460 2696 k = bch2_btree_path_peek_slot(iter->path, &iter->k);
1d214eb1
KO
2697 } else {
2698 struct bpos next;
1d214eb1 2699
2ae4573e
KO
2700 EBUG_ON(iter->path->level);
2701
b1d87f52 2702 if (iter->flags & BTREE_ITER_INTENT) {
67e0dd8f 2703 struct btree_iter iter2;
85d8cf16
KO
2704 struct bpos end = iter->pos;
2705
2706 if (iter->flags & BTREE_ITER_IS_EXTENTS)
2707 end.offset = U64_MAX;
b1d87f52 2708
67e0dd8f 2709 bch2_trans_copy_iter(&iter2, iter);
85d8cf16 2710 k = bch2_btree_iter_peek_upto(&iter2, end);
b1d87f52 2711
67e0dd8f
KO
2712 if (k.k && !bkey_err(k)) {
2713 iter->k = iter2.k;
2714 k.k = &iter->k;
2715 }
2716 bch2_trans_iter_exit(trans, &iter2);
b1d87f52
KO
2717 } else {
2718 struct bpos pos = iter->pos;
2719
2720 k = bch2_btree_iter_peek(iter);
2721 iter->pos = pos;
2722 }
1d214eb1
KO
2723
2724 if (unlikely(bkey_err(k)))
2725 return k;
2726
2727 next = k.k ? bkey_start_pos(k.k) : POS_MAX;
2728
2729 if (bkey_cmp(iter->pos, next) < 0) {
2730 bkey_init(&iter->k);
2731 iter->k.p = iter->pos;
c075ff70
KO
2732
2733 if (iter->flags & BTREE_ITER_IS_EXTENTS) {
2734 bch2_key_resize(&iter->k,
2735 min_t(u64, KEY_SIZE_MAX,
2736 (next.inode == iter->pos.inode
2737 ? next.offset
2738 : KEY_OFFSET_MAX) -
2739 iter->pos.offset));
2740 EBUG_ON(!iter->k.size);
2741 }
1d214eb1
KO
2742
2743 k = (struct bkey_s_c) { &iter->k, NULL };
1d214eb1 2744 }
271a3d3a 2745 }
5222a460 2746out:
67e0dd8f
KO
2747 iter->path->should_be_locked = true;
2748
7e1a3aa9
KO
2749 bch2_btree_iter_verify_entry_exit(iter);
2750 bch2_btree_iter_verify(iter);
c075ff70
KO
2751 ret = bch2_btree_iter_verify_ret(iter, k);
2752 if (unlikely(ret))
2753 return bkey_s_c_err(ret);
66a0a497 2754
63f1a598 2755 return k;
1c6fdbd8
KO
2756}
2757
1c6fdbd8
KO
2758struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
2759{
e0ba3b64 2760 if (!bch2_btree_iter_advance(iter))
2e70ce56 2761 return bkey_s_c_null;
1c6fdbd8 2762
2dac0eae 2763 return bch2_btree_iter_peek_slot(iter);
1c6fdbd8
KO
2764}
2765
18fc6ae5
KO
2766struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
2767{
e0ba3b64 2768 if (!bch2_btree_iter_rewind(iter))
18fc6ae5
KO
2769 return bkey_s_c_null;
2770
2771 return bch2_btree_iter_peek_slot(iter);
2772}
2773
1c6fdbd8
KO
2774/* new transactional stuff: */
2775
0423fb71
KO
2776#ifdef CONFIG_BCACHEFS_DEBUG
2777static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
2778{
67e0dd8f 2779 struct btree_path *path;
0423fb71
KO
2780 unsigned i;
2781
67e0dd8f 2782 BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated));
0423fb71 2783
67e0dd8f
KO
2784 trans_for_each_path(trans, path) {
2785 BUG_ON(path->sorted_idx >= trans->nr_sorted);
2786 BUG_ON(trans->sorted[path->sorted_idx] != path->idx);
0423fb71
KO
2787 }
2788
2789 for (i = 0; i < trans->nr_sorted; i++) {
2790 unsigned idx = trans->sorted[i];
2791
67e0dd8f
KO
2792 EBUG_ON(!(trans->paths_allocated & (1ULL << idx)));
2793 BUG_ON(trans->paths[idx].sorted_idx != i);
0423fb71
KO
2794 }
2795}
0423fb71
KO
2796
2797static void btree_trans_verify_sorted(struct btree_trans *trans)
2798{
67e0dd8f 2799 struct btree_path *path, *prev = NULL;
0423fb71
KO
2800 unsigned i;
2801
67e0dd8f 2802 trans_for_each_path_inorder(trans, path, i) {
eb7bd15f
KO
2803 if (prev && btree_path_cmp(prev, path) > 0) {
2804 bch2_dump_trans_paths_updates(trans);
2805 panic("trans paths out of order!\n");
2806 }
67e0dd8f 2807 prev = path;
0423fb71 2808 }
0423fb71 2809}
068bcaa5
KO
2810#else
2811static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
2812static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
2813#endif
0423fb71 2814
068bcaa5 2815void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
0423fb71
KO
2816{
2817 int i, l = 0, r = trans->nr_sorted, inc = 1;
2818 bool swapped;
2819
068bcaa5
KO
2820 btree_trans_verify_sorted_refs(trans);
2821
2822 if (trans->paths_sorted)
2823 goto out;
2824
0423fb71
KO
2825 /*
2826 * Cocktail shaker sort: this is efficient because iterators will be
2827 * mostly sorteda.
2828 */
2829 do {
2830 swapped = false;
2831
2832 for (i = inc > 0 ? l : r - 2;
2833 i + 1 < r && i >= l;
2834 i += inc) {
67e0dd8f
KO
2835 if (btree_path_cmp(trans->paths + trans->sorted[i],
2836 trans->paths + trans->sorted[i + 1]) > 0) {
0423fb71 2837 swap(trans->sorted[i], trans->sorted[i + 1]);
67e0dd8f
KO
2838 trans->paths[trans->sorted[i]].sorted_idx = i;
2839 trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
0423fb71
KO
2840 swapped = true;
2841 }
2842 }
2843
2844 if (inc > 0)
2845 --r;
2846 else
2847 l++;
2848 inc = -inc;
2849 } while (swapped);
2850
67e0dd8f 2851 trans->paths_sorted = true;
068bcaa5 2852out:
0423fb71
KO
2853 btree_trans_verify_sorted(trans);
2854}
2855
67e0dd8f
KO
2856static inline void btree_path_list_remove(struct btree_trans *trans,
2857 struct btree_path *path)
0423fb71
KO
2858{
2859 unsigned i;
2860
67e0dd8f 2861 EBUG_ON(path->sorted_idx >= trans->nr_sorted);
0423fb71
KO
2862#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
2863 trans->nr_sorted--;
67e0dd8f
KO
2864 memmove_u64s_down_small(trans->sorted + path->sorted_idx,
2865 trans->sorted + path->sorted_idx + 1,
2866 DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
0423fb71 2867#else
67e0dd8f 2868 array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
0423fb71 2869#endif
67e0dd8f
KO
2870 for (i = path->sorted_idx; i < trans->nr_sorted; i++)
2871 trans->paths[trans->sorted[i]].sorted_idx = i;
0423fb71 2872
67e0dd8f 2873 path->sorted_idx = U8_MAX;
0423fb71
KO
2874}
2875
67e0dd8f
KO
2876static inline void btree_path_list_add(struct btree_trans *trans,
2877 struct btree_path *pos,
2878 struct btree_path *path)
0423fb71
KO
2879{
2880 unsigned i;
2881
068bcaa5 2882 path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted;
0423fb71
KO
2883
2884#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
67e0dd8f
KO
2885 memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
2886 trans->sorted + path->sorted_idx,
2887 DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
0423fb71 2888 trans->nr_sorted++;
67e0dd8f 2889 trans->sorted[path->sorted_idx] = path->idx;
0423fb71 2890#else
67e0dd8f 2891 array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
0423fb71
KO
2892#endif
2893
67e0dd8f
KO
2894 for (i = path->sorted_idx; i < trans->nr_sorted; i++)
2895 trans->paths[trans->sorted[i]].sorted_idx = i;
64bc0011 2896
0423fb71 2897 btree_trans_verify_sorted_refs(trans);
7c26ecae
KO
2898}
2899
67e0dd8f 2900void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
64bc0011 2901{
67e0dd8f
KO
2902 if (iter->path)
2903 bch2_path_put(trans, iter->path,
2904 iter->flags & BTREE_ITER_INTENT);
1f2d9192
KO
2905 if (iter->update_path)
2906 bch2_path_put(trans, iter->update_path,
2907 iter->flags & BTREE_ITER_INTENT);
f7b6ca23
KO
2908 if (iter->key_cache_path)
2909 bch2_path_put(trans, iter->key_cache_path,
2910 iter->flags & BTREE_ITER_INTENT);
67e0dd8f 2911 iter->path = NULL;
1f2d9192 2912 iter->update_path = NULL;
f7b6ca23 2913 iter->key_cache_path = NULL;
64bc0011
KO
2914}
2915
67e0dd8f
KO
2916static void __bch2_trans_iter_init(struct btree_trans *trans,
2917 struct btree_iter *iter,
2918 enum btree_id btree_id, struct bpos pos,
2919 unsigned locks_want,
2920 unsigned depth,
2921 unsigned flags)
1c6fdbd8 2922{
e5af273f
KO
2923 EBUG_ON(trans->restarted);
2924
f21566f1
KO
2925 if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
2926 btree_node_type_is_extents(btree_id))
3dea728c 2927 flags |= BTREE_ITER_IS_EXTENTS;
1c6fdbd8 2928
a861c722
KO
2929 if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
2930 !btree_type_has_snapshots(btree_id))
e751c01a
KO
2931 flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
2932
a861c722
KO
2933 if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
2934 btree_type_has_snapshots(btree_id))
2935 flags |= BTREE_ITER_FILTER_SNAPSHOTS;
e751c01a 2936
5222a460
KO
2937 if (trans->journal_replay_not_finished)
2938 flags |= BTREE_ITER_WITH_JOURNAL;
2939
67e0dd8f
KO
2940 iter->trans = trans;
2941 iter->path = NULL;
1f2d9192 2942 iter->update_path = NULL;
f7b6ca23 2943 iter->key_cache_path = NULL;
67e0dd8f
KO
2944 iter->btree_id = btree_id;
2945 iter->min_depth = depth;
f21566f1
KO
2946 iter->flags = flags;
2947 iter->snapshot = pos.snapshot;
67e0dd8f
KO
2948 iter->pos = pos;
2949 iter->k.type = KEY_TYPE_deleted;
2950 iter->k.p = pos;
2951 iter->k.size = 0;
e751c01a 2952
f3e1f444
KO
2953 iter->path = bch2_path_get(trans, btree_id, iter->pos,
2954 locks_want, depth, flags);
1c6fdbd8
KO
2955}
2956
67e0dd8f
KO
2957void bch2_trans_iter_init(struct btree_trans *trans,
2958 struct btree_iter *iter,
2959 unsigned btree_id, struct bpos pos,
2960 unsigned flags)
424eb881 2961{
2ae4573e
KO
2962 if (!btree_id_cached(trans->c, btree_id)) {
2963 flags &= ~BTREE_ITER_CACHED;
2964 flags &= ~BTREE_ITER_WITH_KEY_CACHE;
2965 } else if (!(flags & BTREE_ITER_CACHED))
2966 flags |= BTREE_ITER_WITH_KEY_CACHE;
2967
67e0dd8f
KO
2968 __bch2_trans_iter_init(trans, iter, btree_id, pos,
2969 0, 0, flags);
424eb881
KO
2970}
2971
67e0dd8f
KO
2972void bch2_trans_node_iter_init(struct btree_trans *trans,
2973 struct btree_iter *iter,
2974 enum btree_id btree_id,
2975 struct bpos pos,
2976 unsigned locks_want,
2977 unsigned depth,
2978 unsigned flags)
1c6fdbd8 2979{
67e0dd8f
KO
2980 __bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth,
2981 BTREE_ITER_NOT_EXTENTS|
2982 __BTREE_ITER_ALL_SNAPSHOTS|
2983 BTREE_ITER_ALL_SNAPSHOTS|
2984 flags);
2985 BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH));
2986 BUG_ON(iter->path->level != depth);
2987 BUG_ON(iter->min_depth != depth);
2988}
7c26ecae 2989
67e0dd8f
KO
2990void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
2991{
2992 *dst = *src;
2993 if (src->path)
2994 __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
1f2d9192
KO
2995 if (src->update_path)
2996 __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
f7b6ca23 2997 dst->key_cache_path = NULL;
1c6fdbd8
KO
2998}
2999
73a117d2 3000void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
1c6fdbd8 3001{
73a117d2
KO
3002 size_t new_top = trans->mem_top + size;
3003 void *p;
3004
3005 if (new_top > trans->mem_bytes) {
1c6fdbd8 3006 size_t old_bytes = trans->mem_bytes;
73a117d2 3007 size_t new_bytes = roundup_pow_of_two(new_top);
e131b6aa
KO
3008 void *new_mem;
3009
3010 WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
3011
3012 new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
3013 if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
3014 new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
3015 new_bytes = BTREE_TRANS_MEM_MAX;
3016 kfree(trans->mem);
3017 }
1c6fdbd8
KO
3018
3019 if (!new_mem)
73a117d2 3020 return ERR_PTR(-ENOMEM);
1c6fdbd8
KO
3021
3022 trans->mem = new_mem;
3023 trans->mem_bytes = new_bytes;
3024
1c7a0adf 3025 if (old_bytes) {
669f87a5 3026 trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes);
e5af273f 3027 btree_trans_restart(trans);
73a117d2 3028 return ERR_PTR(-EINTR);
1c7a0adf 3029 }
1c6fdbd8
KO
3030 }
3031
20bceecb 3032 p = trans->mem + trans->mem_top;
1c6fdbd8 3033 trans->mem_top += size;
7ed158f2 3034 memset(p, 0, size);
20bceecb 3035 return p;
1c6fdbd8
KO
3036}
3037
d38494c4 3038/**
955af634 3039 * bch2_trans_begin() - reset a transaction after a interrupted attempt
d38494c4 3040 * @trans: transaction to reset
d38494c4
DR
3041 *
3042 * While iterating over nodes or updating nodes a attempt to lock a btree
3043 * node may return EINTR when the trylock fails. When this occurs
955af634 3044 * bch2_trans_begin() should be called and the transaction retried.
d38494c4 3045 */
955af634 3046void bch2_trans_begin(struct btree_trans *trans)
1c6fdbd8 3047{
67e0dd8f
KO
3048 struct btree_insert_entry *i;
3049 struct btree_path *path;
a8e00bd4 3050
67e0dd8f
KO
3051 trans_for_each_update(trans, i)
3052 __btree_path_put(i->path, true);
1c6fdbd8 3053
904823de 3054 memset(&trans->journal_res, 0, sizeof(trans->journal_res));
a49e9a05 3055 trans->extra_journal_res = 0;
5df4be3f 3056 trans->nr_updates = 0;
163e885a 3057 trans->mem_top = 0;
bf7b87a4 3058
43d00243 3059 trans->hooks = NULL;
2a6870ad 3060 trans->extra_journal_entries.nr = 0;
96e2aa1b 3061
f21539a5
KO
3062 if (trans->fs_usage_deltas) {
3063 trans->fs_usage_deltas->used = 0;
3064 memset((void *) trans->fs_usage_deltas +
3065 offsetof(struct replicas_delta_list, memset_start), 0,
3066 (void *) &trans->fs_usage_deltas->memset_end -
3067 (void *) &trans->fs_usage_deltas->memset_start);
3068 }
3069
67e0dd8f 3070 trans_for_each_path(trans, path) {
25a77231
KO
3071 path->should_be_locked = false;
3072
e1f7fa06
KO
3073 /*
3074 * If the transaction wasn't restarted, we're presuming to be
3075 * doing something new: dont keep iterators excpt the ones that
3076 * are in use - except for the subvolumes btree:
3077 */
3078 if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes)
3079 path->preserve = false;
3080
67e0dd8f
KO
3081 /*
3082 * XXX: we probably shouldn't be doing this if the transaction
3083 * was restarted, but currently we still overflow transaction
3084 * iterators if we do that
3085 */
3086 if (!path->ref && !path->preserve)
3087 __bch2_path_free(trans, path);
3088 else
25a77231 3089 path->preserve = false;
67e0dd8f
KO
3090 }
3091
955af634 3092 bch2_trans_cond_resched(trans);
50dc0f69 3093
955af634 3094 if (trans->restarted)
67e0dd8f 3095 bch2_btree_path_traverse_all(trans);
e5af273f
KO
3096
3097 trans->restarted = false;
1c6fdbd8
KO
3098}
3099
67e0dd8f 3100static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
1a21bf98 3101{
67e0dd8f 3102 size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX;
3eb26d01 3103 size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
dbd1e825 3104 void *p = NULL;
1a21bf98
KO
3105
3106 BUG_ON(trans->used_mempool);
3107
dbd1e825 3108#ifdef __KERNEL__
67e0dd8f 3109 p = this_cpu_xchg(c->btree_paths_bufs->path , NULL);
dbd1e825
KO
3110#endif
3111 if (!p)
67e0dd8f 3112 p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
1a21bf98 3113
67e0dd8f 3114 trans->paths = p; p += paths_bytes;
1a21bf98 3115 trans->updates = p; p += updates_bytes;
1a21bf98
KO
3116}
3117
669f87a5
KO
3118void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
3119 unsigned expected_nr_iters,
3120 size_t expected_mem_bytes,
3121 const char *fn)
c0ebe3e4 3122 __acquires(&c->btree_trans_barrier)
1c6fdbd8 3123{
ae1ede58 3124 memset(trans, 0, sizeof(*trans));
1c6fdbd8 3125 trans->c = c;
669f87a5 3126 trans->fn = fn;
5222a460
KO
3127 trans->journal_replay_not_finished =
3128 !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
20bceecb 3129
67e0dd8f 3130 bch2_trans_alloc_paths(trans, c);
20bceecb 3131
0b5c9f59
KO
3132 if (expected_mem_bytes) {
3133 expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes);
3134 trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL);
e131b6aa
KO
3135
3136 if (!unlikely(trans->mem)) {
3137 trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
3138 trans->mem_bytes = BTREE_TRANS_MEM_MAX;
3139 } else {
0b5c9f59 3140 trans->mem_bytes = expected_mem_bytes;
e131b6aa 3141 }
0b5c9f59 3142 }
495aabed 3143
876c7af3
KO
3144 trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
3145
b84d42c3
KO
3146 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
3147 trans->pid = current->pid;
3148 mutex_lock(&c->btree_trans_lock);
3149 list_add(&trans->list, &c->btree_trans_list);
3150 mutex_unlock(&c->btree_trans_lock);
3151 }
1c6fdbd8
KO
3152}
3153
67e0dd8f
KO
3154static void check_btree_paths_leaked(struct btree_trans *trans)
3155{
3156#ifdef CONFIG_BCACHEFS_DEBUG
3157 struct bch_fs *c = trans->c;
3158 struct btree_path *path;
3159
3160 trans_for_each_path(trans, path)
3161 if (path->ref)
3162 goto leaked;
3163 return;
3164leaked:
669f87a5 3165 bch_err(c, "btree paths leaked from %s!", trans->fn);
67e0dd8f
KO
3166 trans_for_each_path(trans, path)
3167 if (path->ref)
3168 printk(KERN_ERR " btree %s %pS\n",
3169 bch2_btree_ids[path->btree_id],
3170 (void *) path->ip_allocated);
3171 /* Be noisy about this: */
3172 bch2_fatal_error(c);
3173#endif
3174}
3175
9a796fdb 3176void bch2_trans_exit(struct btree_trans *trans)
c0ebe3e4 3177 __releases(&c->btree_trans_barrier)
1c6fdbd8 3178{
67e0dd8f 3179 struct btree_insert_entry *i;
1a21bf98
KO
3180 struct bch_fs *c = trans->c;
3181
ece254b2 3182 bch2_trans_unlock(trans);
1c6fdbd8 3183
67e0dd8f
KO
3184 trans_for_each_update(trans, i)
3185 __btree_path_put(i->path, true);
3186 trans->nr_updates = 0;
509d3e0a 3187
67e0dd8f 3188 check_btree_paths_leaked(trans);
50dc0f69 3189
b84d42c3
KO
3190 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
3191 mutex_lock(&c->btree_trans_lock);
3192 list_del(&trans->list);
3193 mutex_unlock(&c->btree_trans_lock);
3194 }
495aabed 3195
876c7af3
KO
3196 srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
3197
67e0dd8f 3198 bch2_journal_preres_put(&c->journal, &trans->journal_preres);
2ca88e5a 3199
2a6870ad
KO
3200 kfree(trans->extra_journal_entries.data);
3201
9620c3ec
KO
3202 if (trans->fs_usage_deltas) {
3203 if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
3204 REPLICAS_DELTA_LIST_MAX)
3205 mempool_free(trans->fs_usage_deltas,
67e0dd8f 3206 &c->replicas_delta_pool);
9620c3ec
KO
3207 else
3208 kfree(trans->fs_usage_deltas);
3209 }
e131b6aa
KO
3210
3211 if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
67e0dd8f 3212 mempool_free(trans->mem, &c->btree_trans_mem_pool);
e131b6aa
KO
3213 else
3214 kfree(trans->mem);
1a21bf98 3215
dbd1e825
KO
3216#ifdef __KERNEL__
3217 /*
3218 * Userspace doesn't have a real percpu implementation:
3219 */
67e0dd8f 3220 trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths);
dbd1e825 3221#endif
e131b6aa 3222
67e0dd8f
KO
3223 if (trans->paths)
3224 mempool_free(trans->paths, &c->btree_paths_pool);
1a21bf98 3225
1c6fdbd8 3226 trans->mem = (void *) 0x1;
67e0dd8f 3227 trans->paths = (void *) 0x1;
1c6fdbd8 3228}
36e9d698 3229
b3d1e6ca 3230static void __maybe_unused
67e0dd8f 3231bch2_btree_path_node_to_text(struct printbuf *out,
b3d1e6ca 3232 struct btree_bkey_cached_common *_b,
f21566f1 3233 bool cached)
d211b408 3234{
5c1d808a
KO
3235 pr_buf(out, " l=%u %s:",
3236 _b->level, bch2_btree_ids[_b->btree_id]);
f21566f1 3237 bch2_bpos_to_text(out, btree_node_pos(_b, cached));
d211b408
KO
3238}
3239
b84d42c3 3240#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
f21566f1 3241static bool trans_has_locks(struct btree_trans *trans)
5c1d808a 3242{
67e0dd8f 3243 struct btree_path *path;
5c1d808a 3244
67e0dd8f
KO
3245 trans_for_each_path(trans, path)
3246 if (path->nodes_locked)
5c1d808a
KO
3247 return true;
3248 return false;
3249}
2d587674 3250#endif
5c1d808a 3251
495aabed
KO
3252void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
3253{
b84d42c3 3254#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
495aabed 3255 struct btree_trans *trans;
67e0dd8f 3256 struct btree_path *path;
495aabed 3257 struct btree *b;
c7ce2732 3258 static char lock_types[] = { 'r', 'i', 'w' };
495aabed
KO
3259 unsigned l;
3260
3261 mutex_lock(&c->btree_trans_lock);
3262 list_for_each_entry(trans, &c->btree_trans_list, list) {
f21566f1 3263 if (!trans_has_locks(trans))
5c1d808a
KO
3264 continue;
3265
669f87a5 3266 pr_buf(out, "%i %s\n", trans->pid, trans->fn);
495aabed 3267
67e0dd8f
KO
3268 trans_for_each_path(trans, path) {
3269 if (!path->nodes_locked)
495aabed
KO
3270 continue;
3271
1d3ecd7e 3272 pr_buf(out, " path %u %c l=%u %s:",
67e0dd8f
KO
3273 path->idx,
3274 path->cached ? 'c' : 'b',
1d3ecd7e 3275 path->level,
67e0dd8f
KO
3276 bch2_btree_ids[path->btree_id]);
3277 bch2_bpos_to_text(out, path->pos);
495aabed
KO
3278 pr_buf(out, "\n");
3279
3280 for (l = 0; l < BTREE_MAX_DEPTH; l++) {
67e0dd8f 3281 if (btree_node_locked(path, l)) {
d211b408 3282 pr_buf(out, " %s l=%u ",
67e0dd8f
KO
3283 btree_node_intent_locked(path, l) ? "i" : "r", l);
3284 bch2_btree_path_node_to_text(out,
3285 (void *) path->l[l].b,
3286 path->cached);
495aabed
KO
3287 pr_buf(out, "\n");
3288 }
3289 }
3290 }
3291
3292 b = READ_ONCE(trans->locking);
3293 if (b) {
67e0dd8f 3294 path = &trans->paths[trans->locking_path_idx];
c7ce2732 3295 pr_buf(out, " locking path %u %c l=%u %c %s:",
67e0dd8f
KO
3296 trans->locking_path_idx,
3297 path->cached ? 'c' : 'b',
515282ac 3298 trans->locking_level,
c7ce2732 3299 lock_types[trans->locking_lock_type],
515282ac
KO
3300 bch2_btree_ids[trans->locking_btree_id]);
3301 bch2_bpos_to_text(out, trans->locking_pos);
3302
d211b408 3303 pr_buf(out, " node ");
67e0dd8f
KO
3304 bch2_btree_path_node_to_text(out,
3305 (void *) b, path->cached);
495aabed
KO
3306 pr_buf(out, "\n");
3307 }
3308 }
3309 mutex_unlock(&c->btree_trans_lock);
3310#endif
3311}
3312
36e9d698
KO
3313void bch2_fs_btree_iter_exit(struct bch_fs *c)
3314{
99fafb04
KO
3315 if (c->btree_trans_barrier_initialized)
3316 cleanup_srcu_struct(&c->btree_trans_barrier);
e131b6aa 3317 mempool_exit(&c->btree_trans_mem_pool);
67e0dd8f 3318 mempool_exit(&c->btree_paths_pool);
36e9d698
KO
3319}
3320
3321int bch2_fs_btree_iter_init(struct bch_fs *c)
3322{
3323 unsigned nr = BTREE_ITER_MAX;
99fafb04 3324 int ret;
36e9d698 3325
495aabed
KO
3326 INIT_LIST_HEAD(&c->btree_trans_list);
3327 mutex_init(&c->btree_trans_lock);
3328
99fafb04 3329 ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
67e0dd8f 3330 sizeof(struct btree_path) * nr +
e131b6aa
KO
3331 sizeof(struct btree_insert_entry) * nr) ?:
3332 mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
99fafb04
KO
3333 BTREE_TRANS_MEM_MAX) ?:
3334 init_srcu_struct(&c->btree_trans_barrier);
3335 if (!ret)
3336 c->btree_trans_barrier_initialized = true;
3337 return ret;
36e9d698 3338}