bcachefs: Add a missing btree_path_set_dirty() calls
[linux-block.git] / fs / bcachefs / btree_iter.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "bkey_methods.h"
07a1006a 5#include "bkey_buf.h"
1c6fdbd8
KO
6#include "btree_cache.h"
7#include "btree_iter.h"
2ca88e5a 8#include "btree_key_cache.h"
1c6fdbd8 9#include "btree_locking.h"
57b0b3db 10#include "btree_update.h"
1c6fdbd8 11#include "debug.h"
50dc0f69 12#include "error.h"
1c6fdbd8 13#include "extents.h"
2ca88e5a 14#include "journal.h"
5222a460 15#include "recovery.h"
35d5aff2 16#include "replicas.h"
c075ff70 17#include "subvolume.h"
1c6fdbd8
KO
18#include "trace.h"
19
20#include <linux/prefetch.h>
21
67e0dd8f
KO
22static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
23static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
24 struct btree_path *);
25
26static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *);
27
b0d1b70a
KO
28/*
29 * Unlocks before scheduling
30 * Note: does not revalidate iterator
31 */
32static inline int bch2_trans_cond_resched(struct btree_trans *trans)
33{
34 if (need_resched() || race_fault()) {
35 bch2_trans_unlock(trans);
36 schedule();
37 return bch2_trans_relock(trans) ? 0 : -EINTR;
38 } else {
39 return 0;
40 }
41}
42
67e0dd8f
KO
43static inline int __btree_path_cmp(const struct btree_path *l,
44 enum btree_id r_btree_id,
45 bool r_cached,
46 struct bpos r_pos,
47 unsigned r_level)
0423fb71 48{
7abda8c1
KO
49 /*
50 * Must match lock ordering as defined by __bch2_btree_node_lock:
51 */
67e0dd8f 52 return cmp_int(l->btree_id, r_btree_id) ?:
32b26e8c 53 cmp_int((int) l->cached, (int) r_cached) ?:
67e0dd8f
KO
54 bpos_cmp(l->pos, r_pos) ?:
55 -cmp_int(l->level, r_level);
56}
57
58static inline int btree_path_cmp(const struct btree_path *l,
59 const struct btree_path *r)
60{
61 return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
0423fb71
KO
62}
63
e751c01a
KO
64static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
65{
e751c01a
KO
66 /* Are we iterating over keys in all snapshots? */
67 if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
68 p = bpos_successor(p);
69 } else {
70 p = bpos_nosnap_successor(p);
71 p.snapshot = iter->snapshot;
72 }
73
74 return p;
75}
76
77static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
78{
e751c01a
KO
79 /* Are we iterating over keys in all snapshots? */
80 if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
81 p = bpos_predecessor(p);
82 } else {
83 p = bpos_nosnap_predecessor(p);
84 p.snapshot = iter->snapshot;
85 }
86
87 return p;
88}
89
67e0dd8f 90static inline bool is_btree_node(struct btree_path *path, unsigned l)
1c6fdbd8
KO
91{
92 return l < BTREE_MAX_DEPTH &&
67e0dd8f 93 (unsigned long) path->l[l].b >= 128;
1c6fdbd8
KO
94}
95
9626aeb1 96static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
a00fd8c5 97{
9626aeb1 98 struct bpos pos = iter->pos;
cbdf24ce 99
9626aeb1
KO
100 if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
101 bkey_cmp(pos, POS_MAX))
e751c01a 102 pos = bkey_successor(iter, pos);
9626aeb1 103 return pos;
a00fd8c5
KO
104}
105
67e0dd8f 106static inline bool btree_path_pos_before_node(struct btree_path *path,
e3ecf4f5
KO
107 struct btree *b)
108{
67e0dd8f 109 return bpos_cmp(path->pos, b->data->min_key) < 0;
e3ecf4f5
KO
110}
111
67e0dd8f 112static inline bool btree_path_pos_after_node(struct btree_path *path,
e3ecf4f5
KO
113 struct btree *b)
114{
67e0dd8f 115 return bpos_cmp(b->key.k.p, path->pos) < 0;
e3ecf4f5
KO
116}
117
67e0dd8f 118static inline bool btree_path_pos_in_node(struct btree_path *path,
e3ecf4f5
KO
119 struct btree *b)
120{
67e0dd8f
KO
121 return path->btree_id == b->c.btree_id &&
122 !btree_path_pos_before_node(path, b) &&
123 !btree_path_pos_after_node(path, b);
e3ecf4f5
KO
124}
125
1c6fdbd8
KO
126/* Btree node locking: */
127
9f6bd307 128void bch2_btree_node_unlock_write(struct btree_trans *trans,
67e0dd8f 129 struct btree_path *path, struct btree *b)
1c6fdbd8 130{
67e0dd8f 131 bch2_btree_node_unlock_write_inlined(trans, path, b);
1c6fdbd8
KO
132}
133
78cf784e 134void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
1c6fdbd8 135{
67e0dd8f 136 struct btree_path *linked;
1c6fdbd8
KO
137 unsigned readers = 0;
138
67e0dd8f
KO
139 trans_for_each_path(trans, linked)
140 if (linked->l[b->c.level].b == b &&
141 btree_node_read_locked(linked, b->c.level))
1c6fdbd8
KO
142 readers++;
143
144 /*
145 * Must drop our read locks before calling six_lock_write() -
146 * six_unlock() won't do wakeups until the reader count
147 * goes to 0, and it's safe because we have the node intent
148 * locked:
149 */
a9d79c6e
KO
150 if (!b->c.lock.readers)
151 atomic64_sub(__SIX_VAL(read_lock, readers),
152 &b->c.lock.state.counter);
153 else
154 this_cpu_sub(*b->c.lock.readers, readers);
155
c7ce2732 156 six_lock_write(&b->c.lock, NULL, NULL);
a9d79c6e
KO
157
158 if (!b->c.lock.readers)
159 atomic64_add(__SIX_VAL(read_lock, readers),
160 &b->c.lock.state.counter);
161 else
162 this_cpu_add(*b->c.lock.readers, readers);
1c6fdbd8
KO
163}
164
78cf784e 165bool __bch2_btree_node_relock(struct btree_trans *trans,
67e0dd8f 166 struct btree_path *path, unsigned level)
1c6fdbd8 167{
67e0dd8f
KO
168 struct btree *b = btree_path_node(path, level);
169 int want = __btree_lock_want(path, level);
1c6fdbd8 170
67e0dd8f 171 if (!is_btree_node(path, level))
bc82d08b 172 goto fail;
1c6fdbd8
KO
173
174 if (race_fault())
bc82d08b 175 goto fail;
1c6fdbd8 176
67e0dd8f
KO
177 if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
178 (btree_node_lock_seq_matches(path, b, level) &&
78cf784e 179 btree_node_lock_increment(trans, b, level, want))) {
3074bc0f 180 mark_btree_node_locked(path, level, want);
ed8413fd 181 return true;
ed8413fd 182 }
bc82d08b
KO
183fail:
184 trace_btree_node_relock_fail(trans->fn, _RET_IP_,
185 path->btree_id,
186 &path->pos,
187 (unsigned long) b,
188 path->l[level].lock_seq,
189 is_btree_node(path, level) ? b->c.lock.state.seq : 0);
190 return false;
1c6fdbd8
KO
191}
192
f527afea
KO
193bool bch2_btree_node_upgrade(struct btree_trans *trans,
194 struct btree_path *path, unsigned level)
1c6fdbd8 195{
67e0dd8f 196 struct btree *b = path->l[level].b;
1c6fdbd8 197
67e0dd8f 198 if (!is_btree_node(path, level))
1c6fdbd8
KO
199 return false;
200
d697b9ab
KO
201 switch (btree_lock_want(path, level)) {
202 case BTREE_NODE_UNLOCKED:
203 BUG_ON(btree_node_locked(path, level));
204 return true;
205 case BTREE_NODE_READ_LOCKED:
206 BUG_ON(btree_node_intent_locked(path, level));
207 return bch2_btree_node_relock(trans, path, level);
208 case BTREE_NODE_INTENT_LOCKED:
209 break;
210 }
211
67e0dd8f 212 if (btree_node_intent_locked(path, level))
1c6fdbd8
KO
213 return true;
214
215 if (race_fault())
216 return false;
217
67e0dd8f 218 if (btree_node_locked(path, level)
c43a6ef9 219 ? six_lock_tryupgrade(&b->c.lock)
67e0dd8f 220 : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
1c6fdbd8
KO
221 goto success;
222
67e0dd8f 223 if (btree_node_lock_seq_matches(path, b, level) &&
78cf784e 224 btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
67e0dd8f 225 btree_node_unlock(path, level);
1c6fdbd8
KO
226 goto success;
227 }
228
229 return false;
230success:
3074bc0f 231 mark_btree_node_intent_locked(path, level);
1c6fdbd8
KO
232 return true;
233}
234
67e0dd8f
KO
235static inline bool btree_path_get_locks(struct btree_trans *trans,
236 struct btree_path *path,
bc82d08b 237 bool upgrade)
1c6fdbd8 238{
67e0dd8f 239 unsigned l = path->level;
1c6fdbd8
KO
240 int fail_idx = -1;
241
242 do {
67e0dd8f 243 if (!btree_path_node(path, l))
1c6fdbd8
KO
244 break;
245
246 if (!(upgrade
67e0dd8f 247 ? bch2_btree_node_upgrade(trans, path, l)
f48361b0 248 : bch2_btree_node_relock(trans, path, l)))
1c6fdbd8 249 fail_idx = l;
1c6fdbd8
KO
250
251 l++;
67e0dd8f 252 } while (l < path->locks_want);
1c6fdbd8
KO
253
254 /*
255 * When we fail to get a lock, we have to ensure that any child nodes
67e0dd8f 256 * can't be relocked so bch2_btree_path_traverse has to walk back up to
1c6fdbd8
KO
257 * the node that we failed to relock:
258 */
1d3ecd7e
KO
259 if (fail_idx >= 0) {
260 __bch2_btree_path_unlock(path);
261 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
262
263 do {
264 path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
265 --fail_idx;
266 } while (fail_idx >= 0);
1c6fdbd8
KO
267 }
268
67e0dd8f
KO
269 if (path->uptodate == BTREE_ITER_NEED_RELOCK)
270 path->uptodate = BTREE_ITER_UPTODATE;
1c6fdbd8 271
a0a56879 272 bch2_trans_verify_locks(trans);
0f238367 273
67e0dd8f 274 return path->uptodate < BTREE_ITER_NEED_RELOCK;
1c6fdbd8
KO
275}
276
d211b408 277static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
f21566f1 278 bool cached)
d211b408 279{
f21566f1 280 return !cached
d211b408
KO
281 ? container_of(_b, struct btree, c)->key.k.p
282 : container_of(_b, struct bkey_cached, c)->key.pos;
283}
284
1c6fdbd8 285/* Slowpath: */
78cf784e 286bool __bch2_btree_node_lock(struct btree_trans *trans,
67e0dd8f
KO
287 struct btree_path *path,
288 struct btree *b,
289 struct bpos pos, unsigned level,
bd2bb273 290 enum six_lock_type type,
a301dc38
KO
291 six_lock_should_sleep_fn should_sleep_fn, void *p,
292 unsigned long ip)
1c6fdbd8 293{
7abda8c1
KO
294 struct btree_path *linked;
295 unsigned reason;
1c6fdbd8 296
647d7b60 297 /* Check if it's safe to block: */
67e0dd8f 298 trans_for_each_path(trans, linked) {
1c6fdbd8
KO
299 if (!linked->nodes_locked)
300 continue;
301
1c6fdbd8
KO
302 /*
303 * Can't block taking an intent lock if we have _any_ nodes read
304 * locked:
305 *
306 * - Our read lock blocks another thread with an intent lock on
307 * the same node from getting a write lock, and thus from
308 * dropping its intent lock
309 *
310 * - And the other thread may have multiple nodes intent locked:
311 * both the node we want to intent lock, and the node we
312 * already have read locked - deadlock:
313 */
314 if (type == SIX_LOCK_intent &&
315 linked->nodes_locked != linked->nodes_intent_locked) {
2527dd91 316 reason = 1;
7abda8c1 317 goto deadlock;
1c6fdbd8
KO
318 }
319
67e0dd8f 320 if (linked->btree_id != path->btree_id) {
7abda8c1
KO
321 if (linked->btree_id < path->btree_id)
322 continue;
323
324 reason = 3;
325 goto deadlock;
7e7ae6ca
KO
326 }
327
328 /*
7abda8c1
KO
329 * Within the same btree, non-cached paths come before cached
330 * paths:
7e7ae6ca 331 */
67e0dd8f 332 if (linked->cached != path->cached) {
7abda8c1
KO
333 if (!linked->cached)
334 continue;
335
336 reason = 4;
337 goto deadlock;
7e7ae6ca
KO
338 }
339
1c6fdbd8
KO
340 /*
341 * Interior nodes must be locked before their descendants: if
67e0dd8f 342 * another path has possible descendants locked of the node
1c6fdbd8
KO
343 * we're about to lock, it must have the ancestors locked too:
344 */
7e7ae6ca 345 if (level > __fls(linked->nodes_locked)) {
2527dd91 346 reason = 5;
7abda8c1 347 goto deadlock;
515282ac
KO
348 }
349
350 /* Must lock btree nodes in key order: */
7e7ae6ca 351 if (btree_node_locked(linked, level) &&
4cf91b02 352 bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
f21566f1 353 linked->cached)) <= 0) {
7e7ae6ca 354 reason = 7;
7abda8c1 355 goto deadlock;
a301dc38 356 }
1c6fdbd8
KO
357 }
358
c7ce2732
KO
359 return btree_node_lock_type(trans, path, b, pos, level,
360 type, should_sleep_fn, p);
7abda8c1
KO
361deadlock:
362 trace_trans_restart_would_deadlock(trans->fn, ip,
363 trans->in_traverse_all, reason,
364 linked->btree_id,
365 linked->cached,
366 &linked->pos,
367 path->btree_id,
368 path->cached,
369 &pos);
370 btree_trans_restart(trans);
371 return false;
1c6fdbd8
KO
372}
373
374/* Btree iterator locking: */
375
376#ifdef CONFIG_BCACHEFS_DEBUG
67e0dd8f
KO
377
378static void bch2_btree_path_verify_locks(struct btree_path *path)
1c6fdbd8
KO
379{
380 unsigned l;
381
1d3ecd7e 382 if (!path->nodes_locked) {
d697b9ab
KO
383 BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
384 btree_path_node(path, path->level));
1d3ecd7e
KO
385 return;
386 }
1c6fdbd8 387
1d3ecd7e 388 for (l = 0; btree_path_node(path, l); l++)
67e0dd8f
KO
389 BUG_ON(btree_lock_want(path, l) !=
390 btree_node_locked_type(path, l));
1c6fdbd8 391}
ad7ae8d6 392
a0a56879 393void bch2_trans_verify_locks(struct btree_trans *trans)
ad7ae8d6 394{
67e0dd8f 395 struct btree_path *path;
ad7ae8d6 396
67e0dd8f
KO
397 trans_for_each_path(trans, path)
398 bch2_btree_path_verify_locks(path);
ad7ae8d6 399}
7d6f9b64 400#else
67e0dd8f 401static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
1c6fdbd8
KO
402#endif
403
67e0dd8f
KO
404/* Btree path locking: */
405
6e075b54
KO
406/*
407 * Only for btree_cache.c - only relocks intent locks
408 */
67e0dd8f
KO
409bool bch2_btree_path_relock_intent(struct btree_trans *trans,
410 struct btree_path *path)
6e075b54
KO
411{
412 unsigned l;
413
67e0dd8f
KO
414 for (l = path->level;
415 l < path->locks_want && btree_path_node(path, l);
6e075b54 416 l++) {
67e0dd8f 417 if (!bch2_btree_node_relock(trans, path, l)) {
1d3ecd7e 418 __bch2_btree_path_unlock(path);
67e0dd8f 419 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bc82d08b
KO
420 trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_,
421 path->btree_id, &path->pos);
9f6bd307 422 btree_trans_restart(trans);
6e075b54
KO
423 return false;
424 }
425 }
426
427 return true;
428}
429
d4d24a65
KO
430noinline __flatten
431static bool __bch2_btree_path_relock(struct btree_trans *trans,
67e0dd8f 432 struct btree_path *path, unsigned long trace_ip)
1c6fdbd8 433{
bc82d08b 434 bool ret = btree_path_get_locks(trans, path, false);
e5af273f 435
bc82d08b
KO
436 if (!ret) {
437 trace_trans_restart_relock_path(trans->fn, trace_ip,
438 path->btree_id, &path->pos);
9f6bd307 439 btree_trans_restart(trans);
bc82d08b 440 }
e5af273f 441 return ret;
1c6fdbd8
KO
442}
443
d4d24a65
KO
444static inline bool bch2_btree_path_relock(struct btree_trans *trans,
445 struct btree_path *path, unsigned long trace_ip)
446{
447 return btree_node_locked(path, path->level)
448 ? true
449 : __bch2_btree_path_relock(trans, path, trace_ip);
450}
451
67e0dd8f
KO
452bool __bch2_btree_path_upgrade(struct btree_trans *trans,
453 struct btree_path *path,
1c6fdbd8
KO
454 unsigned new_locks_want)
455{
67e0dd8f 456 struct btree_path *linked;
5e6a668b 457
67e0dd8f 458 EBUG_ON(path->locks_want >= new_locks_want);
1c6fdbd8 459
67e0dd8f 460 path->locks_want = new_locks_want;
1c6fdbd8 461
bc82d08b 462 if (btree_path_get_locks(trans, path, true))
5e6a668b
KO
463 return true;
464
465 /*
466 * XXX: this is ugly - we'd prefer to not be mucking with other
467 * iterators in the btree_trans here.
468 *
469 * On failure to upgrade the iterator, setting iter->locks_want and
67e0dd8f 470 * calling get_locks() is sufficient to make bch2_btree_path_traverse()
5e6a668b
KO
471 * get the locks we want on transaction restart.
472 *
473 * But if this iterator was a clone, on transaction restart what we did
474 * to this iterator isn't going to be preserved.
475 *
476 * Possibly we could add an iterator field for the parent iterator when
477 * an iterator is a copy - for now, we'll just upgrade any other
478 * iterators with the same btree id.
479 *
480 * The code below used to be needed to ensure ancestor nodes get locked
481 * before interior nodes - now that's handled by
67e0dd8f 482 * bch2_btree_path_traverse_all().
5e6a668b 483 */
61a66469
KO
484 if (!path->cached && !trans->in_traverse_all)
485 trans_for_each_path(trans, linked)
486 if (linked != path &&
487 linked->cached == path->cached &&
488 linked->btree_id == path->btree_id &&
489 linked->locks_want < new_locks_want) {
490 linked->locks_want = new_locks_want;
491 btree_path_get_locks(trans, linked, true);
492 }
5e6a668b
KO
493
494 return false;
1c6fdbd8
KO
495}
496
67e0dd8f 497void __bch2_btree_path_downgrade(struct btree_path *path,
2d587674 498 unsigned new_locks_want)
1c6fdbd8 499{
2d587674 500 unsigned l;
1c6fdbd8 501
67e0dd8f 502 EBUG_ON(path->locks_want < new_locks_want);
1c6fdbd8 503
67e0dd8f 504 path->locks_want = new_locks_want;
2d587674 505
67e0dd8f
KO
506 while (path->nodes_locked &&
507 (l = __fls(path->nodes_locked)) >= path->locks_want) {
508 if (l > path->level) {
509 btree_node_unlock(path, l);
2d587674 510 } else {
67e0dd8f
KO
511 if (btree_node_intent_locked(path, l)) {
512 six_lock_downgrade(&path->l[l].b->c.lock);
513 path->nodes_intent_locked ^= 1 << l;
1c6fdbd8 514 }
2d587674 515 break;
1c6fdbd8 516 }
1c6fdbd8 517 }
ad7ae8d6 518
67e0dd8f 519 bch2_btree_path_verify_locks(path);
1c6fdbd8
KO
520}
521
72545b5e
KO
522void bch2_trans_downgrade(struct btree_trans *trans)
523{
67e0dd8f 524 struct btree_path *path;
72545b5e 525
67e0dd8f
KO
526 trans_for_each_path(trans, path)
527 bch2_btree_path_downgrade(path);
72545b5e
KO
528}
529
58fbf808
KO
530/* Btree transaction locking: */
531
532bool bch2_trans_relock(struct btree_trans *trans)
0f238367 533{
67e0dd8f 534 struct btree_path *path;
0f238367 535
e5af273f
KO
536 if (unlikely(trans->restarted))
537 return false;
538
67e0dd8f
KO
539 trans_for_each_path(trans, path)
540 if (path->should_be_locked &&
541 !bch2_btree_path_relock(trans, path, _RET_IP_)) {
669f87a5 542 trace_trans_restart_relock(trans->fn, _RET_IP_,
67e0dd8f 543 path->btree_id, &path->pos);
e5af273f 544 BUG_ON(!trans->restarted);
d5a43661 545 return false;
241e2636 546 }
d5a43661 547 return true;
0f238367
KO
548}
549
58fbf808 550void bch2_trans_unlock(struct btree_trans *trans)
0f238367 551{
67e0dd8f 552 struct btree_path *path;
0f238367 553
67e0dd8f
KO
554 trans_for_each_path(trans, path)
555 __bch2_btree_path_unlock(path);
0f238367
KO
556}
557
1c6fdbd8
KO
558/* Btree iterator: */
559
560#ifdef CONFIG_BCACHEFS_DEBUG
561
67e0dd8f
KO
562static void bch2_btree_path_verify_cached(struct btree_trans *trans,
563 struct btree_path *path)
d211b408
KO
564{
565 struct bkey_cached *ck;
67e0dd8f 566 bool locked = btree_node_locked(path, 0);
d211b408 567
67e0dd8f 568 if (!bch2_btree_node_relock(trans, path, 0))
d211b408
KO
569 return;
570
67e0dd8f
KO
571 ck = (void *) path->l[0].b;
572 BUG_ON(ck->key.btree_id != path->btree_id ||
573 bkey_cmp(ck->key.pos, path->pos));
d211b408
KO
574
575 if (!locked)
67e0dd8f 576 btree_node_unlock(path, 0);
d211b408
KO
577}
578
67e0dd8f
KO
579static void bch2_btree_path_verify_level(struct btree_trans *trans,
580 struct btree_path *path, unsigned level)
1c6fdbd8 581{
67e0dd8f 582 struct btree_path_level *l;
4ce41957
KO
583 struct btree_node_iter tmp;
584 bool locked;
2dac0eae 585 struct bkey_packed *p, *k;
fa8e94fa
KO
586 struct printbuf buf1 = PRINTBUF;
587 struct printbuf buf2 = PRINTBUF;
588 struct printbuf buf3 = PRINTBUF;
2dac0eae 589 const char *msg;
1c6fdbd8 590
29364f34 591 if (!bch2_debug_check_iterators)
f13f5a8c
KO
592 return;
593
67e0dd8f 594 l = &path->l[level];
4ce41957 595 tmp = l->iter;
67e0dd8f 596 locked = btree_node_locked(path, level);
4ce41957 597
67e0dd8f 598 if (path->cached) {
d211b408 599 if (!level)
67e0dd8f 600 bch2_btree_path_verify_cached(trans, path);
d211b408
KO
601 return;
602 }
603
67e0dd8f 604 if (!btree_path_node(path, level))
2dac0eae
KO
605 return;
606
67e0dd8f 607 if (!bch2_btree_node_relock(trans, path, level))
271a3d3a
KO
608 return;
609
67e0dd8f 610 BUG_ON(!btree_path_pos_in_node(path, l->b));
2dac0eae 611
2dac0eae 612 bch2_btree_node_iter_verify(&l->iter, l->b);
1c6fdbd8
KO
613
614 /*
1ae29c1f 615 * For interior nodes, the iterator will have skipped past deleted keys:
1c6fdbd8 616 */
1ae29c1f 617 p = level
c052cf82 618 ? bch2_btree_node_iter_prev(&tmp, l->b)
2dac0eae
KO
619 : bch2_btree_node_iter_prev_all(&tmp, l->b);
620 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1c6fdbd8 621
67e0dd8f 622 if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
2dac0eae
KO
623 msg = "before";
624 goto err;
1c6fdbd8
KO
625 }
626
67e0dd8f 627 if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
2dac0eae
KO
628 msg = "after";
629 goto err;
630 }
f21566f1 631
2dac0eae 632 if (!locked)
67e0dd8f 633 btree_node_unlock(path, level);
2dac0eae
KO
634 return;
635err:
fa8e94fa 636 bch2_bpos_to_text(&buf1, path->pos);
2dac0eae
KO
637
638 if (p) {
639 struct bkey uk = bkey_unpack_key(l->b, p);
fa8e94fa
KO
640 bch2_bkey_to_text(&buf2, &uk);
641 } else {
642 pr_buf(&buf2, "(none)");
2dac0eae 643 }
1c6fdbd8 644
2dac0eae
KO
645 if (k) {
646 struct bkey uk = bkey_unpack_key(l->b, k);
fa8e94fa
KO
647 bch2_bkey_to_text(&buf3, &uk);
648 } else {
649 pr_buf(&buf3, "(none)");
1c6fdbd8 650 }
2dac0eae 651
67e0dd8f
KO
652 panic("path should be %s key at level %u:\n"
653 "path pos %s\n"
2dac0eae
KO
654 "prev key %s\n"
655 "cur key %s\n",
fa8e94fa 656 msg, level, buf1.buf, buf2.buf, buf3.buf);
1c6fdbd8
KO
657}
658
67e0dd8f
KO
659static void bch2_btree_path_verify(struct btree_trans *trans,
660 struct btree_path *path)
1c6fdbd8 661{
5aab6635 662 struct bch_fs *c = trans->c;
2dac0eae 663 unsigned i;
1c6fdbd8 664
67e0dd8f
KO
665 EBUG_ON(path->btree_id >= BTREE_ID_NR);
666
667 for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
668 if (!path->l[i].b) {
d7407292
KO
669 BUG_ON(!path->cached &&
670 c->btree_roots[path->btree_id].b->c.level > i);
67e0dd8f
KO
671 break;
672 }
673
674 bch2_btree_path_verify_level(trans, path, i);
675 }
676
677 bch2_btree_path_verify_locks(path);
678}
679
680void bch2_trans_verify_paths(struct btree_trans *trans)
681{
682 struct btree_path *path;
683
67e0dd8f
KO
684 trans_for_each_path(trans, path)
685 bch2_btree_path_verify(trans, path);
686}
687
688static void bch2_btree_iter_verify(struct btree_iter *iter)
689{
690 struct btree_trans *trans = iter->trans;
691
692 BUG_ON(iter->btree_id >= BTREE_ID_NR);
693
694 BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached);
7e1a3aa9 695
e751c01a
KO
696 BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
697 (iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
698
f21566f1 699 BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
e751c01a
KO
700 (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
701 !btree_type_has_snapshots(iter->btree_id));
702
1f2d9192
KO
703 if (iter->update_path)
704 bch2_btree_path_verify(trans, iter->update_path);
67e0dd8f 705 bch2_btree_path_verify(trans, iter->path);
2dac0eae
KO
706}
707
7e1a3aa9
KO
708static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
709{
a861c722
KO
710 BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
711 !iter->pos.snapshot);
712
e751c01a
KO
713 BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
714 iter->pos.snapshot != iter->snapshot);
715
f21566f1
KO
716 BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
717 bkey_cmp(iter->pos, iter->k.p) > 0);
7e1a3aa9
KO
718}
719
c075ff70
KO
720static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
721{
722 struct btree_trans *trans = iter->trans;
723 struct btree_iter copy;
724 struct bkey_s_c prev;
725 int ret = 0;
726
727 if (!bch2_debug_check_iterators)
728 return 0;
729
730 if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
731 return 0;
732
733 if (bkey_err(k) || !k.k)
734 return 0;
735
736 BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
737 iter->snapshot,
738 k.k->p.snapshot));
739
740 bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
ffa7d262 741 BTREE_ITER_NOPRESERVE|
c075ff70
KO
742 BTREE_ITER_ALL_SNAPSHOTS);
743 prev = bch2_btree_iter_prev(&copy);
744 if (!prev.k)
745 goto out;
746
747 ret = bkey_err(prev);
748 if (ret)
749 goto out;
750
751 if (!bkey_cmp(prev.k->p, k.k->p) &&
752 bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
753 prev.k->p.snapshot) > 0) {
fa8e94fa 754 struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
c075ff70 755
fa8e94fa
KO
756 bch2_bkey_to_text(&buf1, k.k);
757 bch2_bkey_to_text(&buf2, prev.k);
c075ff70
KO
758
759 panic("iter snap %u\n"
760 "k %s\n"
761 "prev %s\n",
762 iter->snapshot,
fa8e94fa 763 buf1.buf, buf2.buf);
c075ff70
KO
764 }
765out:
766 bch2_trans_iter_exit(trans, &copy);
767 return ret;
768}
769
32b26e8c
KO
770void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
771 struct bpos pos, bool key_cache)
772{
773 struct btree_path *path;
774 unsigned idx;
fa8e94fa 775 struct printbuf buf = PRINTBUF;
32b26e8c
KO
776
777 trans_for_each_path_inorder(trans, path, idx) {
778 int cmp = cmp_int(path->btree_id, id) ?:
779 cmp_int(path->cached, key_cache);
780
781 if (cmp > 0)
782 break;
783 if (cmp < 0)
784 continue;
785
786 if (!(path->nodes_locked & 1) ||
787 !path->should_be_locked)
788 continue;
789
790 if (!key_cache) {
791 if (bkey_cmp(pos, path->l[0].b->data->min_key) >= 0 &&
792 bkey_cmp(pos, path->l[0].b->key.k.p) <= 0)
793 return;
794 } else {
795 if (!bkey_cmp(pos, path->pos))
796 return;
797 }
798 }
799
800 bch2_dump_trans_paths_updates(trans);
fa8e94fa
KO
801 bch2_bpos_to_text(&buf, pos);
802
32b26e8c 803 panic("not locked: %s %s%s\n",
fa8e94fa 804 bch2_btree_ids[id], buf.buf,
32b26e8c
KO
805 key_cache ? " cached" : "");
806}
807
271a3d3a
KO
808#else
809
67e0dd8f
KO
810static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
811 struct btree_path *path, unsigned l) {}
812static inline void bch2_btree_path_verify(struct btree_trans *trans,
813 struct btree_path *path) {}
7d6f9b64 814static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
7e1a3aa9 815static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
c075ff70 816static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
271a3d3a 817
1c6fdbd8
KO
818#endif
819
67e0dd8f
KO
820/* Btree path: fixups after btree updates */
821
23bbd2bb
KO
822static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
823 struct btree *b,
824 struct bset_tree *t,
825 struct bkey_packed *k)
826{
827 struct btree_node_iter_set *set;
828
829 btree_node_iter_for_each(iter, set)
830 if (set->end == t->end_offset) {
831 set->k = __btree_node_key_to_offset(b, k);
832 bch2_btree_node_iter_sort(iter, b);
833 return;
834 }
835
836 bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
837}
838
67e0dd8f 839static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
9626aeb1
KO
840 struct btree *b,
841 struct bkey_packed *where)
887c2a4e 842{
67e0dd8f 843 struct btree_path_level *l = &path->l[b->c.level];
887c2a4e 844
9626aeb1
KO
845 if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
846 return;
847
67e0dd8f 848 if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
9626aeb1 849 bch2_btree_node_iter_advance(&l->iter, l->b);
887c2a4e
KO
850}
851
67e0dd8f 852void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
887c2a4e
KO
853 struct btree *b,
854 struct bkey_packed *where)
855{
67e0dd8f 856 struct btree_path *path;
887c2a4e 857
67e0dd8f
KO
858 trans_for_each_path_with_node(trans, b, path) {
859 __bch2_btree_path_fix_key_modified(path, b, where);
860 bch2_btree_path_verify_level(trans, path, b->c.level);
887c2a4e
KO
861 }
862}
863
67e0dd8f
KO
864static void __bch2_btree_node_iter_fix(struct btree_path *path,
865 struct btree *b,
866 struct btree_node_iter *node_iter,
867 struct bset_tree *t,
868 struct bkey_packed *where,
869 unsigned clobber_u64s,
870 unsigned new_u64s)
1c6fdbd8
KO
871{
872 const struct bkey_packed *end = btree_bkey_last(b, t);
873 struct btree_node_iter_set *set;
874 unsigned offset = __btree_node_key_to_offset(b, where);
875 int shift = new_u64s - clobber_u64s;
271a3d3a 876 unsigned old_end = t->end_offset - shift;
c0fc30da
KO
877 unsigned orig_iter_pos = node_iter->data[0].k;
878 bool iter_current_key_modified =
879 orig_iter_pos >= offset &&
880 orig_iter_pos <= offset + clobber_u64s;
1c6fdbd8
KO
881
882 btree_node_iter_for_each(node_iter, set)
883 if (set->end == old_end)
884 goto found;
885
886 /* didn't find the bset in the iterator - might have to readd it: */
887 if (new_u64s &&
67e0dd8f 888 bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
1c6fdbd8 889 bch2_btree_node_iter_push(node_iter, b, where, end);
c0fc30da
KO
890 goto fixup_done;
891 } else {
892 /* Iterator is after key that changed */
059e4134 893 return;
1c6fdbd8 894 }
1c6fdbd8 895found:
271a3d3a 896 set->end = t->end_offset;
1c6fdbd8
KO
897
898 /* Iterator hasn't gotten to the key that changed yet: */
899 if (set->k < offset)
059e4134 900 return;
1c6fdbd8
KO
901
902 if (new_u64s &&
67e0dd8f 903 bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
1c6fdbd8
KO
904 set->k = offset;
905 } else if (set->k < offset + clobber_u64s) {
906 set->k = offset + new_u64s;
907 if (set->k == set->end)
908 bch2_btree_node_iter_set_drop(node_iter, set);
909 } else {
c0fc30da 910 /* Iterator is after key that changed */
1c6fdbd8 911 set->k = (int) set->k + shift;
059e4134 912 return;
1c6fdbd8
KO
913 }
914
1c6fdbd8 915 bch2_btree_node_iter_sort(node_iter, b);
c0fc30da
KO
916fixup_done:
917 if (node_iter->data[0].k != orig_iter_pos)
918 iter_current_key_modified = true;
56e0e7c7 919
1c6fdbd8 920 /*
23bbd2bb
KO
921 * When a new key is added, and the node iterator now points to that
922 * key, the iterator might have skipped past deleted keys that should
923 * come after the key the iterator now points to. We have to rewind to
c0fc30da
KO
924 * before those deleted keys - otherwise
925 * bch2_btree_node_iter_prev_all() breaks:
1c6fdbd8 926 */
23bbd2bb 927 if (!bch2_btree_node_iter_end(node_iter) &&
c0fc30da 928 iter_current_key_modified &&
1ae29c1f 929 b->c.level) {
23bbd2bb
KO
930 struct bset_tree *t;
931 struct bkey_packed *k, *k2, *p;
932
933 k = bch2_btree_node_iter_peek_all(node_iter, b);
1c6fdbd8
KO
934
935 for_each_bset(b, t) {
23bbd2bb
KO
936 bool set_pos = false;
937
938 if (node_iter->data[0].end == t->end_offset)
1c6fdbd8
KO
939 continue;
940
23bbd2bb
KO
941 k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
942
943 while ((p = bch2_bkey_prev_all(b, t, k2)) &&
944 bkey_iter_cmp(b, k, p) < 0) {
945 k2 = p;
946 set_pos = true;
1c6fdbd8 947 }
23bbd2bb
KO
948
949 if (set_pos)
950 btree_node_iter_set_set_pos(node_iter,
951 b, t, k2);
1c6fdbd8
KO
952 }
953 }
954}
955
9f6bd307 956void bch2_btree_node_iter_fix(struct btree_trans *trans,
67e0dd8f 957 struct btree_path *path,
216c9fac
KO
958 struct btree *b,
959 struct btree_node_iter *node_iter,
960 struct bkey_packed *where,
961 unsigned clobber_u64s,
962 unsigned new_u64s)
1c6fdbd8 963{
216c9fac 964 struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
67e0dd8f 965 struct btree_path *linked;
1c6fdbd8 966
67e0dd8f
KO
967 if (node_iter != &path->l[b->c.level].iter) {
968 __bch2_btree_node_iter_fix(path, b, node_iter, t,
059e4134 969 where, clobber_u64s, new_u64s);
2dac0eae 970
29364f34 971 if (bch2_debug_check_iterators)
2dac0eae 972 bch2_btree_node_iter_verify(node_iter, b);
059e4134 973 }
1c6fdbd8 974
67e0dd8f 975 trans_for_each_path_with_node(trans, b, linked) {
1c6fdbd8 976 __bch2_btree_node_iter_fix(linked, b,
059e4134
KO
977 &linked->l[b->c.level].iter, t,
978 where, clobber_u64s, new_u64s);
67e0dd8f 979 bch2_btree_path_verify_level(trans, linked, b->c.level);
059e4134 980 }
1c6fdbd8
KO
981}
982
67e0dd8f
KO
983/* Btree path level: pointer to a particular btree node and node iter */
984
985static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
986 struct btree_path_level *l,
1c6fdbd8
KO
987 struct bkey *u,
988 struct bkey_packed *k)
989{
1c6fdbd8
KO
990 if (unlikely(!k)) {
991 /*
992 * signal to bch2_btree_iter_peek_slot() that we're currently at
993 * a hole
994 */
26609b61 995 u->type = KEY_TYPE_deleted;
1c6fdbd8
KO
996 return bkey_s_c_null;
997 }
998
2ce8fbd9 999 return bkey_disassemble(l->b, k, u);
1c6fdbd8
KO
1000}
1001
67e0dd8f
KO
1002static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
1003 struct btree_path_level *l,
1004 struct bkey *u)
1c6fdbd8 1005{
67e0dd8f 1006 return __btree_iter_unpack(c, l, u,
1c6fdbd8
KO
1007 bch2_btree_node_iter_peek_all(&l->iter, l->b));
1008}
1009
67e0dd8f
KO
1010static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
1011 struct btree_path *path,
1012 struct btree_path_level *l,
1013 struct bkey *u)
1c6fdbd8 1014{
67e0dd8f 1015 struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
1c6fdbd8 1016 bch2_btree_node_iter_peek(&l->iter, l->b));
ca58cbd4 1017
67e0dd8f
KO
1018 path->pos = k.k ? k.k->p : l->b->key.k.p;
1019 trans->paths_sorted = false;
ca58cbd4 1020 return k;
1c6fdbd8
KO
1021}
1022
67e0dd8f
KO
1023static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
1024 struct btree_path *path,
1025 struct btree_path_level *l,
1026 struct bkey *u)
ccf5a109 1027{
67e0dd8f 1028 struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
ccf5a109 1029 bch2_btree_node_iter_prev(&l->iter, l->b));
ca58cbd4 1030
67e0dd8f
KO
1031 path->pos = k.k ? k.k->p : l->b->data->min_key;
1032 trans->paths_sorted = false;
ca58cbd4 1033 return k;
ccf5a109
KO
1034}
1035
67e0dd8f
KO
1036static inline bool btree_path_advance_to_pos(struct btree_path *path,
1037 struct btree_path_level *l,
a00fd8c5 1038 int max_advance)
1c6fdbd8 1039{
a00fd8c5
KO
1040 struct bkey_packed *k;
1041 int nr_advanced = 0;
1042
1043 while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
67e0dd8f 1044 bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
a00fd8c5
KO
1045 if (max_advance > 0 && nr_advanced >= max_advance)
1046 return false;
1047
1048 bch2_btree_node_iter_advance(&l->iter, l->b);
1049 nr_advanced++;
1050 }
1051
1052 return true;
1c6fdbd8
KO
1053}
1054
1055/*
1056 * Verify that iterator for parent node points to child node:
1057 */
67e0dd8f
KO
1058static void btree_path_verify_new_node(struct btree_trans *trans,
1059 struct btree_path *path, struct btree *b)
1c6fdbd8 1060{
5222a460 1061 struct bch_fs *c = trans->c;
67e0dd8f 1062 struct btree_path_level *l;
1c6fdbd8
KO
1063 unsigned plevel;
1064 bool parent_locked;
1065 struct bkey_packed *k;
1066
1067 if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
1068 return;
1069
5222a460
KO
1070 if (trans->journal_replay_not_finished)
1071 return;
1072
c43a6ef9 1073 plevel = b->c.level + 1;
67e0dd8f 1074 if (!btree_path_node(path, plevel))
1c6fdbd8
KO
1075 return;
1076
67e0dd8f 1077 parent_locked = btree_node_locked(path, plevel);
1c6fdbd8 1078
67e0dd8f 1079 if (!bch2_btree_node_relock(trans, path, plevel))
1c6fdbd8
KO
1080 return;
1081
67e0dd8f 1082 l = &path->l[plevel];
1c6fdbd8
KO
1083 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1084 if (!k ||
1085 bkey_deleted(k) ||
1086 bkey_cmp_left_packed(l->b, k, &b->key.k.p)) {
fa8e94fa
KO
1087 struct printbuf buf1 = PRINTBUF;
1088 struct printbuf buf2 = PRINTBUF;
1089 struct printbuf buf3 = PRINTBUF;
1090 struct printbuf buf4 = PRINTBUF;
1c6fdbd8
KO
1091 struct bkey uk = bkey_unpack_key(b, k);
1092
5222a460 1093 bch2_dump_btree_node(c, l->b);
fa8e94fa
KO
1094 bch2_bpos_to_text(&buf1, path->pos);
1095 bch2_bkey_to_text(&buf2, &uk);
1096 bch2_bpos_to_text(&buf3, b->data->min_key);
1097 bch2_bpos_to_text(&buf3, b->data->max_key);
a2bfc841 1098 panic("parent iter doesn't point to new node:\n"
f020bfcd 1099 "iter pos %s %s\n"
a2bfc841 1100 "iter key %s\n"
f020bfcd 1101 "new node %s-%s\n",
fa8e94fa
KO
1102 bch2_btree_ids[path->btree_id],
1103 buf1.buf, buf2.buf, buf3.buf, buf4.buf);
1c6fdbd8
KO
1104 }
1105
1106 if (!parent_locked)
1d3ecd7e 1107 btree_node_unlock(path, plevel);
1c6fdbd8
KO
1108}
1109
67e0dd8f 1110static inline void __btree_path_level_init(struct btree_path *path,
78cf784e 1111 unsigned level)
1c6fdbd8 1112{
67e0dd8f 1113 struct btree_path_level *l = &path->l[level];
a00fd8c5 1114
67e0dd8f 1115 bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
1c6fdbd8 1116
537c49d6
KO
1117 /*
1118 * Iterators to interior nodes should always be pointed at the first non
1119 * whiteout:
1120 */
1121 if (level)
1122 bch2_btree_node_iter_peek(&l->iter, l->b);
1c6fdbd8
KO
1123}
1124
67e0dd8f
KO
1125static inline void btree_path_level_init(struct btree_trans *trans,
1126 struct btree_path *path,
78cf784e 1127 struct btree *b)
1c6fdbd8 1128{
67e0dd8f 1129 BUG_ON(path->cached);
2ca88e5a 1130
67e0dd8f 1131 btree_path_verify_new_node(trans, path, b);
1c6fdbd8 1132
67e0dd8f 1133 EBUG_ON(!btree_path_pos_in_node(path, b));
c43a6ef9 1134 EBUG_ON(b->c.lock.state.seq & 1);
1c6fdbd8 1135
67e0dd8f
KO
1136 path->l[b->c.level].lock_seq = b->c.lock.state.seq;
1137 path->l[b->c.level].b = b;
1138 __btree_path_level_init(path, b->c.level);
1c6fdbd8
KO
1139}
1140
67e0dd8f
KO
1141/* Btree path: fixups after btree node updates: */
1142
1c6fdbd8
KO
1143/*
1144 * A btree node is being replaced - update the iterator to point to the new
1145 * node:
1146 */
f7a966a3 1147void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
1c6fdbd8 1148{
67e0dd8f 1149 struct btree_path *path;
1c6fdbd8 1150
67e0dd8f
KO
1151 trans_for_each_path(trans, path)
1152 if (!path->cached &&
1153 btree_path_pos_in_node(path, b)) {
1d3ecd7e
KO
1154 enum btree_node_locked_type t =
1155 btree_lock_want(path, b->c.level);
1c6fdbd8 1156
1d3ecd7e
KO
1157 if (path->nodes_locked &&
1158 t != BTREE_NODE_UNLOCKED) {
1159 btree_node_unlock(path, b->c.level);
c43a6ef9 1160 six_lock_increment(&b->c.lock, (enum six_lock_type) t);
3074bc0f 1161 mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t);
1c6fdbd8
KO
1162 }
1163
67e0dd8f 1164 btree_path_level_init(trans, path, b);
1c6fdbd8 1165 }
1c6fdbd8
KO
1166}
1167
1c6fdbd8
KO
1168/*
1169 * A btree node has been modified in such a way as to invalidate iterators - fix
1170 * them:
1171 */
f7a966a3 1172void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
1c6fdbd8 1173{
67e0dd8f 1174 struct btree_path *path;
1c6fdbd8 1175
67e0dd8f
KO
1176 trans_for_each_path_with_node(trans, b, path)
1177 __btree_path_level_init(path, b->c.level);
1c6fdbd8
KO
1178}
1179
67e0dd8f
KO
1180/* Btree path: traverse, set_pos: */
1181
bd2bb273
KO
1182static int lock_root_check_fn(struct six_lock *lock, void *p)
1183{
1184 struct btree *b = container_of(lock, struct btree, c.lock);
1185 struct btree **rootp = p;
1186
1187 return b == *rootp ? 0 : -1;
1188}
1189
67e0dd8f
KO
1190static inline int btree_path_lock_root(struct btree_trans *trans,
1191 struct btree_path *path,
a301dc38
KO
1192 unsigned depth_want,
1193 unsigned long trace_ip)
1c6fdbd8 1194{
e5af273f 1195 struct bch_fs *c = trans->c;
67e0dd8f 1196 struct btree *b, **rootp = &c->btree_roots[path->btree_id].b;
1c6fdbd8
KO
1197 enum six_lock_type lock_type;
1198 unsigned i;
1199
67e0dd8f 1200 EBUG_ON(path->nodes_locked);
1c6fdbd8
KO
1201
1202 while (1) {
bd2bb273 1203 b = READ_ONCE(*rootp);
67e0dd8f 1204 path->level = READ_ONCE(b->c.level);
1c6fdbd8 1205
67e0dd8f 1206 if (unlikely(path->level < depth_want)) {
1c6fdbd8
KO
1207 /*
1208 * the root is at a lower depth than the depth we want:
1209 * got to the end of the btree, or we're walking nodes
1210 * greater than some depth and there are no nodes >=
1211 * that depth
1212 */
67e0dd8f
KO
1213 path->level = depth_want;
1214 for (i = path->level; i < BTREE_MAX_DEPTH; i++)
1215 path->l[i].b = NULL;
8812600c 1216 return 1;
1c6fdbd8
KO
1217 }
1218
67e0dd8f
KO
1219 lock_type = __btree_lock_want(path, path->level);
1220 if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX,
1221 path->level, lock_type,
a301dc38 1222 lock_root_check_fn, rootp,
e5af273f
KO
1223 trace_ip))) {
1224 if (trans->restarted)
1225 return -EINTR;
1226 continue;
1227 }
1c6fdbd8 1228
bd2bb273 1229 if (likely(b == READ_ONCE(*rootp) &&
67e0dd8f 1230 b->c.level == path->level &&
1c6fdbd8 1231 !race_fault())) {
67e0dd8f
KO
1232 for (i = 0; i < path->level; i++)
1233 path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
1234 path->l[path->level].b = b;
1235 for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
1236 path->l[i].b = NULL;
1237
3074bc0f 1238 mark_btree_node_locked(path, path->level, lock_type);
67e0dd8f 1239 btree_path_level_init(trans, path, b);
1c6fdbd8 1240 return 0;
1c6fdbd8
KO
1241 }
1242
c43a6ef9 1243 six_unlock_type(&b->c.lock, lock_type);
1c6fdbd8
KO
1244 }
1245}
1246
1247noinline
67e0dd8f 1248static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
1c6fdbd8 1249{
9f6bd307 1250 struct bch_fs *c = trans->c;
67e0dd8f 1251 struct btree_path_level *l = path_l(path);
1c6fdbd8
KO
1252 struct btree_node_iter node_iter = l->iter;
1253 struct bkey_packed *k;
07a1006a 1254 struct bkey_buf tmp;
9e5e5b9e 1255 unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
67e0dd8f
KO
1256 ? (path->level > 1 ? 0 : 2)
1257 : (path->level > 1 ? 1 : 16);
1258 bool was_locked = btree_node_locked(path, path->level);
8b3e9bd6 1259 int ret = 0;
1c6fdbd8 1260
07a1006a
KO
1261 bch2_bkey_buf_init(&tmp);
1262
8b3e9bd6 1263 while (nr && !ret) {
67e0dd8f 1264 if (!bch2_btree_node_relock(trans, path, path->level))
07a1006a 1265 break;
1c6fdbd8
KO
1266
1267 bch2_btree_node_iter_advance(&node_iter, l->b);
1268 k = bch2_btree_node_iter_peek(&node_iter, l->b);
1269 if (!k)
1270 break;
1271
07a1006a 1272 bch2_bkey_buf_unpack(&tmp, c, l->b, k);
67e0dd8f
KO
1273 ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
1274 path->level - 1);
1c6fdbd8
KO
1275 }
1276
1277 if (!was_locked)
67e0dd8f 1278 btree_node_unlock(path, path->level);
07a1006a
KO
1279
1280 bch2_bkey_buf_exit(&tmp, c);
8b3e9bd6 1281 return ret;
1c6fdbd8
KO
1282}
1283
5222a460
KO
1284static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
1285 struct btree_and_journal_iter *jiter)
1286{
1287 struct bch_fs *c = trans->c;
1288 struct bkey_s_c k;
1289 struct bkey_buf tmp;
1290 unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
1291 ? (path->level > 1 ? 0 : 2)
1292 : (path->level > 1 ? 1 : 16);
1293 bool was_locked = btree_node_locked(path, path->level);
1294 int ret = 0;
1295
1296 bch2_bkey_buf_init(&tmp);
1297
1298 while (nr && !ret) {
1299 if (!bch2_btree_node_relock(trans, path, path->level))
1300 break;
1301
1302 bch2_btree_and_journal_iter_advance(jiter);
1303 k = bch2_btree_and_journal_iter_peek(jiter);
1304 if (!k.k)
1305 break;
1306
1307 bch2_bkey_buf_reassemble(&tmp, c, k);
1308 ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
1309 path->level - 1);
1310 }
1311
1312 if (!was_locked)
1313 btree_node_unlock(path, path->level);
1314
1315 bch2_bkey_buf_exit(&tmp, c);
1316 return ret;
1317}
1318
78cf784e 1319static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
67e0dd8f 1320 struct btree_path *path,
72141e1f
KO
1321 unsigned plevel, struct btree *b)
1322{
67e0dd8f
KO
1323 struct btree_path_level *l = &path->l[plevel];
1324 bool locked = btree_node_locked(path, plevel);
72141e1f
KO
1325 struct bkey_packed *k;
1326 struct bch_btree_ptr_v2 *bp;
1327
67e0dd8f 1328 if (!bch2_btree_node_relock(trans, path, plevel))
72141e1f
KO
1329 return;
1330
1331 k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1332 BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
1333
1334 bp = (void *) bkeyp_val(&l->b->format, k);
1335 bp->mem_ptr = (unsigned long)b;
1336
1337 if (!locked)
67e0dd8f 1338 btree_node_unlock(path, plevel);
72141e1f
KO
1339}
1340
5222a460
KO
1341static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
1342 struct btree_path *path,
1343 unsigned flags,
1344 struct bkey_buf *out)
1345{
1346 struct bch_fs *c = trans->c;
1347 struct btree_path_level *l = path_l(path);
1348 struct btree_and_journal_iter jiter;
1349 struct bkey_s_c k;
1350 int ret = 0;
1351
1352 __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
1353
1354 k = bch2_btree_and_journal_iter_peek(&jiter);
1355
1356 bch2_bkey_buf_reassemble(out, c, k);
1357
1358 if (flags & BTREE_ITER_PREFETCH)
1359 ret = btree_path_prefetch_j(trans, path, &jiter);
1360
1361 bch2_btree_and_journal_iter_exit(&jiter);
1362 return ret;
1363}
1364
67e0dd8f
KO
1365static __always_inline int btree_path_down(struct btree_trans *trans,
1366 struct btree_path *path,
1367 unsigned flags,
a301dc38 1368 unsigned long trace_ip)
1c6fdbd8 1369{
6e075b54 1370 struct bch_fs *c = trans->c;
67e0dd8f 1371 struct btree_path_level *l = path_l(path);
1c6fdbd8 1372 struct btree *b;
67e0dd8f
KO
1373 unsigned level = path->level - 1;
1374 enum six_lock_type lock_type = __btree_lock_want(path, level);
07a1006a
KO
1375 struct bkey_buf tmp;
1376 int ret;
1c6fdbd8 1377
67e0dd8f 1378 EBUG_ON(!btree_node_locked(path, path->level));
1c6fdbd8 1379
07a1006a 1380 bch2_bkey_buf_init(&tmp);
5222a460
KO
1381
1382 if (unlikely(trans->journal_replay_not_finished)) {
1383 ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
1384 if (ret)
1385 goto err;
1386 } else {
1387 bch2_bkey_buf_unpack(&tmp, c, l->b,
1388 bch2_btree_node_iter_peek(&l->iter, l->b));
1389
1390 if (flags & BTREE_ITER_PREFETCH) {
1391 ret = btree_path_prefetch(trans, path);
1392 if (ret)
1393 goto err;
1394 }
1395 }
1c6fdbd8 1396
67e0dd8f 1397 b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
07a1006a
KO
1398 ret = PTR_ERR_OR_ZERO(b);
1399 if (unlikely(ret))
1400 goto err;
1c6fdbd8 1401
3074bc0f 1402 mark_btree_node_locked(path, level, lock_type);
67e0dd8f 1403 btree_path_level_init(trans, path, b);
1c6fdbd8 1404
5222a460
KO
1405 if (likely(!trans->journal_replay_not_finished &&
1406 tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
07a1006a 1407 unlikely(b != btree_node_mem_ptr(tmp.k)))
67e0dd8f 1408 btree_node_mem_ptr_set(trans, path, level + 1, b);
72141e1f 1409
67e0dd8f
KO
1410 if (btree_node_read_locked(path, level + 1))
1411 btree_node_unlock(path, level + 1);
1412 path->level = level;
c205321b 1413
67e0dd8f 1414 bch2_btree_path_verify_locks(path);
07a1006a
KO
1415err:
1416 bch2_bkey_buf_exit(&tmp, c);
1417 return ret;
1c6fdbd8
KO
1418}
1419
67e0dd8f
KO
1420static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
1421 unsigned, unsigned long);
1c6fdbd8 1422
8f9ad91a 1423static int bch2_btree_path_traverse_all(struct btree_trans *trans)
1c6fdbd8 1424{
e542029e 1425 struct bch_fs *c = trans->c;
eb7bd15f 1426 struct btree_path *path, *prev;
8f9ad91a
KO
1427 unsigned long trace_ip = _RET_IP_;
1428 int i, ret = 0;
e542029e 1429
2ca88e5a
KO
1430 if (trans->in_traverse_all)
1431 return -EINTR;
1432
1433 trans->in_traverse_all = true;
1434retry_all:
eb7bd15f 1435 prev = NULL;
e5af273f
KO
1436 trans->restarted = false;
1437
67e0dd8f
KO
1438 trans_for_each_path(trans, path)
1439 path->should_be_locked = false;
e542029e 1440
67e0dd8f 1441 btree_trans_sort_paths(trans);
2527dd91 1442
67e0dd8f 1443 trans_for_each_path_inorder_reverse(trans, path, i) {
0423fb71 1444 if (prev) {
67e0dd8f
KO
1445 if (path->btree_id == prev->btree_id &&
1446 path->locks_want < prev->locks_want)
1447 __bch2_btree_path_upgrade(trans, path, prev->locks_want);
1448 else if (!path->locks_want && prev->locks_want)
1449 __bch2_btree_path_upgrade(trans, path, 1);
0423fb71 1450 }
2527dd91 1451
67e0dd8f 1452 prev = path;
2527dd91
KO
1453 }
1454
58fbf808 1455 bch2_trans_unlock(trans);
a301dc38 1456 cond_resched();
1c6fdbd8 1457
8f9ad91a 1458 if (unlikely(trans->memory_allocation_failure)) {
1c6fdbd8
KO
1459 struct closure cl;
1460
1461 closure_init_stack(&cl);
1462
1463 do {
1464 ret = bch2_btree_cache_cannibalize_lock(c, &cl);
1465 closure_sync(&cl);
1466 } while (ret);
1467 }
1468
1c6fdbd8 1469 /* Now, redo traversals in correct order: */
0423fb71
KO
1470 i = 0;
1471 while (i < trans->nr_sorted) {
67e0dd8f 1472 path = trans->paths + trans->sorted[i];
2ca88e5a 1473
33aa419d
KO
1474 /*
1475 * Traversing a path can cause another path to be added at about
1476 * the same position:
1477 */
1478 if (path->uptodate) {
1479 ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
a897ef68 1480 if (ret == -EINTR || ret == -ENOMEM)
33aa419d 1481 goto retry_all;
a897ef68
KO
1482 if (ret)
1483 goto err;
33aa419d 1484 } else {
0423fb71 1485 i++;
33aa419d 1486 }
e542029e 1487 }
0423fb71
KO
1488
1489 /*
1490 * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock()
67e0dd8f 1491 * and relock(), relock() won't relock since path->should_be_locked
0423fb71
KO
1492 * isn't set yet, which is all fine
1493 */
67e0dd8f
KO
1494 trans_for_each_path(trans, path)
1495 BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
a897ef68 1496err:
1c6fdbd8 1497 bch2_btree_cache_cannibalize_unlock(c);
2ca88e5a 1498
669f87a5
KO
1499 trans->in_traverse_all = false;
1500
1501 trace_trans_traverse_all(trans->fn, trace_ip);
1c6fdbd8 1502 return ret;
bf7b87a4 1503}
1c6fdbd8 1504
67e0dd8f
KO
1505static inline bool btree_path_good_node(struct btree_trans *trans,
1506 struct btree_path *path,
f4b61341
KO
1507 unsigned l, int check_pos)
1508{
67e0dd8f
KO
1509 if (!is_btree_node(path, l) ||
1510 !bch2_btree_node_relock(trans, path, l))
f4b61341
KO
1511 return false;
1512
67e0dd8f 1513 if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
f4b61341 1514 return false;
67e0dd8f 1515 if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
f4b61341
KO
1516 return false;
1517 return true;
1518}
1519
67e0dd8f
KO
1520static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
1521 struct btree_path *path,
f4b61341 1522 int check_pos)
1c6fdbd8 1523{
8ee0134e 1524 unsigned i, l = path->level;
1d3ecd7e 1525
67e0dd8f
KO
1526 while (btree_path_node(path, l) &&
1527 !btree_path_good_node(trans, path, l, check_pos)) {
1528 btree_node_unlock(path, l);
1529 path->l[l].b = BTREE_ITER_NO_NODE_UP;
1c6fdbd8
KO
1530 l++;
1531 }
1532
8ee0134e
KO
1533 /* If we need intent locks, take them too: */
1534 for (i = l + 1;
1535 i < path->locks_want && btree_path_node(path, i);
1536 i++)
1537 if (!bch2_btree_node_relock(trans, path, i))
1538 while (l <= i) {
1539 btree_node_unlock(path, l);
1540 path->l[l].b = BTREE_ITER_NO_NODE_UP;
1541 l++;
1542 }
1543
1c6fdbd8
KO
1544 return l;
1545}
1546
1547/*
1548 * This is the main state machine for walking down the btree - walks down to a
1549 * specified depth
1550 *
1551 * Returns 0 on success, -EIO on error (error reading in a btree node).
1552 *
1553 * On error, caller (peek_node()/peek_key()) must return NULL; the error is
b7607ce9 1554 * stashed in the iterator and returned from bch2_trans_exit().
1c6fdbd8 1555 */
67e0dd8f
KO
1556static int btree_path_traverse_one(struct btree_trans *trans,
1557 struct btree_path *path,
1558 unsigned flags,
a301dc38 1559 unsigned long trace_ip)
1c6fdbd8 1560{
8ee0134e 1561 unsigned depth_want = path->level;
531a0095 1562 int ret = 0;
1c6fdbd8 1563
979735df
KO
1564 if (unlikely(trans->restarted)) {
1565 ret = -EINTR;
1566 goto out;
1567 }
1568
e829b717 1569 /*
67e0dd8f
KO
1570 * Ensure we obey path->should_be_locked: if it's set, we can't unlock
1571 * and re-traverse the path without a transaction restart:
e829b717 1572 */
67e0dd8f
KO
1573 if (path->should_be_locked) {
1574 ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR;
e829b717
KO
1575 goto out;
1576 }
1577
67e0dd8f
KO
1578 if (path->cached) {
1579 ret = bch2_btree_path_traverse_cached(trans, path, flags);
531a0095
KO
1580 goto out;
1581 }
2ca88e5a 1582
67e0dd8f 1583 if (unlikely(path->level >= BTREE_MAX_DEPTH))
531a0095 1584 goto out;
2ca88e5a 1585
67e0dd8f 1586 path->level = btree_path_up_until_good_node(trans, path, 0);
1c6fdbd8 1587
1c6fdbd8 1588 /*
67e0dd8f 1589 * Note: path->nodes[path->level] may be temporarily NULL here - that
1c6fdbd8
KO
1590 * would indicate to other code that we got to the end of the btree,
1591 * here it indicates that relocking the root failed - it's critical that
67e0dd8f 1592 * btree_path_lock_root() comes next and that it can't fail
1c6fdbd8 1593 */
67e0dd8f
KO
1594 while (path->level > depth_want) {
1595 ret = btree_path_node(path, path->level)
1596 ? btree_path_down(trans, path, flags, trace_ip)
1597 : btree_path_lock_root(trans, path, depth_want, trace_ip);
1c6fdbd8 1598 if (unlikely(ret)) {
531a0095
KO
1599 if (ret == 1) {
1600 /*
f21566f1
KO
1601 * No nodes at this level - got to the end of
1602 * the btree:
531a0095
KO
1603 */
1604 ret = 0;
1605 goto out;
1606 }
8812600c 1607
67e0dd8f
KO
1608 __bch2_btree_path_unlock(path);
1609 path->level = depth_want;
2ca88e5a 1610
67e0dd8f
KO
1611 if (ret == -EIO)
1612 path->l[path->level].b =
2ca88e5a 1613 BTREE_ITER_NO_NODE_ERROR;
67e0dd8f
KO
1614 else
1615 path->l[path->level].b =
2ca88e5a 1616 BTREE_ITER_NO_NODE_DOWN;
531a0095 1617 goto out;
1c6fdbd8
KO
1618 }
1619 }
1620
67e0dd8f 1621 path->uptodate = BTREE_ITER_UPTODATE;
531a0095 1622out:
e5af273f 1623 BUG_ON((ret == -EINTR) != !!trans->restarted);
67e0dd8f 1624 bch2_btree_path_verify(trans, path);
531a0095 1625 return ret;
1c6fdbd8
KO
1626}
1627
67e0dd8f
KO
1628int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
1629 struct btree_path *path, unsigned flags)
1c6fdbd8 1630{
67e0dd8f
KO
1631 if (path->uptodate < BTREE_ITER_NEED_RELOCK)
1632 return 0;
1633
e5fa91d7 1634 return bch2_trans_cond_resched(trans) ?:
67e0dd8f 1635 btree_path_traverse_one(trans, path, flags, _RET_IP_);
1c6fdbd8
KO
1636}
1637
67e0dd8f
KO
1638static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
1639 struct btree_path *src)
1640{
1641 unsigned i, offset = offsetof(struct btree_path, pos);
1642
1643 memcpy((void *) dst + offset,
1644 (void *) src + offset,
1645 sizeof(struct btree_path) - offset);
1646
1647 for (i = 0; i < BTREE_MAX_DEPTH; i++)
1648 if (btree_node_locked(dst, i))
1649 six_lock_increment(&dst->l[i].b->c.lock,
1650 __btree_lock_want(dst, i));
1651
1652 trans->paths_sorted = false;
1653}
1654
c404f203
KO
1655static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
1656 bool intent)
67e0dd8f 1657{
c404f203 1658 struct btree_path *new = btree_path_alloc(trans, src);
67e0dd8f 1659
c404f203 1660 btree_path_copy(trans, new, src);
67e0dd8f 1661 __btree_path_get(new, intent);
c404f203
KO
1662 return new;
1663}
1664
1665struct btree_path * __must_check
1666__bch2_btree_path_make_mut(struct btree_trans *trans,
1667 struct btree_path *path, bool intent)
1668{
67e0dd8f 1669 __btree_path_put(path, intent);
c404f203 1670 path = btree_path_clone(trans, path, intent);
67e0dd8f
KO
1671 path->preserve = false;
1672#ifdef CONFIG_BCACHEFS_DEBUG
1673 path->ip_allocated = _RET_IP_;
1674#endif
1675 return path;
1676}
1677
ce91abd6 1678struct btree_path * __must_check
67e0dd8f
KO
1679__bch2_btree_path_set_pos(struct btree_trans *trans,
1680 struct btree_path *path, struct bpos new_pos,
1681 bool intent, int cmp)
1682{
67e0dd8f
KO
1683 unsigned l = path->level;
1684
1685 EBUG_ON(trans->restarted);
1686 EBUG_ON(!path->ref);
1687
1688 path = bch2_btree_path_make_mut(trans, path, intent);
1689
1690 path->pos = new_pos;
1691 path->should_be_locked = false;
1692 trans->paths_sorted = false;
1693
1694 if (unlikely(path->cached)) {
1695 btree_node_unlock(path, 0);
1696 path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
1697 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1698 goto out;
1699 }
1700
1701 l = btree_path_up_until_good_node(trans, path, cmp);
1702
1703 if (btree_path_node(path, l)) {
7071878b 1704 BUG_ON(!btree_node_locked(path, l));
67e0dd8f
KO
1705 /*
1706 * We might have to skip over many keys, or just a few: try
1707 * advancing the node iterator, and if we have to skip over too
1708 * many keys just reinit it (or if we're rewinding, since that
1709 * is expensive).
1710 */
1711 if (cmp < 0 ||
1712 !btree_path_advance_to_pos(path, &path->l[l], 8))
1713 __btree_path_level_init(path, l);
67e0dd8f
KO
1714 }
1715
1d3ecd7e 1716 if (l != path->level) {
67e0dd8f 1717 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1d3ecd7e
KO
1718 __bch2_btree_path_unlock(path);
1719 }
67e0dd8f
KO
1720out:
1721 bch2_btree_path_verify(trans, path);
67e0dd8f
KO
1722 return path;
1723}
1724
67e0dd8f
KO
1725/* Btree path: main interface: */
1726
1727static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
1728{
1729 struct btree_path *next;
1730
1731 next = prev_btree_path(trans, path);
1732 if (next && !btree_path_cmp(next, path))
1733 return next;
1734
1735 next = next_btree_path(trans, path);
1736 if (next && !btree_path_cmp(next, path))
1737 return next;
1738
1739 return NULL;
1740}
1741
9a74f63c 1742static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
67e0dd8f
KO
1743{
1744 struct btree_path *next;
1745
1746 next = prev_btree_path(trans, path);
9a74f63c
KO
1747 if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
1748 return next;
67e0dd8f
KO
1749
1750 next = next_btree_path(trans, path);
9a74f63c
KO
1751 if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
1752 return next;
67e0dd8f 1753
9a74f63c 1754 return NULL;
67e0dd8f
KO
1755}
1756
1757static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
08070cba 1758{
67e0dd8f
KO
1759 __bch2_btree_path_unlock(path);
1760 btree_path_list_remove(trans, path);
1761 trans->paths_allocated &= ~(1ULL << path->idx);
08070cba
KO
1762}
1763
67e0dd8f
KO
1764void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent)
1765{
1766 struct btree_path *dup;
1767
1768 EBUG_ON(trans->paths + path->idx != path);
1769 EBUG_ON(!path->ref);
1770
1771 if (!__btree_path_put(path, intent))
1772 return;
1773
1774 /*
1775 * Perhaps instead we should check for duplicate paths in traverse_all:
1776 */
1777 if (path->preserve &&
1778 (dup = have_path_at_pos(trans, path))) {
1779 dup->preserve = true;
1780 path->preserve = false;
9a74f63c 1781 goto free;
67e0dd8f
KO
1782 }
1783
1784 if (!path->preserve &&
9a74f63c
KO
1785 (dup = have_node_at_pos(trans, path)))
1786 goto free;
1787 return;
1788free:
1789 if (path->should_be_locked &&
1790 !btree_node_locked(dup, path->level))
1791 return;
1792
1793 dup->should_be_locked |= path->should_be_locked;
1794 __bch2_path_free(trans, path);
67e0dd8f
KO
1795}
1796
8570d775 1797void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
67e0dd8f 1798{
67e0dd8f 1799 struct btree_insert_entry *i;
2158fe46 1800
8570d775
KO
1801 pr_buf(buf, "transaction updates for %s journal seq %llu",
1802 trans->fn, trans->journal_res.seq);
1803 pr_newline(buf);
1804 pr_indent_push(buf, 2);
2158fe46
KO
1805
1806 trans_for_each_update(trans, i) {
1807 struct bkey_s_c old = { &i->old_k, i->old_v };
1808
8570d775 1809 pr_buf(buf, "update: btree=%s cached=%u %pS",
2158fe46 1810 bch2_btree_ids[i->btree_id],
8570d775
KO
1811 i->cached,
1812 (void *) i->ip_allocated);
1813 pr_newline(buf);
1814
1815 pr_buf(buf, " old ");
1816 bch2_bkey_val_to_text(buf, trans->c, old);
1817 pr_newline(buf);
1818
1819 pr_buf(buf, " new ");
1820 bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
1821 pr_newline(buf);
2158fe46
KO
1822 }
1823
8570d775
KO
1824 pr_indent_pop(buf, 2);
1825}
1826
1827noinline __cold
1828void bch2_dump_trans_updates(struct btree_trans *trans)
1829{
1830 struct printbuf buf = PRINTBUF;
1831
1832 bch2_trans_updates_to_text(&buf, trans);
1833 bch_err(trans->c, "%s", buf.buf);
1834 printbuf_exit(&buf);
2158fe46
KO
1835}
1836
1837noinline __cold
1838void bch2_dump_trans_paths_updates(struct btree_trans *trans)
1839{
1840 struct btree_path *path;
1841 struct printbuf buf = PRINTBUF;
67e0dd8f 1842 unsigned idx;
67e0dd8f
KO
1843
1844 btree_trans_sort_paths(trans);
1845
fa8e94fa 1846 trans_for_each_path_inorder(trans, path, idx) {
2158fe46 1847 printbuf_reset(&buf);
fa8e94fa 1848
2158fe46 1849 bch2_bpos_to_text(&buf, path->pos);
fa8e94fa 1850
a0a07c59 1851 printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
67e0dd8f 1852 path->idx, path->ref, path->intent_ref,
32b26e8c
KO
1853 path->should_be_locked ? " S" : "",
1854 path->preserve ? " P" : "",
67e0dd8f 1855 bch2_btree_ids[path->btree_id],
a0a07c59 1856 path->level,
2158fe46 1857 buf.buf,
32b26e8c 1858 path->nodes_locked,
67e0dd8f
KO
1859#ifdef CONFIG_BCACHEFS_DEBUG
1860 (void *) path->ip_allocated
1861#else
1862 NULL
1863#endif
1864 );
fa8e94fa 1865 }
67e0dd8f 1866
2158fe46 1867 printbuf_exit(&buf);
60816d9b 1868
2158fe46 1869 bch2_dump_trans_updates(trans);
67e0dd8f
KO
1870}
1871
1872static struct btree_path *btree_path_alloc(struct btree_trans *trans,
1873 struct btree_path *pos)
1874{
1875 struct btree_path *path;
1876 unsigned idx;
1877
1878 if (unlikely(trans->paths_allocated ==
1879 ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) {
1880 bch2_dump_trans_paths_updates(trans);
1881 panic("trans path oveflow\n");
1882 }
1883
1884 idx = __ffs64(~trans->paths_allocated);
1885 trans->paths_allocated |= 1ULL << idx;
1886
1887 path = &trans->paths[idx];
1888
1889 path->idx = idx;
1890 path->ref = 0;
1891 path->intent_ref = 0;
1892 path->nodes_locked = 0;
1893 path->nodes_intent_locked = 0;
1894
1895 btree_path_list_add(trans, pos, path);
1896 return path;
1897}
1898
f3e1f444 1899struct btree_path *bch2_path_get(struct btree_trans *trans,
67e0dd8f
KO
1900 enum btree_id btree_id, struct bpos pos,
1901 unsigned locks_want, unsigned level,
f3e1f444 1902 unsigned flags)
67e0dd8f 1903{
807dda8c 1904 struct btree_path *path, *path_pos = NULL;
f3e1f444
KO
1905 bool cached = flags & BTREE_ITER_CACHED;
1906 bool intent = flags & BTREE_ITER_INTENT;
67e0dd8f
KO
1907 int i;
1908
1909 BUG_ON(trans->restarted);
7071878b
KO
1910 bch2_trans_verify_locks(trans);
1911
eb7bd15f 1912 btree_trans_sort_paths(trans);
67e0dd8f 1913
807dda8c 1914 btree_trans_sort_paths(trans);
67e0dd8f 1915
807dda8c
KO
1916 trans_for_each_path_inorder(trans, path, i) {
1917 if (__btree_path_cmp(path,
1918 btree_id,
1919 cached,
1920 pos,
1921 level) > 0)
1922 break;
67e0dd8f 1923
807dda8c 1924 path_pos = path;
67e0dd8f
KO
1925 }
1926
807dda8c
KO
1927 if (path_pos &&
1928 path_pos->cached == cached &&
1929 path_pos->btree_id == btree_id &&
1930 path_pos->level == level) {
1931 __btree_path_get(path_pos, intent);
ce91abd6 1932 path = bch2_btree_path_set_pos(trans, path_pos, pos, intent);
67e0dd8f 1933 } else {
807dda8c
KO
1934 path = btree_path_alloc(trans, path_pos);
1935 path_pos = NULL;
67e0dd8f
KO
1936
1937 __btree_path_get(path, intent);
1938 path->pos = pos;
1939 path->btree_id = btree_id;
1940 path->cached = cached;
67e0dd8f
KO
1941 path->uptodate = BTREE_ITER_NEED_TRAVERSE;
1942 path->should_be_locked = false;
1943 path->level = level;
1944 path->locks_want = locks_want;
1945 path->nodes_locked = 0;
1946 path->nodes_intent_locked = 0;
1947 for (i = 0; i < ARRAY_SIZE(path->l); i++)
1948 path->l[i].b = BTREE_ITER_NO_NODE_INIT;
1949#ifdef CONFIG_BCACHEFS_DEBUG
1950 path->ip_allocated = _RET_IP_;
1951#endif
1952 trans->paths_sorted = false;
1953 }
1954
f3e1f444
KO
1955 if (!(flags & BTREE_ITER_NOPRESERVE))
1956 path->preserve = true;
1957
67e0dd8f
KO
1958 if (path->intent_ref)
1959 locks_want = max(locks_want, level + 1);
1960
1961 /*
1962 * If the path has locks_want greater than requested, we don't downgrade
1963 * it here - on transaction restart because btree node split needs to
1964 * upgrade locks, we might be putting/getting the iterator again.
1965 * Downgrading iterators only happens via bch2_trans_downgrade(), after
1966 * a successful transaction commit.
1967 */
1968
1969 locks_want = min(locks_want, BTREE_MAX_DEPTH);
1970 if (locks_want > path->locks_want) {
1971 path->locks_want = locks_want;
bc82d08b 1972 btree_path_get_locks(trans, path, true);
67e0dd8f
KO
1973 }
1974
67e0dd8f
KO
1975 return path;
1976}
1977
1978inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
1979{
1980
1981 struct bkey_s_c k;
1982
67e0dd8f
KO
1983 if (!path->cached) {
1984 struct btree_path_level *l = path_l(path);
f7b6ca23
KO
1985 struct bkey_packed *_k;
1986
1987 EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
67e0dd8f 1988
f7b6ca23 1989 _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
67e0dd8f
KO
1990 k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
1991
1992 EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
1993
1994 if (!k.k || bpos_cmp(path->pos, k.k->p))
1995 goto hole;
1996 } else {
1997 struct bkey_cached *ck = (void *) path->l[0].b;
1998
f7b6ca23
KO
1999 EBUG_ON(ck &&
2000 (path->btree_id != ck->key.btree_id ||
2001 bkey_cmp(path->pos, ck->key.pos)));
67e0dd8f 2002
f7b6ca23
KO
2003 /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
2004 if (unlikely(!ck || !ck->valid))
2005 return bkey_s_c_null;
2006
2007 EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
67e0dd8f 2008
2e63e180 2009 *u = ck->k->k;
67e0dd8f
KO
2010 k = bkey_i_to_s_c(ck->k);
2011 }
2012
2013 return k;
2014hole:
2015 bkey_init(u);
2016 u->p = path->pos;
2017 return (struct bkey_s_c) { u, NULL };
2018}
2019
2020/* Btree iterators: */
2021
db92f2ea
KO
2022int __must_check
2023__bch2_btree_iter_traverse(struct btree_iter *iter)
2024{
2025 return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
2026}
2027
08070cba
KO
2028int __must_check
2029bch2_btree_iter_traverse(struct btree_iter *iter)
2030{
66a0a497
KO
2031 int ret;
2032
ce91abd6 2033 iter->path = bch2_btree_path_set_pos(iter->trans, iter->path,
67e0dd8f
KO
2034 btree_iter_search_key(iter),
2035 iter->flags & BTREE_ITER_INTENT);
08070cba 2036
67e0dd8f 2037 ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
66a0a497
KO
2038 if (ret)
2039 return ret;
2040
67e0dd8f 2041 iter->path->should_be_locked = true;
66a0a497 2042 return 0;
08070cba
KO
2043}
2044
1c6fdbd8
KO
2045/* Iterate across nodes (leaf and interior nodes) */
2046
2047struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
2048{
502027a8 2049 struct btree_trans *trans = iter->trans;
f21566f1 2050 struct btree *b = NULL;
1c6fdbd8
KO
2051 int ret;
2052
67e0dd8f 2053 EBUG_ON(iter->path->cached);
7e1a3aa9 2054 bch2_btree_iter_verify(iter);
1c6fdbd8 2055
502027a8 2056 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
1c6fdbd8 2057 if (ret)
d355c6f4 2058 goto err;
1c6fdbd8 2059
67e0dd8f 2060 b = btree_path_node(iter->path, iter->path->level);
1c6fdbd8 2061 if (!b)
f21566f1 2062 goto out;
1c6fdbd8 2063
4cf91b02 2064 BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0);
1c6fdbd8 2065
f21566f1 2066 bkey_init(&iter->k);
67e0dd8f 2067 iter->k.p = iter->pos = b->key.k.p;
502027a8 2068
ce91abd6 2069 iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
502027a8 2070 iter->flags & BTREE_ITER_INTENT);
67e0dd8f 2071 iter->path->should_be_locked = true;
502027a8 2072 BUG_ON(iter->path->uptodate);
f21566f1
KO
2073out:
2074 bch2_btree_iter_verify_entry_exit(iter);
2075 bch2_btree_iter_verify(iter);
2dac0eae 2076
1c6fdbd8 2077 return b;
d355c6f4
KO
2078err:
2079 b = ERR_PTR(ret);
2080 goto out;
1c6fdbd8
KO
2081}
2082
2dac0eae 2083struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
1c6fdbd8 2084{
67e0dd8f
KO
2085 struct btree_trans *trans = iter->trans;
2086 struct btree_path *path = iter->path;
f21566f1 2087 struct btree *b = NULL;
979735df 2088 unsigned l;
1c6fdbd8
KO
2089 int ret;
2090
979735df 2091 BUG_ON(trans->restarted);
67e0dd8f 2092 EBUG_ON(iter->path->cached);
7e1a3aa9 2093 bch2_btree_iter_verify(iter);
1c6fdbd8 2094
979735df 2095 /* already at end? */
67e0dd8f 2096 if (!btree_path_node(path, path->level))
979735df 2097 return NULL;
1c6fdbd8 2098
979735df
KO
2099 /* got to end? */
2100 if (!btree_path_node(path, path->level + 1)) {
2101 btree_node_unlock(path, path->level);
2102 path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
2103 path->level++;
7071878b 2104 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
979735df
KO
2105 return NULL;
2106 }
1c6fdbd8 2107
979735df
KO
2108 if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
2109 __bch2_btree_path_unlock(path);
2110 path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
2111 path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
7071878b 2112 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bc82d08b
KO
2113 trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_,
2114 path->btree_id, &path->pos);
979735df
KO
2115 btree_trans_restart(trans);
2116 ret = -EINTR;
d355c6f4 2117 goto err;
979735df 2118 }
1c6fdbd8 2119
979735df 2120 b = btree_path_node(path, path->level + 1);
1c6fdbd8 2121
979735df
KO
2122 if (!bpos_cmp(iter->pos, b->key.k.p)) {
2123 btree_node_unlock(path, path->level);
2124 path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
2125 path->level++;
2126 } else {
1c6fdbd8
KO
2127 /*
2128 * Haven't gotten to the end of the parent node: go back down to
2129 * the next child node
2130 */
67e0dd8f 2131 path = iter->path =
ce91abd6 2132 bch2_btree_path_set_pos(trans, path, bpos_successor(iter->pos),
67e0dd8f 2133 iter->flags & BTREE_ITER_INTENT);
1c6fdbd8 2134
67e0dd8f 2135 path->level = iter->min_depth;
979735df
KO
2136
2137 for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
2138 if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
2139 btree_node_unlock(path, l);
2140
67e0dd8f 2141 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
345ca825
KO
2142 bch2_btree_iter_verify(iter);
2143
67e0dd8f 2144 ret = bch2_btree_path_traverse(trans, path, iter->flags);
d355c6f4
KO
2145 if (ret)
2146 goto err;
1c6fdbd8 2147
67e0dd8f 2148 b = path->l[path->level].b;
1c6fdbd8
KO
2149 }
2150
f21566f1 2151 bkey_init(&iter->k);
67e0dd8f 2152 iter->k.p = iter->pos = b->key.k.p;
502027a8 2153
ce91abd6 2154 iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
502027a8 2155 iter->flags & BTREE_ITER_INTENT);
67e0dd8f 2156 iter->path->should_be_locked = true;
502027a8 2157 BUG_ON(iter->path->uptodate);
f21566f1
KO
2158out:
2159 bch2_btree_iter_verify_entry_exit(iter);
2160 bch2_btree_iter_verify(iter);
2dac0eae 2161
1c6fdbd8 2162 return b;
d355c6f4
KO
2163err:
2164 b = ERR_PTR(ret);
2165 goto out;
1c6fdbd8
KO
2166}
2167
2168/* Iterate across keys (in leaf nodes only) */
2169
e0ba3b64 2170inline bool bch2_btree_iter_advance(struct btree_iter *iter)
434094be 2171{
3d495595 2172 struct bpos pos = iter->k.p;
6caf0578
KO
2173 bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
2174 ? bpos_cmp(pos, SPOS_MAX)
2175 : bkey_cmp(pos, SPOS_MAX)) != 0;
3d495595 2176
7e1a3aa9 2177 if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
e751c01a 2178 pos = bkey_successor(iter, pos);
3d495595 2179 bch2_btree_iter_set_pos(iter, pos);
7e1a3aa9 2180 return ret;
3d495595
KO
2181}
2182
e0ba3b64 2183inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
3d495595
KO
2184{
2185 struct bpos pos = bkey_start_pos(&iter->k);
71f892a4
KO
2186 bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
2187 ? bpos_cmp(pos, POS_MIN)
2188 : bkey_cmp(pos, POS_MIN)) != 0;
3d495595 2189
7e1a3aa9 2190 if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
e751c01a 2191 pos = bkey_predecessor(iter, pos);
3d495595 2192 bch2_btree_iter_set_pos(iter, pos);
7e1a3aa9 2193 return ret;
434094be
KO
2194}
2195
3763cb95 2196static noinline
67e0dd8f 2197struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
818664f5
KO
2198{
2199 struct btree_insert_entry *i;
5288e66a 2200 struct bkey_i *ret = NULL;
818664f5 2201
8e6bbc41 2202 trans_for_each_update(iter->trans, i) {
5288e66a
KO
2203 if (i->btree_id < iter->btree_id)
2204 continue;
2205 if (i->btree_id > iter->btree_id)
818664f5 2206 break;
67e0dd8f 2207 if (bpos_cmp(i->k->k.p, iter->path->pos) < 0)
5288e66a 2208 continue;
12ce5b7d
KO
2209 if (i->key_cache_already_flushed)
2210 continue;
5288e66a
KO
2211 if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0)
2212 ret = i->k;
2213 }
818664f5 2214
5288e66a
KO
2215 return ret;
2216}
2217
3763cb95
KO
2218static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter)
2219{
2220 return iter->flags & BTREE_ITER_WITH_UPDATES
2221 ? __bch2_btree_trans_peek_updates(iter)
2222 : NULL;
2223}
2224
5222a460
KO
2225static noinline
2226struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
2227 struct btree_iter *iter)
2228{
45e4cd9e
KO
2229 struct bkey_i *k = bch2_journal_keys_peek(trans->c, iter->btree_id, 0,
2230 iter->path->pos);
5222a460 2231
45e4cd9e 2232 if (k && !bpos_cmp(k->k.p, iter->path->pos)) {
5222a460
KO
2233 iter->k = k->k;
2234 return bkey_i_to_s_c(k);
2235 } else {
2236 return bkey_s_c_null;
2237 }
2238}
2239
2240static noinline
2241struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
2242 struct btree_iter *iter,
2243 struct bkey_s_c k)
2244{
2245 struct bkey_i *next_journal =
45e4cd9e
KO
2246 bch2_journal_keys_peek(trans->c, iter->btree_id, 0,
2247 iter->path->pos);
5222a460
KO
2248
2249 if (next_journal &&
2250 bpos_cmp(next_journal->k.p,
2251 k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
2252 iter->k = next_journal->k;
2253 k = bkey_i_to_s_c(next_journal);
2254 }
2255
2256 return k;
2257}
2258
f7b6ca23
KO
2259/*
2260 * Checks btree key cache for key at iter->pos and returns it if present, or
2261 * bkey_s_c_null:
2262 */
2263static noinline
2264struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
2265{
2266 struct btree_trans *trans = iter->trans;
2267 struct bch_fs *c = trans->c;
2268 struct bkey u;
2269 int ret;
2270
2271 if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
2272 return bkey_s_c_null;
2273
2274 if (!iter->key_cache_path)
2275 iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
2276 iter->flags & BTREE_ITER_INTENT, 0,
2277 iter->flags|BTREE_ITER_CACHED);
2278
2279 iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
2280 iter->flags & BTREE_ITER_INTENT);
2281
2282 ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED);
2283 if (unlikely(ret))
2284 return bkey_s_c_err(ret);
2285
2286 iter->key_cache_path->should_be_locked = true;
2287
2288 return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
2289}
2290
a1e82d35 2291static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
1c6fdbd8 2292{
67e0dd8f 2293 struct btree_trans *trans = iter->trans;
360746bf 2294 struct bkey_i *next_update;
f7b6ca23 2295 struct bkey_s_c k, k2;
8e432d98 2296 int ret;
1c6fdbd8 2297
67e0dd8f 2298 EBUG_ON(iter->path->cached || iter->path->level);
7e1a3aa9 2299 bch2_btree_iter_verify(iter);
1c6fdbd8 2300
1c6fdbd8 2301 while (1) {
ce91abd6
KO
2302 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
2303 iter->flags & BTREE_ITER_INTENT);
c8476a4e 2304
67e0dd8f 2305 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
e6e024e9
KO
2306 if (unlikely(ret)) {
2307 /* ensure that iter->k is consistent with iter->pos: */
2308 bch2_btree_iter_set_pos(iter, iter->pos);
2309 k = bkey_s_c_err(ret);
2310 goto out;
2311 }
1c6fdbd8 2312
f7b6ca23
KO
2313 iter->path->should_be_locked = true;
2314
67e0dd8f 2315 k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
e6e024e9 2316
f7b6ca23
KO
2317 if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
2318 k.k &&
2319 (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
2320 ret = bkey_err(k2);
2321 if (ret) {
2322 k = k2;
2323 bch2_btree_iter_set_pos(iter, iter->pos);
2324 goto out;
2325 }
2326
2327 k = k2;
2328 iter->k = *k.k;
2329 }
2330
5222a460
KO
2331 if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
2332 k = btree_trans_peek_journal(trans, iter, k);
2333
2334 next_update = btree_trans_peek_updates(iter);
e6e024e9 2335
818664f5 2336 if (next_update &&
e6e024e9 2337 bpos_cmp(next_update->k.p,
67e0dd8f 2338 k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
5288e66a 2339 iter->k = next_update->k;
818664f5 2340 k = bkey_i_to_s_c(next_update);
5288e66a 2341 }
818664f5 2342
5222a460
KO
2343 if (k.k && bkey_deleted(k.k)) {
2344 /*
2345 * If we've got a whiteout, and it's after the search
2346 * key, advance the search key to the whiteout instead
2347 * of just after the whiteout - it might be a btree
2348 * whiteout, with a real key at the same position, since
2349 * in the btree deleted keys sort before non deleted.
2350 */
2351 search_key = bpos_cmp(search_key, k.k->p)
2352 ? k.k->p
2353 : bpos_successor(k.k->p);
2354 continue;
2355 }
2356
818664f5 2357 if (likely(k.k)) {
c075ff70 2358 break;
67e0dd8f 2359 } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
e6e024e9 2360 /* Advance to next leaf node: */
67e0dd8f 2361 search_key = bpos_successor(iter->path->l[0].b->key.k.p);
e6e024e9
KO
2362 } else {
2363 /* End of btree: */
c8476a4e
KO
2364 bch2_btree_iter_set_pos(iter, SPOS_MAX);
2365 k = bkey_s_c_null;
2366 goto out;
2367 }
1c6fdbd8 2368 }
a1e82d35 2369out:
a1e82d35
KO
2370 bch2_btree_iter_verify(iter);
2371
2372 return k;
2373}
2374
2375/**
2376 * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
2377 * current position
2378 */
85d8cf16 2379struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
a1e82d35
KO
2380{
2381 struct btree_trans *trans = iter->trans;
2382 struct bpos search_key = btree_iter_search_key(iter);
2383 struct bkey_s_c k;
85d8cf16 2384 struct bpos iter_pos;
a1e82d35
KO
2385 int ret;
2386
1f2d9192
KO
2387 if (iter->update_path) {
2388 bch2_path_put(trans, iter->update_path,
2389 iter->flags & BTREE_ITER_INTENT);
2390 iter->update_path = NULL;
2391 }
2392
a1e82d35
KO
2393 bch2_btree_iter_verify_entry_exit(iter);
2394
2395 while (1) {
2396 k = __bch2_btree_iter_peek(iter, search_key);
2397 if (!k.k || bkey_err(k))
2398 goto out;
2399
85d8cf16
KO
2400 /*
2401 * iter->pos should be mononotically increasing, and always be
2402 * equal to the key we just returned - except extents can
2403 * straddle iter->pos:
2404 */
2405 if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
2406 iter_pos = k.k->p;
2407 else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
2408 iter_pos = bkey_start_pos(k.k);
2409 else
2410 iter_pos = iter->pos;
2411
2412 if (bkey_cmp(iter_pos, end) > 0) {
2413 bch2_btree_iter_set_pos(iter, end);
2414 k = bkey_s_c_null;
2415 goto out;
2416 }
2417
1f2d9192
KO
2418 if (iter->update_path &&
2419 bkey_cmp(iter->update_path->pos, k.k->p)) {
2420 bch2_path_put(trans, iter->update_path,
2421 iter->flags & BTREE_ITER_INTENT);
2422 iter->update_path = NULL;
2423 }
2424
2425 if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
2426 (iter->flags & BTREE_ITER_INTENT) &&
2427 !(iter->flags & BTREE_ITER_IS_EXTENTS) &&
2428 !iter->update_path) {
2429 struct bpos pos = k.k->p;
2430
2431 if (pos.snapshot < iter->snapshot) {
2432 search_key = bpos_successor(k.k->p);
2433 continue;
2434 }
2435
2436 pos.snapshot = iter->snapshot;
2437
2438 /*
2439 * advance, same as on exit for iter->path, but only up
2440 * to snapshot
2441 */
2442 __btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT);
2443 iter->update_path = iter->path;
2444
ce91abd6 2445 iter->update_path = bch2_btree_path_set_pos(trans,
1f2d9192
KO
2446 iter->update_path, pos,
2447 iter->flags & BTREE_ITER_INTENT);
1f2d9192
KO
2448 }
2449
a1e82d35
KO
2450 /*
2451 * We can never have a key in a leaf node at POS_MAX, so
2452 * we don't have to check these successor() calls:
2453 */
2454 if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
2455 !bch2_snapshot_is_ancestor(trans->c,
2456 iter->snapshot,
2457 k.k->p.snapshot)) {
2458 search_key = bpos_successor(k.k->p);
2459 continue;
2460 }
2461
2462 if (bkey_whiteout(k.k) &&
2463 !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2464 search_key = bkey_successor(iter, k.k->p);
2465 continue;
2466 }
2467
2468 break;
2469 }
2470
85d8cf16 2471 iter->pos = iter_pos;
1f2d9192 2472
ce91abd6 2473 iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
1f2d9192
KO
2474 iter->flags & BTREE_ITER_INTENT);
2475 BUG_ON(!iter->path->nodes_locked);
a1e82d35 2476out:
1f2d9192 2477 if (iter->update_path) {
d4d24a65
KO
2478 if (unlikely(!bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_))) {
2479 k = bkey_s_c_err(-EINTR);
2480 } else {
2481 BUG_ON(!(iter->update_path->nodes_locked & 1));
2482 iter->update_path->should_be_locked = true;
2483 }
1f2d9192
KO
2484 }
2485 iter->path->should_be_locked = true;
2486
a1e82d35 2487 if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
c075ff70
KO
2488 iter->pos.snapshot = iter->snapshot;
2489
a1e82d35
KO
2490 ret = bch2_btree_iter_verify_ret(iter, k);
2491 if (unlikely(ret)) {
2492 bch2_btree_iter_set_pos(iter, iter->pos);
2493 k = bkey_s_c_err(ret);
2494 }
67e0dd8f 2495
7e1a3aa9 2496 bch2_btree_iter_verify_entry_exit(iter);
c075ff70 2497
1c6fdbd8
KO
2498 return k;
2499}
2500
f4b61341
KO
2501/**
2502 * bch2_btree_iter_next: returns first key greater than iterator's current
2503 * position
2504 */
1c6fdbd8
KO
2505struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
2506{
e0ba3b64 2507 if (!bch2_btree_iter_advance(iter))
2e70ce56 2508 return bkey_s_c_null;
f4b61341 2509
2dac0eae 2510 return bch2_btree_iter_peek(iter);
1c6fdbd8
KO
2511}
2512
ccf5a109
KO
2513/**
2514 * bch2_btree_iter_peek_prev: returns first key less than or equal to
2515 * iterator's current position
2516 */
2517struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
1c6fdbd8 2518{
67e0dd8f 2519 struct btree_trans *trans = iter->trans;
c8476a4e 2520 struct bpos search_key = iter->pos;
c075ff70 2521 struct btree_path *saved_path = NULL;
1c6fdbd8 2522 struct bkey_s_c k;
c075ff70
KO
2523 struct bkey saved_k;
2524 const struct bch_val *saved_v;
1c6fdbd8
KO
2525 int ret;
2526
67e0dd8f 2527 EBUG_ON(iter->path->cached || iter->path->level);
5288e66a 2528 EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
5222a460
KO
2529
2530 if (iter->flags & BTREE_ITER_WITH_JOURNAL)
2531 return bkey_s_c_err(-EIO);
2532
7e1a3aa9
KO
2533 bch2_btree_iter_verify(iter);
2534 bch2_btree_iter_verify_entry_exit(iter);
2535
c075ff70
KO
2536 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2537 search_key.snapshot = U32_MAX;
2538
1c6fdbd8 2539 while (1) {
ce91abd6 2540 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
67e0dd8f 2541 iter->flags & BTREE_ITER_INTENT);
c8476a4e 2542
67e0dd8f 2543 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
7e1a3aa9 2544 if (unlikely(ret)) {
e6e024e9
KO
2545 /* ensure that iter->k is consistent with iter->pos: */
2546 bch2_btree_iter_set_pos(iter, iter->pos);
7e1a3aa9 2547 k = bkey_s_c_err(ret);
e6e024e9 2548 goto out;
7e1a3aa9 2549 }
1c6fdbd8 2550
67e0dd8f
KO
2551 k = btree_path_level_peek(trans, iter->path,
2552 &iter->path->l[0], &iter->k);
3d495595
KO
2553 if (!k.k ||
2554 ((iter->flags & BTREE_ITER_IS_EXTENTS)
c075ff70
KO
2555 ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
2556 : bpos_cmp(k.k->p, search_key) > 0))
67e0dd8f
KO
2557 k = btree_path_level_prev(trans, iter->path,
2558 &iter->path->l[0], &iter->k);
ccf5a109 2559
e6e024e9 2560 if (likely(k.k)) {
c075ff70
KO
2561 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
2562 if (k.k->p.snapshot == iter->snapshot)
2563 goto got_key;
2564
2565 /*
2566 * If we have a saved candidate, and we're no
2567 * longer at the same _key_ (not pos), return
2568 * that candidate
2569 */
2570 if (saved_path && bkey_cmp(k.k->p, saved_k.p)) {
2571 bch2_path_put(trans, iter->path,
2572 iter->flags & BTREE_ITER_INTENT);
2573 iter->path = saved_path;
2574 saved_path = NULL;
2575 iter->k = saved_k;
2576 k.v = saved_v;
2577 goto got_key;
2578 }
2579
2580 if (bch2_snapshot_is_ancestor(iter->trans->c,
2581 iter->snapshot,
2582 k.k->p.snapshot)) {
2583 if (saved_path)
2584 bch2_path_put(trans, saved_path,
2585 iter->flags & BTREE_ITER_INTENT);
2586 saved_path = btree_path_clone(trans, iter->path,
2587 iter->flags & BTREE_ITER_INTENT);
2588 saved_k = *k.k;
2589 saved_v = k.v;
2590 }
2591
2592 search_key = bpos_predecessor(k.k->p);
2593 continue;
2594 }
2595got_key:
2596 if (bkey_whiteout(k.k) &&
2597 !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2598 search_key = bkey_predecessor(iter, k.k->p);
2599 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2600 search_key.snapshot = U32_MAX;
2601 continue;
2602 }
2603
1c6fdbd8 2604 break;
67e0dd8f 2605 } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) {
e6e024e9 2606 /* Advance to previous leaf node: */
67e0dd8f 2607 search_key = bpos_predecessor(iter->path->l[0].b->data->min_key);
e6e024e9
KO
2608 } else {
2609 /* Start of btree: */
c8476a4e 2610 bch2_btree_iter_set_pos(iter, POS_MIN);
7e1a3aa9 2611 k = bkey_s_c_null;
e6e024e9 2612 goto out;
7e1a3aa9 2613 }
ccf5a109 2614 }
1c6fdbd8 2615
a045be5a 2616 EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
3d495595
KO
2617
2618 /* Extents can straddle iter->pos: */
a045be5a 2619 if (bkey_cmp(k.k->p, iter->pos) < 0)
3d495595 2620 iter->pos = k.k->p;
c075ff70
KO
2621
2622 if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2623 iter->pos.snapshot = iter->snapshot;
7e1a3aa9 2624out:
c075ff70
KO
2625 if (saved_path)
2626 bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
67e0dd8f
KO
2627 iter->path->should_be_locked = true;
2628
7e1a3aa9
KO
2629 bch2_btree_iter_verify_entry_exit(iter);
2630 bch2_btree_iter_verify(iter);
67e0dd8f 2631
1c6fdbd8
KO
2632 return k;
2633}
2634
ccf5a109
KO
2635/**
2636 * bch2_btree_iter_prev: returns first key less than iterator's current
2637 * position
2638 */
2639struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
2640{
e0ba3b64 2641 if (!bch2_btree_iter_rewind(iter))
2e70ce56 2642 return bkey_s_c_null;
ccf5a109 2643
2e70ce56 2644 return bch2_btree_iter_peek_prev(iter);
ccf5a109
KO
2645}
2646
c3801239 2647struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
271a3d3a 2648{
9f6bd307 2649 struct btree_trans *trans = iter->trans;
953ee28a 2650 struct bpos search_key;
271a3d3a 2651 struct bkey_s_c k;
c3801239
KO
2652 int ret;
2653
67e0dd8f 2654 EBUG_ON(iter->path->level);
7e1a3aa9
KO
2655 bch2_btree_iter_verify(iter);
2656 bch2_btree_iter_verify_entry_exit(iter);
2657
1d214eb1
KO
2658 /* extents can't span inode numbers: */
2659 if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
953ee28a 2660 unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
1d214eb1
KO
2661 if (iter->pos.inode == KEY_INODE_MAX)
2662 return bkey_s_c_null;
c3801239 2663
1d214eb1
KO
2664 bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
2665 }
8042b5b7 2666
953ee28a 2667 search_key = btree_iter_search_key(iter);
ce91abd6 2668 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
67e0dd8f 2669 iter->flags & BTREE_ITER_INTENT);
953ee28a 2670
67e0dd8f 2671 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
c3801239
KO
2672 if (unlikely(ret))
2673 return bkey_s_c_err(ret);
1c6fdbd8 2674
c075ff70
KO
2675 if ((iter->flags & BTREE_ITER_CACHED) ||
2676 !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
953ee28a 2677 struct bkey_i *next_update;
1c6fdbd8 2678
5222a460 2679 if ((next_update = btree_trans_peek_updates(iter)) &&
67e0dd8f 2680 !bpos_cmp(next_update->k.p, iter->pos)) {
1d214eb1
KO
2681 iter->k = next_update->k;
2682 k = bkey_i_to_s_c(next_update);
5222a460 2683 goto out;
1d214eb1
KO
2684 }
2685
5222a460
KO
2686 if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
2687 (k = btree_trans_peek_slot_journal(trans, iter)).k)
2688 goto out;
2689
f7b6ca23
KO
2690 if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
2691 (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
2692 if (!bkey_err(k))
2693 iter->k = *k.k;
2694 goto out;
2695 }
2696
5222a460 2697 k = bch2_btree_path_peek_slot(iter->path, &iter->k);
1d214eb1
KO
2698 } else {
2699 struct bpos next;
1d214eb1 2700
b1d87f52 2701 if (iter->flags & BTREE_ITER_INTENT) {
67e0dd8f 2702 struct btree_iter iter2;
85d8cf16
KO
2703 struct bpos end = iter->pos;
2704
2705 if (iter->flags & BTREE_ITER_IS_EXTENTS)
2706 end.offset = U64_MAX;
b1d87f52 2707
67e0dd8f 2708 bch2_trans_copy_iter(&iter2, iter);
85d8cf16 2709 k = bch2_btree_iter_peek_upto(&iter2, end);
b1d87f52 2710
67e0dd8f
KO
2711 if (k.k && !bkey_err(k)) {
2712 iter->k = iter2.k;
2713 k.k = &iter->k;
2714 }
2715 bch2_trans_iter_exit(trans, &iter2);
b1d87f52
KO
2716 } else {
2717 struct bpos pos = iter->pos;
2718
2719 k = bch2_btree_iter_peek(iter);
2720 iter->pos = pos;
2721 }
1d214eb1
KO
2722
2723 if (unlikely(bkey_err(k)))
2724 return k;
2725
2726 next = k.k ? bkey_start_pos(k.k) : POS_MAX;
2727
2728 if (bkey_cmp(iter->pos, next) < 0) {
2729 bkey_init(&iter->k);
2730 iter->k.p = iter->pos;
c075ff70
KO
2731
2732 if (iter->flags & BTREE_ITER_IS_EXTENTS) {
2733 bch2_key_resize(&iter->k,
2734 min_t(u64, KEY_SIZE_MAX,
2735 (next.inode == iter->pos.inode
2736 ? next.offset
2737 : KEY_OFFSET_MAX) -
2738 iter->pos.offset));
2739 EBUG_ON(!iter->k.size);
2740 }
1d214eb1
KO
2741
2742 k = (struct bkey_s_c) { &iter->k, NULL };
1d214eb1 2743 }
271a3d3a 2744 }
5222a460 2745out:
67e0dd8f
KO
2746 iter->path->should_be_locked = true;
2747
7e1a3aa9
KO
2748 bch2_btree_iter_verify_entry_exit(iter);
2749 bch2_btree_iter_verify(iter);
c075ff70
KO
2750 ret = bch2_btree_iter_verify_ret(iter, k);
2751 if (unlikely(ret))
2752 return bkey_s_c_err(ret);
66a0a497 2753
63f1a598 2754 return k;
1c6fdbd8
KO
2755}
2756
1c6fdbd8
KO
2757struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
2758{
e0ba3b64 2759 if (!bch2_btree_iter_advance(iter))
2e70ce56 2760 return bkey_s_c_null;
1c6fdbd8 2761
2dac0eae 2762 return bch2_btree_iter_peek_slot(iter);
1c6fdbd8
KO
2763}
2764
18fc6ae5
KO
2765struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
2766{
e0ba3b64 2767 if (!bch2_btree_iter_rewind(iter))
18fc6ae5
KO
2768 return bkey_s_c_null;
2769
2770 return bch2_btree_iter_peek_slot(iter);
2771}
2772
1c6fdbd8
KO
2773/* new transactional stuff: */
2774
0423fb71
KO
2775#ifdef CONFIG_BCACHEFS_DEBUG
2776static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
2777{
67e0dd8f 2778 struct btree_path *path;
0423fb71
KO
2779 unsigned i;
2780
67e0dd8f 2781 BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated));
0423fb71 2782
67e0dd8f
KO
2783 trans_for_each_path(trans, path) {
2784 BUG_ON(path->sorted_idx >= trans->nr_sorted);
2785 BUG_ON(trans->sorted[path->sorted_idx] != path->idx);
0423fb71
KO
2786 }
2787
2788 for (i = 0; i < trans->nr_sorted; i++) {
2789 unsigned idx = trans->sorted[i];
2790
67e0dd8f
KO
2791 EBUG_ON(!(trans->paths_allocated & (1ULL << idx)));
2792 BUG_ON(trans->paths[idx].sorted_idx != i);
0423fb71
KO
2793 }
2794}
0423fb71
KO
2795
2796static void btree_trans_verify_sorted(struct btree_trans *trans)
2797{
67e0dd8f 2798 struct btree_path *path, *prev = NULL;
0423fb71
KO
2799 unsigned i;
2800
67e0dd8f 2801 trans_for_each_path_inorder(trans, path, i) {
eb7bd15f
KO
2802 if (prev && btree_path_cmp(prev, path) > 0) {
2803 bch2_dump_trans_paths_updates(trans);
2804 panic("trans paths out of order!\n");
2805 }
67e0dd8f 2806 prev = path;
0423fb71 2807 }
0423fb71 2808}
068bcaa5
KO
2809#else
2810static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
2811static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
2812#endif
0423fb71 2813
068bcaa5 2814void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
0423fb71
KO
2815{
2816 int i, l = 0, r = trans->nr_sorted, inc = 1;
2817 bool swapped;
2818
068bcaa5
KO
2819 btree_trans_verify_sorted_refs(trans);
2820
2821 if (trans->paths_sorted)
2822 goto out;
2823
0423fb71
KO
2824 /*
2825 * Cocktail shaker sort: this is efficient because iterators will be
2826 * mostly sorteda.
2827 */
2828 do {
2829 swapped = false;
2830
2831 for (i = inc > 0 ? l : r - 2;
2832 i + 1 < r && i >= l;
2833 i += inc) {
67e0dd8f
KO
2834 if (btree_path_cmp(trans->paths + trans->sorted[i],
2835 trans->paths + trans->sorted[i + 1]) > 0) {
0423fb71 2836 swap(trans->sorted[i], trans->sorted[i + 1]);
67e0dd8f
KO
2837 trans->paths[trans->sorted[i]].sorted_idx = i;
2838 trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
0423fb71
KO
2839 swapped = true;
2840 }
2841 }
2842
2843 if (inc > 0)
2844 --r;
2845 else
2846 l++;
2847 inc = -inc;
2848 } while (swapped);
2849
67e0dd8f 2850 trans->paths_sorted = true;
068bcaa5 2851out:
0423fb71
KO
2852 btree_trans_verify_sorted(trans);
2853}
2854
67e0dd8f
KO
2855static inline void btree_path_list_remove(struct btree_trans *trans,
2856 struct btree_path *path)
0423fb71
KO
2857{
2858 unsigned i;
2859
67e0dd8f 2860 EBUG_ON(path->sorted_idx >= trans->nr_sorted);
0423fb71
KO
2861#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
2862 trans->nr_sorted--;
67e0dd8f
KO
2863 memmove_u64s_down_small(trans->sorted + path->sorted_idx,
2864 trans->sorted + path->sorted_idx + 1,
2865 DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
0423fb71 2866#else
67e0dd8f 2867 array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
0423fb71 2868#endif
67e0dd8f
KO
2869 for (i = path->sorted_idx; i < trans->nr_sorted; i++)
2870 trans->paths[trans->sorted[i]].sorted_idx = i;
0423fb71 2871
67e0dd8f 2872 path->sorted_idx = U8_MAX;
0423fb71
KO
2873}
2874
67e0dd8f
KO
2875static inline void btree_path_list_add(struct btree_trans *trans,
2876 struct btree_path *pos,
2877 struct btree_path *path)
0423fb71
KO
2878{
2879 unsigned i;
2880
068bcaa5 2881 path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted;
0423fb71
KO
2882
2883#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
67e0dd8f
KO
2884 memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
2885 trans->sorted + path->sorted_idx,
2886 DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8));
0423fb71 2887 trans->nr_sorted++;
67e0dd8f 2888 trans->sorted[path->sorted_idx] = path->idx;
0423fb71 2889#else
67e0dd8f 2890 array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
0423fb71
KO
2891#endif
2892
67e0dd8f
KO
2893 for (i = path->sorted_idx; i < trans->nr_sorted; i++)
2894 trans->paths[trans->sorted[i]].sorted_idx = i;
64bc0011 2895
0423fb71 2896 btree_trans_verify_sorted_refs(trans);
7c26ecae
KO
2897}
2898
67e0dd8f 2899void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
64bc0011 2900{
67e0dd8f
KO
2901 if (iter->path)
2902 bch2_path_put(trans, iter->path,
2903 iter->flags & BTREE_ITER_INTENT);
1f2d9192
KO
2904 if (iter->update_path)
2905 bch2_path_put(trans, iter->update_path,
2906 iter->flags & BTREE_ITER_INTENT);
f7b6ca23
KO
2907 if (iter->key_cache_path)
2908 bch2_path_put(trans, iter->key_cache_path,
2909 iter->flags & BTREE_ITER_INTENT);
67e0dd8f 2910 iter->path = NULL;
1f2d9192 2911 iter->update_path = NULL;
f7b6ca23 2912 iter->key_cache_path = NULL;
64bc0011
KO
2913}
2914
67e0dd8f
KO
2915static void __bch2_trans_iter_init(struct btree_trans *trans,
2916 struct btree_iter *iter,
2917 enum btree_id btree_id, struct bpos pos,
2918 unsigned locks_want,
2919 unsigned depth,
2920 unsigned flags)
1c6fdbd8 2921{
e5af273f
KO
2922 EBUG_ON(trans->restarted);
2923
f21566f1
KO
2924 if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
2925 btree_node_type_is_extents(btree_id))
3dea728c 2926 flags |= BTREE_ITER_IS_EXTENTS;
1c6fdbd8 2927
a861c722
KO
2928 if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
2929 !btree_type_has_snapshots(btree_id))
e751c01a
KO
2930 flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
2931
a861c722
KO
2932 if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
2933 btree_type_has_snapshots(btree_id))
2934 flags |= BTREE_ITER_FILTER_SNAPSHOTS;
e751c01a 2935
5222a460
KO
2936 if (trans->journal_replay_not_finished)
2937 flags |= BTREE_ITER_WITH_JOURNAL;
2938
f7b6ca23 2939 if (!btree_id_cached(trans->c, btree_id)) {
7c8f6f98 2940 flags &= ~BTREE_ITER_CACHED;
f7b6ca23
KO
2941 flags &= ~BTREE_ITER_WITH_KEY_CACHE;
2942 } else if (!(flags & BTREE_ITER_CACHED))
2943 flags |= BTREE_ITER_WITH_KEY_CACHE;
7c8f6f98 2944
67e0dd8f
KO
2945 iter->trans = trans;
2946 iter->path = NULL;
1f2d9192 2947 iter->update_path = NULL;
f7b6ca23 2948 iter->key_cache_path = NULL;
67e0dd8f
KO
2949 iter->btree_id = btree_id;
2950 iter->min_depth = depth;
f21566f1
KO
2951 iter->flags = flags;
2952 iter->snapshot = pos.snapshot;
67e0dd8f
KO
2953 iter->pos = pos;
2954 iter->k.type = KEY_TYPE_deleted;
2955 iter->k.p = pos;
2956 iter->k.size = 0;
e751c01a 2957
f3e1f444
KO
2958 iter->path = bch2_path_get(trans, btree_id, iter->pos,
2959 locks_want, depth, flags);
1c6fdbd8
KO
2960}
2961
67e0dd8f
KO
2962void bch2_trans_iter_init(struct btree_trans *trans,
2963 struct btree_iter *iter,
2964 unsigned btree_id, struct bpos pos,
2965 unsigned flags)
424eb881 2966{
67e0dd8f
KO
2967 __bch2_trans_iter_init(trans, iter, btree_id, pos,
2968 0, 0, flags);
424eb881
KO
2969}
2970
67e0dd8f
KO
2971void bch2_trans_node_iter_init(struct btree_trans *trans,
2972 struct btree_iter *iter,
2973 enum btree_id btree_id,
2974 struct bpos pos,
2975 unsigned locks_want,
2976 unsigned depth,
2977 unsigned flags)
1c6fdbd8 2978{
67e0dd8f
KO
2979 __bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth,
2980 BTREE_ITER_NOT_EXTENTS|
2981 __BTREE_ITER_ALL_SNAPSHOTS|
2982 BTREE_ITER_ALL_SNAPSHOTS|
2983 flags);
2984 BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH));
2985 BUG_ON(iter->path->level != depth);
2986 BUG_ON(iter->min_depth != depth);
2987}
7c26ecae 2988
67e0dd8f
KO
2989void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
2990{
2991 *dst = *src;
2992 if (src->path)
2993 __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
1f2d9192
KO
2994 if (src->update_path)
2995 __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
f7b6ca23 2996 dst->key_cache_path = NULL;
1c6fdbd8
KO
2997}
2998
73a117d2 2999void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
1c6fdbd8 3000{
73a117d2
KO
3001 size_t new_top = trans->mem_top + size;
3002 void *p;
3003
3004 if (new_top > trans->mem_bytes) {
1c6fdbd8 3005 size_t old_bytes = trans->mem_bytes;
73a117d2 3006 size_t new_bytes = roundup_pow_of_two(new_top);
e131b6aa
KO
3007 void *new_mem;
3008
3009 WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
3010
3011 new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS);
3012 if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
3013 new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL);
3014 new_bytes = BTREE_TRANS_MEM_MAX;
3015 kfree(trans->mem);
3016 }
1c6fdbd8
KO
3017
3018 if (!new_mem)
73a117d2 3019 return ERR_PTR(-ENOMEM);
1c6fdbd8
KO
3020
3021 trans->mem = new_mem;
3022 trans->mem_bytes = new_bytes;
3023
1c7a0adf 3024 if (old_bytes) {
669f87a5 3025 trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes);
e5af273f 3026 btree_trans_restart(trans);
73a117d2 3027 return ERR_PTR(-EINTR);
1c7a0adf 3028 }
1c6fdbd8
KO
3029 }
3030
20bceecb 3031 p = trans->mem + trans->mem_top;
1c6fdbd8 3032 trans->mem_top += size;
7ed158f2 3033 memset(p, 0, size);
20bceecb 3034 return p;
1c6fdbd8
KO
3035}
3036
d38494c4 3037/**
955af634 3038 * bch2_trans_begin() - reset a transaction after a interrupted attempt
d38494c4 3039 * @trans: transaction to reset
d38494c4
DR
3040 *
3041 * While iterating over nodes or updating nodes a attempt to lock a btree
3042 * node may return EINTR when the trylock fails. When this occurs
955af634 3043 * bch2_trans_begin() should be called and the transaction retried.
d38494c4 3044 */
955af634 3045void bch2_trans_begin(struct btree_trans *trans)
1c6fdbd8 3046{
67e0dd8f
KO
3047 struct btree_insert_entry *i;
3048 struct btree_path *path;
a8e00bd4 3049
67e0dd8f
KO
3050 trans_for_each_update(trans, i)
3051 __btree_path_put(i->path, true);
1c6fdbd8 3052
904823de 3053 memset(&trans->journal_res, 0, sizeof(trans->journal_res));
a49e9a05 3054 trans->extra_journal_res = 0;
5df4be3f 3055 trans->nr_updates = 0;
163e885a 3056 trans->mem_top = 0;
bf7b87a4 3057
43d00243 3058 trans->hooks = NULL;
96e2aa1b
KO
3059 trans->extra_journal_entries = NULL;
3060 trans->extra_journal_entry_u64s = 0;
3061
f21539a5
KO
3062 if (trans->fs_usage_deltas) {
3063 trans->fs_usage_deltas->used = 0;
3064 memset((void *) trans->fs_usage_deltas +
3065 offsetof(struct replicas_delta_list, memset_start), 0,
3066 (void *) &trans->fs_usage_deltas->memset_end -
3067 (void *) &trans->fs_usage_deltas->memset_start);
3068 }
3069
67e0dd8f 3070 trans_for_each_path(trans, path) {
25a77231
KO
3071 path->should_be_locked = false;
3072
e1f7fa06
KO
3073 /*
3074 * If the transaction wasn't restarted, we're presuming to be
3075 * doing something new: dont keep iterators excpt the ones that
3076 * are in use - except for the subvolumes btree:
3077 */
3078 if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes)
3079 path->preserve = false;
3080
67e0dd8f
KO
3081 /*
3082 * XXX: we probably shouldn't be doing this if the transaction
3083 * was restarted, but currently we still overflow transaction
3084 * iterators if we do that
3085 */
3086 if (!path->ref && !path->preserve)
3087 __bch2_path_free(trans, path);
3088 else
25a77231 3089 path->preserve = false;
67e0dd8f
KO
3090 }
3091
955af634 3092 bch2_trans_cond_resched(trans);
50dc0f69 3093
955af634 3094 if (trans->restarted)
67e0dd8f 3095 bch2_btree_path_traverse_all(trans);
e5af273f
KO
3096
3097 trans->restarted = false;
1c6fdbd8
KO
3098}
3099
67e0dd8f 3100static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
1a21bf98 3101{
67e0dd8f 3102 size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX;
3eb26d01 3103 size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX;
dbd1e825 3104 void *p = NULL;
1a21bf98
KO
3105
3106 BUG_ON(trans->used_mempool);
3107
dbd1e825 3108#ifdef __KERNEL__
67e0dd8f 3109 p = this_cpu_xchg(c->btree_paths_bufs->path , NULL);
dbd1e825
KO
3110#endif
3111 if (!p)
67e0dd8f 3112 p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
1a21bf98 3113
67e0dd8f 3114 trans->paths = p; p += paths_bytes;
1a21bf98 3115 trans->updates = p; p += updates_bytes;
1a21bf98
KO
3116}
3117
669f87a5
KO
3118void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
3119 unsigned expected_nr_iters,
3120 size_t expected_mem_bytes,
3121 const char *fn)
c0ebe3e4 3122 __acquires(&c->btree_trans_barrier)
1c6fdbd8 3123{
ae1ede58 3124 memset(trans, 0, sizeof(*trans));
1c6fdbd8 3125 trans->c = c;
669f87a5 3126 trans->fn = fn;
5222a460
KO
3127 trans->journal_replay_not_finished =
3128 !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
20bceecb 3129
67e0dd8f 3130 bch2_trans_alloc_paths(trans, c);
20bceecb 3131
0b5c9f59
KO
3132 if (expected_mem_bytes) {
3133 expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes);
3134 trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL);
e131b6aa
KO
3135
3136 if (!unlikely(trans->mem)) {
3137 trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
3138 trans->mem_bytes = BTREE_TRANS_MEM_MAX;
3139 } else {
0b5c9f59 3140 trans->mem_bytes = expected_mem_bytes;
e131b6aa 3141 }
0b5c9f59 3142 }
495aabed 3143
876c7af3
KO
3144 trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
3145
b84d42c3
KO
3146 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
3147 trans->pid = current->pid;
3148 mutex_lock(&c->btree_trans_lock);
3149 list_add(&trans->list, &c->btree_trans_list);
3150 mutex_unlock(&c->btree_trans_lock);
3151 }
1c6fdbd8
KO
3152}
3153
67e0dd8f
KO
3154static void check_btree_paths_leaked(struct btree_trans *trans)
3155{
3156#ifdef CONFIG_BCACHEFS_DEBUG
3157 struct bch_fs *c = trans->c;
3158 struct btree_path *path;
3159
3160 trans_for_each_path(trans, path)
3161 if (path->ref)
3162 goto leaked;
3163 return;
3164leaked:
669f87a5 3165 bch_err(c, "btree paths leaked from %s!", trans->fn);
67e0dd8f
KO
3166 trans_for_each_path(trans, path)
3167 if (path->ref)
3168 printk(KERN_ERR " btree %s %pS\n",
3169 bch2_btree_ids[path->btree_id],
3170 (void *) path->ip_allocated);
3171 /* Be noisy about this: */
3172 bch2_fatal_error(c);
3173#endif
3174}
3175
9a796fdb 3176void bch2_trans_exit(struct btree_trans *trans)
c0ebe3e4 3177 __releases(&c->btree_trans_barrier)
1c6fdbd8 3178{
67e0dd8f 3179 struct btree_insert_entry *i;
1a21bf98
KO
3180 struct bch_fs *c = trans->c;
3181
ece254b2 3182 bch2_trans_unlock(trans);
1c6fdbd8 3183
67e0dd8f
KO
3184 trans_for_each_update(trans, i)
3185 __btree_path_put(i->path, true);
3186 trans->nr_updates = 0;
509d3e0a 3187
67e0dd8f 3188 check_btree_paths_leaked(trans);
50dc0f69 3189
b84d42c3
KO
3190 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
3191 mutex_lock(&c->btree_trans_lock);
3192 list_del(&trans->list);
3193 mutex_unlock(&c->btree_trans_lock);
3194 }
495aabed 3195
876c7af3
KO
3196 srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
3197
67e0dd8f 3198 bch2_journal_preres_put(&c->journal, &trans->journal_preres);
2ca88e5a 3199
9620c3ec
KO
3200 if (trans->fs_usage_deltas) {
3201 if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
3202 REPLICAS_DELTA_LIST_MAX)
3203 mempool_free(trans->fs_usage_deltas,
67e0dd8f 3204 &c->replicas_delta_pool);
9620c3ec
KO
3205 else
3206 kfree(trans->fs_usage_deltas);
3207 }
e131b6aa
KO
3208
3209 if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
67e0dd8f 3210 mempool_free(trans->mem, &c->btree_trans_mem_pool);
e131b6aa
KO
3211 else
3212 kfree(trans->mem);
1a21bf98 3213
dbd1e825
KO
3214#ifdef __KERNEL__
3215 /*
3216 * Userspace doesn't have a real percpu implementation:
3217 */
67e0dd8f 3218 trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths);
dbd1e825 3219#endif
e131b6aa 3220
67e0dd8f
KO
3221 if (trans->paths)
3222 mempool_free(trans->paths, &c->btree_paths_pool);
1a21bf98 3223
1c6fdbd8 3224 trans->mem = (void *) 0x1;
67e0dd8f 3225 trans->paths = (void *) 0x1;
1c6fdbd8 3226}
36e9d698 3227
b3d1e6ca 3228static void __maybe_unused
67e0dd8f 3229bch2_btree_path_node_to_text(struct printbuf *out,
b3d1e6ca 3230 struct btree_bkey_cached_common *_b,
f21566f1 3231 bool cached)
d211b408 3232{
5c1d808a
KO
3233 pr_buf(out, " l=%u %s:",
3234 _b->level, bch2_btree_ids[_b->btree_id]);
f21566f1 3235 bch2_bpos_to_text(out, btree_node_pos(_b, cached));
d211b408
KO
3236}
3237
b84d42c3 3238#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
f21566f1 3239static bool trans_has_locks(struct btree_trans *trans)
5c1d808a 3240{
67e0dd8f 3241 struct btree_path *path;
5c1d808a 3242
67e0dd8f
KO
3243 trans_for_each_path(trans, path)
3244 if (path->nodes_locked)
5c1d808a
KO
3245 return true;
3246 return false;
3247}
2d587674 3248#endif
5c1d808a 3249
495aabed
KO
3250void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
3251{
b84d42c3 3252#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
495aabed 3253 struct btree_trans *trans;
67e0dd8f 3254 struct btree_path *path;
495aabed 3255 struct btree *b;
c7ce2732 3256 static char lock_types[] = { 'r', 'i', 'w' };
495aabed
KO
3257 unsigned l;
3258
3259 mutex_lock(&c->btree_trans_lock);
3260 list_for_each_entry(trans, &c->btree_trans_list, list) {
f21566f1 3261 if (!trans_has_locks(trans))
5c1d808a
KO
3262 continue;
3263
669f87a5 3264 pr_buf(out, "%i %s\n", trans->pid, trans->fn);
495aabed 3265
67e0dd8f
KO
3266 trans_for_each_path(trans, path) {
3267 if (!path->nodes_locked)
495aabed
KO
3268 continue;
3269
1d3ecd7e 3270 pr_buf(out, " path %u %c l=%u %s:",
67e0dd8f
KO
3271 path->idx,
3272 path->cached ? 'c' : 'b',
1d3ecd7e 3273 path->level,
67e0dd8f
KO
3274 bch2_btree_ids[path->btree_id]);
3275 bch2_bpos_to_text(out, path->pos);
495aabed
KO
3276 pr_buf(out, "\n");
3277
3278 for (l = 0; l < BTREE_MAX_DEPTH; l++) {
67e0dd8f 3279 if (btree_node_locked(path, l)) {
d211b408 3280 pr_buf(out, " %s l=%u ",
67e0dd8f
KO
3281 btree_node_intent_locked(path, l) ? "i" : "r", l);
3282 bch2_btree_path_node_to_text(out,
3283 (void *) path->l[l].b,
3284 path->cached);
495aabed
KO
3285 pr_buf(out, "\n");
3286 }
3287 }
3288 }
3289
3290 b = READ_ONCE(trans->locking);
3291 if (b) {
67e0dd8f 3292 path = &trans->paths[trans->locking_path_idx];
c7ce2732 3293 pr_buf(out, " locking path %u %c l=%u %c %s:",
67e0dd8f
KO
3294 trans->locking_path_idx,
3295 path->cached ? 'c' : 'b',
515282ac 3296 trans->locking_level,
c7ce2732 3297 lock_types[trans->locking_lock_type],
515282ac
KO
3298 bch2_btree_ids[trans->locking_btree_id]);
3299 bch2_bpos_to_text(out, trans->locking_pos);
3300
d211b408 3301 pr_buf(out, " node ");
67e0dd8f
KO
3302 bch2_btree_path_node_to_text(out,
3303 (void *) b, path->cached);
495aabed
KO
3304 pr_buf(out, "\n");
3305 }
3306 }
3307 mutex_unlock(&c->btree_trans_lock);
3308#endif
3309}
3310
36e9d698
KO
3311void bch2_fs_btree_iter_exit(struct bch_fs *c)
3312{
99fafb04
KO
3313 if (c->btree_trans_barrier_initialized)
3314 cleanup_srcu_struct(&c->btree_trans_barrier);
e131b6aa 3315 mempool_exit(&c->btree_trans_mem_pool);
67e0dd8f 3316 mempool_exit(&c->btree_paths_pool);
36e9d698
KO
3317}
3318
3319int bch2_fs_btree_iter_init(struct bch_fs *c)
3320{
3321 unsigned nr = BTREE_ITER_MAX;
99fafb04 3322 int ret;
36e9d698 3323
495aabed
KO
3324 INIT_LIST_HEAD(&c->btree_trans_list);
3325 mutex_init(&c->btree_trans_lock);
3326
99fafb04 3327 ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
67e0dd8f 3328 sizeof(struct btree_path) * nr +
e131b6aa
KO
3329 sizeof(struct btree_insert_entry) * nr) ?:
3330 mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
99fafb04
KO
3331 BTREE_TRANS_MEM_MAX) ?:
3332 init_srcu_struct(&c->btree_trans_barrier);
3333 if (!ret)
3334 c->btree_trans_barrier_initialized = true;
3335 return ret;
36e9d698 3336}