Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
4 | #include "bkey_methods.h" | |
07a1006a | 5 | #include "bkey_buf.h" |
1c6fdbd8 KO |
6 | #include "btree_cache.h" |
7 | #include "btree_iter.h" | |
2ca88e5a | 8 | #include "btree_key_cache.h" |
1c6fdbd8 | 9 | #include "btree_locking.h" |
57b0b3db | 10 | #include "btree_update.h" |
1c6fdbd8 | 11 | #include "debug.h" |
50dc0f69 | 12 | #include "error.h" |
1c6fdbd8 | 13 | #include "extents.h" |
2ca88e5a | 14 | #include "journal.h" |
5222a460 | 15 | #include "recovery.h" |
35d5aff2 | 16 | #include "replicas.h" |
c075ff70 | 17 | #include "subvolume.h" |
1c6fdbd8 KO |
18 | #include "trace.h" |
19 | ||
20 | #include <linux/prefetch.h> | |
21 | ||
67e0dd8f KO |
22 | static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); |
23 | static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, | |
24 | struct btree_path *); | |
25 | ||
26 | static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *); | |
27 | ||
b0d1b70a KO |
28 | /* |
29 | * Unlocks before scheduling | |
30 | * Note: does not revalidate iterator | |
31 | */ | |
32 | static inline int bch2_trans_cond_resched(struct btree_trans *trans) | |
33 | { | |
34 | if (need_resched() || race_fault()) { | |
35 | bch2_trans_unlock(trans); | |
36 | schedule(); | |
37 | return bch2_trans_relock(trans) ? 0 : -EINTR; | |
38 | } else { | |
39 | return 0; | |
40 | } | |
41 | } | |
42 | ||
67e0dd8f KO |
43 | static inline int __btree_path_cmp(const struct btree_path *l, |
44 | enum btree_id r_btree_id, | |
45 | bool r_cached, | |
46 | struct bpos r_pos, | |
47 | unsigned r_level) | |
0423fb71 | 48 | { |
7abda8c1 KO |
49 | /* |
50 | * Must match lock ordering as defined by __bch2_btree_node_lock: | |
51 | */ | |
67e0dd8f | 52 | return cmp_int(l->btree_id, r_btree_id) ?: |
32b26e8c | 53 | cmp_int((int) l->cached, (int) r_cached) ?: |
67e0dd8f KO |
54 | bpos_cmp(l->pos, r_pos) ?: |
55 | -cmp_int(l->level, r_level); | |
56 | } | |
57 | ||
58 | static inline int btree_path_cmp(const struct btree_path *l, | |
59 | const struct btree_path *r) | |
60 | { | |
61 | return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level); | |
0423fb71 KO |
62 | } |
63 | ||
e751c01a KO |
64 | static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) |
65 | { | |
e751c01a KO |
66 | /* Are we iterating over keys in all snapshots? */ |
67 | if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { | |
68 | p = bpos_successor(p); | |
69 | } else { | |
70 | p = bpos_nosnap_successor(p); | |
71 | p.snapshot = iter->snapshot; | |
72 | } | |
73 | ||
74 | return p; | |
75 | } | |
76 | ||
77 | static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p) | |
78 | { | |
e751c01a KO |
79 | /* Are we iterating over keys in all snapshots? */ |
80 | if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { | |
81 | p = bpos_predecessor(p); | |
82 | } else { | |
83 | p = bpos_nosnap_predecessor(p); | |
84 | p.snapshot = iter->snapshot; | |
85 | } | |
86 | ||
87 | return p; | |
88 | } | |
89 | ||
67e0dd8f | 90 | static inline bool is_btree_node(struct btree_path *path, unsigned l) |
1c6fdbd8 KO |
91 | { |
92 | return l < BTREE_MAX_DEPTH && | |
67e0dd8f | 93 | (unsigned long) path->l[l].b >= 128; |
1c6fdbd8 KO |
94 | } |
95 | ||
9626aeb1 | 96 | static inline struct bpos btree_iter_search_key(struct btree_iter *iter) |
a00fd8c5 | 97 | { |
9626aeb1 | 98 | struct bpos pos = iter->pos; |
cbdf24ce | 99 | |
9626aeb1 KO |
100 | if ((iter->flags & BTREE_ITER_IS_EXTENTS) && |
101 | bkey_cmp(pos, POS_MAX)) | |
e751c01a | 102 | pos = bkey_successor(iter, pos); |
9626aeb1 | 103 | return pos; |
a00fd8c5 KO |
104 | } |
105 | ||
67e0dd8f | 106 | static inline bool btree_path_pos_before_node(struct btree_path *path, |
e3ecf4f5 KO |
107 | struct btree *b) |
108 | { | |
67e0dd8f | 109 | return bpos_cmp(path->pos, b->data->min_key) < 0; |
e3ecf4f5 KO |
110 | } |
111 | ||
67e0dd8f | 112 | static inline bool btree_path_pos_after_node(struct btree_path *path, |
e3ecf4f5 KO |
113 | struct btree *b) |
114 | { | |
67e0dd8f | 115 | return bpos_cmp(b->key.k.p, path->pos) < 0; |
e3ecf4f5 KO |
116 | } |
117 | ||
67e0dd8f | 118 | static inline bool btree_path_pos_in_node(struct btree_path *path, |
e3ecf4f5 KO |
119 | struct btree *b) |
120 | { | |
67e0dd8f KO |
121 | return path->btree_id == b->c.btree_id && |
122 | !btree_path_pos_before_node(path, b) && | |
123 | !btree_path_pos_after_node(path, b); | |
e3ecf4f5 KO |
124 | } |
125 | ||
1c6fdbd8 KO |
126 | /* Btree node locking: */ |
127 | ||
9f6bd307 | 128 | void bch2_btree_node_unlock_write(struct btree_trans *trans, |
67e0dd8f | 129 | struct btree_path *path, struct btree *b) |
1c6fdbd8 | 130 | { |
67e0dd8f | 131 | bch2_btree_node_unlock_write_inlined(trans, path, b); |
1c6fdbd8 KO |
132 | } |
133 | ||
78cf784e | 134 | void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) |
1c6fdbd8 | 135 | { |
67e0dd8f | 136 | struct btree_path *linked; |
1c6fdbd8 KO |
137 | unsigned readers = 0; |
138 | ||
67e0dd8f KO |
139 | trans_for_each_path(trans, linked) |
140 | if (linked->l[b->c.level].b == b && | |
141 | btree_node_read_locked(linked, b->c.level)) | |
1c6fdbd8 KO |
142 | readers++; |
143 | ||
144 | /* | |
145 | * Must drop our read locks before calling six_lock_write() - | |
146 | * six_unlock() won't do wakeups until the reader count | |
147 | * goes to 0, and it's safe because we have the node intent | |
148 | * locked: | |
149 | */ | |
a9d79c6e KO |
150 | if (!b->c.lock.readers) |
151 | atomic64_sub(__SIX_VAL(read_lock, readers), | |
152 | &b->c.lock.state.counter); | |
153 | else | |
154 | this_cpu_sub(*b->c.lock.readers, readers); | |
155 | ||
c7ce2732 | 156 | six_lock_write(&b->c.lock, NULL, NULL); |
a9d79c6e KO |
157 | |
158 | if (!b->c.lock.readers) | |
159 | atomic64_add(__SIX_VAL(read_lock, readers), | |
160 | &b->c.lock.state.counter); | |
161 | else | |
162 | this_cpu_add(*b->c.lock.readers, readers); | |
1c6fdbd8 KO |
163 | } |
164 | ||
78cf784e | 165 | bool __bch2_btree_node_relock(struct btree_trans *trans, |
67e0dd8f | 166 | struct btree_path *path, unsigned level) |
1c6fdbd8 | 167 | { |
67e0dd8f KO |
168 | struct btree *b = btree_path_node(path, level); |
169 | int want = __btree_lock_want(path, level); | |
1c6fdbd8 | 170 | |
67e0dd8f | 171 | if (!is_btree_node(path, level)) |
bc82d08b | 172 | goto fail; |
1c6fdbd8 KO |
173 | |
174 | if (race_fault()) | |
bc82d08b | 175 | goto fail; |
1c6fdbd8 | 176 | |
67e0dd8f KO |
177 | if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || |
178 | (btree_node_lock_seq_matches(path, b, level) && | |
78cf784e | 179 | btree_node_lock_increment(trans, b, level, want))) { |
3074bc0f | 180 | mark_btree_node_locked(path, level, want); |
ed8413fd | 181 | return true; |
ed8413fd | 182 | } |
bc82d08b KO |
183 | fail: |
184 | trace_btree_node_relock_fail(trans->fn, _RET_IP_, | |
185 | path->btree_id, | |
186 | &path->pos, | |
187 | (unsigned long) b, | |
188 | path->l[level].lock_seq, | |
189 | is_btree_node(path, level) ? b->c.lock.state.seq : 0); | |
190 | return false; | |
1c6fdbd8 KO |
191 | } |
192 | ||
f527afea KO |
193 | bool bch2_btree_node_upgrade(struct btree_trans *trans, |
194 | struct btree_path *path, unsigned level) | |
1c6fdbd8 | 195 | { |
67e0dd8f | 196 | struct btree *b = path->l[level].b; |
1c6fdbd8 | 197 | |
67e0dd8f | 198 | if (!is_btree_node(path, level)) |
1c6fdbd8 KO |
199 | return false; |
200 | ||
d697b9ab KO |
201 | switch (btree_lock_want(path, level)) { |
202 | case BTREE_NODE_UNLOCKED: | |
203 | BUG_ON(btree_node_locked(path, level)); | |
204 | return true; | |
205 | case BTREE_NODE_READ_LOCKED: | |
206 | BUG_ON(btree_node_intent_locked(path, level)); | |
207 | return bch2_btree_node_relock(trans, path, level); | |
208 | case BTREE_NODE_INTENT_LOCKED: | |
209 | break; | |
210 | } | |
211 | ||
67e0dd8f | 212 | if (btree_node_intent_locked(path, level)) |
1c6fdbd8 KO |
213 | return true; |
214 | ||
215 | if (race_fault()) | |
216 | return false; | |
217 | ||
67e0dd8f | 218 | if (btree_node_locked(path, level) |
c43a6ef9 | 219 | ? six_lock_tryupgrade(&b->c.lock) |
67e0dd8f | 220 | : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) |
1c6fdbd8 KO |
221 | goto success; |
222 | ||
67e0dd8f | 223 | if (btree_node_lock_seq_matches(path, b, level) && |
78cf784e | 224 | btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) { |
67e0dd8f | 225 | btree_node_unlock(path, level); |
1c6fdbd8 KO |
226 | goto success; |
227 | } | |
228 | ||
229 | return false; | |
230 | success: | |
3074bc0f | 231 | mark_btree_node_intent_locked(path, level); |
1c6fdbd8 KO |
232 | return true; |
233 | } | |
234 | ||
67e0dd8f KO |
235 | static inline bool btree_path_get_locks(struct btree_trans *trans, |
236 | struct btree_path *path, | |
bc82d08b | 237 | bool upgrade) |
1c6fdbd8 | 238 | { |
67e0dd8f | 239 | unsigned l = path->level; |
1c6fdbd8 KO |
240 | int fail_idx = -1; |
241 | ||
242 | do { | |
67e0dd8f | 243 | if (!btree_path_node(path, l)) |
1c6fdbd8 KO |
244 | break; |
245 | ||
246 | if (!(upgrade | |
67e0dd8f | 247 | ? bch2_btree_node_upgrade(trans, path, l) |
f48361b0 | 248 | : bch2_btree_node_relock(trans, path, l))) |
1c6fdbd8 | 249 | fail_idx = l; |
1c6fdbd8 KO |
250 | |
251 | l++; | |
67e0dd8f | 252 | } while (l < path->locks_want); |
1c6fdbd8 KO |
253 | |
254 | /* | |
255 | * When we fail to get a lock, we have to ensure that any child nodes | |
67e0dd8f | 256 | * can't be relocked so bch2_btree_path_traverse has to walk back up to |
1c6fdbd8 KO |
257 | * the node that we failed to relock: |
258 | */ | |
1d3ecd7e KO |
259 | if (fail_idx >= 0) { |
260 | __bch2_btree_path_unlock(path); | |
261 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); | |
262 | ||
263 | do { | |
264 | path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; | |
265 | --fail_idx; | |
266 | } while (fail_idx >= 0); | |
1c6fdbd8 KO |
267 | } |
268 | ||
67e0dd8f KO |
269 | if (path->uptodate == BTREE_ITER_NEED_RELOCK) |
270 | path->uptodate = BTREE_ITER_UPTODATE; | |
1c6fdbd8 | 271 | |
a0a56879 | 272 | bch2_trans_verify_locks(trans); |
0f238367 | 273 | |
67e0dd8f | 274 | return path->uptodate < BTREE_ITER_NEED_RELOCK; |
1c6fdbd8 KO |
275 | } |
276 | ||
d211b408 | 277 | static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, |
f21566f1 | 278 | bool cached) |
d211b408 | 279 | { |
f21566f1 | 280 | return !cached |
d211b408 KO |
281 | ? container_of(_b, struct btree, c)->key.k.p |
282 | : container_of(_b, struct bkey_cached, c)->key.pos; | |
283 | } | |
284 | ||
1c6fdbd8 | 285 | /* Slowpath: */ |
78cf784e | 286 | bool __bch2_btree_node_lock(struct btree_trans *trans, |
67e0dd8f KO |
287 | struct btree_path *path, |
288 | struct btree *b, | |
289 | struct bpos pos, unsigned level, | |
bd2bb273 | 290 | enum six_lock_type type, |
a301dc38 KO |
291 | six_lock_should_sleep_fn should_sleep_fn, void *p, |
292 | unsigned long ip) | |
1c6fdbd8 | 293 | { |
7abda8c1 KO |
294 | struct btree_path *linked; |
295 | unsigned reason; | |
1c6fdbd8 | 296 | |
647d7b60 | 297 | /* Check if it's safe to block: */ |
67e0dd8f | 298 | trans_for_each_path(trans, linked) { |
1c6fdbd8 KO |
299 | if (!linked->nodes_locked) |
300 | continue; | |
301 | ||
1c6fdbd8 KO |
302 | /* |
303 | * Can't block taking an intent lock if we have _any_ nodes read | |
304 | * locked: | |
305 | * | |
306 | * - Our read lock blocks another thread with an intent lock on | |
307 | * the same node from getting a write lock, and thus from | |
308 | * dropping its intent lock | |
309 | * | |
310 | * - And the other thread may have multiple nodes intent locked: | |
311 | * both the node we want to intent lock, and the node we | |
312 | * already have read locked - deadlock: | |
313 | */ | |
314 | if (type == SIX_LOCK_intent && | |
315 | linked->nodes_locked != linked->nodes_intent_locked) { | |
2527dd91 | 316 | reason = 1; |
7abda8c1 | 317 | goto deadlock; |
1c6fdbd8 KO |
318 | } |
319 | ||
67e0dd8f | 320 | if (linked->btree_id != path->btree_id) { |
7abda8c1 KO |
321 | if (linked->btree_id < path->btree_id) |
322 | continue; | |
323 | ||
324 | reason = 3; | |
325 | goto deadlock; | |
7e7ae6ca KO |
326 | } |
327 | ||
328 | /* | |
7abda8c1 KO |
329 | * Within the same btree, non-cached paths come before cached |
330 | * paths: | |
7e7ae6ca | 331 | */ |
67e0dd8f | 332 | if (linked->cached != path->cached) { |
7abda8c1 KO |
333 | if (!linked->cached) |
334 | continue; | |
335 | ||
336 | reason = 4; | |
337 | goto deadlock; | |
7e7ae6ca KO |
338 | } |
339 | ||
1c6fdbd8 KO |
340 | /* |
341 | * Interior nodes must be locked before their descendants: if | |
67e0dd8f | 342 | * another path has possible descendants locked of the node |
1c6fdbd8 KO |
343 | * we're about to lock, it must have the ancestors locked too: |
344 | */ | |
7e7ae6ca | 345 | if (level > __fls(linked->nodes_locked)) { |
2527dd91 | 346 | reason = 5; |
7abda8c1 | 347 | goto deadlock; |
515282ac KO |
348 | } |
349 | ||
350 | /* Must lock btree nodes in key order: */ | |
7e7ae6ca | 351 | if (btree_node_locked(linked, level) && |
4cf91b02 | 352 | bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b, |
f21566f1 | 353 | linked->cached)) <= 0) { |
7e7ae6ca | 354 | reason = 7; |
7abda8c1 | 355 | goto deadlock; |
a301dc38 | 356 | } |
1c6fdbd8 KO |
357 | } |
358 | ||
c7ce2732 KO |
359 | return btree_node_lock_type(trans, path, b, pos, level, |
360 | type, should_sleep_fn, p); | |
7abda8c1 KO |
361 | deadlock: |
362 | trace_trans_restart_would_deadlock(trans->fn, ip, | |
363 | trans->in_traverse_all, reason, | |
364 | linked->btree_id, | |
365 | linked->cached, | |
366 | &linked->pos, | |
367 | path->btree_id, | |
368 | path->cached, | |
369 | &pos); | |
370 | btree_trans_restart(trans); | |
371 | return false; | |
1c6fdbd8 KO |
372 | } |
373 | ||
374 | /* Btree iterator locking: */ | |
375 | ||
376 | #ifdef CONFIG_BCACHEFS_DEBUG | |
67e0dd8f KO |
377 | |
378 | static void bch2_btree_path_verify_locks(struct btree_path *path) | |
1c6fdbd8 KO |
379 | { |
380 | unsigned l; | |
381 | ||
1d3ecd7e | 382 | if (!path->nodes_locked) { |
d697b9ab KO |
383 | BUG_ON(path->uptodate == BTREE_ITER_UPTODATE && |
384 | btree_path_node(path, path->level)); | |
1d3ecd7e KO |
385 | return; |
386 | } | |
1c6fdbd8 | 387 | |
1d3ecd7e | 388 | for (l = 0; btree_path_node(path, l); l++) |
67e0dd8f KO |
389 | BUG_ON(btree_lock_want(path, l) != |
390 | btree_node_locked_type(path, l)); | |
1c6fdbd8 | 391 | } |
ad7ae8d6 | 392 | |
a0a56879 | 393 | void bch2_trans_verify_locks(struct btree_trans *trans) |
ad7ae8d6 | 394 | { |
67e0dd8f | 395 | struct btree_path *path; |
ad7ae8d6 | 396 | |
67e0dd8f KO |
397 | trans_for_each_path(trans, path) |
398 | bch2_btree_path_verify_locks(path); | |
ad7ae8d6 | 399 | } |
7d6f9b64 | 400 | #else |
67e0dd8f | 401 | static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} |
1c6fdbd8 KO |
402 | #endif |
403 | ||
67e0dd8f KO |
404 | /* Btree path locking: */ |
405 | ||
6e075b54 KO |
406 | /* |
407 | * Only for btree_cache.c - only relocks intent locks | |
408 | */ | |
67e0dd8f KO |
409 | bool bch2_btree_path_relock_intent(struct btree_trans *trans, |
410 | struct btree_path *path) | |
6e075b54 KO |
411 | { |
412 | unsigned l; | |
413 | ||
67e0dd8f KO |
414 | for (l = path->level; |
415 | l < path->locks_want && btree_path_node(path, l); | |
6e075b54 | 416 | l++) { |
67e0dd8f | 417 | if (!bch2_btree_node_relock(trans, path, l)) { |
1d3ecd7e | 418 | __bch2_btree_path_unlock(path); |
67e0dd8f | 419 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
bc82d08b KO |
420 | trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_, |
421 | path->btree_id, &path->pos); | |
9f6bd307 | 422 | btree_trans_restart(trans); |
6e075b54 KO |
423 | return false; |
424 | } | |
425 | } | |
426 | ||
427 | return true; | |
428 | } | |
429 | ||
d4d24a65 KO |
430 | noinline __flatten |
431 | static bool __bch2_btree_path_relock(struct btree_trans *trans, | |
67e0dd8f | 432 | struct btree_path *path, unsigned long trace_ip) |
1c6fdbd8 | 433 | { |
bc82d08b | 434 | bool ret = btree_path_get_locks(trans, path, false); |
e5af273f | 435 | |
bc82d08b KO |
436 | if (!ret) { |
437 | trace_trans_restart_relock_path(trans->fn, trace_ip, | |
438 | path->btree_id, &path->pos); | |
9f6bd307 | 439 | btree_trans_restart(trans); |
bc82d08b | 440 | } |
e5af273f | 441 | return ret; |
1c6fdbd8 KO |
442 | } |
443 | ||
d4d24a65 KO |
444 | static inline bool bch2_btree_path_relock(struct btree_trans *trans, |
445 | struct btree_path *path, unsigned long trace_ip) | |
446 | { | |
447 | return btree_node_locked(path, path->level) | |
448 | ? true | |
449 | : __bch2_btree_path_relock(trans, path, trace_ip); | |
450 | } | |
451 | ||
67e0dd8f KO |
452 | bool __bch2_btree_path_upgrade(struct btree_trans *trans, |
453 | struct btree_path *path, | |
1c6fdbd8 KO |
454 | unsigned new_locks_want) |
455 | { | |
67e0dd8f | 456 | struct btree_path *linked; |
5e6a668b | 457 | |
67e0dd8f | 458 | EBUG_ON(path->locks_want >= new_locks_want); |
1c6fdbd8 | 459 | |
67e0dd8f | 460 | path->locks_want = new_locks_want; |
1c6fdbd8 | 461 | |
bc82d08b | 462 | if (btree_path_get_locks(trans, path, true)) |
5e6a668b KO |
463 | return true; |
464 | ||
465 | /* | |
466 | * XXX: this is ugly - we'd prefer to not be mucking with other | |
467 | * iterators in the btree_trans here. | |
468 | * | |
469 | * On failure to upgrade the iterator, setting iter->locks_want and | |
67e0dd8f | 470 | * calling get_locks() is sufficient to make bch2_btree_path_traverse() |
5e6a668b KO |
471 | * get the locks we want on transaction restart. |
472 | * | |
473 | * But if this iterator was a clone, on transaction restart what we did | |
474 | * to this iterator isn't going to be preserved. | |
475 | * | |
476 | * Possibly we could add an iterator field for the parent iterator when | |
477 | * an iterator is a copy - for now, we'll just upgrade any other | |
478 | * iterators with the same btree id. | |
479 | * | |
480 | * The code below used to be needed to ensure ancestor nodes get locked | |
481 | * before interior nodes - now that's handled by | |
67e0dd8f | 482 | * bch2_btree_path_traverse_all(). |
5e6a668b | 483 | */ |
61a66469 KO |
484 | if (!path->cached && !trans->in_traverse_all) |
485 | trans_for_each_path(trans, linked) | |
486 | if (linked != path && | |
487 | linked->cached == path->cached && | |
488 | linked->btree_id == path->btree_id && | |
489 | linked->locks_want < new_locks_want) { | |
490 | linked->locks_want = new_locks_want; | |
491 | btree_path_get_locks(trans, linked, true); | |
492 | } | |
5e6a668b KO |
493 | |
494 | return false; | |
1c6fdbd8 KO |
495 | } |
496 | ||
67e0dd8f | 497 | void __bch2_btree_path_downgrade(struct btree_path *path, |
2d587674 | 498 | unsigned new_locks_want) |
1c6fdbd8 | 499 | { |
2d587674 | 500 | unsigned l; |
1c6fdbd8 | 501 | |
67e0dd8f | 502 | EBUG_ON(path->locks_want < new_locks_want); |
1c6fdbd8 | 503 | |
67e0dd8f | 504 | path->locks_want = new_locks_want; |
2d587674 | 505 | |
67e0dd8f KO |
506 | while (path->nodes_locked && |
507 | (l = __fls(path->nodes_locked)) >= path->locks_want) { | |
508 | if (l > path->level) { | |
509 | btree_node_unlock(path, l); | |
2d587674 | 510 | } else { |
67e0dd8f KO |
511 | if (btree_node_intent_locked(path, l)) { |
512 | six_lock_downgrade(&path->l[l].b->c.lock); | |
513 | path->nodes_intent_locked ^= 1 << l; | |
1c6fdbd8 | 514 | } |
2d587674 | 515 | break; |
1c6fdbd8 | 516 | } |
1c6fdbd8 | 517 | } |
ad7ae8d6 | 518 | |
67e0dd8f | 519 | bch2_btree_path_verify_locks(path); |
1c6fdbd8 KO |
520 | } |
521 | ||
72545b5e KO |
522 | void bch2_trans_downgrade(struct btree_trans *trans) |
523 | { | |
67e0dd8f | 524 | struct btree_path *path; |
72545b5e | 525 | |
67e0dd8f KO |
526 | trans_for_each_path(trans, path) |
527 | bch2_btree_path_downgrade(path); | |
72545b5e KO |
528 | } |
529 | ||
58fbf808 KO |
530 | /* Btree transaction locking: */ |
531 | ||
532 | bool bch2_trans_relock(struct btree_trans *trans) | |
0f238367 | 533 | { |
67e0dd8f | 534 | struct btree_path *path; |
0f238367 | 535 | |
e5af273f KO |
536 | if (unlikely(trans->restarted)) |
537 | return false; | |
538 | ||
67e0dd8f KO |
539 | trans_for_each_path(trans, path) |
540 | if (path->should_be_locked && | |
541 | !bch2_btree_path_relock(trans, path, _RET_IP_)) { | |
669f87a5 | 542 | trace_trans_restart_relock(trans->fn, _RET_IP_, |
67e0dd8f | 543 | path->btree_id, &path->pos); |
e5af273f | 544 | BUG_ON(!trans->restarted); |
d5a43661 | 545 | return false; |
241e2636 | 546 | } |
d5a43661 | 547 | return true; |
0f238367 KO |
548 | } |
549 | ||
58fbf808 | 550 | void bch2_trans_unlock(struct btree_trans *trans) |
0f238367 | 551 | { |
67e0dd8f | 552 | struct btree_path *path; |
0f238367 | 553 | |
67e0dd8f KO |
554 | trans_for_each_path(trans, path) |
555 | __bch2_btree_path_unlock(path); | |
0f238367 KO |
556 | } |
557 | ||
1c6fdbd8 KO |
558 | /* Btree iterator: */ |
559 | ||
560 | #ifdef CONFIG_BCACHEFS_DEBUG | |
561 | ||
67e0dd8f KO |
562 | static void bch2_btree_path_verify_cached(struct btree_trans *trans, |
563 | struct btree_path *path) | |
d211b408 KO |
564 | { |
565 | struct bkey_cached *ck; | |
67e0dd8f | 566 | bool locked = btree_node_locked(path, 0); |
d211b408 | 567 | |
67e0dd8f | 568 | if (!bch2_btree_node_relock(trans, path, 0)) |
d211b408 KO |
569 | return; |
570 | ||
67e0dd8f KO |
571 | ck = (void *) path->l[0].b; |
572 | BUG_ON(ck->key.btree_id != path->btree_id || | |
573 | bkey_cmp(ck->key.pos, path->pos)); | |
d211b408 KO |
574 | |
575 | if (!locked) | |
67e0dd8f | 576 | btree_node_unlock(path, 0); |
d211b408 KO |
577 | } |
578 | ||
67e0dd8f KO |
579 | static void bch2_btree_path_verify_level(struct btree_trans *trans, |
580 | struct btree_path *path, unsigned level) | |
1c6fdbd8 | 581 | { |
67e0dd8f | 582 | struct btree_path_level *l; |
4ce41957 KO |
583 | struct btree_node_iter tmp; |
584 | bool locked; | |
2dac0eae | 585 | struct bkey_packed *p, *k; |
fa8e94fa KO |
586 | struct printbuf buf1 = PRINTBUF; |
587 | struct printbuf buf2 = PRINTBUF; | |
588 | struct printbuf buf3 = PRINTBUF; | |
2dac0eae | 589 | const char *msg; |
1c6fdbd8 | 590 | |
29364f34 | 591 | if (!bch2_debug_check_iterators) |
f13f5a8c KO |
592 | return; |
593 | ||
67e0dd8f | 594 | l = &path->l[level]; |
4ce41957 | 595 | tmp = l->iter; |
67e0dd8f | 596 | locked = btree_node_locked(path, level); |
4ce41957 | 597 | |
67e0dd8f | 598 | if (path->cached) { |
d211b408 | 599 | if (!level) |
67e0dd8f | 600 | bch2_btree_path_verify_cached(trans, path); |
d211b408 KO |
601 | return; |
602 | } | |
603 | ||
67e0dd8f | 604 | if (!btree_path_node(path, level)) |
2dac0eae KO |
605 | return; |
606 | ||
67e0dd8f | 607 | if (!bch2_btree_node_relock(trans, path, level)) |
271a3d3a KO |
608 | return; |
609 | ||
67e0dd8f | 610 | BUG_ON(!btree_path_pos_in_node(path, l->b)); |
2dac0eae | 611 | |
2dac0eae | 612 | bch2_btree_node_iter_verify(&l->iter, l->b); |
1c6fdbd8 KO |
613 | |
614 | /* | |
1ae29c1f | 615 | * For interior nodes, the iterator will have skipped past deleted keys: |
1c6fdbd8 | 616 | */ |
1ae29c1f | 617 | p = level |
c052cf82 | 618 | ? bch2_btree_node_iter_prev(&tmp, l->b) |
2dac0eae KO |
619 | : bch2_btree_node_iter_prev_all(&tmp, l->b); |
620 | k = bch2_btree_node_iter_peek_all(&l->iter, l->b); | |
1c6fdbd8 | 621 | |
67e0dd8f | 622 | if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) { |
2dac0eae KO |
623 | msg = "before"; |
624 | goto err; | |
1c6fdbd8 KO |
625 | } |
626 | ||
67e0dd8f | 627 | if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { |
2dac0eae KO |
628 | msg = "after"; |
629 | goto err; | |
630 | } | |
f21566f1 | 631 | |
2dac0eae | 632 | if (!locked) |
67e0dd8f | 633 | btree_node_unlock(path, level); |
2dac0eae KO |
634 | return; |
635 | err: | |
fa8e94fa | 636 | bch2_bpos_to_text(&buf1, path->pos); |
2dac0eae KO |
637 | |
638 | if (p) { | |
639 | struct bkey uk = bkey_unpack_key(l->b, p); | |
fa8e94fa KO |
640 | bch2_bkey_to_text(&buf2, &uk); |
641 | } else { | |
642 | pr_buf(&buf2, "(none)"); | |
2dac0eae | 643 | } |
1c6fdbd8 | 644 | |
2dac0eae KO |
645 | if (k) { |
646 | struct bkey uk = bkey_unpack_key(l->b, k); | |
fa8e94fa KO |
647 | bch2_bkey_to_text(&buf3, &uk); |
648 | } else { | |
649 | pr_buf(&buf3, "(none)"); | |
1c6fdbd8 | 650 | } |
2dac0eae | 651 | |
67e0dd8f KO |
652 | panic("path should be %s key at level %u:\n" |
653 | "path pos %s\n" | |
2dac0eae KO |
654 | "prev key %s\n" |
655 | "cur key %s\n", | |
fa8e94fa | 656 | msg, level, buf1.buf, buf2.buf, buf3.buf); |
1c6fdbd8 KO |
657 | } |
658 | ||
67e0dd8f KO |
659 | static void bch2_btree_path_verify(struct btree_trans *trans, |
660 | struct btree_path *path) | |
1c6fdbd8 | 661 | { |
5aab6635 | 662 | struct bch_fs *c = trans->c; |
2dac0eae | 663 | unsigned i; |
1c6fdbd8 | 664 | |
67e0dd8f KO |
665 | EBUG_ON(path->btree_id >= BTREE_ID_NR); |
666 | ||
667 | for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { | |
668 | if (!path->l[i].b) { | |
d7407292 KO |
669 | BUG_ON(!path->cached && |
670 | c->btree_roots[path->btree_id].b->c.level > i); | |
67e0dd8f KO |
671 | break; |
672 | } | |
673 | ||
674 | bch2_btree_path_verify_level(trans, path, i); | |
675 | } | |
676 | ||
677 | bch2_btree_path_verify_locks(path); | |
678 | } | |
679 | ||
680 | void bch2_trans_verify_paths(struct btree_trans *trans) | |
681 | { | |
682 | struct btree_path *path; | |
683 | ||
67e0dd8f KO |
684 | trans_for_each_path(trans, path) |
685 | bch2_btree_path_verify(trans, path); | |
686 | } | |
687 | ||
688 | static void bch2_btree_iter_verify(struct btree_iter *iter) | |
689 | { | |
690 | struct btree_trans *trans = iter->trans; | |
691 | ||
692 | BUG_ON(iter->btree_id >= BTREE_ID_NR); | |
693 | ||
694 | BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached); | |
7e1a3aa9 | 695 | |
e751c01a KO |
696 | BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && |
697 | (iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); | |
698 | ||
f21566f1 | 699 | BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) && |
e751c01a KO |
700 | (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && |
701 | !btree_type_has_snapshots(iter->btree_id)); | |
702 | ||
1f2d9192 KO |
703 | if (iter->update_path) |
704 | bch2_btree_path_verify(trans, iter->update_path); | |
67e0dd8f | 705 | bch2_btree_path_verify(trans, iter->path); |
2dac0eae KO |
706 | } |
707 | ||
7e1a3aa9 KO |
708 | static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) |
709 | { | |
a861c722 KO |
710 | BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && |
711 | !iter->pos.snapshot); | |
712 | ||
e751c01a KO |
713 | BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && |
714 | iter->pos.snapshot != iter->snapshot); | |
715 | ||
f21566f1 KO |
716 | BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || |
717 | bkey_cmp(iter->pos, iter->k.p) > 0); | |
7e1a3aa9 KO |
718 | } |
719 | ||
c075ff70 KO |
720 | static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) |
721 | { | |
722 | struct btree_trans *trans = iter->trans; | |
723 | struct btree_iter copy; | |
724 | struct bkey_s_c prev; | |
725 | int ret = 0; | |
726 | ||
727 | if (!bch2_debug_check_iterators) | |
728 | return 0; | |
729 | ||
730 | if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) | |
731 | return 0; | |
732 | ||
733 | if (bkey_err(k) || !k.k) | |
734 | return 0; | |
735 | ||
736 | BUG_ON(!bch2_snapshot_is_ancestor(trans->c, | |
737 | iter->snapshot, | |
738 | k.k->p.snapshot)); | |
739 | ||
740 | bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos, | |
ffa7d262 | 741 | BTREE_ITER_NOPRESERVE| |
c075ff70 KO |
742 | BTREE_ITER_ALL_SNAPSHOTS); |
743 | prev = bch2_btree_iter_prev(©); | |
744 | if (!prev.k) | |
745 | goto out; | |
746 | ||
747 | ret = bkey_err(prev); | |
748 | if (ret) | |
749 | goto out; | |
750 | ||
751 | if (!bkey_cmp(prev.k->p, k.k->p) && | |
752 | bch2_snapshot_is_ancestor(trans->c, iter->snapshot, | |
753 | prev.k->p.snapshot) > 0) { | |
fa8e94fa | 754 | struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; |
c075ff70 | 755 | |
fa8e94fa KO |
756 | bch2_bkey_to_text(&buf1, k.k); |
757 | bch2_bkey_to_text(&buf2, prev.k); | |
c075ff70 KO |
758 | |
759 | panic("iter snap %u\n" | |
760 | "k %s\n" | |
761 | "prev %s\n", | |
762 | iter->snapshot, | |
fa8e94fa | 763 | buf1.buf, buf2.buf); |
c075ff70 KO |
764 | } |
765 | out: | |
766 | bch2_trans_iter_exit(trans, ©); | |
767 | return ret; | |
768 | } | |
769 | ||
32b26e8c KO |
770 | void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, |
771 | struct bpos pos, bool key_cache) | |
772 | { | |
773 | struct btree_path *path; | |
774 | unsigned idx; | |
fa8e94fa | 775 | struct printbuf buf = PRINTBUF; |
32b26e8c KO |
776 | |
777 | trans_for_each_path_inorder(trans, path, idx) { | |
778 | int cmp = cmp_int(path->btree_id, id) ?: | |
779 | cmp_int(path->cached, key_cache); | |
780 | ||
781 | if (cmp > 0) | |
782 | break; | |
783 | if (cmp < 0) | |
784 | continue; | |
785 | ||
786 | if (!(path->nodes_locked & 1) || | |
787 | !path->should_be_locked) | |
788 | continue; | |
789 | ||
790 | if (!key_cache) { | |
791 | if (bkey_cmp(pos, path->l[0].b->data->min_key) >= 0 && | |
792 | bkey_cmp(pos, path->l[0].b->key.k.p) <= 0) | |
793 | return; | |
794 | } else { | |
795 | if (!bkey_cmp(pos, path->pos)) | |
796 | return; | |
797 | } | |
798 | } | |
799 | ||
800 | bch2_dump_trans_paths_updates(trans); | |
fa8e94fa KO |
801 | bch2_bpos_to_text(&buf, pos); |
802 | ||
32b26e8c | 803 | panic("not locked: %s %s%s\n", |
fa8e94fa | 804 | bch2_btree_ids[id], buf.buf, |
32b26e8c KO |
805 | key_cache ? " cached" : ""); |
806 | } | |
807 | ||
271a3d3a KO |
808 | #else |
809 | ||
67e0dd8f KO |
810 | static inline void bch2_btree_path_verify_level(struct btree_trans *trans, |
811 | struct btree_path *path, unsigned l) {} | |
812 | static inline void bch2_btree_path_verify(struct btree_trans *trans, | |
813 | struct btree_path *path) {} | |
7d6f9b64 | 814 | static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} |
7e1a3aa9 | 815 | static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} |
c075ff70 | 816 | static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; } |
271a3d3a | 817 | |
1c6fdbd8 KO |
818 | #endif |
819 | ||
67e0dd8f KO |
820 | /* Btree path: fixups after btree updates */ |
821 | ||
23bbd2bb KO |
822 | static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, |
823 | struct btree *b, | |
824 | struct bset_tree *t, | |
825 | struct bkey_packed *k) | |
826 | { | |
827 | struct btree_node_iter_set *set; | |
828 | ||
829 | btree_node_iter_for_each(iter, set) | |
830 | if (set->end == t->end_offset) { | |
831 | set->k = __btree_node_key_to_offset(b, k); | |
832 | bch2_btree_node_iter_sort(iter, b); | |
833 | return; | |
834 | } | |
835 | ||
836 | bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t)); | |
837 | } | |
838 | ||
67e0dd8f | 839 | static void __bch2_btree_path_fix_key_modified(struct btree_path *path, |
9626aeb1 KO |
840 | struct btree *b, |
841 | struct bkey_packed *where) | |
887c2a4e | 842 | { |
67e0dd8f | 843 | struct btree_path_level *l = &path->l[b->c.level]; |
887c2a4e | 844 | |
9626aeb1 KO |
845 | if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b)) |
846 | return; | |
847 | ||
67e0dd8f | 848 | if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0) |
9626aeb1 | 849 | bch2_btree_node_iter_advance(&l->iter, l->b); |
887c2a4e KO |
850 | } |
851 | ||
67e0dd8f | 852 | void bch2_btree_path_fix_key_modified(struct btree_trans *trans, |
887c2a4e KO |
853 | struct btree *b, |
854 | struct bkey_packed *where) | |
855 | { | |
67e0dd8f | 856 | struct btree_path *path; |
887c2a4e | 857 | |
67e0dd8f KO |
858 | trans_for_each_path_with_node(trans, b, path) { |
859 | __bch2_btree_path_fix_key_modified(path, b, where); | |
860 | bch2_btree_path_verify_level(trans, path, b->c.level); | |
887c2a4e KO |
861 | } |
862 | } | |
863 | ||
67e0dd8f KO |
864 | static void __bch2_btree_node_iter_fix(struct btree_path *path, |
865 | struct btree *b, | |
866 | struct btree_node_iter *node_iter, | |
867 | struct bset_tree *t, | |
868 | struct bkey_packed *where, | |
869 | unsigned clobber_u64s, | |
870 | unsigned new_u64s) | |
1c6fdbd8 KO |
871 | { |
872 | const struct bkey_packed *end = btree_bkey_last(b, t); | |
873 | struct btree_node_iter_set *set; | |
874 | unsigned offset = __btree_node_key_to_offset(b, where); | |
875 | int shift = new_u64s - clobber_u64s; | |
271a3d3a | 876 | unsigned old_end = t->end_offset - shift; |
c0fc30da KO |
877 | unsigned orig_iter_pos = node_iter->data[0].k; |
878 | bool iter_current_key_modified = | |
879 | orig_iter_pos >= offset && | |
880 | orig_iter_pos <= offset + clobber_u64s; | |
1c6fdbd8 KO |
881 | |
882 | btree_node_iter_for_each(node_iter, set) | |
883 | if (set->end == old_end) | |
884 | goto found; | |
885 | ||
886 | /* didn't find the bset in the iterator - might have to readd it: */ | |
887 | if (new_u64s && | |
67e0dd8f | 888 | bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { |
1c6fdbd8 | 889 | bch2_btree_node_iter_push(node_iter, b, where, end); |
c0fc30da KO |
890 | goto fixup_done; |
891 | } else { | |
892 | /* Iterator is after key that changed */ | |
059e4134 | 893 | return; |
1c6fdbd8 | 894 | } |
1c6fdbd8 | 895 | found: |
271a3d3a | 896 | set->end = t->end_offset; |
1c6fdbd8 KO |
897 | |
898 | /* Iterator hasn't gotten to the key that changed yet: */ | |
899 | if (set->k < offset) | |
059e4134 | 900 | return; |
1c6fdbd8 KO |
901 | |
902 | if (new_u64s && | |
67e0dd8f | 903 | bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { |
1c6fdbd8 KO |
904 | set->k = offset; |
905 | } else if (set->k < offset + clobber_u64s) { | |
906 | set->k = offset + new_u64s; | |
907 | if (set->k == set->end) | |
908 | bch2_btree_node_iter_set_drop(node_iter, set); | |
909 | } else { | |
c0fc30da | 910 | /* Iterator is after key that changed */ |
1c6fdbd8 | 911 | set->k = (int) set->k + shift; |
059e4134 | 912 | return; |
1c6fdbd8 KO |
913 | } |
914 | ||
1c6fdbd8 | 915 | bch2_btree_node_iter_sort(node_iter, b); |
c0fc30da KO |
916 | fixup_done: |
917 | if (node_iter->data[0].k != orig_iter_pos) | |
918 | iter_current_key_modified = true; | |
56e0e7c7 | 919 | |
1c6fdbd8 | 920 | /* |
23bbd2bb KO |
921 | * When a new key is added, and the node iterator now points to that |
922 | * key, the iterator might have skipped past deleted keys that should | |
923 | * come after the key the iterator now points to. We have to rewind to | |
c0fc30da KO |
924 | * before those deleted keys - otherwise |
925 | * bch2_btree_node_iter_prev_all() breaks: | |
1c6fdbd8 | 926 | */ |
23bbd2bb | 927 | if (!bch2_btree_node_iter_end(node_iter) && |
c0fc30da | 928 | iter_current_key_modified && |
1ae29c1f | 929 | b->c.level) { |
23bbd2bb KO |
930 | struct bset_tree *t; |
931 | struct bkey_packed *k, *k2, *p; | |
932 | ||
933 | k = bch2_btree_node_iter_peek_all(node_iter, b); | |
1c6fdbd8 KO |
934 | |
935 | for_each_bset(b, t) { | |
23bbd2bb KO |
936 | bool set_pos = false; |
937 | ||
938 | if (node_iter->data[0].end == t->end_offset) | |
1c6fdbd8 KO |
939 | continue; |
940 | ||
23bbd2bb KO |
941 | k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t); |
942 | ||
943 | while ((p = bch2_bkey_prev_all(b, t, k2)) && | |
944 | bkey_iter_cmp(b, k, p) < 0) { | |
945 | k2 = p; | |
946 | set_pos = true; | |
1c6fdbd8 | 947 | } |
23bbd2bb KO |
948 | |
949 | if (set_pos) | |
950 | btree_node_iter_set_set_pos(node_iter, | |
951 | b, t, k2); | |
1c6fdbd8 KO |
952 | } |
953 | } | |
954 | } | |
955 | ||
9f6bd307 | 956 | void bch2_btree_node_iter_fix(struct btree_trans *trans, |
67e0dd8f | 957 | struct btree_path *path, |
216c9fac KO |
958 | struct btree *b, |
959 | struct btree_node_iter *node_iter, | |
960 | struct bkey_packed *where, | |
961 | unsigned clobber_u64s, | |
962 | unsigned new_u64s) | |
1c6fdbd8 | 963 | { |
216c9fac | 964 | struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where); |
67e0dd8f | 965 | struct btree_path *linked; |
1c6fdbd8 | 966 | |
67e0dd8f KO |
967 | if (node_iter != &path->l[b->c.level].iter) { |
968 | __bch2_btree_node_iter_fix(path, b, node_iter, t, | |
059e4134 | 969 | where, clobber_u64s, new_u64s); |
2dac0eae | 970 | |
29364f34 | 971 | if (bch2_debug_check_iterators) |
2dac0eae | 972 | bch2_btree_node_iter_verify(node_iter, b); |
059e4134 | 973 | } |
1c6fdbd8 | 974 | |
67e0dd8f | 975 | trans_for_each_path_with_node(trans, b, linked) { |
1c6fdbd8 | 976 | __bch2_btree_node_iter_fix(linked, b, |
059e4134 KO |
977 | &linked->l[b->c.level].iter, t, |
978 | where, clobber_u64s, new_u64s); | |
67e0dd8f | 979 | bch2_btree_path_verify_level(trans, linked, b->c.level); |
059e4134 | 980 | } |
1c6fdbd8 KO |
981 | } |
982 | ||
67e0dd8f KO |
983 | /* Btree path level: pointer to a particular btree node and node iter */ |
984 | ||
985 | static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c, | |
986 | struct btree_path_level *l, | |
1c6fdbd8 KO |
987 | struct bkey *u, |
988 | struct bkey_packed *k) | |
989 | { | |
1c6fdbd8 KO |
990 | if (unlikely(!k)) { |
991 | /* | |
992 | * signal to bch2_btree_iter_peek_slot() that we're currently at | |
993 | * a hole | |
994 | */ | |
26609b61 | 995 | u->type = KEY_TYPE_deleted; |
1c6fdbd8 KO |
996 | return bkey_s_c_null; |
997 | } | |
998 | ||
2ce8fbd9 | 999 | return bkey_disassemble(l->b, k, u); |
1c6fdbd8 KO |
1000 | } |
1001 | ||
67e0dd8f KO |
1002 | static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c, |
1003 | struct btree_path_level *l, | |
1004 | struct bkey *u) | |
1c6fdbd8 | 1005 | { |
67e0dd8f | 1006 | return __btree_iter_unpack(c, l, u, |
1c6fdbd8 KO |
1007 | bch2_btree_node_iter_peek_all(&l->iter, l->b)); |
1008 | } | |
1009 | ||
67e0dd8f KO |
1010 | static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans, |
1011 | struct btree_path *path, | |
1012 | struct btree_path_level *l, | |
1013 | struct bkey *u) | |
1c6fdbd8 | 1014 | { |
67e0dd8f | 1015 | struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, |
1c6fdbd8 | 1016 | bch2_btree_node_iter_peek(&l->iter, l->b)); |
ca58cbd4 | 1017 | |
67e0dd8f KO |
1018 | path->pos = k.k ? k.k->p : l->b->key.k.p; |
1019 | trans->paths_sorted = false; | |
ca58cbd4 | 1020 | return k; |
1c6fdbd8 KO |
1021 | } |
1022 | ||
67e0dd8f KO |
1023 | static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans, |
1024 | struct btree_path *path, | |
1025 | struct btree_path_level *l, | |
1026 | struct bkey *u) | |
ccf5a109 | 1027 | { |
67e0dd8f | 1028 | struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, |
ccf5a109 | 1029 | bch2_btree_node_iter_prev(&l->iter, l->b)); |
ca58cbd4 | 1030 | |
67e0dd8f KO |
1031 | path->pos = k.k ? k.k->p : l->b->data->min_key; |
1032 | trans->paths_sorted = false; | |
ca58cbd4 | 1033 | return k; |
ccf5a109 KO |
1034 | } |
1035 | ||
67e0dd8f KO |
1036 | static inline bool btree_path_advance_to_pos(struct btree_path *path, |
1037 | struct btree_path_level *l, | |
a00fd8c5 | 1038 | int max_advance) |
1c6fdbd8 | 1039 | { |
a00fd8c5 KO |
1040 | struct bkey_packed *k; |
1041 | int nr_advanced = 0; | |
1042 | ||
1043 | while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && | |
67e0dd8f | 1044 | bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { |
a00fd8c5 KO |
1045 | if (max_advance > 0 && nr_advanced >= max_advance) |
1046 | return false; | |
1047 | ||
1048 | bch2_btree_node_iter_advance(&l->iter, l->b); | |
1049 | nr_advanced++; | |
1050 | } | |
1051 | ||
1052 | return true; | |
1c6fdbd8 KO |
1053 | } |
1054 | ||
1055 | /* | |
1056 | * Verify that iterator for parent node points to child node: | |
1057 | */ | |
67e0dd8f KO |
1058 | static void btree_path_verify_new_node(struct btree_trans *trans, |
1059 | struct btree_path *path, struct btree *b) | |
1c6fdbd8 | 1060 | { |
5222a460 | 1061 | struct bch_fs *c = trans->c; |
67e0dd8f | 1062 | struct btree_path_level *l; |
1c6fdbd8 KO |
1063 | unsigned plevel; |
1064 | bool parent_locked; | |
1065 | struct bkey_packed *k; | |
1066 | ||
1067 | if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) | |
1068 | return; | |
1069 | ||
5222a460 KO |
1070 | if (trans->journal_replay_not_finished) |
1071 | return; | |
1072 | ||
c43a6ef9 | 1073 | plevel = b->c.level + 1; |
67e0dd8f | 1074 | if (!btree_path_node(path, plevel)) |
1c6fdbd8 KO |
1075 | return; |
1076 | ||
67e0dd8f | 1077 | parent_locked = btree_node_locked(path, plevel); |
1c6fdbd8 | 1078 | |
67e0dd8f | 1079 | if (!bch2_btree_node_relock(trans, path, plevel)) |
1c6fdbd8 KO |
1080 | return; |
1081 | ||
67e0dd8f | 1082 | l = &path->l[plevel]; |
1c6fdbd8 KO |
1083 | k = bch2_btree_node_iter_peek_all(&l->iter, l->b); |
1084 | if (!k || | |
1085 | bkey_deleted(k) || | |
1086 | bkey_cmp_left_packed(l->b, k, &b->key.k.p)) { | |
fa8e94fa KO |
1087 | struct printbuf buf1 = PRINTBUF; |
1088 | struct printbuf buf2 = PRINTBUF; | |
1089 | struct printbuf buf3 = PRINTBUF; | |
1090 | struct printbuf buf4 = PRINTBUF; | |
1c6fdbd8 KO |
1091 | struct bkey uk = bkey_unpack_key(b, k); |
1092 | ||
5222a460 | 1093 | bch2_dump_btree_node(c, l->b); |
fa8e94fa KO |
1094 | bch2_bpos_to_text(&buf1, path->pos); |
1095 | bch2_bkey_to_text(&buf2, &uk); | |
1096 | bch2_bpos_to_text(&buf3, b->data->min_key); | |
1097 | bch2_bpos_to_text(&buf3, b->data->max_key); | |
a2bfc841 | 1098 | panic("parent iter doesn't point to new node:\n" |
f020bfcd | 1099 | "iter pos %s %s\n" |
a2bfc841 | 1100 | "iter key %s\n" |
f020bfcd | 1101 | "new node %s-%s\n", |
fa8e94fa KO |
1102 | bch2_btree_ids[path->btree_id], |
1103 | buf1.buf, buf2.buf, buf3.buf, buf4.buf); | |
1c6fdbd8 KO |
1104 | } |
1105 | ||
1106 | if (!parent_locked) | |
1d3ecd7e | 1107 | btree_node_unlock(path, plevel); |
1c6fdbd8 KO |
1108 | } |
1109 | ||
67e0dd8f | 1110 | static inline void __btree_path_level_init(struct btree_path *path, |
78cf784e | 1111 | unsigned level) |
1c6fdbd8 | 1112 | { |
67e0dd8f | 1113 | struct btree_path_level *l = &path->l[level]; |
a00fd8c5 | 1114 | |
67e0dd8f | 1115 | bch2_btree_node_iter_init(&l->iter, l->b, &path->pos); |
1c6fdbd8 | 1116 | |
537c49d6 KO |
1117 | /* |
1118 | * Iterators to interior nodes should always be pointed at the first non | |
1119 | * whiteout: | |
1120 | */ | |
1121 | if (level) | |
1122 | bch2_btree_node_iter_peek(&l->iter, l->b); | |
1c6fdbd8 KO |
1123 | } |
1124 | ||
67e0dd8f KO |
1125 | static inline void btree_path_level_init(struct btree_trans *trans, |
1126 | struct btree_path *path, | |
78cf784e | 1127 | struct btree *b) |
1c6fdbd8 | 1128 | { |
67e0dd8f | 1129 | BUG_ON(path->cached); |
2ca88e5a | 1130 | |
67e0dd8f | 1131 | btree_path_verify_new_node(trans, path, b); |
1c6fdbd8 | 1132 | |
67e0dd8f | 1133 | EBUG_ON(!btree_path_pos_in_node(path, b)); |
c43a6ef9 | 1134 | EBUG_ON(b->c.lock.state.seq & 1); |
1c6fdbd8 | 1135 | |
67e0dd8f KO |
1136 | path->l[b->c.level].lock_seq = b->c.lock.state.seq; |
1137 | path->l[b->c.level].b = b; | |
1138 | __btree_path_level_init(path, b->c.level); | |
1c6fdbd8 KO |
1139 | } |
1140 | ||
67e0dd8f KO |
1141 | /* Btree path: fixups after btree node updates: */ |
1142 | ||
1c6fdbd8 KO |
1143 | /* |
1144 | * A btree node is being replaced - update the iterator to point to the new | |
1145 | * node: | |
1146 | */ | |
f7a966a3 | 1147 | void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) |
1c6fdbd8 | 1148 | { |
67e0dd8f | 1149 | struct btree_path *path; |
1c6fdbd8 | 1150 | |
67e0dd8f KO |
1151 | trans_for_each_path(trans, path) |
1152 | if (!path->cached && | |
1153 | btree_path_pos_in_node(path, b)) { | |
1d3ecd7e KO |
1154 | enum btree_node_locked_type t = |
1155 | btree_lock_want(path, b->c.level); | |
1c6fdbd8 | 1156 | |
1d3ecd7e KO |
1157 | if (path->nodes_locked && |
1158 | t != BTREE_NODE_UNLOCKED) { | |
1159 | btree_node_unlock(path, b->c.level); | |
c43a6ef9 | 1160 | six_lock_increment(&b->c.lock, (enum six_lock_type) t); |
3074bc0f | 1161 | mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t); |
1c6fdbd8 KO |
1162 | } |
1163 | ||
67e0dd8f | 1164 | btree_path_level_init(trans, path, b); |
1c6fdbd8 | 1165 | } |
1c6fdbd8 KO |
1166 | } |
1167 | ||
1c6fdbd8 KO |
1168 | /* |
1169 | * A btree node has been modified in such a way as to invalidate iterators - fix | |
1170 | * them: | |
1171 | */ | |
f7a966a3 | 1172 | void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b) |
1c6fdbd8 | 1173 | { |
67e0dd8f | 1174 | struct btree_path *path; |
1c6fdbd8 | 1175 | |
67e0dd8f KO |
1176 | trans_for_each_path_with_node(trans, b, path) |
1177 | __btree_path_level_init(path, b->c.level); | |
1c6fdbd8 KO |
1178 | } |
1179 | ||
67e0dd8f KO |
1180 | /* Btree path: traverse, set_pos: */ |
1181 | ||
bd2bb273 KO |
1182 | static int lock_root_check_fn(struct six_lock *lock, void *p) |
1183 | { | |
1184 | struct btree *b = container_of(lock, struct btree, c.lock); | |
1185 | struct btree **rootp = p; | |
1186 | ||
1187 | return b == *rootp ? 0 : -1; | |
1188 | } | |
1189 | ||
67e0dd8f KO |
1190 | static inline int btree_path_lock_root(struct btree_trans *trans, |
1191 | struct btree_path *path, | |
a301dc38 KO |
1192 | unsigned depth_want, |
1193 | unsigned long trace_ip) | |
1c6fdbd8 | 1194 | { |
e5af273f | 1195 | struct bch_fs *c = trans->c; |
67e0dd8f | 1196 | struct btree *b, **rootp = &c->btree_roots[path->btree_id].b; |
1c6fdbd8 KO |
1197 | enum six_lock_type lock_type; |
1198 | unsigned i; | |
1199 | ||
67e0dd8f | 1200 | EBUG_ON(path->nodes_locked); |
1c6fdbd8 KO |
1201 | |
1202 | while (1) { | |
bd2bb273 | 1203 | b = READ_ONCE(*rootp); |
67e0dd8f | 1204 | path->level = READ_ONCE(b->c.level); |
1c6fdbd8 | 1205 | |
67e0dd8f | 1206 | if (unlikely(path->level < depth_want)) { |
1c6fdbd8 KO |
1207 | /* |
1208 | * the root is at a lower depth than the depth we want: | |
1209 | * got to the end of the btree, or we're walking nodes | |
1210 | * greater than some depth and there are no nodes >= | |
1211 | * that depth | |
1212 | */ | |
67e0dd8f KO |
1213 | path->level = depth_want; |
1214 | for (i = path->level; i < BTREE_MAX_DEPTH; i++) | |
1215 | path->l[i].b = NULL; | |
8812600c | 1216 | return 1; |
1c6fdbd8 KO |
1217 | } |
1218 | ||
67e0dd8f KO |
1219 | lock_type = __btree_lock_want(path, path->level); |
1220 | if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX, | |
1221 | path->level, lock_type, | |
a301dc38 | 1222 | lock_root_check_fn, rootp, |
e5af273f KO |
1223 | trace_ip))) { |
1224 | if (trans->restarted) | |
1225 | return -EINTR; | |
1226 | continue; | |
1227 | } | |
1c6fdbd8 | 1228 | |
bd2bb273 | 1229 | if (likely(b == READ_ONCE(*rootp) && |
67e0dd8f | 1230 | b->c.level == path->level && |
1c6fdbd8 | 1231 | !race_fault())) { |
67e0dd8f KO |
1232 | for (i = 0; i < path->level; i++) |
1233 | path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; | |
1234 | path->l[path->level].b = b; | |
1235 | for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) | |
1236 | path->l[i].b = NULL; | |
1237 | ||
3074bc0f | 1238 | mark_btree_node_locked(path, path->level, lock_type); |
67e0dd8f | 1239 | btree_path_level_init(trans, path, b); |
1c6fdbd8 | 1240 | return 0; |
1c6fdbd8 KO |
1241 | } |
1242 | ||
c43a6ef9 | 1243 | six_unlock_type(&b->c.lock, lock_type); |
1c6fdbd8 KO |
1244 | } |
1245 | } | |
1246 | ||
1247 | noinline | |
67e0dd8f | 1248 | static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path) |
1c6fdbd8 | 1249 | { |
9f6bd307 | 1250 | struct bch_fs *c = trans->c; |
67e0dd8f | 1251 | struct btree_path_level *l = path_l(path); |
1c6fdbd8 KO |
1252 | struct btree_node_iter node_iter = l->iter; |
1253 | struct bkey_packed *k; | |
07a1006a | 1254 | struct bkey_buf tmp; |
9e5e5b9e | 1255 | unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) |
67e0dd8f KO |
1256 | ? (path->level > 1 ? 0 : 2) |
1257 | : (path->level > 1 ? 1 : 16); | |
1258 | bool was_locked = btree_node_locked(path, path->level); | |
8b3e9bd6 | 1259 | int ret = 0; |
1c6fdbd8 | 1260 | |
07a1006a KO |
1261 | bch2_bkey_buf_init(&tmp); |
1262 | ||
8b3e9bd6 | 1263 | while (nr && !ret) { |
67e0dd8f | 1264 | if (!bch2_btree_node_relock(trans, path, path->level)) |
07a1006a | 1265 | break; |
1c6fdbd8 KO |
1266 | |
1267 | bch2_btree_node_iter_advance(&node_iter, l->b); | |
1268 | k = bch2_btree_node_iter_peek(&node_iter, l->b); | |
1269 | if (!k) | |
1270 | break; | |
1271 | ||
07a1006a | 1272 | bch2_bkey_buf_unpack(&tmp, c, l->b, k); |
67e0dd8f KO |
1273 | ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id, |
1274 | path->level - 1); | |
1c6fdbd8 KO |
1275 | } |
1276 | ||
1277 | if (!was_locked) | |
67e0dd8f | 1278 | btree_node_unlock(path, path->level); |
07a1006a KO |
1279 | |
1280 | bch2_bkey_buf_exit(&tmp, c); | |
8b3e9bd6 | 1281 | return ret; |
1c6fdbd8 KO |
1282 | } |
1283 | ||
5222a460 KO |
1284 | static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path, |
1285 | struct btree_and_journal_iter *jiter) | |
1286 | { | |
1287 | struct bch_fs *c = trans->c; | |
1288 | struct bkey_s_c k; | |
1289 | struct bkey_buf tmp; | |
1290 | unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) | |
1291 | ? (path->level > 1 ? 0 : 2) | |
1292 | : (path->level > 1 ? 1 : 16); | |
1293 | bool was_locked = btree_node_locked(path, path->level); | |
1294 | int ret = 0; | |
1295 | ||
1296 | bch2_bkey_buf_init(&tmp); | |
1297 | ||
1298 | while (nr && !ret) { | |
1299 | if (!bch2_btree_node_relock(trans, path, path->level)) | |
1300 | break; | |
1301 | ||
1302 | bch2_btree_and_journal_iter_advance(jiter); | |
1303 | k = bch2_btree_and_journal_iter_peek(jiter); | |
1304 | if (!k.k) | |
1305 | break; | |
1306 | ||
1307 | bch2_bkey_buf_reassemble(&tmp, c, k); | |
1308 | ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id, | |
1309 | path->level - 1); | |
1310 | } | |
1311 | ||
1312 | if (!was_locked) | |
1313 | btree_node_unlock(path, path->level); | |
1314 | ||
1315 | bch2_bkey_buf_exit(&tmp, c); | |
1316 | return ret; | |
1317 | } | |
1318 | ||
78cf784e | 1319 | static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, |
67e0dd8f | 1320 | struct btree_path *path, |
72141e1f KO |
1321 | unsigned plevel, struct btree *b) |
1322 | { | |
67e0dd8f KO |
1323 | struct btree_path_level *l = &path->l[plevel]; |
1324 | bool locked = btree_node_locked(path, plevel); | |
72141e1f KO |
1325 | struct bkey_packed *k; |
1326 | struct bch_btree_ptr_v2 *bp; | |
1327 | ||
67e0dd8f | 1328 | if (!bch2_btree_node_relock(trans, path, plevel)) |
72141e1f KO |
1329 | return; |
1330 | ||
1331 | k = bch2_btree_node_iter_peek_all(&l->iter, l->b); | |
1332 | BUG_ON(k->type != KEY_TYPE_btree_ptr_v2); | |
1333 | ||
1334 | bp = (void *) bkeyp_val(&l->b->format, k); | |
1335 | bp->mem_ptr = (unsigned long)b; | |
1336 | ||
1337 | if (!locked) | |
67e0dd8f | 1338 | btree_node_unlock(path, plevel); |
72141e1f KO |
1339 | } |
1340 | ||
5222a460 KO |
1341 | static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, |
1342 | struct btree_path *path, | |
1343 | unsigned flags, | |
1344 | struct bkey_buf *out) | |
1345 | { | |
1346 | struct bch_fs *c = trans->c; | |
1347 | struct btree_path_level *l = path_l(path); | |
1348 | struct btree_and_journal_iter jiter; | |
1349 | struct bkey_s_c k; | |
1350 | int ret = 0; | |
1351 | ||
1352 | __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos); | |
1353 | ||
1354 | k = bch2_btree_and_journal_iter_peek(&jiter); | |
1355 | ||
1356 | bch2_bkey_buf_reassemble(out, c, k); | |
1357 | ||
1358 | if (flags & BTREE_ITER_PREFETCH) | |
1359 | ret = btree_path_prefetch_j(trans, path, &jiter); | |
1360 | ||
1361 | bch2_btree_and_journal_iter_exit(&jiter); | |
1362 | return ret; | |
1363 | } | |
1364 | ||
67e0dd8f KO |
1365 | static __always_inline int btree_path_down(struct btree_trans *trans, |
1366 | struct btree_path *path, | |
1367 | unsigned flags, | |
a301dc38 | 1368 | unsigned long trace_ip) |
1c6fdbd8 | 1369 | { |
6e075b54 | 1370 | struct bch_fs *c = trans->c; |
67e0dd8f | 1371 | struct btree_path_level *l = path_l(path); |
1c6fdbd8 | 1372 | struct btree *b; |
67e0dd8f KO |
1373 | unsigned level = path->level - 1; |
1374 | enum six_lock_type lock_type = __btree_lock_want(path, level); | |
07a1006a KO |
1375 | struct bkey_buf tmp; |
1376 | int ret; | |
1c6fdbd8 | 1377 | |
67e0dd8f | 1378 | EBUG_ON(!btree_node_locked(path, path->level)); |
1c6fdbd8 | 1379 | |
07a1006a | 1380 | bch2_bkey_buf_init(&tmp); |
5222a460 KO |
1381 | |
1382 | if (unlikely(trans->journal_replay_not_finished)) { | |
1383 | ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp); | |
1384 | if (ret) | |
1385 | goto err; | |
1386 | } else { | |
1387 | bch2_bkey_buf_unpack(&tmp, c, l->b, | |
1388 | bch2_btree_node_iter_peek(&l->iter, l->b)); | |
1389 | ||
1390 | if (flags & BTREE_ITER_PREFETCH) { | |
1391 | ret = btree_path_prefetch(trans, path); | |
1392 | if (ret) | |
1393 | goto err; | |
1394 | } | |
1395 | } | |
1c6fdbd8 | 1396 | |
67e0dd8f | 1397 | b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip); |
07a1006a KO |
1398 | ret = PTR_ERR_OR_ZERO(b); |
1399 | if (unlikely(ret)) | |
1400 | goto err; | |
1c6fdbd8 | 1401 | |
3074bc0f | 1402 | mark_btree_node_locked(path, level, lock_type); |
67e0dd8f | 1403 | btree_path_level_init(trans, path, b); |
1c6fdbd8 | 1404 | |
5222a460 KO |
1405 | if (likely(!trans->journal_replay_not_finished && |
1406 | tmp.k->k.type == KEY_TYPE_btree_ptr_v2) && | |
07a1006a | 1407 | unlikely(b != btree_node_mem_ptr(tmp.k))) |
67e0dd8f | 1408 | btree_node_mem_ptr_set(trans, path, level + 1, b); |
72141e1f | 1409 | |
67e0dd8f KO |
1410 | if (btree_node_read_locked(path, level + 1)) |
1411 | btree_node_unlock(path, level + 1); | |
1412 | path->level = level; | |
c205321b | 1413 | |
67e0dd8f | 1414 | bch2_btree_path_verify_locks(path); |
07a1006a KO |
1415 | err: |
1416 | bch2_bkey_buf_exit(&tmp, c); | |
1417 | return ret; | |
1c6fdbd8 KO |
1418 | } |
1419 | ||
67e0dd8f KO |
1420 | static int btree_path_traverse_one(struct btree_trans *, struct btree_path *, |
1421 | unsigned, unsigned long); | |
1c6fdbd8 | 1422 | |
8f9ad91a | 1423 | static int bch2_btree_path_traverse_all(struct btree_trans *trans) |
1c6fdbd8 | 1424 | { |
e542029e | 1425 | struct bch_fs *c = trans->c; |
eb7bd15f | 1426 | struct btree_path *path, *prev; |
8f9ad91a KO |
1427 | unsigned long trace_ip = _RET_IP_; |
1428 | int i, ret = 0; | |
e542029e | 1429 | |
2ca88e5a KO |
1430 | if (trans->in_traverse_all) |
1431 | return -EINTR; | |
1432 | ||
1433 | trans->in_traverse_all = true; | |
1434 | retry_all: | |
eb7bd15f | 1435 | prev = NULL; |
e5af273f KO |
1436 | trans->restarted = false; |
1437 | ||
67e0dd8f KO |
1438 | trans_for_each_path(trans, path) |
1439 | path->should_be_locked = false; | |
e542029e | 1440 | |
67e0dd8f | 1441 | btree_trans_sort_paths(trans); |
2527dd91 | 1442 | |
67e0dd8f | 1443 | trans_for_each_path_inorder_reverse(trans, path, i) { |
0423fb71 | 1444 | if (prev) { |
67e0dd8f KO |
1445 | if (path->btree_id == prev->btree_id && |
1446 | path->locks_want < prev->locks_want) | |
1447 | __bch2_btree_path_upgrade(trans, path, prev->locks_want); | |
1448 | else if (!path->locks_want && prev->locks_want) | |
1449 | __bch2_btree_path_upgrade(trans, path, 1); | |
0423fb71 | 1450 | } |
2527dd91 | 1451 | |
67e0dd8f | 1452 | prev = path; |
2527dd91 KO |
1453 | } |
1454 | ||
58fbf808 | 1455 | bch2_trans_unlock(trans); |
a301dc38 | 1456 | cond_resched(); |
1c6fdbd8 | 1457 | |
8f9ad91a | 1458 | if (unlikely(trans->memory_allocation_failure)) { |
1c6fdbd8 KO |
1459 | struct closure cl; |
1460 | ||
1461 | closure_init_stack(&cl); | |
1462 | ||
1463 | do { | |
1464 | ret = bch2_btree_cache_cannibalize_lock(c, &cl); | |
1465 | closure_sync(&cl); | |
1466 | } while (ret); | |
1467 | } | |
1468 | ||
1c6fdbd8 | 1469 | /* Now, redo traversals in correct order: */ |
0423fb71 KO |
1470 | i = 0; |
1471 | while (i < trans->nr_sorted) { | |
67e0dd8f | 1472 | path = trans->paths + trans->sorted[i]; |
2ca88e5a | 1473 | |
33aa419d KO |
1474 | /* |
1475 | * Traversing a path can cause another path to be added at about | |
1476 | * the same position: | |
1477 | */ | |
1478 | if (path->uptodate) { | |
1479 | ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_); | |
a897ef68 | 1480 | if (ret == -EINTR || ret == -ENOMEM) |
33aa419d | 1481 | goto retry_all; |
a897ef68 KO |
1482 | if (ret) |
1483 | goto err; | |
33aa419d | 1484 | } else { |
0423fb71 | 1485 | i++; |
33aa419d | 1486 | } |
e542029e | 1487 | } |
0423fb71 KO |
1488 | |
1489 | /* | |
1490 | * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock() | |
67e0dd8f | 1491 | * and relock(), relock() won't relock since path->should_be_locked |
0423fb71 KO |
1492 | * isn't set yet, which is all fine |
1493 | */ | |
67e0dd8f KO |
1494 | trans_for_each_path(trans, path) |
1495 | BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE); | |
a897ef68 | 1496 | err: |
1c6fdbd8 | 1497 | bch2_btree_cache_cannibalize_unlock(c); |
2ca88e5a | 1498 | |
669f87a5 KO |
1499 | trans->in_traverse_all = false; |
1500 | ||
1501 | trace_trans_traverse_all(trans->fn, trace_ip); | |
1c6fdbd8 | 1502 | return ret; |
bf7b87a4 | 1503 | } |
1c6fdbd8 | 1504 | |
67e0dd8f KO |
1505 | static inline bool btree_path_good_node(struct btree_trans *trans, |
1506 | struct btree_path *path, | |
f4b61341 KO |
1507 | unsigned l, int check_pos) |
1508 | { | |
67e0dd8f KO |
1509 | if (!is_btree_node(path, l) || |
1510 | !bch2_btree_node_relock(trans, path, l)) | |
f4b61341 KO |
1511 | return false; |
1512 | ||
67e0dd8f | 1513 | if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b)) |
f4b61341 | 1514 | return false; |
67e0dd8f | 1515 | if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b)) |
f4b61341 KO |
1516 | return false; |
1517 | return true; | |
1518 | } | |
1519 | ||
67e0dd8f KO |
1520 | static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, |
1521 | struct btree_path *path, | |
f4b61341 | 1522 | int check_pos) |
1c6fdbd8 | 1523 | { |
8ee0134e | 1524 | unsigned i, l = path->level; |
1d3ecd7e | 1525 | |
67e0dd8f KO |
1526 | while (btree_path_node(path, l) && |
1527 | !btree_path_good_node(trans, path, l, check_pos)) { | |
1528 | btree_node_unlock(path, l); | |
1529 | path->l[l].b = BTREE_ITER_NO_NODE_UP; | |
1c6fdbd8 KO |
1530 | l++; |
1531 | } | |
1532 | ||
8ee0134e KO |
1533 | /* If we need intent locks, take them too: */ |
1534 | for (i = l + 1; | |
1535 | i < path->locks_want && btree_path_node(path, i); | |
1536 | i++) | |
1537 | if (!bch2_btree_node_relock(trans, path, i)) | |
1538 | while (l <= i) { | |
1539 | btree_node_unlock(path, l); | |
1540 | path->l[l].b = BTREE_ITER_NO_NODE_UP; | |
1541 | l++; | |
1542 | } | |
1543 | ||
1c6fdbd8 KO |
1544 | return l; |
1545 | } | |
1546 | ||
1547 | /* | |
1548 | * This is the main state machine for walking down the btree - walks down to a | |
1549 | * specified depth | |
1550 | * | |
1551 | * Returns 0 on success, -EIO on error (error reading in a btree node). | |
1552 | * | |
1553 | * On error, caller (peek_node()/peek_key()) must return NULL; the error is | |
b7607ce9 | 1554 | * stashed in the iterator and returned from bch2_trans_exit(). |
1c6fdbd8 | 1555 | */ |
67e0dd8f KO |
1556 | static int btree_path_traverse_one(struct btree_trans *trans, |
1557 | struct btree_path *path, | |
1558 | unsigned flags, | |
a301dc38 | 1559 | unsigned long trace_ip) |
1c6fdbd8 | 1560 | { |
8ee0134e | 1561 | unsigned depth_want = path->level; |
531a0095 | 1562 | int ret = 0; |
1c6fdbd8 | 1563 | |
979735df KO |
1564 | if (unlikely(trans->restarted)) { |
1565 | ret = -EINTR; | |
1566 | goto out; | |
1567 | } | |
1568 | ||
e829b717 | 1569 | /* |
67e0dd8f KO |
1570 | * Ensure we obey path->should_be_locked: if it's set, we can't unlock |
1571 | * and re-traverse the path without a transaction restart: | |
e829b717 | 1572 | */ |
67e0dd8f KO |
1573 | if (path->should_be_locked) { |
1574 | ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR; | |
e829b717 KO |
1575 | goto out; |
1576 | } | |
1577 | ||
67e0dd8f KO |
1578 | if (path->cached) { |
1579 | ret = bch2_btree_path_traverse_cached(trans, path, flags); | |
531a0095 KO |
1580 | goto out; |
1581 | } | |
2ca88e5a | 1582 | |
67e0dd8f | 1583 | if (unlikely(path->level >= BTREE_MAX_DEPTH)) |
531a0095 | 1584 | goto out; |
2ca88e5a | 1585 | |
67e0dd8f | 1586 | path->level = btree_path_up_until_good_node(trans, path, 0); |
1c6fdbd8 | 1587 | |
1c6fdbd8 | 1588 | /* |
67e0dd8f | 1589 | * Note: path->nodes[path->level] may be temporarily NULL here - that |
1c6fdbd8 KO |
1590 | * would indicate to other code that we got to the end of the btree, |
1591 | * here it indicates that relocking the root failed - it's critical that | |
67e0dd8f | 1592 | * btree_path_lock_root() comes next and that it can't fail |
1c6fdbd8 | 1593 | */ |
67e0dd8f KO |
1594 | while (path->level > depth_want) { |
1595 | ret = btree_path_node(path, path->level) | |
1596 | ? btree_path_down(trans, path, flags, trace_ip) | |
1597 | : btree_path_lock_root(trans, path, depth_want, trace_ip); | |
1c6fdbd8 | 1598 | if (unlikely(ret)) { |
531a0095 KO |
1599 | if (ret == 1) { |
1600 | /* | |
f21566f1 KO |
1601 | * No nodes at this level - got to the end of |
1602 | * the btree: | |
531a0095 KO |
1603 | */ |
1604 | ret = 0; | |
1605 | goto out; | |
1606 | } | |
8812600c | 1607 | |
67e0dd8f KO |
1608 | __bch2_btree_path_unlock(path); |
1609 | path->level = depth_want; | |
2ca88e5a | 1610 | |
67e0dd8f KO |
1611 | if (ret == -EIO) |
1612 | path->l[path->level].b = | |
2ca88e5a | 1613 | BTREE_ITER_NO_NODE_ERROR; |
67e0dd8f KO |
1614 | else |
1615 | path->l[path->level].b = | |
2ca88e5a | 1616 | BTREE_ITER_NO_NODE_DOWN; |
531a0095 | 1617 | goto out; |
1c6fdbd8 KO |
1618 | } |
1619 | } | |
1620 | ||
67e0dd8f | 1621 | path->uptodate = BTREE_ITER_UPTODATE; |
531a0095 | 1622 | out: |
e5af273f | 1623 | BUG_ON((ret == -EINTR) != !!trans->restarted); |
67e0dd8f | 1624 | bch2_btree_path_verify(trans, path); |
531a0095 | 1625 | return ret; |
1c6fdbd8 KO |
1626 | } |
1627 | ||
67e0dd8f KO |
1628 | int __must_check bch2_btree_path_traverse(struct btree_trans *trans, |
1629 | struct btree_path *path, unsigned flags) | |
1c6fdbd8 | 1630 | { |
67e0dd8f KO |
1631 | if (path->uptodate < BTREE_ITER_NEED_RELOCK) |
1632 | return 0; | |
1633 | ||
e5fa91d7 | 1634 | return bch2_trans_cond_resched(trans) ?: |
67e0dd8f | 1635 | btree_path_traverse_one(trans, path, flags, _RET_IP_); |
1c6fdbd8 KO |
1636 | } |
1637 | ||
67e0dd8f KO |
1638 | static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, |
1639 | struct btree_path *src) | |
1640 | { | |
1641 | unsigned i, offset = offsetof(struct btree_path, pos); | |
1642 | ||
1643 | memcpy((void *) dst + offset, | |
1644 | (void *) src + offset, | |
1645 | sizeof(struct btree_path) - offset); | |
1646 | ||
1647 | for (i = 0; i < BTREE_MAX_DEPTH; i++) | |
1648 | if (btree_node_locked(dst, i)) | |
1649 | six_lock_increment(&dst->l[i].b->c.lock, | |
1650 | __btree_lock_want(dst, i)); | |
1651 | ||
1652 | trans->paths_sorted = false; | |
1653 | } | |
1654 | ||
c404f203 KO |
1655 | static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, |
1656 | bool intent) | |
67e0dd8f | 1657 | { |
c404f203 | 1658 | struct btree_path *new = btree_path_alloc(trans, src); |
67e0dd8f | 1659 | |
c404f203 | 1660 | btree_path_copy(trans, new, src); |
67e0dd8f | 1661 | __btree_path_get(new, intent); |
c404f203 KO |
1662 | return new; |
1663 | } | |
1664 | ||
d8648425 KO |
1665 | struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans, |
1666 | struct btree_path *path, bool intent) | |
c404f203 | 1667 | { |
67e0dd8f | 1668 | __btree_path_put(path, intent); |
c404f203 | 1669 | path = btree_path_clone(trans, path, intent); |
67e0dd8f KO |
1670 | path->preserve = false; |
1671 | #ifdef CONFIG_BCACHEFS_DEBUG | |
1672 | path->ip_allocated = _RET_IP_; | |
1673 | #endif | |
d8648425 | 1674 | path->should_be_locked = false; |
67e0dd8f KO |
1675 | return path; |
1676 | } | |
1677 | ||
ce91abd6 | 1678 | struct btree_path * __must_check |
67e0dd8f KO |
1679 | __bch2_btree_path_set_pos(struct btree_trans *trans, |
1680 | struct btree_path *path, struct bpos new_pos, | |
1681 | bool intent, int cmp) | |
1682 | { | |
67e0dd8f KO |
1683 | unsigned l = path->level; |
1684 | ||
1685 | EBUG_ON(trans->restarted); | |
1686 | EBUG_ON(!path->ref); | |
1687 | ||
1688 | path = bch2_btree_path_make_mut(trans, path, intent); | |
1689 | ||
1690 | path->pos = new_pos; | |
67e0dd8f KO |
1691 | trans->paths_sorted = false; |
1692 | ||
1693 | if (unlikely(path->cached)) { | |
1694 | btree_node_unlock(path, 0); | |
1695 | path->l[0].b = BTREE_ITER_NO_NODE_CACHED; | |
1696 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); | |
1697 | goto out; | |
1698 | } | |
1699 | ||
1700 | l = btree_path_up_until_good_node(trans, path, cmp); | |
1701 | ||
1702 | if (btree_path_node(path, l)) { | |
7071878b | 1703 | BUG_ON(!btree_node_locked(path, l)); |
67e0dd8f KO |
1704 | /* |
1705 | * We might have to skip over many keys, or just a few: try | |
1706 | * advancing the node iterator, and if we have to skip over too | |
1707 | * many keys just reinit it (or if we're rewinding, since that | |
1708 | * is expensive). | |
1709 | */ | |
1710 | if (cmp < 0 || | |
1711 | !btree_path_advance_to_pos(path, &path->l[l], 8)) | |
1712 | __btree_path_level_init(path, l); | |
67e0dd8f KO |
1713 | } |
1714 | ||
1d3ecd7e | 1715 | if (l != path->level) { |
67e0dd8f | 1716 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
1d3ecd7e KO |
1717 | __bch2_btree_path_unlock(path); |
1718 | } | |
67e0dd8f KO |
1719 | out: |
1720 | bch2_btree_path_verify(trans, path); | |
67e0dd8f KO |
1721 | return path; |
1722 | } | |
1723 | ||
67e0dd8f KO |
1724 | /* Btree path: main interface: */ |
1725 | ||
1726 | static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) | |
1727 | { | |
1728 | struct btree_path *next; | |
1729 | ||
1730 | next = prev_btree_path(trans, path); | |
1731 | if (next && !btree_path_cmp(next, path)) | |
1732 | return next; | |
1733 | ||
1734 | next = next_btree_path(trans, path); | |
1735 | if (next && !btree_path_cmp(next, path)) | |
1736 | return next; | |
1737 | ||
1738 | return NULL; | |
1739 | } | |
1740 | ||
9a74f63c | 1741 | static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path) |
67e0dd8f KO |
1742 | { |
1743 | struct btree_path *next; | |
1744 | ||
1745 | next = prev_btree_path(trans, path); | |
9a74f63c KO |
1746 | if (next && next->level == path->level && path_l(next)->b == path_l(path)->b) |
1747 | return next; | |
67e0dd8f KO |
1748 | |
1749 | next = next_btree_path(trans, path); | |
9a74f63c KO |
1750 | if (next && next->level == path->level && path_l(next)->b == path_l(path)->b) |
1751 | return next; | |
67e0dd8f | 1752 | |
9a74f63c | 1753 | return NULL; |
67e0dd8f KO |
1754 | } |
1755 | ||
1756 | static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path) | |
08070cba | 1757 | { |
67e0dd8f KO |
1758 | __bch2_btree_path_unlock(path); |
1759 | btree_path_list_remove(trans, path); | |
1760 | trans->paths_allocated &= ~(1ULL << path->idx); | |
08070cba KO |
1761 | } |
1762 | ||
67e0dd8f KO |
1763 | void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) |
1764 | { | |
1765 | struct btree_path *dup; | |
1766 | ||
1767 | EBUG_ON(trans->paths + path->idx != path); | |
1768 | EBUG_ON(!path->ref); | |
1769 | ||
1770 | if (!__btree_path_put(path, intent)) | |
1771 | return; | |
1772 | ||
1773 | /* | |
1774 | * Perhaps instead we should check for duplicate paths in traverse_all: | |
1775 | */ | |
1776 | if (path->preserve && | |
1777 | (dup = have_path_at_pos(trans, path))) { | |
1778 | dup->preserve = true; | |
1779 | path->preserve = false; | |
9a74f63c | 1780 | goto free; |
67e0dd8f KO |
1781 | } |
1782 | ||
1783 | if (!path->preserve && | |
9a74f63c KO |
1784 | (dup = have_node_at_pos(trans, path))) |
1785 | goto free; | |
1786 | return; | |
1787 | free: | |
1788 | if (path->should_be_locked && | |
1789 | !btree_node_locked(dup, path->level)) | |
1790 | return; | |
1791 | ||
1792 | dup->should_be_locked |= path->should_be_locked; | |
1793 | __bch2_path_free(trans, path); | |
67e0dd8f KO |
1794 | } |
1795 | ||
8570d775 | 1796 | void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) |
67e0dd8f | 1797 | { |
67e0dd8f | 1798 | struct btree_insert_entry *i; |
2158fe46 | 1799 | |
8570d775 KO |
1800 | pr_buf(buf, "transaction updates for %s journal seq %llu", |
1801 | trans->fn, trans->journal_res.seq); | |
1802 | pr_newline(buf); | |
1803 | pr_indent_push(buf, 2); | |
2158fe46 KO |
1804 | |
1805 | trans_for_each_update(trans, i) { | |
1806 | struct bkey_s_c old = { &i->old_k, i->old_v }; | |
1807 | ||
8570d775 | 1808 | pr_buf(buf, "update: btree=%s cached=%u %pS", |
2158fe46 | 1809 | bch2_btree_ids[i->btree_id], |
8570d775 KO |
1810 | i->cached, |
1811 | (void *) i->ip_allocated); | |
1812 | pr_newline(buf); | |
1813 | ||
1814 | pr_buf(buf, " old "); | |
1815 | bch2_bkey_val_to_text(buf, trans->c, old); | |
1816 | pr_newline(buf); | |
1817 | ||
1818 | pr_buf(buf, " new "); | |
1819 | bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k)); | |
1820 | pr_newline(buf); | |
2158fe46 KO |
1821 | } |
1822 | ||
8570d775 KO |
1823 | pr_indent_pop(buf, 2); |
1824 | } | |
1825 | ||
1826 | noinline __cold | |
1827 | void bch2_dump_trans_updates(struct btree_trans *trans) | |
1828 | { | |
1829 | struct printbuf buf = PRINTBUF; | |
1830 | ||
1831 | bch2_trans_updates_to_text(&buf, trans); | |
1832 | bch_err(trans->c, "%s", buf.buf); | |
1833 | printbuf_exit(&buf); | |
2158fe46 KO |
1834 | } |
1835 | ||
1836 | noinline __cold | |
1837 | void bch2_dump_trans_paths_updates(struct btree_trans *trans) | |
1838 | { | |
1839 | struct btree_path *path; | |
1840 | struct printbuf buf = PRINTBUF; | |
67e0dd8f | 1841 | unsigned idx; |
67e0dd8f KO |
1842 | |
1843 | btree_trans_sort_paths(trans); | |
1844 | ||
fa8e94fa | 1845 | trans_for_each_path_inorder(trans, path, idx) { |
2158fe46 | 1846 | printbuf_reset(&buf); |
fa8e94fa | 1847 | |
2158fe46 | 1848 | bch2_bpos_to_text(&buf, path->pos); |
fa8e94fa | 1849 | |
a0a07c59 | 1850 | printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n", |
67e0dd8f | 1851 | path->idx, path->ref, path->intent_ref, |
32b26e8c KO |
1852 | path->should_be_locked ? " S" : "", |
1853 | path->preserve ? " P" : "", | |
67e0dd8f | 1854 | bch2_btree_ids[path->btree_id], |
a0a07c59 | 1855 | path->level, |
2158fe46 | 1856 | buf.buf, |
32b26e8c | 1857 | path->nodes_locked, |
67e0dd8f KO |
1858 | #ifdef CONFIG_BCACHEFS_DEBUG |
1859 | (void *) path->ip_allocated | |
1860 | #else | |
1861 | NULL | |
1862 | #endif | |
1863 | ); | |
fa8e94fa | 1864 | } |
67e0dd8f | 1865 | |
2158fe46 | 1866 | printbuf_exit(&buf); |
60816d9b | 1867 | |
2158fe46 | 1868 | bch2_dump_trans_updates(trans); |
67e0dd8f KO |
1869 | } |
1870 | ||
1871 | static struct btree_path *btree_path_alloc(struct btree_trans *trans, | |
1872 | struct btree_path *pos) | |
1873 | { | |
1874 | struct btree_path *path; | |
1875 | unsigned idx; | |
1876 | ||
1877 | if (unlikely(trans->paths_allocated == | |
1878 | ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) { | |
1879 | bch2_dump_trans_paths_updates(trans); | |
1880 | panic("trans path oveflow\n"); | |
1881 | } | |
1882 | ||
1883 | idx = __ffs64(~trans->paths_allocated); | |
1884 | trans->paths_allocated |= 1ULL << idx; | |
1885 | ||
1886 | path = &trans->paths[idx]; | |
1887 | ||
1888 | path->idx = idx; | |
1889 | path->ref = 0; | |
1890 | path->intent_ref = 0; | |
1891 | path->nodes_locked = 0; | |
1892 | path->nodes_intent_locked = 0; | |
1893 | ||
1894 | btree_path_list_add(trans, pos, path); | |
1895 | return path; | |
1896 | } | |
1897 | ||
f3e1f444 | 1898 | struct btree_path *bch2_path_get(struct btree_trans *trans, |
67e0dd8f KO |
1899 | enum btree_id btree_id, struct bpos pos, |
1900 | unsigned locks_want, unsigned level, | |
f3e1f444 | 1901 | unsigned flags) |
67e0dd8f | 1902 | { |
807dda8c | 1903 | struct btree_path *path, *path_pos = NULL; |
f3e1f444 KO |
1904 | bool cached = flags & BTREE_ITER_CACHED; |
1905 | bool intent = flags & BTREE_ITER_INTENT; | |
67e0dd8f KO |
1906 | int i; |
1907 | ||
1908 | BUG_ON(trans->restarted); | |
7071878b KO |
1909 | bch2_trans_verify_locks(trans); |
1910 | ||
eb7bd15f | 1911 | btree_trans_sort_paths(trans); |
67e0dd8f | 1912 | |
807dda8c | 1913 | btree_trans_sort_paths(trans); |
67e0dd8f | 1914 | |
807dda8c KO |
1915 | trans_for_each_path_inorder(trans, path, i) { |
1916 | if (__btree_path_cmp(path, | |
1917 | btree_id, | |
1918 | cached, | |
1919 | pos, | |
1920 | level) > 0) | |
1921 | break; | |
67e0dd8f | 1922 | |
807dda8c | 1923 | path_pos = path; |
67e0dd8f KO |
1924 | } |
1925 | ||
807dda8c KO |
1926 | if (path_pos && |
1927 | path_pos->cached == cached && | |
1928 | path_pos->btree_id == btree_id && | |
1929 | path_pos->level == level) { | |
1930 | __btree_path_get(path_pos, intent); | |
ce91abd6 | 1931 | path = bch2_btree_path_set_pos(trans, path_pos, pos, intent); |
67e0dd8f | 1932 | } else { |
807dda8c KO |
1933 | path = btree_path_alloc(trans, path_pos); |
1934 | path_pos = NULL; | |
67e0dd8f KO |
1935 | |
1936 | __btree_path_get(path, intent); | |
1937 | path->pos = pos; | |
1938 | path->btree_id = btree_id; | |
1939 | path->cached = cached; | |
67e0dd8f KO |
1940 | path->uptodate = BTREE_ITER_NEED_TRAVERSE; |
1941 | path->should_be_locked = false; | |
1942 | path->level = level; | |
1943 | path->locks_want = locks_want; | |
1944 | path->nodes_locked = 0; | |
1945 | path->nodes_intent_locked = 0; | |
1946 | for (i = 0; i < ARRAY_SIZE(path->l); i++) | |
1947 | path->l[i].b = BTREE_ITER_NO_NODE_INIT; | |
1948 | #ifdef CONFIG_BCACHEFS_DEBUG | |
1949 | path->ip_allocated = _RET_IP_; | |
1950 | #endif | |
1951 | trans->paths_sorted = false; | |
1952 | } | |
1953 | ||
f3e1f444 KO |
1954 | if (!(flags & BTREE_ITER_NOPRESERVE)) |
1955 | path->preserve = true; | |
1956 | ||
67e0dd8f KO |
1957 | if (path->intent_ref) |
1958 | locks_want = max(locks_want, level + 1); | |
1959 | ||
1960 | /* | |
1961 | * If the path has locks_want greater than requested, we don't downgrade | |
1962 | * it here - on transaction restart because btree node split needs to | |
1963 | * upgrade locks, we might be putting/getting the iterator again. | |
1964 | * Downgrading iterators only happens via bch2_trans_downgrade(), after | |
1965 | * a successful transaction commit. | |
1966 | */ | |
1967 | ||
1968 | locks_want = min(locks_want, BTREE_MAX_DEPTH); | |
1969 | if (locks_want > path->locks_want) { | |
1970 | path->locks_want = locks_want; | |
bc82d08b | 1971 | btree_path_get_locks(trans, path, true); |
67e0dd8f KO |
1972 | } |
1973 | ||
67e0dd8f KO |
1974 | return path; |
1975 | } | |
1976 | ||
1977 | inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u) | |
1978 | { | |
1979 | ||
1980 | struct bkey_s_c k; | |
1981 | ||
67e0dd8f KO |
1982 | if (!path->cached) { |
1983 | struct btree_path_level *l = path_l(path); | |
f7b6ca23 KO |
1984 | struct bkey_packed *_k; |
1985 | ||
1986 | EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); | |
67e0dd8f | 1987 | |
f7b6ca23 | 1988 | _k = bch2_btree_node_iter_peek_all(&l->iter, l->b); |
67e0dd8f KO |
1989 | k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; |
1990 | ||
1991 | EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0); | |
1992 | ||
1993 | if (!k.k || bpos_cmp(path->pos, k.k->p)) | |
1994 | goto hole; | |
1995 | } else { | |
1996 | struct bkey_cached *ck = (void *) path->l[0].b; | |
1997 | ||
f7b6ca23 KO |
1998 | EBUG_ON(ck && |
1999 | (path->btree_id != ck->key.btree_id || | |
2000 | bkey_cmp(path->pos, ck->key.pos))); | |
67e0dd8f | 2001 | |
f7b6ca23 KO |
2002 | /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */ |
2003 | if (unlikely(!ck || !ck->valid)) | |
2004 | return bkey_s_c_null; | |
2005 | ||
2006 | EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); | |
67e0dd8f | 2007 | |
2e63e180 | 2008 | *u = ck->k->k; |
67e0dd8f KO |
2009 | k = bkey_i_to_s_c(ck->k); |
2010 | } | |
2011 | ||
2012 | return k; | |
2013 | hole: | |
2014 | bkey_init(u); | |
2015 | u->p = path->pos; | |
2016 | return (struct bkey_s_c) { u, NULL }; | |
2017 | } | |
2018 | ||
2019 | /* Btree iterators: */ | |
2020 | ||
db92f2ea KO |
2021 | int __must_check |
2022 | __bch2_btree_iter_traverse(struct btree_iter *iter) | |
2023 | { | |
2024 | return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); | |
2025 | } | |
2026 | ||
08070cba KO |
2027 | int __must_check |
2028 | bch2_btree_iter_traverse(struct btree_iter *iter) | |
2029 | { | |
66a0a497 KO |
2030 | int ret; |
2031 | ||
ce91abd6 | 2032 | iter->path = bch2_btree_path_set_pos(iter->trans, iter->path, |
67e0dd8f KO |
2033 | btree_iter_search_key(iter), |
2034 | iter->flags & BTREE_ITER_INTENT); | |
08070cba | 2035 | |
67e0dd8f | 2036 | ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); |
66a0a497 KO |
2037 | if (ret) |
2038 | return ret; | |
2039 | ||
67e0dd8f | 2040 | iter->path->should_be_locked = true; |
66a0a497 | 2041 | return 0; |
08070cba KO |
2042 | } |
2043 | ||
1c6fdbd8 KO |
2044 | /* Iterate across nodes (leaf and interior nodes) */ |
2045 | ||
2046 | struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) | |
2047 | { | |
502027a8 | 2048 | struct btree_trans *trans = iter->trans; |
f21566f1 | 2049 | struct btree *b = NULL; |
1c6fdbd8 KO |
2050 | int ret; |
2051 | ||
67e0dd8f | 2052 | EBUG_ON(iter->path->cached); |
7e1a3aa9 | 2053 | bch2_btree_iter_verify(iter); |
1c6fdbd8 | 2054 | |
502027a8 | 2055 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
1c6fdbd8 | 2056 | if (ret) |
d355c6f4 | 2057 | goto err; |
1c6fdbd8 | 2058 | |
67e0dd8f | 2059 | b = btree_path_node(iter->path, iter->path->level); |
1c6fdbd8 | 2060 | if (!b) |
f21566f1 | 2061 | goto out; |
1c6fdbd8 | 2062 | |
4cf91b02 | 2063 | BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0); |
1c6fdbd8 | 2064 | |
f21566f1 | 2065 | bkey_init(&iter->k); |
67e0dd8f | 2066 | iter->k.p = iter->pos = b->key.k.p; |
502027a8 | 2067 | |
ce91abd6 | 2068 | iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, |
502027a8 | 2069 | iter->flags & BTREE_ITER_INTENT); |
67e0dd8f | 2070 | iter->path->should_be_locked = true; |
502027a8 | 2071 | BUG_ON(iter->path->uptodate); |
f21566f1 KO |
2072 | out: |
2073 | bch2_btree_iter_verify_entry_exit(iter); | |
2074 | bch2_btree_iter_verify(iter); | |
2dac0eae | 2075 | |
1c6fdbd8 | 2076 | return b; |
d355c6f4 KO |
2077 | err: |
2078 | b = ERR_PTR(ret); | |
2079 | goto out; | |
1c6fdbd8 KO |
2080 | } |
2081 | ||
2dac0eae | 2082 | struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) |
1c6fdbd8 | 2083 | { |
67e0dd8f KO |
2084 | struct btree_trans *trans = iter->trans; |
2085 | struct btree_path *path = iter->path; | |
f21566f1 | 2086 | struct btree *b = NULL; |
979735df | 2087 | unsigned l; |
1c6fdbd8 KO |
2088 | int ret; |
2089 | ||
979735df | 2090 | BUG_ON(trans->restarted); |
67e0dd8f | 2091 | EBUG_ON(iter->path->cached); |
7e1a3aa9 | 2092 | bch2_btree_iter_verify(iter); |
1c6fdbd8 | 2093 | |
979735df | 2094 | /* already at end? */ |
67e0dd8f | 2095 | if (!btree_path_node(path, path->level)) |
979735df | 2096 | return NULL; |
1c6fdbd8 | 2097 | |
979735df KO |
2098 | /* got to end? */ |
2099 | if (!btree_path_node(path, path->level + 1)) { | |
2100 | btree_node_unlock(path, path->level); | |
2101 | path->l[path->level].b = BTREE_ITER_NO_NODE_UP; | |
2102 | path->level++; | |
7071878b | 2103 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
979735df KO |
2104 | return NULL; |
2105 | } | |
1c6fdbd8 | 2106 | |
979735df KO |
2107 | if (!bch2_btree_node_relock(trans, path, path->level + 1)) { |
2108 | __bch2_btree_path_unlock(path); | |
2109 | path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS; | |
2110 | path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS; | |
7071878b | 2111 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
bc82d08b KO |
2112 | trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_, |
2113 | path->btree_id, &path->pos); | |
979735df KO |
2114 | btree_trans_restart(trans); |
2115 | ret = -EINTR; | |
d355c6f4 | 2116 | goto err; |
979735df | 2117 | } |
1c6fdbd8 | 2118 | |
979735df | 2119 | b = btree_path_node(path, path->level + 1); |
1c6fdbd8 | 2120 | |
979735df KO |
2121 | if (!bpos_cmp(iter->pos, b->key.k.p)) { |
2122 | btree_node_unlock(path, path->level); | |
2123 | path->l[path->level].b = BTREE_ITER_NO_NODE_UP; | |
2124 | path->level++; | |
2125 | } else { | |
1c6fdbd8 KO |
2126 | /* |
2127 | * Haven't gotten to the end of the parent node: go back down to | |
2128 | * the next child node | |
2129 | */ | |
67e0dd8f | 2130 | path = iter->path = |
ce91abd6 | 2131 | bch2_btree_path_set_pos(trans, path, bpos_successor(iter->pos), |
67e0dd8f | 2132 | iter->flags & BTREE_ITER_INTENT); |
1c6fdbd8 | 2133 | |
67e0dd8f | 2134 | path->level = iter->min_depth; |
979735df KO |
2135 | |
2136 | for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++) | |
2137 | if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED) | |
2138 | btree_node_unlock(path, l); | |
2139 | ||
67e0dd8f | 2140 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
345ca825 KO |
2141 | bch2_btree_iter_verify(iter); |
2142 | ||
67e0dd8f | 2143 | ret = bch2_btree_path_traverse(trans, path, iter->flags); |
d355c6f4 KO |
2144 | if (ret) |
2145 | goto err; | |
1c6fdbd8 | 2146 | |
67e0dd8f | 2147 | b = path->l[path->level].b; |
1c6fdbd8 KO |
2148 | } |
2149 | ||
f21566f1 | 2150 | bkey_init(&iter->k); |
67e0dd8f | 2151 | iter->k.p = iter->pos = b->key.k.p; |
502027a8 | 2152 | |
ce91abd6 | 2153 | iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, |
502027a8 | 2154 | iter->flags & BTREE_ITER_INTENT); |
67e0dd8f | 2155 | iter->path->should_be_locked = true; |
502027a8 | 2156 | BUG_ON(iter->path->uptodate); |
f21566f1 KO |
2157 | out: |
2158 | bch2_btree_iter_verify_entry_exit(iter); | |
2159 | bch2_btree_iter_verify(iter); | |
2dac0eae | 2160 | |
1c6fdbd8 | 2161 | return b; |
d355c6f4 KO |
2162 | err: |
2163 | b = ERR_PTR(ret); | |
2164 | goto out; | |
1c6fdbd8 KO |
2165 | } |
2166 | ||
2167 | /* Iterate across keys (in leaf nodes only) */ | |
2168 | ||
e0ba3b64 | 2169 | inline bool bch2_btree_iter_advance(struct btree_iter *iter) |
434094be | 2170 | { |
3d495595 | 2171 | struct bpos pos = iter->k.p; |
6caf0578 KO |
2172 | bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS |
2173 | ? bpos_cmp(pos, SPOS_MAX) | |
2174 | : bkey_cmp(pos, SPOS_MAX)) != 0; | |
3d495595 | 2175 | |
7e1a3aa9 | 2176 | if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) |
e751c01a | 2177 | pos = bkey_successor(iter, pos); |
3d495595 | 2178 | bch2_btree_iter_set_pos(iter, pos); |
7e1a3aa9 | 2179 | return ret; |
3d495595 KO |
2180 | } |
2181 | ||
e0ba3b64 | 2182 | inline bool bch2_btree_iter_rewind(struct btree_iter *iter) |
3d495595 KO |
2183 | { |
2184 | struct bpos pos = bkey_start_pos(&iter->k); | |
71f892a4 KO |
2185 | bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS |
2186 | ? bpos_cmp(pos, POS_MIN) | |
2187 | : bkey_cmp(pos, POS_MIN)) != 0; | |
3d495595 | 2188 | |
7e1a3aa9 | 2189 | if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) |
e751c01a | 2190 | pos = bkey_predecessor(iter, pos); |
3d495595 | 2191 | bch2_btree_iter_set_pos(iter, pos); |
7e1a3aa9 | 2192 | return ret; |
434094be KO |
2193 | } |
2194 | ||
3763cb95 | 2195 | static noinline |
67e0dd8f | 2196 | struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) |
818664f5 KO |
2197 | { |
2198 | struct btree_insert_entry *i; | |
5288e66a | 2199 | struct bkey_i *ret = NULL; |
818664f5 | 2200 | |
8e6bbc41 | 2201 | trans_for_each_update(iter->trans, i) { |
5288e66a KO |
2202 | if (i->btree_id < iter->btree_id) |
2203 | continue; | |
2204 | if (i->btree_id > iter->btree_id) | |
818664f5 | 2205 | break; |
67e0dd8f | 2206 | if (bpos_cmp(i->k->k.p, iter->path->pos) < 0) |
5288e66a | 2207 | continue; |
12ce5b7d KO |
2208 | if (i->key_cache_already_flushed) |
2209 | continue; | |
5288e66a KO |
2210 | if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0) |
2211 | ret = i->k; | |
2212 | } | |
818664f5 | 2213 | |
5288e66a KO |
2214 | return ret; |
2215 | } | |
2216 | ||
3763cb95 KO |
2217 | static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter) |
2218 | { | |
2219 | return iter->flags & BTREE_ITER_WITH_UPDATES | |
2220 | ? __bch2_btree_trans_peek_updates(iter) | |
2221 | : NULL; | |
2222 | } | |
2223 | ||
5222a460 KO |
2224 | static noinline |
2225 | struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, | |
2226 | struct btree_iter *iter) | |
2227 | { | |
5650bb46 | 2228 | struct bkey_i *k = bch2_journal_keys_peek_slot(trans->c, iter->btree_id, |
2ae4573e KO |
2229 | iter->path->level, |
2230 | iter->path->pos); | |
5222a460 | 2231 | |
5650bb46 | 2232 | if (k) { |
5222a460 KO |
2233 | iter->k = k->k; |
2234 | return bkey_i_to_s_c(k); | |
2235 | } else { | |
2236 | return bkey_s_c_null; | |
2237 | } | |
2238 | } | |
2239 | ||
2240 | static noinline | |
2241 | struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, | |
2242 | struct btree_iter *iter, | |
2243 | struct bkey_s_c k) | |
2244 | { | |
2245 | struct bkey_i *next_journal = | |
5650bb46 KO |
2246 | bch2_journal_keys_peek_upto(trans->c, iter->btree_id, 0, |
2247 | iter->path->pos, | |
2248 | k.k ? k.k->p : iter->path->l[0].b->key.k.p); | |
5222a460 | 2249 | |
5650bb46 | 2250 | if (next_journal) { |
5222a460 KO |
2251 | iter->k = next_journal->k; |
2252 | k = bkey_i_to_s_c(next_journal); | |
2253 | } | |
2254 | ||
2255 | return k; | |
2256 | } | |
2257 | ||
f7b6ca23 KO |
2258 | /* |
2259 | * Checks btree key cache for key at iter->pos and returns it if present, or | |
2260 | * bkey_s_c_null: | |
2261 | */ | |
2262 | static noinline | |
2263 | struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) | |
2264 | { | |
2265 | struct btree_trans *trans = iter->trans; | |
2266 | struct bch_fs *c = trans->c; | |
2267 | struct bkey u; | |
2268 | int ret; | |
2269 | ||
2270 | if (!bch2_btree_key_cache_find(c, iter->btree_id, pos)) | |
2271 | return bkey_s_c_null; | |
2272 | ||
2273 | if (!iter->key_cache_path) | |
2274 | iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos, | |
2275 | iter->flags & BTREE_ITER_INTENT, 0, | |
2276 | iter->flags|BTREE_ITER_CACHED); | |
2277 | ||
2278 | iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos, | |
2279 | iter->flags & BTREE_ITER_INTENT); | |
2280 | ||
2281 | ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED); | |
2282 | if (unlikely(ret)) | |
2283 | return bkey_s_c_err(ret); | |
2284 | ||
2285 | iter->key_cache_path->should_be_locked = true; | |
2286 | ||
2287 | return bch2_btree_path_peek_slot(iter->key_cache_path, &u); | |
2288 | } | |
2289 | ||
a1e82d35 | 2290 | static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) |
1c6fdbd8 | 2291 | { |
67e0dd8f | 2292 | struct btree_trans *trans = iter->trans; |
360746bf | 2293 | struct bkey_i *next_update; |
f7b6ca23 | 2294 | struct bkey_s_c k, k2; |
8e432d98 | 2295 | int ret; |
1c6fdbd8 | 2296 | |
67e0dd8f | 2297 | EBUG_ON(iter->path->cached || iter->path->level); |
7e1a3aa9 | 2298 | bch2_btree_iter_verify(iter); |
1c6fdbd8 | 2299 | |
1c6fdbd8 | 2300 | while (1) { |
ce91abd6 KO |
2301 | iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, |
2302 | iter->flags & BTREE_ITER_INTENT); | |
c8476a4e | 2303 | |
67e0dd8f | 2304 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
e6e024e9 KO |
2305 | if (unlikely(ret)) { |
2306 | /* ensure that iter->k is consistent with iter->pos: */ | |
2307 | bch2_btree_iter_set_pos(iter, iter->pos); | |
2308 | k = bkey_s_c_err(ret); | |
2309 | goto out; | |
2310 | } | |
1c6fdbd8 | 2311 | |
f7b6ca23 KO |
2312 | iter->path->should_be_locked = true; |
2313 | ||
67e0dd8f | 2314 | k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); |
e6e024e9 | 2315 | |
f7b6ca23 KO |
2316 | if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && |
2317 | k.k && | |
2318 | (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { | |
2319 | ret = bkey_err(k2); | |
2320 | if (ret) { | |
2321 | k = k2; | |
2322 | bch2_btree_iter_set_pos(iter, iter->pos); | |
2323 | goto out; | |
2324 | } | |
2325 | ||
2326 | k = k2; | |
2327 | iter->k = *k.k; | |
2328 | } | |
2329 | ||
5222a460 KO |
2330 | if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) |
2331 | k = btree_trans_peek_journal(trans, iter, k); | |
2332 | ||
2333 | next_update = btree_trans_peek_updates(iter); | |
e6e024e9 | 2334 | |
818664f5 | 2335 | if (next_update && |
e6e024e9 | 2336 | bpos_cmp(next_update->k.p, |
67e0dd8f | 2337 | k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) { |
5288e66a | 2338 | iter->k = next_update->k; |
818664f5 | 2339 | k = bkey_i_to_s_c(next_update); |
5288e66a | 2340 | } |
818664f5 | 2341 | |
5222a460 KO |
2342 | if (k.k && bkey_deleted(k.k)) { |
2343 | /* | |
2344 | * If we've got a whiteout, and it's after the search | |
2345 | * key, advance the search key to the whiteout instead | |
2346 | * of just after the whiteout - it might be a btree | |
2347 | * whiteout, with a real key at the same position, since | |
2348 | * in the btree deleted keys sort before non deleted. | |
2349 | */ | |
2350 | search_key = bpos_cmp(search_key, k.k->p) | |
2351 | ? k.k->p | |
2352 | : bpos_successor(k.k->p); | |
2353 | continue; | |
2354 | } | |
2355 | ||
818664f5 | 2356 | if (likely(k.k)) { |
c075ff70 | 2357 | break; |
67e0dd8f | 2358 | } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) { |
e6e024e9 | 2359 | /* Advance to next leaf node: */ |
67e0dd8f | 2360 | search_key = bpos_successor(iter->path->l[0].b->key.k.p); |
e6e024e9 KO |
2361 | } else { |
2362 | /* End of btree: */ | |
c8476a4e KO |
2363 | bch2_btree_iter_set_pos(iter, SPOS_MAX); |
2364 | k = bkey_s_c_null; | |
2365 | goto out; | |
2366 | } | |
1c6fdbd8 | 2367 | } |
a1e82d35 | 2368 | out: |
a1e82d35 KO |
2369 | bch2_btree_iter_verify(iter); |
2370 | ||
2371 | return k; | |
2372 | } | |
2373 | ||
2374 | /** | |
2375 | * bch2_btree_iter_peek: returns first key greater than or equal to iterator's | |
2376 | * current position | |
2377 | */ | |
85d8cf16 | 2378 | struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) |
a1e82d35 KO |
2379 | { |
2380 | struct btree_trans *trans = iter->trans; | |
2381 | struct bpos search_key = btree_iter_search_key(iter); | |
2382 | struct bkey_s_c k; | |
85d8cf16 | 2383 | struct bpos iter_pos; |
a1e82d35 KO |
2384 | int ret; |
2385 | ||
1f2d9192 KO |
2386 | if (iter->update_path) { |
2387 | bch2_path_put(trans, iter->update_path, | |
2388 | iter->flags & BTREE_ITER_INTENT); | |
2389 | iter->update_path = NULL; | |
2390 | } | |
2391 | ||
a1e82d35 KO |
2392 | bch2_btree_iter_verify_entry_exit(iter); |
2393 | ||
2394 | while (1) { | |
2395 | k = __bch2_btree_iter_peek(iter, search_key); | |
2396 | if (!k.k || bkey_err(k)) | |
2397 | goto out; | |
2398 | ||
85d8cf16 KO |
2399 | /* |
2400 | * iter->pos should be mononotically increasing, and always be | |
2401 | * equal to the key we just returned - except extents can | |
2402 | * straddle iter->pos: | |
2403 | */ | |
2404 | if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) | |
2405 | iter_pos = k.k->p; | |
2406 | else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) | |
2407 | iter_pos = bkey_start_pos(k.k); | |
2408 | else | |
2409 | iter_pos = iter->pos; | |
2410 | ||
2411 | if (bkey_cmp(iter_pos, end) > 0) { | |
2412 | bch2_btree_iter_set_pos(iter, end); | |
2413 | k = bkey_s_c_null; | |
2414 | goto out; | |
2415 | } | |
2416 | ||
1f2d9192 KO |
2417 | if (iter->update_path && |
2418 | bkey_cmp(iter->update_path->pos, k.k->p)) { | |
2419 | bch2_path_put(trans, iter->update_path, | |
2420 | iter->flags & BTREE_ITER_INTENT); | |
2421 | iter->update_path = NULL; | |
2422 | } | |
2423 | ||
2424 | if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && | |
2425 | (iter->flags & BTREE_ITER_INTENT) && | |
2426 | !(iter->flags & BTREE_ITER_IS_EXTENTS) && | |
2427 | !iter->update_path) { | |
2428 | struct bpos pos = k.k->p; | |
2429 | ||
2430 | if (pos.snapshot < iter->snapshot) { | |
2431 | search_key = bpos_successor(k.k->p); | |
2432 | continue; | |
2433 | } | |
2434 | ||
2435 | pos.snapshot = iter->snapshot; | |
2436 | ||
2437 | /* | |
2438 | * advance, same as on exit for iter->path, but only up | |
2439 | * to snapshot | |
2440 | */ | |
2441 | __btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT); | |
2442 | iter->update_path = iter->path; | |
2443 | ||
ce91abd6 | 2444 | iter->update_path = bch2_btree_path_set_pos(trans, |
1f2d9192 KO |
2445 | iter->update_path, pos, |
2446 | iter->flags & BTREE_ITER_INTENT); | |
1f2d9192 KO |
2447 | } |
2448 | ||
a1e82d35 KO |
2449 | /* |
2450 | * We can never have a key in a leaf node at POS_MAX, so | |
2451 | * we don't have to check these successor() calls: | |
2452 | */ | |
2453 | if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && | |
2454 | !bch2_snapshot_is_ancestor(trans->c, | |
2455 | iter->snapshot, | |
2456 | k.k->p.snapshot)) { | |
2457 | search_key = bpos_successor(k.k->p); | |
2458 | continue; | |
2459 | } | |
2460 | ||
2461 | if (bkey_whiteout(k.k) && | |
2462 | !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { | |
2463 | search_key = bkey_successor(iter, k.k->p); | |
2464 | continue; | |
2465 | } | |
2466 | ||
2467 | break; | |
2468 | } | |
2469 | ||
85d8cf16 | 2470 | iter->pos = iter_pos; |
1f2d9192 | 2471 | |
ce91abd6 | 2472 | iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, |
1f2d9192 KO |
2473 | iter->flags & BTREE_ITER_INTENT); |
2474 | BUG_ON(!iter->path->nodes_locked); | |
a1e82d35 | 2475 | out: |
1f2d9192 | 2476 | if (iter->update_path) { |
d4d24a65 KO |
2477 | if (unlikely(!bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_))) { |
2478 | k = bkey_s_c_err(-EINTR); | |
2479 | } else { | |
2480 | BUG_ON(!(iter->update_path->nodes_locked & 1)); | |
2481 | iter->update_path->should_be_locked = true; | |
2482 | } | |
1f2d9192 KO |
2483 | } |
2484 | iter->path->should_be_locked = true; | |
2485 | ||
a1e82d35 | 2486 | if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) |
c075ff70 KO |
2487 | iter->pos.snapshot = iter->snapshot; |
2488 | ||
a1e82d35 KO |
2489 | ret = bch2_btree_iter_verify_ret(iter, k); |
2490 | if (unlikely(ret)) { | |
2491 | bch2_btree_iter_set_pos(iter, iter->pos); | |
2492 | k = bkey_s_c_err(ret); | |
2493 | } | |
67e0dd8f | 2494 | |
7e1a3aa9 | 2495 | bch2_btree_iter_verify_entry_exit(iter); |
c075ff70 | 2496 | |
1c6fdbd8 KO |
2497 | return k; |
2498 | } | |
2499 | ||
f4b61341 KO |
2500 | /** |
2501 | * bch2_btree_iter_next: returns first key greater than iterator's current | |
2502 | * position | |
2503 | */ | |
1c6fdbd8 KO |
2504 | struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) |
2505 | { | |
e0ba3b64 | 2506 | if (!bch2_btree_iter_advance(iter)) |
2e70ce56 | 2507 | return bkey_s_c_null; |
f4b61341 | 2508 | |
2dac0eae | 2509 | return bch2_btree_iter_peek(iter); |
1c6fdbd8 KO |
2510 | } |
2511 | ||
ccf5a109 KO |
2512 | /** |
2513 | * bch2_btree_iter_peek_prev: returns first key less than or equal to | |
2514 | * iterator's current position | |
2515 | */ | |
2516 | struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) | |
1c6fdbd8 | 2517 | { |
67e0dd8f | 2518 | struct btree_trans *trans = iter->trans; |
c8476a4e | 2519 | struct bpos search_key = iter->pos; |
c075ff70 | 2520 | struct btree_path *saved_path = NULL; |
1c6fdbd8 | 2521 | struct bkey_s_c k; |
c075ff70 KO |
2522 | struct bkey saved_k; |
2523 | const struct bch_val *saved_v; | |
1c6fdbd8 KO |
2524 | int ret; |
2525 | ||
67e0dd8f | 2526 | EBUG_ON(iter->path->cached || iter->path->level); |
5288e66a | 2527 | EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); |
5222a460 KO |
2528 | |
2529 | if (iter->flags & BTREE_ITER_WITH_JOURNAL) | |
2530 | return bkey_s_c_err(-EIO); | |
2531 | ||
7e1a3aa9 KO |
2532 | bch2_btree_iter_verify(iter); |
2533 | bch2_btree_iter_verify_entry_exit(iter); | |
2534 | ||
c075ff70 KO |
2535 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) |
2536 | search_key.snapshot = U32_MAX; | |
2537 | ||
1c6fdbd8 | 2538 | while (1) { |
ce91abd6 | 2539 | iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, |
67e0dd8f | 2540 | iter->flags & BTREE_ITER_INTENT); |
c8476a4e | 2541 | |
67e0dd8f | 2542 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
7e1a3aa9 | 2543 | if (unlikely(ret)) { |
e6e024e9 KO |
2544 | /* ensure that iter->k is consistent with iter->pos: */ |
2545 | bch2_btree_iter_set_pos(iter, iter->pos); | |
7e1a3aa9 | 2546 | k = bkey_s_c_err(ret); |
e6e024e9 | 2547 | goto out; |
7e1a3aa9 | 2548 | } |
1c6fdbd8 | 2549 | |
67e0dd8f KO |
2550 | k = btree_path_level_peek(trans, iter->path, |
2551 | &iter->path->l[0], &iter->k); | |
3d495595 KO |
2552 | if (!k.k || |
2553 | ((iter->flags & BTREE_ITER_IS_EXTENTS) | |
c075ff70 KO |
2554 | ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0 |
2555 | : bpos_cmp(k.k->p, search_key) > 0)) | |
67e0dd8f KO |
2556 | k = btree_path_level_prev(trans, iter->path, |
2557 | &iter->path->l[0], &iter->k); | |
ccf5a109 | 2558 | |
e6e024e9 | 2559 | if (likely(k.k)) { |
c075ff70 KO |
2560 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) { |
2561 | if (k.k->p.snapshot == iter->snapshot) | |
2562 | goto got_key; | |
2563 | ||
2564 | /* | |
2565 | * If we have a saved candidate, and we're no | |
2566 | * longer at the same _key_ (not pos), return | |
2567 | * that candidate | |
2568 | */ | |
2569 | if (saved_path && bkey_cmp(k.k->p, saved_k.p)) { | |
2570 | bch2_path_put(trans, iter->path, | |
2571 | iter->flags & BTREE_ITER_INTENT); | |
2572 | iter->path = saved_path; | |
2573 | saved_path = NULL; | |
2574 | iter->k = saved_k; | |
2575 | k.v = saved_v; | |
2576 | goto got_key; | |
2577 | } | |
2578 | ||
2579 | if (bch2_snapshot_is_ancestor(iter->trans->c, | |
2580 | iter->snapshot, | |
2581 | k.k->p.snapshot)) { | |
2582 | if (saved_path) | |
2583 | bch2_path_put(trans, saved_path, | |
2584 | iter->flags & BTREE_ITER_INTENT); | |
2585 | saved_path = btree_path_clone(trans, iter->path, | |
2586 | iter->flags & BTREE_ITER_INTENT); | |
2587 | saved_k = *k.k; | |
2588 | saved_v = k.v; | |
2589 | } | |
2590 | ||
2591 | search_key = bpos_predecessor(k.k->p); | |
2592 | continue; | |
2593 | } | |
2594 | got_key: | |
2595 | if (bkey_whiteout(k.k) && | |
2596 | !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { | |
2597 | search_key = bkey_predecessor(iter, k.k->p); | |
2598 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) | |
2599 | search_key.snapshot = U32_MAX; | |
2600 | continue; | |
2601 | } | |
2602 | ||
1c6fdbd8 | 2603 | break; |
67e0dd8f | 2604 | } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) { |
e6e024e9 | 2605 | /* Advance to previous leaf node: */ |
67e0dd8f | 2606 | search_key = bpos_predecessor(iter->path->l[0].b->data->min_key); |
e6e024e9 KO |
2607 | } else { |
2608 | /* Start of btree: */ | |
c8476a4e | 2609 | bch2_btree_iter_set_pos(iter, POS_MIN); |
7e1a3aa9 | 2610 | k = bkey_s_c_null; |
e6e024e9 | 2611 | goto out; |
7e1a3aa9 | 2612 | } |
ccf5a109 | 2613 | } |
1c6fdbd8 | 2614 | |
a045be5a | 2615 | EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0); |
3d495595 KO |
2616 | |
2617 | /* Extents can straddle iter->pos: */ | |
a045be5a | 2618 | if (bkey_cmp(k.k->p, iter->pos) < 0) |
3d495595 | 2619 | iter->pos = k.k->p; |
c075ff70 KO |
2620 | |
2621 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) | |
2622 | iter->pos.snapshot = iter->snapshot; | |
7e1a3aa9 | 2623 | out: |
c075ff70 KO |
2624 | if (saved_path) |
2625 | bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT); | |
67e0dd8f KO |
2626 | iter->path->should_be_locked = true; |
2627 | ||
7e1a3aa9 KO |
2628 | bch2_btree_iter_verify_entry_exit(iter); |
2629 | bch2_btree_iter_verify(iter); | |
67e0dd8f | 2630 | |
1c6fdbd8 KO |
2631 | return k; |
2632 | } | |
2633 | ||
ccf5a109 KO |
2634 | /** |
2635 | * bch2_btree_iter_prev: returns first key less than iterator's current | |
2636 | * position | |
2637 | */ | |
2638 | struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) | |
2639 | { | |
e0ba3b64 | 2640 | if (!bch2_btree_iter_rewind(iter)) |
2e70ce56 | 2641 | return bkey_s_c_null; |
ccf5a109 | 2642 | |
2e70ce56 | 2643 | return bch2_btree_iter_peek_prev(iter); |
ccf5a109 KO |
2644 | } |
2645 | ||
c3801239 | 2646 | struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) |
271a3d3a | 2647 | { |
9f6bd307 | 2648 | struct btree_trans *trans = iter->trans; |
953ee28a | 2649 | struct bpos search_key; |
271a3d3a | 2650 | struct bkey_s_c k; |
c3801239 KO |
2651 | int ret; |
2652 | ||
7e1a3aa9 KO |
2653 | bch2_btree_iter_verify(iter); |
2654 | bch2_btree_iter_verify_entry_exit(iter); | |
2ae4573e | 2655 | EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE)); |
7e1a3aa9 | 2656 | |
1d214eb1 KO |
2657 | /* extents can't span inode numbers: */ |
2658 | if ((iter->flags & BTREE_ITER_IS_EXTENTS) && | |
953ee28a | 2659 | unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { |
1d214eb1 KO |
2660 | if (iter->pos.inode == KEY_INODE_MAX) |
2661 | return bkey_s_c_null; | |
c3801239 | 2662 | |
1d214eb1 KO |
2663 | bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); |
2664 | } | |
8042b5b7 | 2665 | |
953ee28a | 2666 | search_key = btree_iter_search_key(iter); |
ce91abd6 | 2667 | iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, |
67e0dd8f | 2668 | iter->flags & BTREE_ITER_INTENT); |
953ee28a | 2669 | |
67e0dd8f | 2670 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
c3801239 KO |
2671 | if (unlikely(ret)) |
2672 | return bkey_s_c_err(ret); | |
1c6fdbd8 | 2673 | |
c075ff70 KO |
2674 | if ((iter->flags & BTREE_ITER_CACHED) || |
2675 | !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { | |
953ee28a | 2676 | struct bkey_i *next_update; |
1c6fdbd8 | 2677 | |
5222a460 | 2678 | if ((next_update = btree_trans_peek_updates(iter)) && |
67e0dd8f | 2679 | !bpos_cmp(next_update->k.p, iter->pos)) { |
1d214eb1 KO |
2680 | iter->k = next_update->k; |
2681 | k = bkey_i_to_s_c(next_update); | |
5222a460 | 2682 | goto out; |
1d214eb1 KO |
2683 | } |
2684 | ||
5222a460 KO |
2685 | if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) && |
2686 | (k = btree_trans_peek_slot_journal(trans, iter)).k) | |
2687 | goto out; | |
2688 | ||
f7b6ca23 KO |
2689 | if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && |
2690 | (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { | |
2691 | if (!bkey_err(k)) | |
2692 | iter->k = *k.k; | |
2693 | goto out; | |
2694 | } | |
2695 | ||
5222a460 | 2696 | k = bch2_btree_path_peek_slot(iter->path, &iter->k); |
1d214eb1 KO |
2697 | } else { |
2698 | struct bpos next; | |
1d214eb1 | 2699 | |
2ae4573e KO |
2700 | EBUG_ON(iter->path->level); |
2701 | ||
b1d87f52 | 2702 | if (iter->flags & BTREE_ITER_INTENT) { |
67e0dd8f | 2703 | struct btree_iter iter2; |
85d8cf16 KO |
2704 | struct bpos end = iter->pos; |
2705 | ||
2706 | if (iter->flags & BTREE_ITER_IS_EXTENTS) | |
2707 | end.offset = U64_MAX; | |
b1d87f52 | 2708 | |
67e0dd8f | 2709 | bch2_trans_copy_iter(&iter2, iter); |
85d8cf16 | 2710 | k = bch2_btree_iter_peek_upto(&iter2, end); |
b1d87f52 | 2711 | |
67e0dd8f KO |
2712 | if (k.k && !bkey_err(k)) { |
2713 | iter->k = iter2.k; | |
2714 | k.k = &iter->k; | |
2715 | } | |
2716 | bch2_trans_iter_exit(trans, &iter2); | |
b1d87f52 KO |
2717 | } else { |
2718 | struct bpos pos = iter->pos; | |
2719 | ||
2720 | k = bch2_btree_iter_peek(iter); | |
2721 | iter->pos = pos; | |
2722 | } | |
1d214eb1 KO |
2723 | |
2724 | if (unlikely(bkey_err(k))) | |
2725 | return k; | |
2726 | ||
2727 | next = k.k ? bkey_start_pos(k.k) : POS_MAX; | |
2728 | ||
2729 | if (bkey_cmp(iter->pos, next) < 0) { | |
2730 | bkey_init(&iter->k); | |
2731 | iter->k.p = iter->pos; | |
c075ff70 KO |
2732 | |
2733 | if (iter->flags & BTREE_ITER_IS_EXTENTS) { | |
2734 | bch2_key_resize(&iter->k, | |
2735 | min_t(u64, KEY_SIZE_MAX, | |
2736 | (next.inode == iter->pos.inode | |
2737 | ? next.offset | |
2738 | : KEY_OFFSET_MAX) - | |
2739 | iter->pos.offset)); | |
2740 | EBUG_ON(!iter->k.size); | |
2741 | } | |
1d214eb1 KO |
2742 | |
2743 | k = (struct bkey_s_c) { &iter->k, NULL }; | |
1d214eb1 | 2744 | } |
271a3d3a | 2745 | } |
5222a460 | 2746 | out: |
67e0dd8f KO |
2747 | iter->path->should_be_locked = true; |
2748 | ||
7e1a3aa9 KO |
2749 | bch2_btree_iter_verify_entry_exit(iter); |
2750 | bch2_btree_iter_verify(iter); | |
c075ff70 KO |
2751 | ret = bch2_btree_iter_verify_ret(iter, k); |
2752 | if (unlikely(ret)) | |
2753 | return bkey_s_c_err(ret); | |
66a0a497 | 2754 | |
63f1a598 | 2755 | return k; |
1c6fdbd8 KO |
2756 | } |
2757 | ||
1c6fdbd8 KO |
2758 | struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) |
2759 | { | |
e0ba3b64 | 2760 | if (!bch2_btree_iter_advance(iter)) |
2e70ce56 | 2761 | return bkey_s_c_null; |
1c6fdbd8 | 2762 | |
2dac0eae | 2763 | return bch2_btree_iter_peek_slot(iter); |
1c6fdbd8 KO |
2764 | } |
2765 | ||
18fc6ae5 KO |
2766 | struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) |
2767 | { | |
e0ba3b64 | 2768 | if (!bch2_btree_iter_rewind(iter)) |
18fc6ae5 KO |
2769 | return bkey_s_c_null; |
2770 | ||
2771 | return bch2_btree_iter_peek_slot(iter); | |
2772 | } | |
2773 | ||
1c6fdbd8 KO |
2774 | /* new transactional stuff: */ |
2775 | ||
0423fb71 KO |
2776 | #ifdef CONFIG_BCACHEFS_DEBUG |
2777 | static void btree_trans_verify_sorted_refs(struct btree_trans *trans) | |
2778 | { | |
67e0dd8f | 2779 | struct btree_path *path; |
0423fb71 KO |
2780 | unsigned i; |
2781 | ||
67e0dd8f | 2782 | BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated)); |
0423fb71 | 2783 | |
67e0dd8f KO |
2784 | trans_for_each_path(trans, path) { |
2785 | BUG_ON(path->sorted_idx >= trans->nr_sorted); | |
2786 | BUG_ON(trans->sorted[path->sorted_idx] != path->idx); | |
0423fb71 KO |
2787 | } |
2788 | ||
2789 | for (i = 0; i < trans->nr_sorted; i++) { | |
2790 | unsigned idx = trans->sorted[i]; | |
2791 | ||
67e0dd8f KO |
2792 | EBUG_ON(!(trans->paths_allocated & (1ULL << idx))); |
2793 | BUG_ON(trans->paths[idx].sorted_idx != i); | |
0423fb71 KO |
2794 | } |
2795 | } | |
0423fb71 KO |
2796 | |
2797 | static void btree_trans_verify_sorted(struct btree_trans *trans) | |
2798 | { | |
67e0dd8f | 2799 | struct btree_path *path, *prev = NULL; |
0423fb71 KO |
2800 | unsigned i; |
2801 | ||
67e0dd8f | 2802 | trans_for_each_path_inorder(trans, path, i) { |
eb7bd15f KO |
2803 | if (prev && btree_path_cmp(prev, path) > 0) { |
2804 | bch2_dump_trans_paths_updates(trans); | |
2805 | panic("trans paths out of order!\n"); | |
2806 | } | |
67e0dd8f | 2807 | prev = path; |
0423fb71 | 2808 | } |
0423fb71 | 2809 | } |
068bcaa5 KO |
2810 | #else |
2811 | static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {} | |
2812 | static inline void btree_trans_verify_sorted(struct btree_trans *trans) {} | |
2813 | #endif | |
0423fb71 | 2814 | |
068bcaa5 | 2815 | void __bch2_btree_trans_sort_paths(struct btree_trans *trans) |
0423fb71 KO |
2816 | { |
2817 | int i, l = 0, r = trans->nr_sorted, inc = 1; | |
2818 | bool swapped; | |
2819 | ||
068bcaa5 KO |
2820 | btree_trans_verify_sorted_refs(trans); |
2821 | ||
2822 | if (trans->paths_sorted) | |
2823 | goto out; | |
2824 | ||
0423fb71 KO |
2825 | /* |
2826 | * Cocktail shaker sort: this is efficient because iterators will be | |
2827 | * mostly sorteda. | |
2828 | */ | |
2829 | do { | |
2830 | swapped = false; | |
2831 | ||
2832 | for (i = inc > 0 ? l : r - 2; | |
2833 | i + 1 < r && i >= l; | |
2834 | i += inc) { | |
67e0dd8f KO |
2835 | if (btree_path_cmp(trans->paths + trans->sorted[i], |
2836 | trans->paths + trans->sorted[i + 1]) > 0) { | |
0423fb71 | 2837 | swap(trans->sorted[i], trans->sorted[i + 1]); |
67e0dd8f KO |
2838 | trans->paths[trans->sorted[i]].sorted_idx = i; |
2839 | trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1; | |
0423fb71 KO |
2840 | swapped = true; |
2841 | } | |
2842 | } | |
2843 | ||
2844 | if (inc > 0) | |
2845 | --r; | |
2846 | else | |
2847 | l++; | |
2848 | inc = -inc; | |
2849 | } while (swapped); | |
2850 | ||
67e0dd8f | 2851 | trans->paths_sorted = true; |
068bcaa5 | 2852 | out: |
0423fb71 KO |
2853 | btree_trans_verify_sorted(trans); |
2854 | } | |
2855 | ||
67e0dd8f KO |
2856 | static inline void btree_path_list_remove(struct btree_trans *trans, |
2857 | struct btree_path *path) | |
0423fb71 KO |
2858 | { |
2859 | unsigned i; | |
2860 | ||
67e0dd8f | 2861 | EBUG_ON(path->sorted_idx >= trans->nr_sorted); |
0423fb71 KO |
2862 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS |
2863 | trans->nr_sorted--; | |
67e0dd8f KO |
2864 | memmove_u64s_down_small(trans->sorted + path->sorted_idx, |
2865 | trans->sorted + path->sorted_idx + 1, | |
2866 | DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); | |
0423fb71 | 2867 | #else |
67e0dd8f | 2868 | array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); |
0423fb71 | 2869 | #endif |
67e0dd8f KO |
2870 | for (i = path->sorted_idx; i < trans->nr_sorted; i++) |
2871 | trans->paths[trans->sorted[i]].sorted_idx = i; | |
0423fb71 | 2872 | |
67e0dd8f | 2873 | path->sorted_idx = U8_MAX; |
0423fb71 KO |
2874 | } |
2875 | ||
67e0dd8f KO |
2876 | static inline void btree_path_list_add(struct btree_trans *trans, |
2877 | struct btree_path *pos, | |
2878 | struct btree_path *path) | |
0423fb71 KO |
2879 | { |
2880 | unsigned i; | |
2881 | ||
068bcaa5 | 2882 | path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted; |
0423fb71 KO |
2883 | |
2884 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
67e0dd8f KO |
2885 | memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, |
2886 | trans->sorted + path->sorted_idx, | |
2887 | DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); | |
0423fb71 | 2888 | trans->nr_sorted++; |
67e0dd8f | 2889 | trans->sorted[path->sorted_idx] = path->idx; |
0423fb71 | 2890 | #else |
67e0dd8f | 2891 | array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx); |
0423fb71 KO |
2892 | #endif |
2893 | ||
67e0dd8f KO |
2894 | for (i = path->sorted_idx; i < trans->nr_sorted; i++) |
2895 | trans->paths[trans->sorted[i]].sorted_idx = i; | |
64bc0011 | 2896 | |
0423fb71 | 2897 | btree_trans_verify_sorted_refs(trans); |
7c26ecae KO |
2898 | } |
2899 | ||
67e0dd8f | 2900 | void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) |
64bc0011 | 2901 | { |
67e0dd8f KO |
2902 | if (iter->path) |
2903 | bch2_path_put(trans, iter->path, | |
2904 | iter->flags & BTREE_ITER_INTENT); | |
1f2d9192 KO |
2905 | if (iter->update_path) |
2906 | bch2_path_put(trans, iter->update_path, | |
2907 | iter->flags & BTREE_ITER_INTENT); | |
f7b6ca23 KO |
2908 | if (iter->key_cache_path) |
2909 | bch2_path_put(trans, iter->key_cache_path, | |
2910 | iter->flags & BTREE_ITER_INTENT); | |
67e0dd8f | 2911 | iter->path = NULL; |
1f2d9192 | 2912 | iter->update_path = NULL; |
f7b6ca23 | 2913 | iter->key_cache_path = NULL; |
64bc0011 KO |
2914 | } |
2915 | ||
67e0dd8f KO |
2916 | static void __bch2_trans_iter_init(struct btree_trans *trans, |
2917 | struct btree_iter *iter, | |
2918 | enum btree_id btree_id, struct bpos pos, | |
2919 | unsigned locks_want, | |
2920 | unsigned depth, | |
2921 | unsigned flags) | |
1c6fdbd8 | 2922 | { |
e5af273f KO |
2923 | EBUG_ON(trans->restarted); |
2924 | ||
f21566f1 KO |
2925 | if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && |
2926 | btree_node_type_is_extents(btree_id)) | |
3dea728c | 2927 | flags |= BTREE_ITER_IS_EXTENTS; |
1c6fdbd8 | 2928 | |
a861c722 KO |
2929 | if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) && |
2930 | !btree_type_has_snapshots(btree_id)) | |
e751c01a KO |
2931 | flags &= ~BTREE_ITER_ALL_SNAPSHOTS; |
2932 | ||
a861c722 KO |
2933 | if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) && |
2934 | btree_type_has_snapshots(btree_id)) | |
2935 | flags |= BTREE_ITER_FILTER_SNAPSHOTS; | |
e751c01a | 2936 | |
5222a460 KO |
2937 | if (trans->journal_replay_not_finished) |
2938 | flags |= BTREE_ITER_WITH_JOURNAL; | |
2939 | ||
67e0dd8f KO |
2940 | iter->trans = trans; |
2941 | iter->path = NULL; | |
1f2d9192 | 2942 | iter->update_path = NULL; |
f7b6ca23 | 2943 | iter->key_cache_path = NULL; |
67e0dd8f KO |
2944 | iter->btree_id = btree_id; |
2945 | iter->min_depth = depth; | |
f21566f1 KO |
2946 | iter->flags = flags; |
2947 | iter->snapshot = pos.snapshot; | |
67e0dd8f KO |
2948 | iter->pos = pos; |
2949 | iter->k.type = KEY_TYPE_deleted; | |
2950 | iter->k.p = pos; | |
2951 | iter->k.size = 0; | |
e751c01a | 2952 | |
f3e1f444 KO |
2953 | iter->path = bch2_path_get(trans, btree_id, iter->pos, |
2954 | locks_want, depth, flags); | |
1c6fdbd8 KO |
2955 | } |
2956 | ||
67e0dd8f KO |
2957 | void bch2_trans_iter_init(struct btree_trans *trans, |
2958 | struct btree_iter *iter, | |
2959 | unsigned btree_id, struct bpos pos, | |
2960 | unsigned flags) | |
424eb881 | 2961 | { |
2ae4573e KO |
2962 | if (!btree_id_cached(trans->c, btree_id)) { |
2963 | flags &= ~BTREE_ITER_CACHED; | |
2964 | flags &= ~BTREE_ITER_WITH_KEY_CACHE; | |
2965 | } else if (!(flags & BTREE_ITER_CACHED)) | |
2966 | flags |= BTREE_ITER_WITH_KEY_CACHE; | |
2967 | ||
67e0dd8f KO |
2968 | __bch2_trans_iter_init(trans, iter, btree_id, pos, |
2969 | 0, 0, flags); | |
424eb881 KO |
2970 | } |
2971 | ||
67e0dd8f KO |
2972 | void bch2_trans_node_iter_init(struct btree_trans *trans, |
2973 | struct btree_iter *iter, | |
2974 | enum btree_id btree_id, | |
2975 | struct bpos pos, | |
2976 | unsigned locks_want, | |
2977 | unsigned depth, | |
2978 | unsigned flags) | |
1c6fdbd8 | 2979 | { |
67e0dd8f KO |
2980 | __bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth, |
2981 | BTREE_ITER_NOT_EXTENTS| | |
2982 | __BTREE_ITER_ALL_SNAPSHOTS| | |
2983 | BTREE_ITER_ALL_SNAPSHOTS| | |
2984 | flags); | |
2985 | BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH)); | |
2986 | BUG_ON(iter->path->level != depth); | |
2987 | BUG_ON(iter->min_depth != depth); | |
2988 | } | |
7c26ecae | 2989 | |
67e0dd8f KO |
2990 | void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) |
2991 | { | |
2992 | *dst = *src; | |
2993 | if (src->path) | |
2994 | __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); | |
1f2d9192 KO |
2995 | if (src->update_path) |
2996 | __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT); | |
f7b6ca23 | 2997 | dst->key_cache_path = NULL; |
1c6fdbd8 KO |
2998 | } |
2999 | ||
73a117d2 | 3000 | void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) |
1c6fdbd8 | 3001 | { |
73a117d2 KO |
3002 | size_t new_top = trans->mem_top + size; |
3003 | void *p; | |
3004 | ||
3005 | if (new_top > trans->mem_bytes) { | |
1c6fdbd8 | 3006 | size_t old_bytes = trans->mem_bytes; |
73a117d2 | 3007 | size_t new_bytes = roundup_pow_of_two(new_top); |
e131b6aa KO |
3008 | void *new_mem; |
3009 | ||
3010 | WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); | |
3011 | ||
3012 | new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS); | |
3013 | if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { | |
3014 | new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); | |
3015 | new_bytes = BTREE_TRANS_MEM_MAX; | |
3016 | kfree(trans->mem); | |
3017 | } | |
1c6fdbd8 KO |
3018 | |
3019 | if (!new_mem) | |
73a117d2 | 3020 | return ERR_PTR(-ENOMEM); |
1c6fdbd8 KO |
3021 | |
3022 | trans->mem = new_mem; | |
3023 | trans->mem_bytes = new_bytes; | |
3024 | ||
1c7a0adf | 3025 | if (old_bytes) { |
669f87a5 | 3026 | trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes); |
e5af273f | 3027 | btree_trans_restart(trans); |
73a117d2 | 3028 | return ERR_PTR(-EINTR); |
1c7a0adf | 3029 | } |
1c6fdbd8 KO |
3030 | } |
3031 | ||
20bceecb | 3032 | p = trans->mem + trans->mem_top; |
1c6fdbd8 | 3033 | trans->mem_top += size; |
7ed158f2 | 3034 | memset(p, 0, size); |
20bceecb | 3035 | return p; |
1c6fdbd8 KO |
3036 | } |
3037 | ||
d38494c4 | 3038 | /** |
955af634 | 3039 | * bch2_trans_begin() - reset a transaction after a interrupted attempt |
d38494c4 | 3040 | * @trans: transaction to reset |
d38494c4 DR |
3041 | * |
3042 | * While iterating over nodes or updating nodes a attempt to lock a btree | |
3043 | * node may return EINTR when the trylock fails. When this occurs | |
955af634 | 3044 | * bch2_trans_begin() should be called and the transaction retried. |
d38494c4 | 3045 | */ |
955af634 | 3046 | void bch2_trans_begin(struct btree_trans *trans) |
1c6fdbd8 | 3047 | { |
67e0dd8f KO |
3048 | struct btree_insert_entry *i; |
3049 | struct btree_path *path; | |
a8e00bd4 | 3050 | |
67e0dd8f KO |
3051 | trans_for_each_update(trans, i) |
3052 | __btree_path_put(i->path, true); | |
1c6fdbd8 | 3053 | |
904823de | 3054 | memset(&trans->journal_res, 0, sizeof(trans->journal_res)); |
a49e9a05 | 3055 | trans->extra_journal_res = 0; |
5df4be3f | 3056 | trans->nr_updates = 0; |
163e885a | 3057 | trans->mem_top = 0; |
bf7b87a4 | 3058 | |
43d00243 | 3059 | trans->hooks = NULL; |
2a6870ad | 3060 | trans->extra_journal_entries.nr = 0; |
96e2aa1b | 3061 | |
f21539a5 KO |
3062 | if (trans->fs_usage_deltas) { |
3063 | trans->fs_usage_deltas->used = 0; | |
3064 | memset((void *) trans->fs_usage_deltas + | |
3065 | offsetof(struct replicas_delta_list, memset_start), 0, | |
3066 | (void *) &trans->fs_usage_deltas->memset_end - | |
3067 | (void *) &trans->fs_usage_deltas->memset_start); | |
3068 | } | |
3069 | ||
67e0dd8f | 3070 | trans_for_each_path(trans, path) { |
25a77231 KO |
3071 | path->should_be_locked = false; |
3072 | ||
e1f7fa06 KO |
3073 | /* |
3074 | * If the transaction wasn't restarted, we're presuming to be | |
3075 | * doing something new: dont keep iterators excpt the ones that | |
3076 | * are in use - except for the subvolumes btree: | |
3077 | */ | |
3078 | if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes) | |
3079 | path->preserve = false; | |
3080 | ||
67e0dd8f KO |
3081 | /* |
3082 | * XXX: we probably shouldn't be doing this if the transaction | |
3083 | * was restarted, but currently we still overflow transaction | |
3084 | * iterators if we do that | |
3085 | */ | |
3086 | if (!path->ref && !path->preserve) | |
3087 | __bch2_path_free(trans, path); | |
3088 | else | |
25a77231 | 3089 | path->preserve = false; |
67e0dd8f KO |
3090 | } |
3091 | ||
955af634 | 3092 | bch2_trans_cond_resched(trans); |
50dc0f69 | 3093 | |
955af634 | 3094 | if (trans->restarted) |
67e0dd8f | 3095 | bch2_btree_path_traverse_all(trans); |
e5af273f KO |
3096 | |
3097 | trans->restarted = false; | |
1c6fdbd8 KO |
3098 | } |
3099 | ||
67e0dd8f | 3100 | static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) |
1a21bf98 | 3101 | { |
67e0dd8f | 3102 | size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX; |
3eb26d01 | 3103 | size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX; |
dbd1e825 | 3104 | void *p = NULL; |
1a21bf98 KO |
3105 | |
3106 | BUG_ON(trans->used_mempool); | |
3107 | ||
dbd1e825 | 3108 | #ifdef __KERNEL__ |
67e0dd8f | 3109 | p = this_cpu_xchg(c->btree_paths_bufs->path , NULL); |
dbd1e825 KO |
3110 | #endif |
3111 | if (!p) | |
67e0dd8f | 3112 | p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); |
1a21bf98 | 3113 | |
67e0dd8f | 3114 | trans->paths = p; p += paths_bytes; |
1a21bf98 | 3115 | trans->updates = p; p += updates_bytes; |
1a21bf98 KO |
3116 | } |
3117 | ||
669f87a5 KO |
3118 | void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, |
3119 | unsigned expected_nr_iters, | |
3120 | size_t expected_mem_bytes, | |
3121 | const char *fn) | |
c0ebe3e4 | 3122 | __acquires(&c->btree_trans_barrier) |
1c6fdbd8 | 3123 | { |
ae1ede58 | 3124 | memset(trans, 0, sizeof(*trans)); |
1c6fdbd8 | 3125 | trans->c = c; |
669f87a5 | 3126 | trans->fn = fn; |
5222a460 KO |
3127 | trans->journal_replay_not_finished = |
3128 | !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags); | |
20bceecb | 3129 | |
67e0dd8f | 3130 | bch2_trans_alloc_paths(trans, c); |
20bceecb | 3131 | |
0b5c9f59 KO |
3132 | if (expected_mem_bytes) { |
3133 | expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes); | |
3134 | trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL); | |
e131b6aa KO |
3135 | |
3136 | if (!unlikely(trans->mem)) { | |
3137 | trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); | |
3138 | trans->mem_bytes = BTREE_TRANS_MEM_MAX; | |
3139 | } else { | |
0b5c9f59 | 3140 | trans->mem_bytes = expected_mem_bytes; |
e131b6aa | 3141 | } |
0b5c9f59 | 3142 | } |
495aabed | 3143 | |
876c7af3 KO |
3144 | trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); |
3145 | ||
b84d42c3 KO |
3146 | if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { |
3147 | trans->pid = current->pid; | |
3148 | mutex_lock(&c->btree_trans_lock); | |
3149 | list_add(&trans->list, &c->btree_trans_list); | |
3150 | mutex_unlock(&c->btree_trans_lock); | |
3151 | } | |
1c6fdbd8 KO |
3152 | } |
3153 | ||
67e0dd8f KO |
3154 | static void check_btree_paths_leaked(struct btree_trans *trans) |
3155 | { | |
3156 | #ifdef CONFIG_BCACHEFS_DEBUG | |
3157 | struct bch_fs *c = trans->c; | |
3158 | struct btree_path *path; | |
3159 | ||
3160 | trans_for_each_path(trans, path) | |
3161 | if (path->ref) | |
3162 | goto leaked; | |
3163 | return; | |
3164 | leaked: | |
669f87a5 | 3165 | bch_err(c, "btree paths leaked from %s!", trans->fn); |
67e0dd8f KO |
3166 | trans_for_each_path(trans, path) |
3167 | if (path->ref) | |
3168 | printk(KERN_ERR " btree %s %pS\n", | |
3169 | bch2_btree_ids[path->btree_id], | |
3170 | (void *) path->ip_allocated); | |
3171 | /* Be noisy about this: */ | |
3172 | bch2_fatal_error(c); | |
3173 | #endif | |
3174 | } | |
3175 | ||
9a796fdb | 3176 | void bch2_trans_exit(struct btree_trans *trans) |
c0ebe3e4 | 3177 | __releases(&c->btree_trans_barrier) |
1c6fdbd8 | 3178 | { |
67e0dd8f | 3179 | struct btree_insert_entry *i; |
1a21bf98 KO |
3180 | struct bch_fs *c = trans->c; |
3181 | ||
ece254b2 | 3182 | bch2_trans_unlock(trans); |
1c6fdbd8 | 3183 | |
67e0dd8f KO |
3184 | trans_for_each_update(trans, i) |
3185 | __btree_path_put(i->path, true); | |
3186 | trans->nr_updates = 0; | |
509d3e0a | 3187 | |
67e0dd8f | 3188 | check_btree_paths_leaked(trans); |
50dc0f69 | 3189 | |
b84d42c3 KO |
3190 | if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { |
3191 | mutex_lock(&c->btree_trans_lock); | |
3192 | list_del(&trans->list); | |
3193 | mutex_unlock(&c->btree_trans_lock); | |
3194 | } | |
495aabed | 3195 | |
876c7af3 KO |
3196 | srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); |
3197 | ||
67e0dd8f | 3198 | bch2_journal_preres_put(&c->journal, &trans->journal_preres); |
2ca88e5a | 3199 | |
2a6870ad KO |
3200 | kfree(trans->extra_journal_entries.data); |
3201 | ||
9620c3ec KO |
3202 | if (trans->fs_usage_deltas) { |
3203 | if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) == | |
3204 | REPLICAS_DELTA_LIST_MAX) | |
3205 | mempool_free(trans->fs_usage_deltas, | |
67e0dd8f | 3206 | &c->replicas_delta_pool); |
9620c3ec KO |
3207 | else |
3208 | kfree(trans->fs_usage_deltas); | |
3209 | } | |
e131b6aa KO |
3210 | |
3211 | if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) | |
67e0dd8f | 3212 | mempool_free(trans->mem, &c->btree_trans_mem_pool); |
e131b6aa KO |
3213 | else |
3214 | kfree(trans->mem); | |
1a21bf98 | 3215 | |
dbd1e825 KO |
3216 | #ifdef __KERNEL__ |
3217 | /* | |
3218 | * Userspace doesn't have a real percpu implementation: | |
3219 | */ | |
67e0dd8f | 3220 | trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths); |
dbd1e825 | 3221 | #endif |
e131b6aa | 3222 | |
67e0dd8f KO |
3223 | if (trans->paths) |
3224 | mempool_free(trans->paths, &c->btree_paths_pool); | |
1a21bf98 | 3225 | |
1c6fdbd8 | 3226 | trans->mem = (void *) 0x1; |
67e0dd8f | 3227 | trans->paths = (void *) 0x1; |
1c6fdbd8 | 3228 | } |
36e9d698 | 3229 | |
b3d1e6ca | 3230 | static void __maybe_unused |
67e0dd8f | 3231 | bch2_btree_path_node_to_text(struct printbuf *out, |
b3d1e6ca | 3232 | struct btree_bkey_cached_common *_b, |
f21566f1 | 3233 | bool cached) |
d211b408 | 3234 | { |
5c1d808a KO |
3235 | pr_buf(out, " l=%u %s:", |
3236 | _b->level, bch2_btree_ids[_b->btree_id]); | |
f21566f1 | 3237 | bch2_bpos_to_text(out, btree_node_pos(_b, cached)); |
d211b408 KO |
3238 | } |
3239 | ||
b84d42c3 | 3240 | #ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS |
f21566f1 | 3241 | static bool trans_has_locks(struct btree_trans *trans) |
5c1d808a | 3242 | { |
67e0dd8f | 3243 | struct btree_path *path; |
5c1d808a | 3244 | |
67e0dd8f KO |
3245 | trans_for_each_path(trans, path) |
3246 | if (path->nodes_locked) | |
5c1d808a KO |
3247 | return true; |
3248 | return false; | |
3249 | } | |
2d587674 | 3250 | #endif |
5c1d808a | 3251 | |
495aabed KO |
3252 | void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) |
3253 | { | |
b84d42c3 | 3254 | #ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS |
495aabed | 3255 | struct btree_trans *trans; |
67e0dd8f | 3256 | struct btree_path *path; |
495aabed | 3257 | struct btree *b; |
c7ce2732 | 3258 | static char lock_types[] = { 'r', 'i', 'w' }; |
495aabed KO |
3259 | unsigned l; |
3260 | ||
3261 | mutex_lock(&c->btree_trans_lock); | |
3262 | list_for_each_entry(trans, &c->btree_trans_list, list) { | |
f21566f1 | 3263 | if (!trans_has_locks(trans)) |
5c1d808a KO |
3264 | continue; |
3265 | ||
669f87a5 | 3266 | pr_buf(out, "%i %s\n", trans->pid, trans->fn); |
495aabed | 3267 | |
67e0dd8f KO |
3268 | trans_for_each_path(trans, path) { |
3269 | if (!path->nodes_locked) | |
495aabed KO |
3270 | continue; |
3271 | ||
1d3ecd7e | 3272 | pr_buf(out, " path %u %c l=%u %s:", |
67e0dd8f KO |
3273 | path->idx, |
3274 | path->cached ? 'c' : 'b', | |
1d3ecd7e | 3275 | path->level, |
67e0dd8f KO |
3276 | bch2_btree_ids[path->btree_id]); |
3277 | bch2_bpos_to_text(out, path->pos); | |
495aabed KO |
3278 | pr_buf(out, "\n"); |
3279 | ||
3280 | for (l = 0; l < BTREE_MAX_DEPTH; l++) { | |
67e0dd8f | 3281 | if (btree_node_locked(path, l)) { |
d211b408 | 3282 | pr_buf(out, " %s l=%u ", |
67e0dd8f KO |
3283 | btree_node_intent_locked(path, l) ? "i" : "r", l); |
3284 | bch2_btree_path_node_to_text(out, | |
3285 | (void *) path->l[l].b, | |
3286 | path->cached); | |
495aabed KO |
3287 | pr_buf(out, "\n"); |
3288 | } | |
3289 | } | |
3290 | } | |
3291 | ||
3292 | b = READ_ONCE(trans->locking); | |
3293 | if (b) { | |
67e0dd8f | 3294 | path = &trans->paths[trans->locking_path_idx]; |
c7ce2732 | 3295 | pr_buf(out, " locking path %u %c l=%u %c %s:", |
67e0dd8f KO |
3296 | trans->locking_path_idx, |
3297 | path->cached ? 'c' : 'b', | |
515282ac | 3298 | trans->locking_level, |
c7ce2732 | 3299 | lock_types[trans->locking_lock_type], |
515282ac KO |
3300 | bch2_btree_ids[trans->locking_btree_id]); |
3301 | bch2_bpos_to_text(out, trans->locking_pos); | |
3302 | ||
d211b408 | 3303 | pr_buf(out, " node "); |
67e0dd8f KO |
3304 | bch2_btree_path_node_to_text(out, |
3305 | (void *) b, path->cached); | |
495aabed KO |
3306 | pr_buf(out, "\n"); |
3307 | } | |
3308 | } | |
3309 | mutex_unlock(&c->btree_trans_lock); | |
3310 | #endif | |
3311 | } | |
3312 | ||
36e9d698 KO |
3313 | void bch2_fs_btree_iter_exit(struct bch_fs *c) |
3314 | { | |
99fafb04 KO |
3315 | if (c->btree_trans_barrier_initialized) |
3316 | cleanup_srcu_struct(&c->btree_trans_barrier); | |
e131b6aa | 3317 | mempool_exit(&c->btree_trans_mem_pool); |
67e0dd8f | 3318 | mempool_exit(&c->btree_paths_pool); |
36e9d698 KO |
3319 | } |
3320 | ||
3321 | int bch2_fs_btree_iter_init(struct bch_fs *c) | |
3322 | { | |
3323 | unsigned nr = BTREE_ITER_MAX; | |
99fafb04 | 3324 | int ret; |
36e9d698 | 3325 | |
495aabed KO |
3326 | INIT_LIST_HEAD(&c->btree_trans_list); |
3327 | mutex_init(&c->btree_trans_lock); | |
3328 | ||
99fafb04 | 3329 | ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1, |
67e0dd8f | 3330 | sizeof(struct btree_path) * nr + |
e131b6aa KO |
3331 | sizeof(struct btree_insert_entry) * nr) ?: |
3332 | mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, | |
99fafb04 KO |
3333 | BTREE_TRANS_MEM_MAX) ?: |
3334 | init_srcu_struct(&c->btree_trans_barrier); | |
3335 | if (!ret) | |
3336 | c->btree_trans_barrier_initialized = true; | |
3337 | return ret; | |
36e9d698 | 3338 | } |