Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
4 | #include "bkey_methods.h" | |
07a1006a | 5 | #include "bkey_buf.h" |
1c6fdbd8 KO |
6 | #include "btree_cache.h" |
7 | #include "btree_iter.h" | |
2ca88e5a | 8 | #include "btree_key_cache.h" |
1c6fdbd8 | 9 | #include "btree_locking.h" |
57b0b3db | 10 | #include "btree_update.h" |
1c6fdbd8 | 11 | #include "debug.h" |
50dc0f69 | 12 | #include "error.h" |
1c6fdbd8 | 13 | #include "extents.h" |
2ca88e5a | 14 | #include "journal.h" |
35d5aff2 | 15 | #include "replicas.h" |
c075ff70 | 16 | #include "subvolume.h" |
1c6fdbd8 KO |
17 | #include "trace.h" |
18 | ||
19 | #include <linux/prefetch.h> | |
20 | ||
67e0dd8f KO |
21 | static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); |
22 | static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, | |
23 | struct btree_path *); | |
24 | ||
25 | static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *); | |
26 | ||
27 | static inline int __btree_path_cmp(const struct btree_path *l, | |
28 | enum btree_id r_btree_id, | |
29 | bool r_cached, | |
30 | struct bpos r_pos, | |
31 | unsigned r_level) | |
0423fb71 | 32 | { |
67e0dd8f KO |
33 | return cmp_int(l->btree_id, r_btree_id) ?: |
34 | cmp_int(l->cached, r_cached) ?: | |
35 | bpos_cmp(l->pos, r_pos) ?: | |
36 | -cmp_int(l->level, r_level); | |
37 | } | |
38 | ||
39 | static inline int btree_path_cmp(const struct btree_path *l, | |
40 | const struct btree_path *r) | |
41 | { | |
42 | return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level); | |
0423fb71 KO |
43 | } |
44 | ||
e751c01a KO |
45 | static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) |
46 | { | |
e751c01a KO |
47 | /* Are we iterating over keys in all snapshots? */ |
48 | if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { | |
49 | p = bpos_successor(p); | |
50 | } else { | |
51 | p = bpos_nosnap_successor(p); | |
52 | p.snapshot = iter->snapshot; | |
53 | } | |
54 | ||
55 | return p; | |
56 | } | |
57 | ||
58 | static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p) | |
59 | { | |
e751c01a KO |
60 | /* Are we iterating over keys in all snapshots? */ |
61 | if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { | |
62 | p = bpos_predecessor(p); | |
63 | } else { | |
64 | p = bpos_nosnap_predecessor(p); | |
65 | p.snapshot = iter->snapshot; | |
66 | } | |
67 | ||
68 | return p; | |
69 | } | |
70 | ||
67e0dd8f | 71 | static inline bool is_btree_node(struct btree_path *path, unsigned l) |
1c6fdbd8 KO |
72 | { |
73 | return l < BTREE_MAX_DEPTH && | |
67e0dd8f | 74 | (unsigned long) path->l[l].b >= 128; |
1c6fdbd8 KO |
75 | } |
76 | ||
9626aeb1 | 77 | static inline struct bpos btree_iter_search_key(struct btree_iter *iter) |
a00fd8c5 | 78 | { |
9626aeb1 | 79 | struct bpos pos = iter->pos; |
cbdf24ce | 80 | |
9626aeb1 KO |
81 | if ((iter->flags & BTREE_ITER_IS_EXTENTS) && |
82 | bkey_cmp(pos, POS_MAX)) | |
e751c01a | 83 | pos = bkey_successor(iter, pos); |
9626aeb1 | 84 | return pos; |
a00fd8c5 KO |
85 | } |
86 | ||
67e0dd8f | 87 | static inline bool btree_path_pos_before_node(struct btree_path *path, |
e3ecf4f5 KO |
88 | struct btree *b) |
89 | { | |
67e0dd8f | 90 | return bpos_cmp(path->pos, b->data->min_key) < 0; |
e3ecf4f5 KO |
91 | } |
92 | ||
67e0dd8f | 93 | static inline bool btree_path_pos_after_node(struct btree_path *path, |
e3ecf4f5 KO |
94 | struct btree *b) |
95 | { | |
67e0dd8f | 96 | return bpos_cmp(b->key.k.p, path->pos) < 0; |
e3ecf4f5 KO |
97 | } |
98 | ||
67e0dd8f | 99 | static inline bool btree_path_pos_in_node(struct btree_path *path, |
e3ecf4f5 KO |
100 | struct btree *b) |
101 | { | |
67e0dd8f KO |
102 | return path->btree_id == b->c.btree_id && |
103 | !btree_path_pos_before_node(path, b) && | |
104 | !btree_path_pos_after_node(path, b); | |
e3ecf4f5 KO |
105 | } |
106 | ||
1c6fdbd8 KO |
107 | /* Btree node locking: */ |
108 | ||
9f6bd307 | 109 | void bch2_btree_node_unlock_write(struct btree_trans *trans, |
67e0dd8f | 110 | struct btree_path *path, struct btree *b) |
1c6fdbd8 | 111 | { |
67e0dd8f | 112 | bch2_btree_node_unlock_write_inlined(trans, path, b); |
1c6fdbd8 KO |
113 | } |
114 | ||
78cf784e | 115 | void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) |
1c6fdbd8 | 116 | { |
67e0dd8f | 117 | struct btree_path *linked; |
1c6fdbd8 KO |
118 | unsigned readers = 0; |
119 | ||
67e0dd8f KO |
120 | trans_for_each_path(trans, linked) |
121 | if (linked->l[b->c.level].b == b && | |
122 | btree_node_read_locked(linked, b->c.level)) | |
1c6fdbd8 KO |
123 | readers++; |
124 | ||
125 | /* | |
126 | * Must drop our read locks before calling six_lock_write() - | |
127 | * six_unlock() won't do wakeups until the reader count | |
128 | * goes to 0, and it's safe because we have the node intent | |
129 | * locked: | |
130 | */ | |
a9d79c6e KO |
131 | if (!b->c.lock.readers) |
132 | atomic64_sub(__SIX_VAL(read_lock, readers), | |
133 | &b->c.lock.state.counter); | |
134 | else | |
135 | this_cpu_sub(*b->c.lock.readers, readers); | |
136 | ||
9f6bd307 | 137 | btree_node_lock_type(trans->c, b, SIX_LOCK_write); |
a9d79c6e KO |
138 | |
139 | if (!b->c.lock.readers) | |
140 | atomic64_add(__SIX_VAL(read_lock, readers), | |
141 | &b->c.lock.state.counter); | |
142 | else | |
143 | this_cpu_add(*b->c.lock.readers, readers); | |
1c6fdbd8 KO |
144 | } |
145 | ||
78cf784e | 146 | bool __bch2_btree_node_relock(struct btree_trans *trans, |
67e0dd8f | 147 | struct btree_path *path, unsigned level) |
1c6fdbd8 | 148 | { |
67e0dd8f KO |
149 | struct btree *b = btree_path_node(path, level); |
150 | int want = __btree_lock_want(path, level); | |
1c6fdbd8 | 151 | |
67e0dd8f | 152 | if (!is_btree_node(path, level)) |
1c6fdbd8 KO |
153 | return false; |
154 | ||
155 | if (race_fault()) | |
156 | return false; | |
157 | ||
67e0dd8f KO |
158 | if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || |
159 | (btree_node_lock_seq_matches(path, b, level) && | |
78cf784e | 160 | btree_node_lock_increment(trans, b, level, want))) { |
3074bc0f | 161 | mark_btree_node_locked(path, level, want); |
ed8413fd KO |
162 | return true; |
163 | } else { | |
1c6fdbd8 | 164 | return false; |
ed8413fd | 165 | } |
1c6fdbd8 KO |
166 | } |
167 | ||
78cf784e | 168 | static bool bch2_btree_node_upgrade(struct btree_trans *trans, |
67e0dd8f | 169 | struct btree_path *path, unsigned level) |
1c6fdbd8 | 170 | { |
67e0dd8f | 171 | struct btree *b = path->l[level].b; |
1c6fdbd8 | 172 | |
67e0dd8f | 173 | if (!is_btree_node(path, level)) |
1c6fdbd8 KO |
174 | return false; |
175 | ||
d697b9ab KO |
176 | switch (btree_lock_want(path, level)) { |
177 | case BTREE_NODE_UNLOCKED: | |
178 | BUG_ON(btree_node_locked(path, level)); | |
179 | return true; | |
180 | case BTREE_NODE_READ_LOCKED: | |
181 | BUG_ON(btree_node_intent_locked(path, level)); | |
182 | return bch2_btree_node_relock(trans, path, level); | |
183 | case BTREE_NODE_INTENT_LOCKED: | |
184 | break; | |
185 | } | |
186 | ||
67e0dd8f | 187 | if (btree_node_intent_locked(path, level)) |
1c6fdbd8 KO |
188 | return true; |
189 | ||
190 | if (race_fault()) | |
191 | return false; | |
192 | ||
67e0dd8f | 193 | if (btree_node_locked(path, level) |
c43a6ef9 | 194 | ? six_lock_tryupgrade(&b->c.lock) |
67e0dd8f | 195 | : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) |
1c6fdbd8 KO |
196 | goto success; |
197 | ||
67e0dd8f | 198 | if (btree_node_lock_seq_matches(path, b, level) && |
78cf784e | 199 | btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) { |
67e0dd8f | 200 | btree_node_unlock(path, level); |
1c6fdbd8 KO |
201 | goto success; |
202 | } | |
203 | ||
204 | return false; | |
205 | success: | |
3074bc0f | 206 | mark_btree_node_intent_locked(path, level); |
1c6fdbd8 KO |
207 | return true; |
208 | } | |
209 | ||
67e0dd8f KO |
210 | static inline bool btree_path_get_locks(struct btree_trans *trans, |
211 | struct btree_path *path, | |
9f6bd307 | 212 | bool upgrade, unsigned long trace_ip) |
1c6fdbd8 | 213 | { |
67e0dd8f | 214 | unsigned l = path->level; |
1c6fdbd8 KO |
215 | int fail_idx = -1; |
216 | ||
217 | do { | |
67e0dd8f | 218 | if (!btree_path_node(path, l)) |
1c6fdbd8 KO |
219 | break; |
220 | ||
221 | if (!(upgrade | |
67e0dd8f | 222 | ? bch2_btree_node_upgrade(trans, path, l) |
f48361b0 | 223 | : bch2_btree_node_relock(trans, path, l))) |
1c6fdbd8 | 224 | fail_idx = l; |
1c6fdbd8 KO |
225 | |
226 | l++; | |
67e0dd8f | 227 | } while (l < path->locks_want); |
1c6fdbd8 KO |
228 | |
229 | /* | |
230 | * When we fail to get a lock, we have to ensure that any child nodes | |
67e0dd8f | 231 | * can't be relocked so bch2_btree_path_traverse has to walk back up to |
1c6fdbd8 KO |
232 | * the node that we failed to relock: |
233 | */ | |
1d3ecd7e KO |
234 | if (fail_idx >= 0) { |
235 | __bch2_btree_path_unlock(path); | |
236 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); | |
237 | ||
238 | do { | |
239 | path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; | |
240 | --fail_idx; | |
241 | } while (fail_idx >= 0); | |
1c6fdbd8 KO |
242 | } |
243 | ||
67e0dd8f KO |
244 | if (path->uptodate == BTREE_ITER_NEED_RELOCK) |
245 | path->uptodate = BTREE_ITER_UPTODATE; | |
1c6fdbd8 | 246 | |
a0a56879 | 247 | bch2_trans_verify_locks(trans); |
0f238367 | 248 | |
67e0dd8f | 249 | return path->uptodate < BTREE_ITER_NEED_RELOCK; |
1c6fdbd8 KO |
250 | } |
251 | ||
d211b408 | 252 | static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, |
f21566f1 | 253 | bool cached) |
d211b408 | 254 | { |
f21566f1 | 255 | return !cached |
d211b408 KO |
256 | ? container_of(_b, struct btree, c)->key.k.p |
257 | : container_of(_b, struct bkey_cached, c)->key.pos; | |
258 | } | |
259 | ||
1c6fdbd8 | 260 | /* Slowpath: */ |
78cf784e | 261 | bool __bch2_btree_node_lock(struct btree_trans *trans, |
67e0dd8f KO |
262 | struct btree_path *path, |
263 | struct btree *b, | |
264 | struct bpos pos, unsigned level, | |
bd2bb273 | 265 | enum six_lock_type type, |
a301dc38 KO |
266 | six_lock_should_sleep_fn should_sleep_fn, void *p, |
267 | unsigned long ip) | |
1c6fdbd8 | 268 | { |
67e0dd8f | 269 | struct btree_path *linked, *deadlock_path = NULL; |
bd2bb273 | 270 | u64 start_time = local_clock(); |
a301dc38 | 271 | unsigned reason = 9; |
acb3b26e | 272 | bool ret; |
1c6fdbd8 | 273 | |
647d7b60 | 274 | /* Check if it's safe to block: */ |
67e0dd8f | 275 | trans_for_each_path(trans, linked) { |
1c6fdbd8 KO |
276 | if (!linked->nodes_locked) |
277 | continue; | |
278 | ||
1c6fdbd8 KO |
279 | /* |
280 | * Can't block taking an intent lock if we have _any_ nodes read | |
281 | * locked: | |
282 | * | |
283 | * - Our read lock blocks another thread with an intent lock on | |
284 | * the same node from getting a write lock, and thus from | |
285 | * dropping its intent lock | |
286 | * | |
287 | * - And the other thread may have multiple nodes intent locked: | |
288 | * both the node we want to intent lock, and the node we | |
289 | * already have read locked - deadlock: | |
290 | */ | |
291 | if (type == SIX_LOCK_intent && | |
292 | linked->nodes_locked != linked->nodes_intent_locked) { | |
67e0dd8f | 293 | deadlock_path = linked; |
2527dd91 | 294 | reason = 1; |
1c6fdbd8 KO |
295 | } |
296 | ||
67e0dd8f KO |
297 | if (linked->btree_id != path->btree_id) { |
298 | if (linked->btree_id > path->btree_id) { | |
299 | deadlock_path = linked; | |
7e7ae6ca KO |
300 | reason = 3; |
301 | } | |
302 | continue; | |
303 | } | |
304 | ||
305 | /* | |
67e0dd8f KO |
306 | * Within the same btree, cached paths come before non |
307 | * cached paths: | |
7e7ae6ca | 308 | */ |
67e0dd8f KO |
309 | if (linked->cached != path->cached) { |
310 | if (path->cached) { | |
311 | deadlock_path = linked; | |
7e7ae6ca KO |
312 | reason = 4; |
313 | } | |
314 | continue; | |
315 | } | |
316 | ||
1c6fdbd8 KO |
317 | /* |
318 | * Interior nodes must be locked before their descendants: if | |
67e0dd8f | 319 | * another path has possible descendants locked of the node |
1c6fdbd8 KO |
320 | * we're about to lock, it must have the ancestors locked too: |
321 | */ | |
7e7ae6ca | 322 | if (level > __fls(linked->nodes_locked)) { |
67e0dd8f | 323 | deadlock_path = linked; |
2527dd91 | 324 | reason = 5; |
515282ac KO |
325 | } |
326 | ||
327 | /* Must lock btree nodes in key order: */ | |
7e7ae6ca | 328 | if (btree_node_locked(linked, level) && |
4cf91b02 | 329 | bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b, |
f21566f1 | 330 | linked->cached)) <= 0) { |
67e0dd8f | 331 | deadlock_path = linked; |
7e7ae6ca | 332 | reason = 7; |
a301dc38 | 333 | } |
1c6fdbd8 KO |
334 | } |
335 | ||
67e0dd8f | 336 | if (unlikely(deadlock_path)) { |
e5af273f | 337 | trace_trans_restart_would_deadlock(trans->ip, ip, |
2527dd91 | 338 | trans->in_traverse_all, reason, |
67e0dd8f KO |
339 | deadlock_path->btree_id, |
340 | deadlock_path->cached, | |
341 | &deadlock_path->pos, | |
342 | path->btree_id, | |
343 | path->cached, | |
2527dd91 | 344 | &pos); |
e5af273f | 345 | btree_trans_restart(trans); |
ba5c6557 KO |
346 | return false; |
347 | } | |
1c7a0adf | 348 | |
bd2bb273 KO |
349 | if (six_trylock_type(&b->c.lock, type)) |
350 | return true; | |
351 | ||
acb3b26e | 352 | #ifdef CONFIG_BCACHEFS_DEBUG |
67e0dd8f | 353 | trans->locking_path_idx = path->idx; |
acb3b26e | 354 | trans->locking_pos = pos; |
67e0dd8f | 355 | trans->locking_btree_id = path->btree_id; |
acb3b26e KO |
356 | trans->locking_level = level; |
357 | trans->locking = b; | |
358 | #endif | |
bd2bb273 | 359 | |
acb3b26e KO |
360 | ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0; |
361 | ||
362 | #ifdef CONFIG_BCACHEFS_DEBUG | |
363 | trans->locking = NULL; | |
364 | #endif | |
365 | if (ret) | |
366 | bch2_time_stats_update(&trans->c->times[lock_to_time_stat(type)], | |
367 | start_time); | |
368 | return ret; | |
1c6fdbd8 KO |
369 | } |
370 | ||
371 | /* Btree iterator locking: */ | |
372 | ||
373 | #ifdef CONFIG_BCACHEFS_DEBUG | |
67e0dd8f KO |
374 | |
375 | static void bch2_btree_path_verify_locks(struct btree_path *path) | |
1c6fdbd8 KO |
376 | { |
377 | unsigned l; | |
378 | ||
1d3ecd7e | 379 | if (!path->nodes_locked) { |
d697b9ab KO |
380 | BUG_ON(path->uptodate == BTREE_ITER_UPTODATE && |
381 | btree_path_node(path, path->level)); | |
1d3ecd7e KO |
382 | return; |
383 | } | |
1c6fdbd8 | 384 | |
1d3ecd7e | 385 | for (l = 0; btree_path_node(path, l); l++) |
67e0dd8f KO |
386 | BUG_ON(btree_lock_want(path, l) != |
387 | btree_node_locked_type(path, l)); | |
1c6fdbd8 | 388 | } |
ad7ae8d6 | 389 | |
a0a56879 | 390 | void bch2_trans_verify_locks(struct btree_trans *trans) |
ad7ae8d6 | 391 | { |
67e0dd8f | 392 | struct btree_path *path; |
ad7ae8d6 | 393 | |
67e0dd8f KO |
394 | trans_for_each_path(trans, path) |
395 | bch2_btree_path_verify_locks(path); | |
ad7ae8d6 | 396 | } |
7d6f9b64 | 397 | #else |
67e0dd8f | 398 | static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} |
1c6fdbd8 KO |
399 | #endif |
400 | ||
67e0dd8f KO |
401 | /* Btree path locking: */ |
402 | ||
6e075b54 KO |
403 | /* |
404 | * Only for btree_cache.c - only relocks intent locks | |
405 | */ | |
67e0dd8f KO |
406 | bool bch2_btree_path_relock_intent(struct btree_trans *trans, |
407 | struct btree_path *path) | |
6e075b54 KO |
408 | { |
409 | unsigned l; | |
410 | ||
67e0dd8f KO |
411 | for (l = path->level; |
412 | l < path->locks_want && btree_path_node(path, l); | |
6e075b54 | 413 | l++) { |
67e0dd8f | 414 | if (!bch2_btree_node_relock(trans, path, l)) { |
1d3ecd7e | 415 | __bch2_btree_path_unlock(path); |
67e0dd8f | 416 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
9f6bd307 | 417 | btree_trans_restart(trans); |
6e075b54 KO |
418 | return false; |
419 | } | |
420 | } | |
421 | ||
422 | return true; | |
423 | } | |
424 | ||
1c6fdbd8 | 425 | __flatten |
67e0dd8f KO |
426 | static bool bch2_btree_path_relock(struct btree_trans *trans, |
427 | struct btree_path *path, unsigned long trace_ip) | |
1c6fdbd8 | 428 | { |
67e0dd8f | 429 | bool ret = btree_path_get_locks(trans, path, false, trace_ip); |
e5af273f KO |
430 | |
431 | if (!ret) | |
9f6bd307 | 432 | btree_trans_restart(trans); |
e5af273f | 433 | return ret; |
1c6fdbd8 KO |
434 | } |
435 | ||
67e0dd8f KO |
436 | bool __bch2_btree_path_upgrade(struct btree_trans *trans, |
437 | struct btree_path *path, | |
1c6fdbd8 KO |
438 | unsigned new_locks_want) |
439 | { | |
67e0dd8f | 440 | struct btree_path *linked; |
5e6a668b | 441 | |
67e0dd8f | 442 | EBUG_ON(path->locks_want >= new_locks_want); |
1c6fdbd8 | 443 | |
67e0dd8f | 444 | path->locks_want = new_locks_want; |
1c6fdbd8 | 445 | |
67e0dd8f | 446 | if (btree_path_get_locks(trans, path, true, _THIS_IP_)) |
5e6a668b KO |
447 | return true; |
448 | ||
449 | /* | |
450 | * XXX: this is ugly - we'd prefer to not be mucking with other | |
451 | * iterators in the btree_trans here. | |
452 | * | |
453 | * On failure to upgrade the iterator, setting iter->locks_want and | |
67e0dd8f | 454 | * calling get_locks() is sufficient to make bch2_btree_path_traverse() |
5e6a668b KO |
455 | * get the locks we want on transaction restart. |
456 | * | |
457 | * But if this iterator was a clone, on transaction restart what we did | |
458 | * to this iterator isn't going to be preserved. | |
459 | * | |
460 | * Possibly we could add an iterator field for the parent iterator when | |
461 | * an iterator is a copy - for now, we'll just upgrade any other | |
462 | * iterators with the same btree id. | |
463 | * | |
464 | * The code below used to be needed to ensure ancestor nodes get locked | |
465 | * before interior nodes - now that's handled by | |
67e0dd8f | 466 | * bch2_btree_path_traverse_all(). |
5e6a668b | 467 | */ |
67e0dd8f KO |
468 | trans_for_each_path(trans, linked) |
469 | if (linked != path && | |
470 | linked->cached == path->cached && | |
471 | linked->btree_id == path->btree_id && | |
5e6a668b KO |
472 | linked->locks_want < new_locks_want) { |
473 | linked->locks_want = new_locks_want; | |
67e0dd8f | 474 | btree_path_get_locks(trans, linked, true, _THIS_IP_); |
5e6a668b KO |
475 | } |
476 | ||
477 | return false; | |
1c6fdbd8 KO |
478 | } |
479 | ||
67e0dd8f | 480 | void __bch2_btree_path_downgrade(struct btree_path *path, |
2d587674 | 481 | unsigned new_locks_want) |
1c6fdbd8 | 482 | { |
2d587674 | 483 | unsigned l; |
1c6fdbd8 | 484 | |
67e0dd8f | 485 | EBUG_ON(path->locks_want < new_locks_want); |
1c6fdbd8 | 486 | |
67e0dd8f | 487 | path->locks_want = new_locks_want; |
2d587674 | 488 | |
67e0dd8f KO |
489 | while (path->nodes_locked && |
490 | (l = __fls(path->nodes_locked)) >= path->locks_want) { | |
491 | if (l > path->level) { | |
492 | btree_node_unlock(path, l); | |
2d587674 | 493 | } else { |
67e0dd8f KO |
494 | if (btree_node_intent_locked(path, l)) { |
495 | six_lock_downgrade(&path->l[l].b->c.lock); | |
496 | path->nodes_intent_locked ^= 1 << l; | |
1c6fdbd8 | 497 | } |
2d587674 | 498 | break; |
1c6fdbd8 | 499 | } |
1c6fdbd8 | 500 | } |
ad7ae8d6 | 501 | |
67e0dd8f | 502 | bch2_btree_path_verify_locks(path); |
1c6fdbd8 KO |
503 | } |
504 | ||
72545b5e KO |
505 | void bch2_trans_downgrade(struct btree_trans *trans) |
506 | { | |
67e0dd8f | 507 | struct btree_path *path; |
72545b5e | 508 | |
67e0dd8f KO |
509 | trans_for_each_path(trans, path) |
510 | bch2_btree_path_downgrade(path); | |
72545b5e KO |
511 | } |
512 | ||
58fbf808 KO |
513 | /* Btree transaction locking: */ |
514 | ||
515 | bool bch2_trans_relock(struct btree_trans *trans) | |
0f238367 | 516 | { |
67e0dd8f | 517 | struct btree_path *path; |
0f238367 | 518 | |
e5af273f KO |
519 | if (unlikely(trans->restarted)) |
520 | return false; | |
521 | ||
67e0dd8f KO |
522 | trans_for_each_path(trans, path) |
523 | if (path->should_be_locked && | |
524 | !bch2_btree_path_relock(trans, path, _RET_IP_)) { | |
531a0095 | 525 | trace_trans_restart_relock(trans->ip, _RET_IP_, |
67e0dd8f | 526 | path->btree_id, &path->pos); |
e5af273f | 527 | BUG_ON(!trans->restarted); |
d5a43661 | 528 | return false; |
241e2636 | 529 | } |
d5a43661 | 530 | return true; |
0f238367 KO |
531 | } |
532 | ||
58fbf808 | 533 | void bch2_trans_unlock(struct btree_trans *trans) |
0f238367 | 534 | { |
67e0dd8f | 535 | struct btree_path *path; |
0f238367 | 536 | |
67e0dd8f KO |
537 | trans_for_each_path(trans, path) |
538 | __bch2_btree_path_unlock(path); | |
0f238367 KO |
539 | } |
540 | ||
1c6fdbd8 KO |
541 | /* Btree iterator: */ |
542 | ||
543 | #ifdef CONFIG_BCACHEFS_DEBUG | |
544 | ||
67e0dd8f KO |
545 | static void bch2_btree_path_verify_cached(struct btree_trans *trans, |
546 | struct btree_path *path) | |
d211b408 KO |
547 | { |
548 | struct bkey_cached *ck; | |
67e0dd8f | 549 | bool locked = btree_node_locked(path, 0); |
d211b408 | 550 | |
67e0dd8f | 551 | if (!bch2_btree_node_relock(trans, path, 0)) |
d211b408 KO |
552 | return; |
553 | ||
67e0dd8f KO |
554 | ck = (void *) path->l[0].b; |
555 | BUG_ON(ck->key.btree_id != path->btree_id || | |
556 | bkey_cmp(ck->key.pos, path->pos)); | |
d211b408 KO |
557 | |
558 | if (!locked) | |
67e0dd8f | 559 | btree_node_unlock(path, 0); |
d211b408 KO |
560 | } |
561 | ||
67e0dd8f KO |
562 | static void bch2_btree_path_verify_level(struct btree_trans *trans, |
563 | struct btree_path *path, unsigned level) | |
1c6fdbd8 | 564 | { |
67e0dd8f | 565 | struct btree_path_level *l; |
4ce41957 KO |
566 | struct btree_node_iter tmp; |
567 | bool locked; | |
2dac0eae | 568 | struct bkey_packed *p, *k; |
f020bfcd | 569 | char buf1[100], buf2[100], buf3[100]; |
2dac0eae | 570 | const char *msg; |
1c6fdbd8 | 571 | |
29364f34 | 572 | if (!bch2_debug_check_iterators) |
f13f5a8c KO |
573 | return; |
574 | ||
67e0dd8f | 575 | l = &path->l[level]; |
4ce41957 | 576 | tmp = l->iter; |
67e0dd8f | 577 | locked = btree_node_locked(path, level); |
4ce41957 | 578 | |
67e0dd8f | 579 | if (path->cached) { |
d211b408 | 580 | if (!level) |
67e0dd8f | 581 | bch2_btree_path_verify_cached(trans, path); |
d211b408 KO |
582 | return; |
583 | } | |
584 | ||
67e0dd8f | 585 | if (!btree_path_node(path, level)) |
2dac0eae KO |
586 | return; |
587 | ||
67e0dd8f | 588 | if (!bch2_btree_node_relock(trans, path, level)) |
271a3d3a KO |
589 | return; |
590 | ||
67e0dd8f | 591 | BUG_ON(!btree_path_pos_in_node(path, l->b)); |
2dac0eae | 592 | |
2dac0eae | 593 | bch2_btree_node_iter_verify(&l->iter, l->b); |
1c6fdbd8 KO |
594 | |
595 | /* | |
1ae29c1f | 596 | * For interior nodes, the iterator will have skipped past deleted keys: |
1c6fdbd8 | 597 | */ |
1ae29c1f | 598 | p = level |
c052cf82 | 599 | ? bch2_btree_node_iter_prev(&tmp, l->b) |
2dac0eae KO |
600 | : bch2_btree_node_iter_prev_all(&tmp, l->b); |
601 | k = bch2_btree_node_iter_peek_all(&l->iter, l->b); | |
1c6fdbd8 | 602 | |
67e0dd8f | 603 | if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) { |
2dac0eae KO |
604 | msg = "before"; |
605 | goto err; | |
1c6fdbd8 KO |
606 | } |
607 | ||
67e0dd8f | 608 | if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { |
2dac0eae KO |
609 | msg = "after"; |
610 | goto err; | |
611 | } | |
f21566f1 | 612 | |
2dac0eae | 613 | if (!locked) |
67e0dd8f | 614 | btree_node_unlock(path, level); |
2dac0eae KO |
615 | return; |
616 | err: | |
2dac0eae | 617 | strcpy(buf2, "(none)"); |
f020bfcd KO |
618 | strcpy(buf3, "(none)"); |
619 | ||
67e0dd8f | 620 | bch2_bpos_to_text(&PBUF(buf1), path->pos); |
2dac0eae KO |
621 | |
622 | if (p) { | |
623 | struct bkey uk = bkey_unpack_key(l->b, p); | |
f020bfcd | 624 | bch2_bkey_to_text(&PBUF(buf2), &uk); |
2dac0eae | 625 | } |
1c6fdbd8 | 626 | |
2dac0eae KO |
627 | if (k) { |
628 | struct bkey uk = bkey_unpack_key(l->b, k); | |
f020bfcd | 629 | bch2_bkey_to_text(&PBUF(buf3), &uk); |
1c6fdbd8 | 630 | } |
2dac0eae | 631 | |
67e0dd8f KO |
632 | panic("path should be %s key at level %u:\n" |
633 | "path pos %s\n" | |
2dac0eae KO |
634 | "prev key %s\n" |
635 | "cur key %s\n", | |
f020bfcd | 636 | msg, level, buf1, buf2, buf3); |
1c6fdbd8 KO |
637 | } |
638 | ||
67e0dd8f KO |
639 | static void bch2_btree_path_verify(struct btree_trans *trans, |
640 | struct btree_path *path) | |
1c6fdbd8 | 641 | { |
5aab6635 | 642 | struct bch_fs *c = trans->c; |
2dac0eae | 643 | unsigned i; |
1c6fdbd8 | 644 | |
67e0dd8f KO |
645 | EBUG_ON(path->btree_id >= BTREE_ID_NR); |
646 | ||
647 | for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { | |
648 | if (!path->l[i].b) { | |
649 | BUG_ON(c->btree_roots[path->btree_id].b->c.level > i); | |
650 | break; | |
651 | } | |
652 | ||
653 | bch2_btree_path_verify_level(trans, path, i); | |
654 | } | |
655 | ||
656 | bch2_btree_path_verify_locks(path); | |
657 | } | |
658 | ||
659 | void bch2_trans_verify_paths(struct btree_trans *trans) | |
660 | { | |
661 | struct btree_path *path; | |
662 | ||
67e0dd8f KO |
663 | trans_for_each_path(trans, path) |
664 | bch2_btree_path_verify(trans, path); | |
665 | } | |
666 | ||
667 | static void bch2_btree_iter_verify(struct btree_iter *iter) | |
668 | { | |
669 | struct btree_trans *trans = iter->trans; | |
670 | ||
671 | BUG_ON(iter->btree_id >= BTREE_ID_NR); | |
672 | ||
673 | BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached); | |
7e1a3aa9 | 674 | |
e751c01a KO |
675 | BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && |
676 | iter->pos.snapshot != iter->snapshot); | |
677 | ||
678 | BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && | |
679 | (iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); | |
680 | ||
f21566f1 | 681 | BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) && |
e751c01a KO |
682 | (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && |
683 | !btree_type_has_snapshots(iter->btree_id)); | |
684 | ||
67e0dd8f | 685 | bch2_btree_path_verify(trans, iter->path); |
2dac0eae KO |
686 | } |
687 | ||
7e1a3aa9 KO |
688 | static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) |
689 | { | |
a861c722 KO |
690 | BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && |
691 | !iter->pos.snapshot); | |
692 | ||
e751c01a KO |
693 | BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && |
694 | iter->pos.snapshot != iter->snapshot); | |
695 | ||
f21566f1 KO |
696 | BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || |
697 | bkey_cmp(iter->pos, iter->k.p) > 0); | |
7e1a3aa9 KO |
698 | } |
699 | ||
c075ff70 KO |
700 | static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) |
701 | { | |
702 | struct btree_trans *trans = iter->trans; | |
703 | struct btree_iter copy; | |
704 | struct bkey_s_c prev; | |
705 | int ret = 0; | |
706 | ||
707 | if (!bch2_debug_check_iterators) | |
708 | return 0; | |
709 | ||
710 | if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) | |
711 | return 0; | |
712 | ||
713 | if (bkey_err(k) || !k.k) | |
714 | return 0; | |
715 | ||
716 | BUG_ON(!bch2_snapshot_is_ancestor(trans->c, | |
717 | iter->snapshot, | |
718 | k.k->p.snapshot)); | |
719 | ||
720 | bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos, | |
721 | BTREE_ITER_ALL_SNAPSHOTS); | |
722 | prev = bch2_btree_iter_prev(©); | |
723 | if (!prev.k) | |
724 | goto out; | |
725 | ||
726 | ret = bkey_err(prev); | |
727 | if (ret) | |
728 | goto out; | |
729 | ||
730 | if (!bkey_cmp(prev.k->p, k.k->p) && | |
731 | bch2_snapshot_is_ancestor(trans->c, iter->snapshot, | |
732 | prev.k->p.snapshot) > 0) { | |
733 | char buf1[100], buf2[200]; | |
734 | ||
735 | bch2_bkey_to_text(&PBUF(buf1), k.k); | |
736 | bch2_bkey_to_text(&PBUF(buf2), prev.k); | |
737 | ||
738 | panic("iter snap %u\n" | |
739 | "k %s\n" | |
740 | "prev %s\n", | |
741 | iter->snapshot, | |
742 | buf1, buf2); | |
743 | } | |
744 | out: | |
745 | bch2_trans_iter_exit(trans, ©); | |
746 | return ret; | |
747 | } | |
748 | ||
271a3d3a KO |
749 | #else |
750 | ||
67e0dd8f KO |
751 | static inline void bch2_btree_path_verify_level(struct btree_trans *trans, |
752 | struct btree_path *path, unsigned l) {} | |
753 | static inline void bch2_btree_path_verify(struct btree_trans *trans, | |
754 | struct btree_path *path) {} | |
7d6f9b64 | 755 | static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} |
7e1a3aa9 | 756 | static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} |
c075ff70 | 757 | static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; } |
271a3d3a | 758 | |
1c6fdbd8 KO |
759 | #endif |
760 | ||
67e0dd8f KO |
761 | /* Btree path: fixups after btree updates */ |
762 | ||
23bbd2bb KO |
763 | static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, |
764 | struct btree *b, | |
765 | struct bset_tree *t, | |
766 | struct bkey_packed *k) | |
767 | { | |
768 | struct btree_node_iter_set *set; | |
769 | ||
770 | btree_node_iter_for_each(iter, set) | |
771 | if (set->end == t->end_offset) { | |
772 | set->k = __btree_node_key_to_offset(b, k); | |
773 | bch2_btree_node_iter_sort(iter, b); | |
774 | return; | |
775 | } | |
776 | ||
777 | bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t)); | |
778 | } | |
779 | ||
67e0dd8f | 780 | static void __bch2_btree_path_fix_key_modified(struct btree_path *path, |
9626aeb1 KO |
781 | struct btree *b, |
782 | struct bkey_packed *where) | |
887c2a4e | 783 | { |
67e0dd8f | 784 | struct btree_path_level *l = &path->l[b->c.level]; |
887c2a4e | 785 | |
9626aeb1 KO |
786 | if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b)) |
787 | return; | |
788 | ||
67e0dd8f | 789 | if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0) |
9626aeb1 | 790 | bch2_btree_node_iter_advance(&l->iter, l->b); |
887c2a4e KO |
791 | } |
792 | ||
67e0dd8f | 793 | void bch2_btree_path_fix_key_modified(struct btree_trans *trans, |
887c2a4e KO |
794 | struct btree *b, |
795 | struct bkey_packed *where) | |
796 | { | |
67e0dd8f | 797 | struct btree_path *path; |
887c2a4e | 798 | |
67e0dd8f KO |
799 | trans_for_each_path_with_node(trans, b, path) { |
800 | __bch2_btree_path_fix_key_modified(path, b, where); | |
801 | bch2_btree_path_verify_level(trans, path, b->c.level); | |
887c2a4e KO |
802 | } |
803 | } | |
804 | ||
67e0dd8f KO |
805 | static void __bch2_btree_node_iter_fix(struct btree_path *path, |
806 | struct btree *b, | |
807 | struct btree_node_iter *node_iter, | |
808 | struct bset_tree *t, | |
809 | struct bkey_packed *where, | |
810 | unsigned clobber_u64s, | |
811 | unsigned new_u64s) | |
1c6fdbd8 KO |
812 | { |
813 | const struct bkey_packed *end = btree_bkey_last(b, t); | |
814 | struct btree_node_iter_set *set; | |
815 | unsigned offset = __btree_node_key_to_offset(b, where); | |
816 | int shift = new_u64s - clobber_u64s; | |
271a3d3a | 817 | unsigned old_end = t->end_offset - shift; |
c0fc30da KO |
818 | unsigned orig_iter_pos = node_iter->data[0].k; |
819 | bool iter_current_key_modified = | |
820 | orig_iter_pos >= offset && | |
821 | orig_iter_pos <= offset + clobber_u64s; | |
1c6fdbd8 KO |
822 | |
823 | btree_node_iter_for_each(node_iter, set) | |
824 | if (set->end == old_end) | |
825 | goto found; | |
826 | ||
827 | /* didn't find the bset in the iterator - might have to readd it: */ | |
828 | if (new_u64s && | |
67e0dd8f | 829 | bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { |
1c6fdbd8 | 830 | bch2_btree_node_iter_push(node_iter, b, where, end); |
c0fc30da KO |
831 | goto fixup_done; |
832 | } else { | |
833 | /* Iterator is after key that changed */ | |
059e4134 | 834 | return; |
1c6fdbd8 | 835 | } |
1c6fdbd8 | 836 | found: |
271a3d3a | 837 | set->end = t->end_offset; |
1c6fdbd8 KO |
838 | |
839 | /* Iterator hasn't gotten to the key that changed yet: */ | |
840 | if (set->k < offset) | |
059e4134 | 841 | return; |
1c6fdbd8 KO |
842 | |
843 | if (new_u64s && | |
67e0dd8f | 844 | bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { |
1c6fdbd8 KO |
845 | set->k = offset; |
846 | } else if (set->k < offset + clobber_u64s) { | |
847 | set->k = offset + new_u64s; | |
848 | if (set->k == set->end) | |
849 | bch2_btree_node_iter_set_drop(node_iter, set); | |
850 | } else { | |
c0fc30da | 851 | /* Iterator is after key that changed */ |
1c6fdbd8 | 852 | set->k = (int) set->k + shift; |
059e4134 | 853 | return; |
1c6fdbd8 KO |
854 | } |
855 | ||
1c6fdbd8 | 856 | bch2_btree_node_iter_sort(node_iter, b); |
c0fc30da KO |
857 | fixup_done: |
858 | if (node_iter->data[0].k != orig_iter_pos) | |
859 | iter_current_key_modified = true; | |
56e0e7c7 | 860 | |
1c6fdbd8 | 861 | /* |
23bbd2bb KO |
862 | * When a new key is added, and the node iterator now points to that |
863 | * key, the iterator might have skipped past deleted keys that should | |
864 | * come after the key the iterator now points to. We have to rewind to | |
c0fc30da KO |
865 | * before those deleted keys - otherwise |
866 | * bch2_btree_node_iter_prev_all() breaks: | |
1c6fdbd8 | 867 | */ |
23bbd2bb | 868 | if (!bch2_btree_node_iter_end(node_iter) && |
c0fc30da | 869 | iter_current_key_modified && |
1ae29c1f | 870 | b->c.level) { |
23bbd2bb KO |
871 | struct bset_tree *t; |
872 | struct bkey_packed *k, *k2, *p; | |
873 | ||
874 | k = bch2_btree_node_iter_peek_all(node_iter, b); | |
1c6fdbd8 KO |
875 | |
876 | for_each_bset(b, t) { | |
23bbd2bb KO |
877 | bool set_pos = false; |
878 | ||
879 | if (node_iter->data[0].end == t->end_offset) | |
1c6fdbd8 KO |
880 | continue; |
881 | ||
23bbd2bb KO |
882 | k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t); |
883 | ||
884 | while ((p = bch2_bkey_prev_all(b, t, k2)) && | |
885 | bkey_iter_cmp(b, k, p) < 0) { | |
886 | k2 = p; | |
887 | set_pos = true; | |
1c6fdbd8 | 888 | } |
23bbd2bb KO |
889 | |
890 | if (set_pos) | |
891 | btree_node_iter_set_set_pos(node_iter, | |
892 | b, t, k2); | |
1c6fdbd8 KO |
893 | } |
894 | } | |
895 | } | |
896 | ||
9f6bd307 | 897 | void bch2_btree_node_iter_fix(struct btree_trans *trans, |
67e0dd8f | 898 | struct btree_path *path, |
216c9fac KO |
899 | struct btree *b, |
900 | struct btree_node_iter *node_iter, | |
901 | struct bkey_packed *where, | |
902 | unsigned clobber_u64s, | |
903 | unsigned new_u64s) | |
1c6fdbd8 | 904 | { |
216c9fac | 905 | struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where); |
67e0dd8f | 906 | struct btree_path *linked; |
1c6fdbd8 | 907 | |
67e0dd8f KO |
908 | if (node_iter != &path->l[b->c.level].iter) { |
909 | __bch2_btree_node_iter_fix(path, b, node_iter, t, | |
059e4134 | 910 | where, clobber_u64s, new_u64s); |
2dac0eae | 911 | |
29364f34 | 912 | if (bch2_debug_check_iterators) |
2dac0eae | 913 | bch2_btree_node_iter_verify(node_iter, b); |
059e4134 | 914 | } |
1c6fdbd8 | 915 | |
67e0dd8f | 916 | trans_for_each_path_with_node(trans, b, linked) { |
1c6fdbd8 | 917 | __bch2_btree_node_iter_fix(linked, b, |
059e4134 KO |
918 | &linked->l[b->c.level].iter, t, |
919 | where, clobber_u64s, new_u64s); | |
67e0dd8f | 920 | bch2_btree_path_verify_level(trans, linked, b->c.level); |
059e4134 | 921 | } |
1c6fdbd8 KO |
922 | } |
923 | ||
67e0dd8f KO |
924 | /* Btree path level: pointer to a particular btree node and node iter */ |
925 | ||
926 | static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c, | |
927 | struct btree_path_level *l, | |
1c6fdbd8 KO |
928 | struct bkey *u, |
929 | struct bkey_packed *k) | |
930 | { | |
931 | struct bkey_s_c ret; | |
932 | ||
933 | if (unlikely(!k)) { | |
934 | /* | |
935 | * signal to bch2_btree_iter_peek_slot() that we're currently at | |
936 | * a hole | |
937 | */ | |
26609b61 | 938 | u->type = KEY_TYPE_deleted; |
1c6fdbd8 KO |
939 | return bkey_s_c_null; |
940 | } | |
941 | ||
942 | ret = bkey_disassemble(l->b, k, u); | |
943 | ||
7138f220 KO |
944 | /* |
945 | * XXX: bch2_btree_bset_insert_key() generates invalid keys when we | |
946 | * overwrite extents - it sets k->type = KEY_TYPE_deleted on the key | |
947 | * being overwritten but doesn't change k->size. But this is ok, because | |
948 | * those keys are never written out, we just have to avoid a spurious | |
949 | * assertion here: | |
950 | */ | |
951 | if (bch2_debug_check_bkeys && !bkey_deleted(ret.k)) | |
67e0dd8f | 952 | bch2_bkey_debugcheck(c, l->b, ret); |
1c6fdbd8 KO |
953 | |
954 | return ret; | |
955 | } | |
956 | ||
67e0dd8f KO |
957 | static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c, |
958 | struct btree_path_level *l, | |
959 | struct bkey *u) | |
1c6fdbd8 | 960 | { |
67e0dd8f | 961 | return __btree_iter_unpack(c, l, u, |
1c6fdbd8 KO |
962 | bch2_btree_node_iter_peek_all(&l->iter, l->b)); |
963 | } | |
964 | ||
67e0dd8f KO |
965 | static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans, |
966 | struct btree_path *path, | |
967 | struct btree_path_level *l, | |
968 | struct bkey *u) | |
1c6fdbd8 | 969 | { |
67e0dd8f | 970 | struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, |
1c6fdbd8 | 971 | bch2_btree_node_iter_peek(&l->iter, l->b)); |
ca58cbd4 | 972 | |
67e0dd8f KO |
973 | path->pos = k.k ? k.k->p : l->b->key.k.p; |
974 | trans->paths_sorted = false; | |
ca58cbd4 | 975 | return k; |
1c6fdbd8 KO |
976 | } |
977 | ||
67e0dd8f KO |
978 | static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans, |
979 | struct btree_path *path, | |
980 | struct btree_path_level *l, | |
981 | struct bkey *u) | |
ccf5a109 | 982 | { |
67e0dd8f | 983 | struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, |
ccf5a109 | 984 | bch2_btree_node_iter_prev(&l->iter, l->b)); |
ca58cbd4 | 985 | |
67e0dd8f KO |
986 | path->pos = k.k ? k.k->p : l->b->data->min_key; |
987 | trans->paths_sorted = false; | |
ca58cbd4 | 988 | return k; |
ccf5a109 KO |
989 | } |
990 | ||
67e0dd8f KO |
991 | static inline bool btree_path_advance_to_pos(struct btree_path *path, |
992 | struct btree_path_level *l, | |
a00fd8c5 | 993 | int max_advance) |
1c6fdbd8 | 994 | { |
a00fd8c5 KO |
995 | struct bkey_packed *k; |
996 | int nr_advanced = 0; | |
997 | ||
998 | while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && | |
67e0dd8f | 999 | bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { |
a00fd8c5 KO |
1000 | if (max_advance > 0 && nr_advanced >= max_advance) |
1001 | return false; | |
1002 | ||
1003 | bch2_btree_node_iter_advance(&l->iter, l->b); | |
1004 | nr_advanced++; | |
1005 | } | |
1006 | ||
1007 | return true; | |
1c6fdbd8 KO |
1008 | } |
1009 | ||
1010 | /* | |
1011 | * Verify that iterator for parent node points to child node: | |
1012 | */ | |
67e0dd8f KO |
1013 | static void btree_path_verify_new_node(struct btree_trans *trans, |
1014 | struct btree_path *path, struct btree *b) | |
1c6fdbd8 | 1015 | { |
67e0dd8f | 1016 | struct btree_path_level *l; |
1c6fdbd8 KO |
1017 | unsigned plevel; |
1018 | bool parent_locked; | |
1019 | struct bkey_packed *k; | |
1020 | ||
1021 | if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) | |
1022 | return; | |
1023 | ||
c43a6ef9 | 1024 | plevel = b->c.level + 1; |
67e0dd8f | 1025 | if (!btree_path_node(path, plevel)) |
1c6fdbd8 KO |
1026 | return; |
1027 | ||
67e0dd8f | 1028 | parent_locked = btree_node_locked(path, plevel); |
1c6fdbd8 | 1029 | |
67e0dd8f | 1030 | if (!bch2_btree_node_relock(trans, path, plevel)) |
1c6fdbd8 KO |
1031 | return; |
1032 | ||
67e0dd8f | 1033 | l = &path->l[plevel]; |
1c6fdbd8 KO |
1034 | k = bch2_btree_node_iter_peek_all(&l->iter, l->b); |
1035 | if (!k || | |
1036 | bkey_deleted(k) || | |
1037 | bkey_cmp_left_packed(l->b, k, &b->key.k.p)) { | |
f020bfcd KO |
1038 | char buf1[100]; |
1039 | char buf2[100]; | |
1040 | char buf3[100]; | |
1041 | char buf4[100]; | |
1c6fdbd8 KO |
1042 | struct bkey uk = bkey_unpack_key(b, k); |
1043 | ||
78cf784e | 1044 | bch2_dump_btree_node(trans->c, l->b); |
67e0dd8f | 1045 | bch2_bpos_to_text(&PBUF(buf1), path->pos); |
f020bfcd KO |
1046 | bch2_bkey_to_text(&PBUF(buf2), &uk); |
1047 | bch2_bpos_to_text(&PBUF(buf3), b->data->min_key); | |
1048 | bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); | |
a2bfc841 | 1049 | panic("parent iter doesn't point to new node:\n" |
f020bfcd | 1050 | "iter pos %s %s\n" |
a2bfc841 | 1051 | "iter key %s\n" |
f020bfcd | 1052 | "new node %s-%s\n", |
67e0dd8f | 1053 | bch2_btree_ids[path->btree_id], buf1, |
f020bfcd | 1054 | buf2, buf3, buf4); |
1c6fdbd8 KO |
1055 | } |
1056 | ||
1057 | if (!parent_locked) | |
1d3ecd7e | 1058 | btree_node_unlock(path, plevel); |
1c6fdbd8 KO |
1059 | } |
1060 | ||
67e0dd8f | 1061 | static inline void __btree_path_level_init(struct btree_path *path, |
78cf784e | 1062 | unsigned level) |
1c6fdbd8 | 1063 | { |
67e0dd8f | 1064 | struct btree_path_level *l = &path->l[level]; |
a00fd8c5 | 1065 | |
67e0dd8f | 1066 | bch2_btree_node_iter_init(&l->iter, l->b, &path->pos); |
1c6fdbd8 | 1067 | |
537c49d6 KO |
1068 | /* |
1069 | * Iterators to interior nodes should always be pointed at the first non | |
1070 | * whiteout: | |
1071 | */ | |
1072 | if (level) | |
1073 | bch2_btree_node_iter_peek(&l->iter, l->b); | |
1c6fdbd8 KO |
1074 | } |
1075 | ||
67e0dd8f KO |
1076 | static inline void btree_path_level_init(struct btree_trans *trans, |
1077 | struct btree_path *path, | |
78cf784e | 1078 | struct btree *b) |
1c6fdbd8 | 1079 | { |
67e0dd8f | 1080 | BUG_ON(path->cached); |
2ca88e5a | 1081 | |
67e0dd8f | 1082 | btree_path_verify_new_node(trans, path, b); |
1c6fdbd8 | 1083 | |
67e0dd8f | 1084 | EBUG_ON(!btree_path_pos_in_node(path, b)); |
c43a6ef9 | 1085 | EBUG_ON(b->c.lock.state.seq & 1); |
1c6fdbd8 | 1086 | |
67e0dd8f KO |
1087 | path->l[b->c.level].lock_seq = b->c.lock.state.seq; |
1088 | path->l[b->c.level].b = b; | |
1089 | __btree_path_level_init(path, b->c.level); | |
1c6fdbd8 KO |
1090 | } |
1091 | ||
67e0dd8f KO |
1092 | /* Btree path: fixups after btree node updates: */ |
1093 | ||
1c6fdbd8 KO |
1094 | /* |
1095 | * A btree node is being replaced - update the iterator to point to the new | |
1096 | * node: | |
1097 | */ | |
f7a966a3 | 1098 | void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) |
1c6fdbd8 | 1099 | { |
67e0dd8f | 1100 | struct btree_path *path; |
1c6fdbd8 | 1101 | |
67e0dd8f KO |
1102 | trans_for_each_path(trans, path) |
1103 | if (!path->cached && | |
1104 | btree_path_pos_in_node(path, b)) { | |
1d3ecd7e KO |
1105 | enum btree_node_locked_type t = |
1106 | btree_lock_want(path, b->c.level); | |
1c6fdbd8 | 1107 | |
1d3ecd7e KO |
1108 | if (path->nodes_locked && |
1109 | t != BTREE_NODE_UNLOCKED) { | |
1110 | btree_node_unlock(path, b->c.level); | |
c43a6ef9 | 1111 | six_lock_increment(&b->c.lock, (enum six_lock_type) t); |
3074bc0f | 1112 | mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t); |
1c6fdbd8 KO |
1113 | } |
1114 | ||
67e0dd8f | 1115 | btree_path_level_init(trans, path, b); |
1c6fdbd8 | 1116 | } |
1c6fdbd8 KO |
1117 | } |
1118 | ||
1c6fdbd8 KO |
1119 | /* |
1120 | * A btree node has been modified in such a way as to invalidate iterators - fix | |
1121 | * them: | |
1122 | */ | |
f7a966a3 | 1123 | void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b) |
1c6fdbd8 | 1124 | { |
67e0dd8f | 1125 | struct btree_path *path; |
1c6fdbd8 | 1126 | |
67e0dd8f KO |
1127 | trans_for_each_path_with_node(trans, b, path) |
1128 | __btree_path_level_init(path, b->c.level); | |
1c6fdbd8 KO |
1129 | } |
1130 | ||
67e0dd8f KO |
1131 | /* Btree path: traverse, set_pos: */ |
1132 | ||
bd2bb273 KO |
1133 | static int lock_root_check_fn(struct six_lock *lock, void *p) |
1134 | { | |
1135 | struct btree *b = container_of(lock, struct btree, c.lock); | |
1136 | struct btree **rootp = p; | |
1137 | ||
1138 | return b == *rootp ? 0 : -1; | |
1139 | } | |
1140 | ||
67e0dd8f KO |
1141 | static inline int btree_path_lock_root(struct btree_trans *trans, |
1142 | struct btree_path *path, | |
a301dc38 KO |
1143 | unsigned depth_want, |
1144 | unsigned long trace_ip) | |
1c6fdbd8 | 1145 | { |
e5af273f | 1146 | struct bch_fs *c = trans->c; |
67e0dd8f | 1147 | struct btree *b, **rootp = &c->btree_roots[path->btree_id].b; |
1c6fdbd8 KO |
1148 | enum six_lock_type lock_type; |
1149 | unsigned i; | |
1150 | ||
67e0dd8f | 1151 | EBUG_ON(path->nodes_locked); |
1c6fdbd8 KO |
1152 | |
1153 | while (1) { | |
bd2bb273 | 1154 | b = READ_ONCE(*rootp); |
67e0dd8f | 1155 | path->level = READ_ONCE(b->c.level); |
1c6fdbd8 | 1156 | |
67e0dd8f | 1157 | if (unlikely(path->level < depth_want)) { |
1c6fdbd8 KO |
1158 | /* |
1159 | * the root is at a lower depth than the depth we want: | |
1160 | * got to the end of the btree, or we're walking nodes | |
1161 | * greater than some depth and there are no nodes >= | |
1162 | * that depth | |
1163 | */ | |
67e0dd8f KO |
1164 | path->level = depth_want; |
1165 | for (i = path->level; i < BTREE_MAX_DEPTH; i++) | |
1166 | path->l[i].b = NULL; | |
8812600c | 1167 | return 1; |
1c6fdbd8 KO |
1168 | } |
1169 | ||
67e0dd8f KO |
1170 | lock_type = __btree_lock_want(path, path->level); |
1171 | if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX, | |
1172 | path->level, lock_type, | |
a301dc38 | 1173 | lock_root_check_fn, rootp, |
e5af273f KO |
1174 | trace_ip))) { |
1175 | if (trans->restarted) | |
1176 | return -EINTR; | |
1177 | continue; | |
1178 | } | |
1c6fdbd8 | 1179 | |
bd2bb273 | 1180 | if (likely(b == READ_ONCE(*rootp) && |
67e0dd8f | 1181 | b->c.level == path->level && |
1c6fdbd8 | 1182 | !race_fault())) { |
67e0dd8f KO |
1183 | for (i = 0; i < path->level; i++) |
1184 | path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; | |
1185 | path->l[path->level].b = b; | |
1186 | for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) | |
1187 | path->l[i].b = NULL; | |
1188 | ||
3074bc0f | 1189 | mark_btree_node_locked(path, path->level, lock_type); |
67e0dd8f | 1190 | btree_path_level_init(trans, path, b); |
1c6fdbd8 | 1191 | return 0; |
1c6fdbd8 KO |
1192 | } |
1193 | ||
c43a6ef9 | 1194 | six_unlock_type(&b->c.lock, lock_type); |
1c6fdbd8 KO |
1195 | } |
1196 | } | |
1197 | ||
1198 | noinline | |
67e0dd8f | 1199 | static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path) |
1c6fdbd8 | 1200 | { |
9f6bd307 | 1201 | struct bch_fs *c = trans->c; |
67e0dd8f | 1202 | struct btree_path_level *l = path_l(path); |
1c6fdbd8 KO |
1203 | struct btree_node_iter node_iter = l->iter; |
1204 | struct bkey_packed *k; | |
07a1006a | 1205 | struct bkey_buf tmp; |
9e5e5b9e | 1206 | unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) |
67e0dd8f KO |
1207 | ? (path->level > 1 ? 0 : 2) |
1208 | : (path->level > 1 ? 1 : 16); | |
1209 | bool was_locked = btree_node_locked(path, path->level); | |
8b3e9bd6 | 1210 | int ret = 0; |
1c6fdbd8 | 1211 | |
07a1006a KO |
1212 | bch2_bkey_buf_init(&tmp); |
1213 | ||
8b3e9bd6 | 1214 | while (nr && !ret) { |
67e0dd8f | 1215 | if (!bch2_btree_node_relock(trans, path, path->level)) |
07a1006a | 1216 | break; |
1c6fdbd8 KO |
1217 | |
1218 | bch2_btree_node_iter_advance(&node_iter, l->b); | |
1219 | k = bch2_btree_node_iter_peek(&node_iter, l->b); | |
1220 | if (!k) | |
1221 | break; | |
1222 | ||
07a1006a | 1223 | bch2_bkey_buf_unpack(&tmp, c, l->b, k); |
67e0dd8f KO |
1224 | ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id, |
1225 | path->level - 1); | |
1c6fdbd8 KO |
1226 | } |
1227 | ||
1228 | if (!was_locked) | |
67e0dd8f | 1229 | btree_node_unlock(path, path->level); |
07a1006a KO |
1230 | |
1231 | bch2_bkey_buf_exit(&tmp, c); | |
8b3e9bd6 | 1232 | return ret; |
1c6fdbd8 KO |
1233 | } |
1234 | ||
78cf784e | 1235 | static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, |
67e0dd8f | 1236 | struct btree_path *path, |
72141e1f KO |
1237 | unsigned plevel, struct btree *b) |
1238 | { | |
67e0dd8f KO |
1239 | struct btree_path_level *l = &path->l[plevel]; |
1240 | bool locked = btree_node_locked(path, plevel); | |
72141e1f KO |
1241 | struct bkey_packed *k; |
1242 | struct bch_btree_ptr_v2 *bp; | |
1243 | ||
67e0dd8f | 1244 | if (!bch2_btree_node_relock(trans, path, plevel)) |
72141e1f KO |
1245 | return; |
1246 | ||
1247 | k = bch2_btree_node_iter_peek_all(&l->iter, l->b); | |
1248 | BUG_ON(k->type != KEY_TYPE_btree_ptr_v2); | |
1249 | ||
1250 | bp = (void *) bkeyp_val(&l->b->format, k); | |
1251 | bp->mem_ptr = (unsigned long)b; | |
1252 | ||
1253 | if (!locked) | |
67e0dd8f | 1254 | btree_node_unlock(path, plevel); |
72141e1f KO |
1255 | } |
1256 | ||
67e0dd8f KO |
1257 | static __always_inline int btree_path_down(struct btree_trans *trans, |
1258 | struct btree_path *path, | |
1259 | unsigned flags, | |
a301dc38 | 1260 | unsigned long trace_ip) |
1c6fdbd8 | 1261 | { |
6e075b54 | 1262 | struct bch_fs *c = trans->c; |
67e0dd8f | 1263 | struct btree_path_level *l = path_l(path); |
1c6fdbd8 | 1264 | struct btree *b; |
67e0dd8f KO |
1265 | unsigned level = path->level - 1; |
1266 | enum six_lock_type lock_type = __btree_lock_want(path, level); | |
07a1006a KO |
1267 | struct bkey_buf tmp; |
1268 | int ret; | |
1c6fdbd8 | 1269 | |
67e0dd8f | 1270 | EBUG_ON(!btree_node_locked(path, path->level)); |
1c6fdbd8 | 1271 | |
07a1006a KO |
1272 | bch2_bkey_buf_init(&tmp); |
1273 | bch2_bkey_buf_unpack(&tmp, c, l->b, | |
1c6fdbd8 KO |
1274 | bch2_btree_node_iter_peek(&l->iter, l->b)); |
1275 | ||
67e0dd8f | 1276 | b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip); |
07a1006a KO |
1277 | ret = PTR_ERR_OR_ZERO(b); |
1278 | if (unlikely(ret)) | |
1279 | goto err; | |
1c6fdbd8 | 1280 | |
3074bc0f | 1281 | mark_btree_node_locked(path, level, lock_type); |
67e0dd8f | 1282 | btree_path_level_init(trans, path, b); |
1c6fdbd8 | 1283 | |
07a1006a KO |
1284 | if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 && |
1285 | unlikely(b != btree_node_mem_ptr(tmp.k))) | |
67e0dd8f | 1286 | btree_node_mem_ptr_set(trans, path, level + 1, b); |
72141e1f | 1287 | |
67e0dd8f KO |
1288 | if (flags & BTREE_ITER_PREFETCH) |
1289 | ret = btree_path_prefetch(trans, path); | |
1c6fdbd8 | 1290 | |
67e0dd8f KO |
1291 | if (btree_node_read_locked(path, level + 1)) |
1292 | btree_node_unlock(path, level + 1); | |
1293 | path->level = level; | |
c205321b | 1294 | |
67e0dd8f | 1295 | bch2_btree_path_verify_locks(path); |
07a1006a KO |
1296 | err: |
1297 | bch2_bkey_buf_exit(&tmp, c); | |
1298 | return ret; | |
1c6fdbd8 KO |
1299 | } |
1300 | ||
67e0dd8f KO |
1301 | static int btree_path_traverse_one(struct btree_trans *, struct btree_path *, |
1302 | unsigned, unsigned long); | |
1c6fdbd8 | 1303 | |
67e0dd8f | 1304 | static int __btree_path_traverse_all(struct btree_trans *trans, int ret, |
531a0095 | 1305 | unsigned long trace_ip) |
1c6fdbd8 | 1306 | { |
e542029e | 1307 | struct bch_fs *c = trans->c; |
67e0dd8f | 1308 | struct btree_path *path, *prev = NULL; |
0423fb71 | 1309 | int i; |
e542029e | 1310 | |
2ca88e5a KO |
1311 | if (trans->in_traverse_all) |
1312 | return -EINTR; | |
1313 | ||
1314 | trans->in_traverse_all = true; | |
1315 | retry_all: | |
e5af273f KO |
1316 | trans->restarted = false; |
1317 | ||
67e0dd8f KO |
1318 | trans_for_each_path(trans, path) |
1319 | path->should_be_locked = false; | |
e542029e | 1320 | |
67e0dd8f | 1321 | btree_trans_sort_paths(trans); |
2527dd91 | 1322 | |
67e0dd8f | 1323 | trans_for_each_path_inorder_reverse(trans, path, i) { |
0423fb71 | 1324 | if (prev) { |
67e0dd8f KO |
1325 | if (path->btree_id == prev->btree_id && |
1326 | path->locks_want < prev->locks_want) | |
1327 | __bch2_btree_path_upgrade(trans, path, prev->locks_want); | |
1328 | else if (!path->locks_want && prev->locks_want) | |
1329 | __bch2_btree_path_upgrade(trans, path, 1); | |
0423fb71 | 1330 | } |
2527dd91 | 1331 | |
67e0dd8f | 1332 | prev = path; |
2527dd91 KO |
1333 | } |
1334 | ||
58fbf808 | 1335 | bch2_trans_unlock(trans); |
a301dc38 | 1336 | cond_resched(); |
1c6fdbd8 | 1337 | |
bf7b87a4 | 1338 | if (unlikely(ret == -ENOMEM)) { |
1c6fdbd8 KO |
1339 | struct closure cl; |
1340 | ||
1341 | closure_init_stack(&cl); | |
1342 | ||
1343 | do { | |
1344 | ret = bch2_btree_cache_cannibalize_lock(c, &cl); | |
1345 | closure_sync(&cl); | |
1346 | } while (ret); | |
1347 | } | |
1348 | ||
bf7b87a4 | 1349 | if (unlikely(ret == -EIO)) { |
ece254b2 | 1350 | trans->error = true; |
bf7b87a4 KO |
1351 | goto out; |
1352 | } | |
1353 | ||
1354 | BUG_ON(ret && ret != -EINTR); | |
1355 | ||
1c6fdbd8 | 1356 | /* Now, redo traversals in correct order: */ |
0423fb71 KO |
1357 | i = 0; |
1358 | while (i < trans->nr_sorted) { | |
67e0dd8f | 1359 | path = trans->paths + trans->sorted[i]; |
2ca88e5a | 1360 | |
67e0dd8f | 1361 | EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); |
1c6fdbd8 | 1362 | |
67e0dd8f | 1363 | ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_); |
e542029e KO |
1364 | if (ret) |
1365 | goto retry_all; | |
0423fb71 | 1366 | |
67e0dd8f | 1367 | EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); |
0423fb71 | 1368 | |
d697b9ab KO |
1369 | if (path->nodes_locked || |
1370 | !btree_path_node(path, path->level)) | |
0423fb71 | 1371 | i++; |
e542029e | 1372 | } |
0423fb71 KO |
1373 | |
1374 | /* | |
1375 | * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock() | |
67e0dd8f | 1376 | * and relock(), relock() won't relock since path->should_be_locked |
0423fb71 KO |
1377 | * isn't set yet, which is all fine |
1378 | */ | |
67e0dd8f KO |
1379 | trans_for_each_path(trans, path) |
1380 | BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE); | |
1c6fdbd8 KO |
1381 | out: |
1382 | bch2_btree_cache_cannibalize_unlock(c); | |
2ca88e5a | 1383 | |
531a0095 | 1384 | trace_trans_traverse_all(trans->ip, trace_ip); |
1c6fdbd8 | 1385 | return ret; |
bf7b87a4 | 1386 | } |
1c6fdbd8 | 1387 | |
67e0dd8f | 1388 | static int bch2_btree_path_traverse_all(struct btree_trans *trans) |
bf7b87a4 | 1389 | { |
67e0dd8f | 1390 | return __btree_path_traverse_all(trans, 0, _RET_IP_); |
1c6fdbd8 KO |
1391 | } |
1392 | ||
67e0dd8f KO |
1393 | static inline bool btree_path_good_node(struct btree_trans *trans, |
1394 | struct btree_path *path, | |
f4b61341 KO |
1395 | unsigned l, int check_pos) |
1396 | { | |
67e0dd8f KO |
1397 | if (!is_btree_node(path, l) || |
1398 | !bch2_btree_node_relock(trans, path, l)) | |
f4b61341 KO |
1399 | return false; |
1400 | ||
67e0dd8f | 1401 | if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b)) |
f4b61341 | 1402 | return false; |
67e0dd8f | 1403 | if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b)) |
f4b61341 KO |
1404 | return false; |
1405 | return true; | |
1406 | } | |
1407 | ||
67e0dd8f KO |
1408 | static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, |
1409 | struct btree_path *path, | |
f4b61341 | 1410 | int check_pos) |
1c6fdbd8 | 1411 | { |
8ee0134e | 1412 | unsigned i, l = path->level; |
1d3ecd7e | 1413 | |
67e0dd8f KO |
1414 | while (btree_path_node(path, l) && |
1415 | !btree_path_good_node(trans, path, l, check_pos)) { | |
1416 | btree_node_unlock(path, l); | |
1417 | path->l[l].b = BTREE_ITER_NO_NODE_UP; | |
1c6fdbd8 KO |
1418 | l++; |
1419 | } | |
1420 | ||
8ee0134e KO |
1421 | /* If we need intent locks, take them too: */ |
1422 | for (i = l + 1; | |
1423 | i < path->locks_want && btree_path_node(path, i); | |
1424 | i++) | |
1425 | if (!bch2_btree_node_relock(trans, path, i)) | |
1426 | while (l <= i) { | |
1427 | btree_node_unlock(path, l); | |
1428 | path->l[l].b = BTREE_ITER_NO_NODE_UP; | |
1429 | l++; | |
1430 | } | |
1431 | ||
1c6fdbd8 KO |
1432 | return l; |
1433 | } | |
1434 | ||
1435 | /* | |
1436 | * This is the main state machine for walking down the btree - walks down to a | |
1437 | * specified depth | |
1438 | * | |
1439 | * Returns 0 on success, -EIO on error (error reading in a btree node). | |
1440 | * | |
1441 | * On error, caller (peek_node()/peek_key()) must return NULL; the error is | |
b7607ce9 | 1442 | * stashed in the iterator and returned from bch2_trans_exit(). |
1c6fdbd8 | 1443 | */ |
67e0dd8f KO |
1444 | static int btree_path_traverse_one(struct btree_trans *trans, |
1445 | struct btree_path *path, | |
1446 | unsigned flags, | |
a301dc38 | 1447 | unsigned long trace_ip) |
1c6fdbd8 | 1448 | { |
8ee0134e | 1449 | unsigned depth_want = path->level; |
531a0095 | 1450 | int ret = 0; |
1c6fdbd8 | 1451 | |
e829b717 | 1452 | /* |
67e0dd8f KO |
1453 | * Ensure we obey path->should_be_locked: if it's set, we can't unlock |
1454 | * and re-traverse the path without a transaction restart: | |
e829b717 | 1455 | */ |
67e0dd8f KO |
1456 | if (path->should_be_locked) { |
1457 | ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR; | |
e829b717 KO |
1458 | goto out; |
1459 | } | |
1460 | ||
67e0dd8f KO |
1461 | if (path->cached) { |
1462 | ret = bch2_btree_path_traverse_cached(trans, path, flags); | |
531a0095 KO |
1463 | goto out; |
1464 | } | |
2ca88e5a | 1465 | |
67e0dd8f | 1466 | if (unlikely(path->level >= BTREE_MAX_DEPTH)) |
531a0095 | 1467 | goto out; |
2ca88e5a | 1468 | |
67e0dd8f | 1469 | path->level = btree_path_up_until_good_node(trans, path, 0); |
1c6fdbd8 | 1470 | |
1c6fdbd8 | 1471 | /* |
67e0dd8f | 1472 | * Note: path->nodes[path->level] may be temporarily NULL here - that |
1c6fdbd8 KO |
1473 | * would indicate to other code that we got to the end of the btree, |
1474 | * here it indicates that relocking the root failed - it's critical that | |
67e0dd8f | 1475 | * btree_path_lock_root() comes next and that it can't fail |
1c6fdbd8 | 1476 | */ |
67e0dd8f KO |
1477 | while (path->level > depth_want) { |
1478 | ret = btree_path_node(path, path->level) | |
1479 | ? btree_path_down(trans, path, flags, trace_ip) | |
1480 | : btree_path_lock_root(trans, path, depth_want, trace_ip); | |
1c6fdbd8 | 1481 | if (unlikely(ret)) { |
531a0095 KO |
1482 | if (ret == 1) { |
1483 | /* | |
f21566f1 KO |
1484 | * No nodes at this level - got to the end of |
1485 | * the btree: | |
531a0095 KO |
1486 | */ |
1487 | ret = 0; | |
1488 | goto out; | |
1489 | } | |
8812600c | 1490 | |
67e0dd8f KO |
1491 | __bch2_btree_path_unlock(path); |
1492 | path->level = depth_want; | |
2ca88e5a | 1493 | |
67e0dd8f KO |
1494 | if (ret == -EIO) |
1495 | path->l[path->level].b = | |
2ca88e5a | 1496 | BTREE_ITER_NO_NODE_ERROR; |
67e0dd8f KO |
1497 | else |
1498 | path->l[path->level].b = | |
2ca88e5a | 1499 | BTREE_ITER_NO_NODE_DOWN; |
531a0095 | 1500 | goto out; |
1c6fdbd8 KO |
1501 | } |
1502 | } | |
1503 | ||
67e0dd8f | 1504 | path->uptodate = BTREE_ITER_UPTODATE; |
531a0095 | 1505 | out: |
e5af273f | 1506 | BUG_ON((ret == -EINTR) != !!trans->restarted); |
67e0dd8f | 1507 | bch2_btree_path_verify(trans, path); |
531a0095 | 1508 | return ret; |
1c6fdbd8 KO |
1509 | } |
1510 | ||
67e0dd8f KO |
1511 | static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long); |
1512 | ||
1513 | int __must_check bch2_btree_path_traverse(struct btree_trans *trans, | |
1514 | struct btree_path *path, unsigned flags) | |
1c6fdbd8 KO |
1515 | { |
1516 | int ret; | |
1517 | ||
67e0dd8f KO |
1518 | if (path->uptodate < BTREE_ITER_NEED_RELOCK) |
1519 | return 0; | |
1520 | ||
2ca88e5a | 1521 | ret = bch2_trans_cond_resched(trans) ?: |
67e0dd8f KO |
1522 | btree_path_traverse_one(trans, path, flags, _RET_IP_); |
1523 | if (unlikely(ret) && hweight64(trans->paths_allocated) == 1) { | |
1524 | ret = __btree_path_traverse_all(trans, ret, _RET_IP_); | |
b4e09b35 KO |
1525 | BUG_ON(ret == -EINTR); |
1526 | } | |
1c6fdbd8 | 1527 | |
1c6fdbd8 KO |
1528 | return ret; |
1529 | } | |
1530 | ||
67e0dd8f KO |
1531 | static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, |
1532 | struct btree_path *src) | |
1533 | { | |
1534 | unsigned i, offset = offsetof(struct btree_path, pos); | |
1535 | ||
1536 | memcpy((void *) dst + offset, | |
1537 | (void *) src + offset, | |
1538 | sizeof(struct btree_path) - offset); | |
1539 | ||
1540 | for (i = 0; i < BTREE_MAX_DEPTH; i++) | |
1541 | if (btree_node_locked(dst, i)) | |
1542 | six_lock_increment(&dst->l[i].b->c.lock, | |
1543 | __btree_lock_want(dst, i)); | |
1544 | ||
1545 | trans->paths_sorted = false; | |
1546 | } | |
1547 | ||
c404f203 KO |
1548 | static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, |
1549 | bool intent) | |
67e0dd8f | 1550 | { |
c404f203 | 1551 | struct btree_path *new = btree_path_alloc(trans, src); |
67e0dd8f | 1552 | |
c404f203 | 1553 | btree_path_copy(trans, new, src); |
67e0dd8f | 1554 | __btree_path_get(new, intent); |
c404f203 KO |
1555 | return new; |
1556 | } | |
1557 | ||
1558 | struct btree_path * __must_check | |
1559 | __bch2_btree_path_make_mut(struct btree_trans *trans, | |
1560 | struct btree_path *path, bool intent) | |
1561 | { | |
67e0dd8f | 1562 | __btree_path_put(path, intent); |
c404f203 | 1563 | path = btree_path_clone(trans, path, intent); |
67e0dd8f KO |
1564 | path->preserve = false; |
1565 | #ifdef CONFIG_BCACHEFS_DEBUG | |
1566 | path->ip_allocated = _RET_IP_; | |
1567 | #endif | |
1568 | return path; | |
1569 | } | |
1570 | ||
1571 | static struct btree_path * __must_check | |
1572 | __bch2_btree_path_set_pos(struct btree_trans *trans, | |
1573 | struct btree_path *path, struct bpos new_pos, | |
1574 | bool intent, int cmp) | |
1575 | { | |
67e0dd8f KO |
1576 | unsigned l = path->level; |
1577 | ||
1578 | EBUG_ON(trans->restarted); | |
1579 | EBUG_ON(!path->ref); | |
1580 | ||
1581 | path = bch2_btree_path_make_mut(trans, path, intent); | |
1582 | ||
1583 | path->pos = new_pos; | |
1584 | path->should_be_locked = false; | |
1585 | trans->paths_sorted = false; | |
1586 | ||
1587 | if (unlikely(path->cached)) { | |
1588 | btree_node_unlock(path, 0); | |
1589 | path->l[0].b = BTREE_ITER_NO_NODE_CACHED; | |
1590 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); | |
1591 | goto out; | |
1592 | } | |
1593 | ||
1594 | l = btree_path_up_until_good_node(trans, path, cmp); | |
1595 | ||
1596 | if (btree_path_node(path, l)) { | |
1597 | /* | |
1598 | * We might have to skip over many keys, or just a few: try | |
1599 | * advancing the node iterator, and if we have to skip over too | |
1600 | * many keys just reinit it (or if we're rewinding, since that | |
1601 | * is expensive). | |
1602 | */ | |
1603 | if (cmp < 0 || | |
1604 | !btree_path_advance_to_pos(path, &path->l[l], 8)) | |
1605 | __btree_path_level_init(path, l); | |
67e0dd8f KO |
1606 | } |
1607 | ||
1d3ecd7e | 1608 | if (l != path->level) { |
67e0dd8f | 1609 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
1d3ecd7e KO |
1610 | __bch2_btree_path_unlock(path); |
1611 | } | |
67e0dd8f KO |
1612 | out: |
1613 | bch2_btree_path_verify(trans, path); | |
67e0dd8f KO |
1614 | return path; |
1615 | } | |
1616 | ||
1617 | static inline struct btree_path * __must_check | |
1618 | btree_path_set_pos(struct btree_trans *trans, | |
1619 | struct btree_path *path, struct bpos new_pos, | |
1620 | bool intent) | |
1621 | { | |
1622 | int cmp = bpos_cmp(new_pos, path->pos); | |
1623 | ||
1624 | return cmp | |
1625 | ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, cmp) | |
1626 | : path; | |
1627 | } | |
1628 | ||
1629 | /* Btree path: main interface: */ | |
1630 | ||
1631 | static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) | |
1632 | { | |
1633 | struct btree_path *next; | |
1634 | ||
1635 | next = prev_btree_path(trans, path); | |
1636 | if (next && !btree_path_cmp(next, path)) | |
1637 | return next; | |
1638 | ||
1639 | next = next_btree_path(trans, path); | |
1640 | if (next && !btree_path_cmp(next, path)) | |
1641 | return next; | |
1642 | ||
1643 | return NULL; | |
1644 | } | |
1645 | ||
1646 | static bool have_node_at_pos(struct btree_trans *trans, struct btree_path *path) | |
1647 | { | |
1648 | struct btree_path *next; | |
1649 | ||
1650 | next = prev_btree_path(trans, path); | |
1651 | if (next && path_l(next)->b == path_l(path)->b) | |
1652 | return true; | |
1653 | ||
1654 | next = next_btree_path(trans, path); | |
1655 | if (next && path_l(next)->b == path_l(path)->b) | |
1656 | return true; | |
1657 | ||
1658 | return false; | |
1659 | } | |
1660 | ||
1661 | static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path) | |
08070cba | 1662 | { |
67e0dd8f KO |
1663 | __bch2_btree_path_unlock(path); |
1664 | btree_path_list_remove(trans, path); | |
1665 | trans->paths_allocated &= ~(1ULL << path->idx); | |
08070cba KO |
1666 | } |
1667 | ||
67e0dd8f KO |
1668 | void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) |
1669 | { | |
1670 | struct btree_path *dup; | |
1671 | ||
1672 | EBUG_ON(trans->paths + path->idx != path); | |
1673 | EBUG_ON(!path->ref); | |
1674 | ||
1675 | if (!__btree_path_put(path, intent)) | |
1676 | return; | |
1677 | ||
1678 | /* | |
1679 | * Perhaps instead we should check for duplicate paths in traverse_all: | |
1680 | */ | |
1681 | if (path->preserve && | |
1682 | (dup = have_path_at_pos(trans, path))) { | |
1683 | dup->preserve = true; | |
1684 | path->preserve = false; | |
1685 | } | |
1686 | ||
1687 | if (!path->preserve && | |
1688 | have_node_at_pos(trans, path)) | |
1689 | __bch2_path_free(trans, path); | |
1690 | } | |
1691 | ||
1692 | noinline __cold | |
1693 | void bch2_dump_trans_paths_updates(struct btree_trans *trans) | |
1694 | { | |
1695 | struct btree_path *path; | |
1696 | struct btree_insert_entry *i; | |
1697 | unsigned idx; | |
60816d9b | 1698 | char buf1[300], buf2[300]; |
67e0dd8f KO |
1699 | |
1700 | btree_trans_sort_paths(trans); | |
1701 | ||
1702 | trans_for_each_path_inorder(trans, path, idx) | |
1703 | printk(KERN_ERR "path: idx %u ref %u:%u%s btree %s pos %s %pS\n", | |
1704 | path->idx, path->ref, path->intent_ref, | |
1705 | path->preserve ? " preserve" : "", | |
1706 | bch2_btree_ids[path->btree_id], | |
60816d9b | 1707 | (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1), |
67e0dd8f KO |
1708 | #ifdef CONFIG_BCACHEFS_DEBUG |
1709 | (void *) path->ip_allocated | |
1710 | #else | |
1711 | NULL | |
1712 | #endif | |
1713 | ); | |
1714 | ||
60816d9b KO |
1715 | trans_for_each_update(trans, i) { |
1716 | struct bkey u; | |
1717 | struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u); | |
1718 | ||
1719 | printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s", | |
67e0dd8f | 1720 | bch2_btree_ids[i->btree_id], |
60816d9b KO |
1721 | (void *) i->ip_allocated, |
1722 | (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1), | |
1723 | (bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2)); | |
1724 | } | |
67e0dd8f KO |
1725 | } |
1726 | ||
1727 | static struct btree_path *btree_path_alloc(struct btree_trans *trans, | |
1728 | struct btree_path *pos) | |
1729 | { | |
1730 | struct btree_path *path; | |
1731 | unsigned idx; | |
1732 | ||
1733 | if (unlikely(trans->paths_allocated == | |
1734 | ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) { | |
1735 | bch2_dump_trans_paths_updates(trans); | |
1736 | panic("trans path oveflow\n"); | |
1737 | } | |
1738 | ||
1739 | idx = __ffs64(~trans->paths_allocated); | |
1740 | trans->paths_allocated |= 1ULL << idx; | |
1741 | ||
1742 | path = &trans->paths[idx]; | |
1743 | ||
1744 | path->idx = idx; | |
1745 | path->ref = 0; | |
1746 | path->intent_ref = 0; | |
1747 | path->nodes_locked = 0; | |
1748 | path->nodes_intent_locked = 0; | |
1749 | ||
1750 | btree_path_list_add(trans, pos, path); | |
1751 | return path; | |
1752 | } | |
1753 | ||
1754 | struct btree_path *bch2_path_get(struct btree_trans *trans, bool cached, | |
1755 | enum btree_id btree_id, struct bpos pos, | |
1756 | unsigned locks_want, unsigned level, | |
1757 | bool intent) | |
1758 | { | |
807dda8c | 1759 | struct btree_path *path, *path_pos = NULL; |
67e0dd8f KO |
1760 | int i; |
1761 | ||
1762 | BUG_ON(trans->restarted); | |
1763 | ||
807dda8c | 1764 | btree_trans_sort_paths(trans); |
67e0dd8f | 1765 | |
807dda8c KO |
1766 | trans_for_each_path_inorder(trans, path, i) { |
1767 | if (__btree_path_cmp(path, | |
1768 | btree_id, | |
1769 | cached, | |
1770 | pos, | |
1771 | level) > 0) | |
1772 | break; | |
67e0dd8f | 1773 | |
807dda8c | 1774 | path_pos = path; |
67e0dd8f KO |
1775 | } |
1776 | ||
807dda8c KO |
1777 | if (path_pos && |
1778 | path_pos->cached == cached && | |
1779 | path_pos->btree_id == btree_id && | |
1780 | path_pos->level == level) { | |
1781 | __btree_path_get(path_pos, intent); | |
1782 | path = btree_path_set_pos(trans, path_pos, pos, intent); | |
67e0dd8f KO |
1783 | path->preserve = true; |
1784 | } else { | |
807dda8c KO |
1785 | path = btree_path_alloc(trans, path_pos); |
1786 | path_pos = NULL; | |
67e0dd8f KO |
1787 | |
1788 | __btree_path_get(path, intent); | |
1789 | path->pos = pos; | |
1790 | path->btree_id = btree_id; | |
1791 | path->cached = cached; | |
1792 | path->preserve = true; | |
1793 | path->uptodate = BTREE_ITER_NEED_TRAVERSE; | |
1794 | path->should_be_locked = false; | |
1795 | path->level = level; | |
1796 | path->locks_want = locks_want; | |
1797 | path->nodes_locked = 0; | |
1798 | path->nodes_intent_locked = 0; | |
1799 | for (i = 0; i < ARRAY_SIZE(path->l); i++) | |
1800 | path->l[i].b = BTREE_ITER_NO_NODE_INIT; | |
1801 | #ifdef CONFIG_BCACHEFS_DEBUG | |
1802 | path->ip_allocated = _RET_IP_; | |
1803 | #endif | |
1804 | trans->paths_sorted = false; | |
1805 | } | |
1806 | ||
1807 | if (path->intent_ref) | |
1808 | locks_want = max(locks_want, level + 1); | |
1809 | ||
1810 | /* | |
1811 | * If the path has locks_want greater than requested, we don't downgrade | |
1812 | * it here - on transaction restart because btree node split needs to | |
1813 | * upgrade locks, we might be putting/getting the iterator again. | |
1814 | * Downgrading iterators only happens via bch2_trans_downgrade(), after | |
1815 | * a successful transaction commit. | |
1816 | */ | |
1817 | ||
1818 | locks_want = min(locks_want, BTREE_MAX_DEPTH); | |
1819 | if (locks_want > path->locks_want) { | |
1820 | path->locks_want = locks_want; | |
1821 | btree_path_get_locks(trans, path, true, _THIS_IP_); | |
1822 | } | |
1823 | ||
67e0dd8f KO |
1824 | return path; |
1825 | } | |
1826 | ||
1827 | inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u) | |
1828 | { | |
1829 | ||
1830 | struct bkey_s_c k; | |
1831 | ||
1832 | BUG_ON(path->uptodate != BTREE_ITER_UPTODATE); | |
1833 | ||
1834 | if (!path->cached) { | |
1835 | struct btree_path_level *l = path_l(path); | |
1836 | struct bkey_packed *_k = | |
1837 | bch2_btree_node_iter_peek_all(&l->iter, l->b); | |
1838 | ||
1839 | k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; | |
1840 | ||
1841 | EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0); | |
1842 | ||
1843 | if (!k.k || bpos_cmp(path->pos, k.k->p)) | |
1844 | goto hole; | |
1845 | } else { | |
1846 | struct bkey_cached *ck = (void *) path->l[0].b; | |
1847 | ||
1848 | EBUG_ON(path->btree_id != ck->key.btree_id || | |
1849 | bkey_cmp(path->pos, ck->key.pos)); | |
1850 | ||
1851 | /* BTREE_ITER_CACHED_NOFILL? */ | |
1852 | if (unlikely(!ck->valid)) | |
1853 | goto hole; | |
1854 | ||
1855 | k = bkey_i_to_s_c(ck->k); | |
1856 | } | |
1857 | ||
1858 | return k; | |
1859 | hole: | |
1860 | bkey_init(u); | |
1861 | u->p = path->pos; | |
1862 | return (struct bkey_s_c) { u, NULL }; | |
1863 | } | |
1864 | ||
1865 | /* Btree iterators: */ | |
1866 | ||
db92f2ea KO |
1867 | int __must_check |
1868 | __bch2_btree_iter_traverse(struct btree_iter *iter) | |
1869 | { | |
1870 | return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); | |
1871 | } | |
1872 | ||
08070cba KO |
1873 | int __must_check |
1874 | bch2_btree_iter_traverse(struct btree_iter *iter) | |
1875 | { | |
66a0a497 KO |
1876 | int ret; |
1877 | ||
67e0dd8f KO |
1878 | iter->path = btree_path_set_pos(iter->trans, iter->path, |
1879 | btree_iter_search_key(iter), | |
1880 | iter->flags & BTREE_ITER_INTENT); | |
08070cba | 1881 | |
67e0dd8f | 1882 | ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); |
66a0a497 KO |
1883 | if (ret) |
1884 | return ret; | |
1885 | ||
67e0dd8f | 1886 | iter->path->should_be_locked = true; |
66a0a497 | 1887 | return 0; |
08070cba KO |
1888 | } |
1889 | ||
1c6fdbd8 KO |
1890 | /* Iterate across nodes (leaf and interior nodes) */ |
1891 | ||
1892 | struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) | |
1893 | { | |
502027a8 | 1894 | struct btree_trans *trans = iter->trans; |
f21566f1 | 1895 | struct btree *b = NULL; |
1c6fdbd8 KO |
1896 | int ret; |
1897 | ||
67e0dd8f | 1898 | EBUG_ON(iter->path->cached); |
7e1a3aa9 | 1899 | bch2_btree_iter_verify(iter); |
1c6fdbd8 | 1900 | |
502027a8 | 1901 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
1c6fdbd8 | 1902 | if (ret) |
d355c6f4 | 1903 | goto err; |
1c6fdbd8 | 1904 | |
67e0dd8f | 1905 | b = btree_path_node(iter->path, iter->path->level); |
1c6fdbd8 | 1906 | if (!b) |
f21566f1 | 1907 | goto out; |
1c6fdbd8 | 1908 | |
4cf91b02 | 1909 | BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0); |
1c6fdbd8 | 1910 | |
f21566f1 | 1911 | bkey_init(&iter->k); |
67e0dd8f | 1912 | iter->k.p = iter->pos = b->key.k.p; |
502027a8 KO |
1913 | |
1914 | iter->path = btree_path_set_pos(trans, iter->path, b->key.k.p, | |
1915 | iter->flags & BTREE_ITER_INTENT); | |
67e0dd8f | 1916 | iter->path->should_be_locked = true; |
502027a8 | 1917 | BUG_ON(iter->path->uptodate); |
f21566f1 KO |
1918 | out: |
1919 | bch2_btree_iter_verify_entry_exit(iter); | |
1920 | bch2_btree_iter_verify(iter); | |
2dac0eae | 1921 | |
1c6fdbd8 | 1922 | return b; |
d355c6f4 KO |
1923 | err: |
1924 | b = ERR_PTR(ret); | |
1925 | goto out; | |
1c6fdbd8 KO |
1926 | } |
1927 | ||
2dac0eae | 1928 | struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) |
1c6fdbd8 | 1929 | { |
67e0dd8f KO |
1930 | struct btree_trans *trans = iter->trans; |
1931 | struct btree_path *path = iter->path; | |
f21566f1 | 1932 | struct btree *b = NULL; |
1c6fdbd8 KO |
1933 | int ret; |
1934 | ||
67e0dd8f | 1935 | EBUG_ON(iter->path->cached); |
7e1a3aa9 | 1936 | bch2_btree_iter_verify(iter); |
1c6fdbd8 KO |
1937 | |
1938 | /* already got to end? */ | |
67e0dd8f | 1939 | if (!btree_path_node(path, path->level)) |
f21566f1 | 1940 | goto out; |
1c6fdbd8 | 1941 | |
d355c6f4 KO |
1942 | ret = bch2_trans_cond_resched(trans); |
1943 | if (ret) | |
1944 | goto err; | |
1dd7f9d9 | 1945 | |
67e0dd8f KO |
1946 | btree_node_unlock(path, path->level); |
1947 | path->l[path->level].b = BTREE_ITER_NO_NODE_UP; | |
1948 | path->level++; | |
1c6fdbd8 | 1949 | |
67e0dd8f KO |
1950 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); |
1951 | ret = bch2_btree_path_traverse(trans, path, iter->flags); | |
1c6fdbd8 | 1952 | if (ret) |
d355c6f4 | 1953 | goto err; |
1c6fdbd8 KO |
1954 | |
1955 | /* got to end? */ | |
67e0dd8f | 1956 | b = btree_path_node(path, path->level); |
1c6fdbd8 | 1957 | if (!b) |
f21566f1 | 1958 | goto out; |
1c6fdbd8 | 1959 | |
4cf91b02 | 1960 | if (bpos_cmp(iter->pos, b->key.k.p) < 0) { |
1c6fdbd8 KO |
1961 | /* |
1962 | * Haven't gotten to the end of the parent node: go back down to | |
1963 | * the next child node | |
1964 | */ | |
67e0dd8f KO |
1965 | path = iter->path = |
1966 | btree_path_set_pos(trans, path, bpos_successor(iter->pos), | |
1967 | iter->flags & BTREE_ITER_INTENT); | |
1c6fdbd8 | 1968 | |
345ca825 | 1969 | /* Unlock to avoid screwing up our lock invariants: */ |
67e0dd8f | 1970 | btree_node_unlock(path, path->level); |
1c6fdbd8 | 1971 | |
67e0dd8f KO |
1972 | path->level = iter->min_depth; |
1973 | btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); | |
345ca825 KO |
1974 | bch2_btree_iter_verify(iter); |
1975 | ||
67e0dd8f | 1976 | ret = bch2_btree_path_traverse(trans, path, iter->flags); |
d355c6f4 KO |
1977 | if (ret) |
1978 | goto err; | |
1c6fdbd8 | 1979 | |
67e0dd8f | 1980 | b = path->l[path->level].b; |
1c6fdbd8 KO |
1981 | } |
1982 | ||
f21566f1 | 1983 | bkey_init(&iter->k); |
67e0dd8f | 1984 | iter->k.p = iter->pos = b->key.k.p; |
502027a8 KO |
1985 | |
1986 | iter->path = btree_path_set_pos(trans, iter->path, b->key.k.p, | |
1987 | iter->flags & BTREE_ITER_INTENT); | |
67e0dd8f | 1988 | iter->path->should_be_locked = true; |
502027a8 | 1989 | BUG_ON(iter->path->uptodate); |
f21566f1 KO |
1990 | out: |
1991 | bch2_btree_iter_verify_entry_exit(iter); | |
1992 | bch2_btree_iter_verify(iter); | |
2dac0eae | 1993 | |
1c6fdbd8 | 1994 | return b; |
d355c6f4 KO |
1995 | err: |
1996 | b = ERR_PTR(ret); | |
1997 | goto out; | |
1c6fdbd8 KO |
1998 | } |
1999 | ||
2000 | /* Iterate across keys (in leaf nodes only) */ | |
2001 | ||
e0ba3b64 | 2002 | inline bool bch2_btree_iter_advance(struct btree_iter *iter) |
434094be | 2003 | { |
3d495595 | 2004 | struct bpos pos = iter->k.p; |
618b1c0e | 2005 | bool ret = bpos_cmp(pos, SPOS_MAX) != 0; |
3d495595 | 2006 | |
7e1a3aa9 | 2007 | if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) |
e751c01a | 2008 | pos = bkey_successor(iter, pos); |
3d495595 | 2009 | bch2_btree_iter_set_pos(iter, pos); |
7e1a3aa9 | 2010 | return ret; |
3d495595 KO |
2011 | } |
2012 | ||
e0ba3b64 | 2013 | inline bool bch2_btree_iter_rewind(struct btree_iter *iter) |
3d495595 KO |
2014 | { |
2015 | struct bpos pos = bkey_start_pos(&iter->k); | |
71f892a4 KO |
2016 | bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS |
2017 | ? bpos_cmp(pos, POS_MIN) | |
2018 | : bkey_cmp(pos, POS_MIN)) != 0; | |
3d495595 | 2019 | |
7e1a3aa9 | 2020 | if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) |
e751c01a | 2021 | pos = bkey_predecessor(iter, pos); |
3d495595 | 2022 | bch2_btree_iter_set_pos(iter, pos); |
7e1a3aa9 | 2023 | return ret; |
434094be KO |
2024 | } |
2025 | ||
67e0dd8f | 2026 | struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) |
818664f5 KO |
2027 | { |
2028 | struct btree_insert_entry *i; | |
5288e66a | 2029 | struct bkey_i *ret = NULL; |
818664f5 | 2030 | |
8e6bbc41 | 2031 | trans_for_each_update(iter->trans, i) { |
5288e66a KO |
2032 | if (i->btree_id < iter->btree_id) |
2033 | continue; | |
2034 | if (i->btree_id > iter->btree_id) | |
818664f5 | 2035 | break; |
67e0dd8f | 2036 | if (bpos_cmp(i->k->k.p, iter->path->pos) < 0) |
5288e66a KO |
2037 | continue; |
2038 | if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0) | |
2039 | ret = i->k; | |
2040 | } | |
818664f5 | 2041 | |
5288e66a KO |
2042 | return ret; |
2043 | } | |
2044 | ||
5288e66a KO |
2045 | /** |
2046 | * bch2_btree_iter_peek: returns first key greater than or equal to iterator's | |
2047 | * current position | |
2048 | */ | |
2049 | struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) | |
1c6fdbd8 | 2050 | { |
67e0dd8f | 2051 | struct btree_trans *trans = iter->trans; |
818664f5 | 2052 | struct bpos search_key = btree_iter_search_key(iter); |
360746bf | 2053 | struct bkey_i *next_update; |
1c6fdbd8 | 2054 | struct bkey_s_c k; |
67e0dd8f | 2055 | int ret, cmp; |
1c6fdbd8 | 2056 | |
67e0dd8f | 2057 | EBUG_ON(iter->path->cached || iter->path->level); |
7e1a3aa9 KO |
2058 | bch2_btree_iter_verify(iter); |
2059 | bch2_btree_iter_verify_entry_exit(iter); | |
1c6fdbd8 | 2060 | |
1c6fdbd8 | 2061 | while (1) { |
67e0dd8f KO |
2062 | iter->path = btree_path_set_pos(trans, iter->path, search_key, |
2063 | iter->flags & BTREE_ITER_INTENT); | |
c8476a4e | 2064 | |
67e0dd8f | 2065 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
e6e024e9 KO |
2066 | if (unlikely(ret)) { |
2067 | /* ensure that iter->k is consistent with iter->pos: */ | |
2068 | bch2_btree_iter_set_pos(iter, iter->pos); | |
2069 | k = bkey_s_c_err(ret); | |
2070 | goto out; | |
2071 | } | |
1c6fdbd8 | 2072 | |
d2c50773 | 2073 | next_update = btree_trans_peek_updates(iter); |
67e0dd8f | 2074 | k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); |
e6e024e9 KO |
2075 | |
2076 | /* * In the btree, deleted keys sort before non deleted: */ | |
2077 | if (k.k && bkey_deleted(k.k) && | |
2078 | (!next_update || | |
2079 | bpos_cmp(k.k->p, next_update->k.p) <= 0)) { | |
2080 | search_key = k.k->p; | |
2081 | continue; | |
2082 | } | |
2083 | ||
818664f5 | 2084 | if (next_update && |
e6e024e9 | 2085 | bpos_cmp(next_update->k.p, |
67e0dd8f | 2086 | k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) { |
5288e66a | 2087 | iter->k = next_update->k; |
818664f5 | 2088 | k = bkey_i_to_s_c(next_update); |
5288e66a | 2089 | } |
818664f5 KO |
2090 | |
2091 | if (likely(k.k)) { | |
c075ff70 KO |
2092 | /* |
2093 | * We can never have a key in a leaf node at POS_MAX, so | |
2094 | * we don't have to check these successor() calls: | |
2095 | */ | |
2096 | if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && | |
2097 | !bch2_snapshot_is_ancestor(trans->c, | |
2098 | iter->snapshot, | |
2099 | k.k->p.snapshot)) { | |
2100 | search_key = bpos_successor(k.k->p); | |
2101 | continue; | |
2102 | } | |
e6e024e9 | 2103 | |
c075ff70 KO |
2104 | if (bkey_whiteout(k.k) && |
2105 | !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { | |
2106 | search_key = bkey_successor(iter, k.k->p); | |
2107 | continue; | |
2108 | } | |
2109 | ||
2110 | break; | |
67e0dd8f | 2111 | } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) { |
e6e024e9 | 2112 | /* Advance to next leaf node: */ |
67e0dd8f | 2113 | search_key = bpos_successor(iter->path->l[0].b->key.k.p); |
e6e024e9 KO |
2114 | } else { |
2115 | /* End of btree: */ | |
c8476a4e KO |
2116 | bch2_btree_iter_set_pos(iter, SPOS_MAX); |
2117 | k = bkey_s_c_null; | |
2118 | goto out; | |
2119 | } | |
1c6fdbd8 KO |
2120 | } |
2121 | ||
2122 | /* | |
818664f5 KO |
2123 | * iter->pos should be mononotically increasing, and always be equal to |
2124 | * the key we just returned - except extents can straddle iter->pos: | |
1c6fdbd8 | 2125 | */ |
d7f35163 KO |
2126 | if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) |
2127 | iter->pos = k.k->p; | |
2128 | else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) | |
1c6fdbd8 | 2129 | iter->pos = bkey_start_pos(k.k); |
67e0dd8f | 2130 | |
c075ff70 KO |
2131 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) |
2132 | iter->pos.snapshot = iter->snapshot; | |
2133 | ||
67e0dd8f KO |
2134 | cmp = bpos_cmp(k.k->p, iter->path->pos); |
2135 | if (cmp) { | |
c404f203 KO |
2136 | iter->path = bch2_btree_path_make_mut(trans, iter->path, |
2137 | iter->flags & BTREE_ITER_INTENT); | |
67e0dd8f KO |
2138 | iter->path->pos = k.k->p; |
2139 | trans->paths_sorted = false; | |
2140 | } | |
c8476a4e | 2141 | out: |
67e0dd8f KO |
2142 | iter->path->should_be_locked = true; |
2143 | ||
7e1a3aa9 KO |
2144 | bch2_btree_iter_verify_entry_exit(iter); |
2145 | bch2_btree_iter_verify(iter); | |
c075ff70 KO |
2146 | ret = bch2_btree_iter_verify_ret(iter, k); |
2147 | if (unlikely(ret)) | |
2148 | return bkey_s_c_err(ret); | |
2149 | ||
1c6fdbd8 KO |
2150 | return k; |
2151 | } | |
2152 | ||
f4b61341 KO |
2153 | /** |
2154 | * bch2_btree_iter_next: returns first key greater than iterator's current | |
2155 | * position | |
2156 | */ | |
1c6fdbd8 KO |
2157 | struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) |
2158 | { | |
e0ba3b64 | 2159 | if (!bch2_btree_iter_advance(iter)) |
2e70ce56 | 2160 | return bkey_s_c_null; |
f4b61341 | 2161 | |
2dac0eae | 2162 | return bch2_btree_iter_peek(iter); |
1c6fdbd8 KO |
2163 | } |
2164 | ||
ccf5a109 KO |
2165 | /** |
2166 | * bch2_btree_iter_peek_prev: returns first key less than or equal to | |
2167 | * iterator's current position | |
2168 | */ | |
2169 | struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) | |
1c6fdbd8 | 2170 | { |
67e0dd8f | 2171 | struct btree_trans *trans = iter->trans; |
c8476a4e | 2172 | struct bpos search_key = iter->pos; |
c075ff70 | 2173 | struct btree_path *saved_path = NULL; |
1c6fdbd8 | 2174 | struct bkey_s_c k; |
c075ff70 KO |
2175 | struct bkey saved_k; |
2176 | const struct bch_val *saved_v; | |
1c6fdbd8 KO |
2177 | int ret; |
2178 | ||
67e0dd8f | 2179 | EBUG_ON(iter->path->cached || iter->path->level); |
5288e66a | 2180 | EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); |
7e1a3aa9 KO |
2181 | bch2_btree_iter_verify(iter); |
2182 | bch2_btree_iter_verify_entry_exit(iter); | |
2183 | ||
c075ff70 KO |
2184 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) |
2185 | search_key.snapshot = U32_MAX; | |
2186 | ||
1c6fdbd8 | 2187 | while (1) { |
67e0dd8f KO |
2188 | iter->path = btree_path_set_pos(trans, iter->path, search_key, |
2189 | iter->flags & BTREE_ITER_INTENT); | |
c8476a4e | 2190 | |
67e0dd8f | 2191 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
7e1a3aa9 | 2192 | if (unlikely(ret)) { |
e6e024e9 KO |
2193 | /* ensure that iter->k is consistent with iter->pos: */ |
2194 | bch2_btree_iter_set_pos(iter, iter->pos); | |
7e1a3aa9 | 2195 | k = bkey_s_c_err(ret); |
e6e024e9 | 2196 | goto out; |
7e1a3aa9 | 2197 | } |
1c6fdbd8 | 2198 | |
67e0dd8f KO |
2199 | k = btree_path_level_peek(trans, iter->path, |
2200 | &iter->path->l[0], &iter->k); | |
3d495595 KO |
2201 | if (!k.k || |
2202 | ((iter->flags & BTREE_ITER_IS_EXTENTS) | |
c075ff70 KO |
2203 | ? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0 |
2204 | : bpos_cmp(k.k->p, search_key) > 0)) | |
67e0dd8f KO |
2205 | k = btree_path_level_prev(trans, iter->path, |
2206 | &iter->path->l[0], &iter->k); | |
ccf5a109 | 2207 | |
e6e024e9 | 2208 | if (likely(k.k)) { |
c075ff70 KO |
2209 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) { |
2210 | if (k.k->p.snapshot == iter->snapshot) | |
2211 | goto got_key; | |
2212 | ||
2213 | /* | |
2214 | * If we have a saved candidate, and we're no | |
2215 | * longer at the same _key_ (not pos), return | |
2216 | * that candidate | |
2217 | */ | |
2218 | if (saved_path && bkey_cmp(k.k->p, saved_k.p)) { | |
2219 | bch2_path_put(trans, iter->path, | |
2220 | iter->flags & BTREE_ITER_INTENT); | |
2221 | iter->path = saved_path; | |
2222 | saved_path = NULL; | |
2223 | iter->k = saved_k; | |
2224 | k.v = saved_v; | |
2225 | goto got_key; | |
2226 | } | |
2227 | ||
2228 | if (bch2_snapshot_is_ancestor(iter->trans->c, | |
2229 | iter->snapshot, | |
2230 | k.k->p.snapshot)) { | |
2231 | if (saved_path) | |
2232 | bch2_path_put(trans, saved_path, | |
2233 | iter->flags & BTREE_ITER_INTENT); | |
2234 | saved_path = btree_path_clone(trans, iter->path, | |
2235 | iter->flags & BTREE_ITER_INTENT); | |
2236 | saved_k = *k.k; | |
2237 | saved_v = k.v; | |
2238 | } | |
2239 | ||
2240 | search_key = bpos_predecessor(k.k->p); | |
2241 | continue; | |
2242 | } | |
2243 | got_key: | |
2244 | if (bkey_whiteout(k.k) && | |
2245 | !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { | |
2246 | search_key = bkey_predecessor(iter, k.k->p); | |
2247 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) | |
2248 | search_key.snapshot = U32_MAX; | |
2249 | continue; | |
2250 | } | |
2251 | ||
1c6fdbd8 | 2252 | break; |
67e0dd8f | 2253 | } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) { |
e6e024e9 | 2254 | /* Advance to previous leaf node: */ |
67e0dd8f | 2255 | search_key = bpos_predecessor(iter->path->l[0].b->data->min_key); |
e6e024e9 KO |
2256 | } else { |
2257 | /* Start of btree: */ | |
c8476a4e | 2258 | bch2_btree_iter_set_pos(iter, POS_MIN); |
7e1a3aa9 | 2259 | k = bkey_s_c_null; |
e6e024e9 | 2260 | goto out; |
7e1a3aa9 | 2261 | } |
ccf5a109 | 2262 | } |
1c6fdbd8 | 2263 | |
a045be5a | 2264 | EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0); |
3d495595 KO |
2265 | |
2266 | /* Extents can straddle iter->pos: */ | |
a045be5a | 2267 | if (bkey_cmp(k.k->p, iter->pos) < 0) |
3d495595 | 2268 | iter->pos = k.k->p; |
c075ff70 KO |
2269 | |
2270 | if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) | |
2271 | iter->pos.snapshot = iter->snapshot; | |
7e1a3aa9 | 2272 | out: |
c075ff70 KO |
2273 | if (saved_path) |
2274 | bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT); | |
67e0dd8f KO |
2275 | iter->path->should_be_locked = true; |
2276 | ||
7e1a3aa9 KO |
2277 | bch2_btree_iter_verify_entry_exit(iter); |
2278 | bch2_btree_iter_verify(iter); | |
67e0dd8f | 2279 | |
1c6fdbd8 KO |
2280 | return k; |
2281 | } | |
2282 | ||
ccf5a109 KO |
2283 | /** |
2284 | * bch2_btree_iter_prev: returns first key less than iterator's current | |
2285 | * position | |
2286 | */ | |
2287 | struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) | |
2288 | { | |
e0ba3b64 | 2289 | if (!bch2_btree_iter_rewind(iter)) |
2e70ce56 | 2290 | return bkey_s_c_null; |
ccf5a109 | 2291 | |
2e70ce56 | 2292 | return bch2_btree_iter_peek_prev(iter); |
ccf5a109 KO |
2293 | } |
2294 | ||
c3801239 | 2295 | struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) |
271a3d3a | 2296 | { |
9f6bd307 | 2297 | struct btree_trans *trans = iter->trans; |
953ee28a | 2298 | struct bpos search_key; |
271a3d3a | 2299 | struct bkey_s_c k; |
c3801239 KO |
2300 | int ret; |
2301 | ||
67e0dd8f | 2302 | EBUG_ON(iter->path->level); |
7e1a3aa9 KO |
2303 | bch2_btree_iter_verify(iter); |
2304 | bch2_btree_iter_verify_entry_exit(iter); | |
2305 | ||
1d214eb1 KO |
2306 | /* extents can't span inode numbers: */ |
2307 | if ((iter->flags & BTREE_ITER_IS_EXTENTS) && | |
953ee28a | 2308 | unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { |
1d214eb1 KO |
2309 | if (iter->pos.inode == KEY_INODE_MAX) |
2310 | return bkey_s_c_null; | |
c3801239 | 2311 | |
1d214eb1 KO |
2312 | bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); |
2313 | } | |
8042b5b7 | 2314 | |
953ee28a | 2315 | search_key = btree_iter_search_key(iter); |
67e0dd8f KO |
2316 | iter->path = btree_path_set_pos(trans, iter->path, search_key, |
2317 | iter->flags & BTREE_ITER_INTENT); | |
953ee28a | 2318 | |
67e0dd8f | 2319 | ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); |
c3801239 KO |
2320 | if (unlikely(ret)) |
2321 | return bkey_s_c_err(ret); | |
1c6fdbd8 | 2322 | |
c075ff70 KO |
2323 | if ((iter->flags & BTREE_ITER_CACHED) || |
2324 | !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { | |
953ee28a | 2325 | struct bkey_i *next_update; |
1c6fdbd8 | 2326 | |
d2c50773 | 2327 | next_update = btree_trans_peek_updates(iter); |
1d214eb1 | 2328 | if (next_update && |
67e0dd8f | 2329 | !bpos_cmp(next_update->k.p, iter->pos)) { |
1d214eb1 KO |
2330 | iter->k = next_update->k; |
2331 | k = bkey_i_to_s_c(next_update); | |
67e0dd8f KO |
2332 | } else { |
2333 | k = bch2_btree_path_peek_slot(iter->path, &iter->k); | |
1d214eb1 KO |
2334 | } |
2335 | ||
2336 | if (!k.k || | |
2337 | ((iter->flags & BTREE_ITER_ALL_SNAPSHOTS) | |
2338 | ? bpos_cmp(iter->pos, k.k->p) | |
2339 | : bkey_cmp(iter->pos, k.k->p))) { | |
2340 | bkey_init(&iter->k); | |
2341 | iter->k.p = iter->pos; | |
2342 | k = (struct bkey_s_c) { &iter->k, NULL }; | |
2343 | } | |
2344 | } else { | |
2345 | struct bpos next; | |
1d214eb1 | 2346 | |
b1d87f52 | 2347 | if (iter->flags & BTREE_ITER_INTENT) { |
67e0dd8f | 2348 | struct btree_iter iter2; |
b1d87f52 | 2349 | |
67e0dd8f KO |
2350 | bch2_trans_copy_iter(&iter2, iter); |
2351 | k = bch2_btree_iter_peek(&iter2); | |
b1d87f52 | 2352 | |
67e0dd8f KO |
2353 | if (k.k && !bkey_err(k)) { |
2354 | iter->k = iter2.k; | |
2355 | k.k = &iter->k; | |
2356 | } | |
2357 | bch2_trans_iter_exit(trans, &iter2); | |
b1d87f52 KO |
2358 | } else { |
2359 | struct bpos pos = iter->pos; | |
2360 | ||
2361 | k = bch2_btree_iter_peek(iter); | |
2362 | iter->pos = pos; | |
2363 | } | |
1d214eb1 KO |
2364 | |
2365 | if (unlikely(bkey_err(k))) | |
2366 | return k; | |
2367 | ||
2368 | next = k.k ? bkey_start_pos(k.k) : POS_MAX; | |
2369 | ||
2370 | if (bkey_cmp(iter->pos, next) < 0) { | |
2371 | bkey_init(&iter->k); | |
2372 | iter->k.p = iter->pos; | |
c075ff70 KO |
2373 | |
2374 | if (iter->flags & BTREE_ITER_IS_EXTENTS) { | |
2375 | bch2_key_resize(&iter->k, | |
2376 | min_t(u64, KEY_SIZE_MAX, | |
2377 | (next.inode == iter->pos.inode | |
2378 | ? next.offset | |
2379 | : KEY_OFFSET_MAX) - | |
2380 | iter->pos.offset)); | |
2381 | EBUG_ON(!iter->k.size); | |
2382 | } | |
1d214eb1 KO |
2383 | |
2384 | k = (struct bkey_s_c) { &iter->k, NULL }; | |
1d214eb1 | 2385 | } |
271a3d3a | 2386 | } |
63f1a598 | 2387 | |
67e0dd8f KO |
2388 | iter->path->should_be_locked = true; |
2389 | ||
7e1a3aa9 KO |
2390 | bch2_btree_iter_verify_entry_exit(iter); |
2391 | bch2_btree_iter_verify(iter); | |
c075ff70 KO |
2392 | ret = bch2_btree_iter_verify_ret(iter, k); |
2393 | if (unlikely(ret)) | |
2394 | return bkey_s_c_err(ret); | |
66a0a497 | 2395 | |
63f1a598 | 2396 | return k; |
1c6fdbd8 KO |
2397 | } |
2398 | ||
1c6fdbd8 KO |
2399 | struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) |
2400 | { | |
e0ba3b64 | 2401 | if (!bch2_btree_iter_advance(iter)) |
2e70ce56 | 2402 | return bkey_s_c_null; |
1c6fdbd8 | 2403 | |
2dac0eae | 2404 | return bch2_btree_iter_peek_slot(iter); |
1c6fdbd8 KO |
2405 | } |
2406 | ||
18fc6ae5 KO |
2407 | struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) |
2408 | { | |
e0ba3b64 | 2409 | if (!bch2_btree_iter_rewind(iter)) |
18fc6ae5 KO |
2410 | return bkey_s_c_null; |
2411 | ||
2412 | return bch2_btree_iter_peek_slot(iter); | |
2413 | } | |
2414 | ||
1c6fdbd8 KO |
2415 | /* new transactional stuff: */ |
2416 | ||
0423fb71 KO |
2417 | #ifdef CONFIG_BCACHEFS_DEBUG |
2418 | static void btree_trans_verify_sorted_refs(struct btree_trans *trans) | |
2419 | { | |
67e0dd8f | 2420 | struct btree_path *path; |
0423fb71 KO |
2421 | unsigned i; |
2422 | ||
67e0dd8f | 2423 | BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated)); |
0423fb71 | 2424 | |
67e0dd8f KO |
2425 | trans_for_each_path(trans, path) { |
2426 | BUG_ON(path->sorted_idx >= trans->nr_sorted); | |
2427 | BUG_ON(trans->sorted[path->sorted_idx] != path->idx); | |
0423fb71 KO |
2428 | } |
2429 | ||
2430 | for (i = 0; i < trans->nr_sorted; i++) { | |
2431 | unsigned idx = trans->sorted[i]; | |
2432 | ||
67e0dd8f KO |
2433 | EBUG_ON(!(trans->paths_allocated & (1ULL << idx))); |
2434 | BUG_ON(trans->paths[idx].sorted_idx != i); | |
0423fb71 KO |
2435 | } |
2436 | } | |
0423fb71 KO |
2437 | |
2438 | static void btree_trans_verify_sorted(struct btree_trans *trans) | |
2439 | { | |
67e0dd8f | 2440 | struct btree_path *path, *prev = NULL; |
0423fb71 KO |
2441 | unsigned i; |
2442 | ||
67e0dd8f KO |
2443 | trans_for_each_path_inorder(trans, path, i) { |
2444 | BUG_ON(prev && btree_path_cmp(prev, path) > 0); | |
2445 | prev = path; | |
0423fb71 | 2446 | } |
0423fb71 | 2447 | } |
068bcaa5 KO |
2448 | #else |
2449 | static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {} | |
2450 | static inline void btree_trans_verify_sorted(struct btree_trans *trans) {} | |
2451 | #endif | |
0423fb71 | 2452 | |
068bcaa5 | 2453 | void __bch2_btree_trans_sort_paths(struct btree_trans *trans) |
0423fb71 KO |
2454 | { |
2455 | int i, l = 0, r = trans->nr_sorted, inc = 1; | |
2456 | bool swapped; | |
2457 | ||
068bcaa5 KO |
2458 | btree_trans_verify_sorted_refs(trans); |
2459 | ||
2460 | if (trans->paths_sorted) | |
2461 | goto out; | |
2462 | ||
0423fb71 KO |
2463 | /* |
2464 | * Cocktail shaker sort: this is efficient because iterators will be | |
2465 | * mostly sorteda. | |
2466 | */ | |
2467 | do { | |
2468 | swapped = false; | |
2469 | ||
2470 | for (i = inc > 0 ? l : r - 2; | |
2471 | i + 1 < r && i >= l; | |
2472 | i += inc) { | |
67e0dd8f KO |
2473 | if (btree_path_cmp(trans->paths + trans->sorted[i], |
2474 | trans->paths + trans->sorted[i + 1]) > 0) { | |
0423fb71 | 2475 | swap(trans->sorted[i], trans->sorted[i + 1]); |
67e0dd8f KO |
2476 | trans->paths[trans->sorted[i]].sorted_idx = i; |
2477 | trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1; | |
0423fb71 KO |
2478 | swapped = true; |
2479 | } | |
2480 | } | |
2481 | ||
2482 | if (inc > 0) | |
2483 | --r; | |
2484 | else | |
2485 | l++; | |
2486 | inc = -inc; | |
2487 | } while (swapped); | |
2488 | ||
67e0dd8f | 2489 | trans->paths_sorted = true; |
068bcaa5 | 2490 | out: |
0423fb71 KO |
2491 | btree_trans_verify_sorted(trans); |
2492 | } | |
2493 | ||
67e0dd8f KO |
2494 | static inline void btree_path_list_remove(struct btree_trans *trans, |
2495 | struct btree_path *path) | |
0423fb71 KO |
2496 | { |
2497 | unsigned i; | |
2498 | ||
67e0dd8f | 2499 | EBUG_ON(path->sorted_idx >= trans->nr_sorted); |
0423fb71 KO |
2500 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS |
2501 | trans->nr_sorted--; | |
67e0dd8f KO |
2502 | memmove_u64s_down_small(trans->sorted + path->sorted_idx, |
2503 | trans->sorted + path->sorted_idx + 1, | |
2504 | DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); | |
0423fb71 | 2505 | #else |
67e0dd8f | 2506 | array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); |
0423fb71 | 2507 | #endif |
67e0dd8f KO |
2508 | for (i = path->sorted_idx; i < trans->nr_sorted; i++) |
2509 | trans->paths[trans->sorted[i]].sorted_idx = i; | |
0423fb71 | 2510 | |
67e0dd8f | 2511 | path->sorted_idx = U8_MAX; |
0423fb71 KO |
2512 | } |
2513 | ||
67e0dd8f KO |
2514 | static inline void btree_path_list_add(struct btree_trans *trans, |
2515 | struct btree_path *pos, | |
2516 | struct btree_path *path) | |
0423fb71 KO |
2517 | { |
2518 | unsigned i; | |
2519 | ||
068bcaa5 | 2520 | path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted; |
0423fb71 KO |
2521 | |
2522 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
67e0dd8f KO |
2523 | memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, |
2524 | trans->sorted + path->sorted_idx, | |
2525 | DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); | |
0423fb71 | 2526 | trans->nr_sorted++; |
67e0dd8f | 2527 | trans->sorted[path->sorted_idx] = path->idx; |
0423fb71 | 2528 | #else |
67e0dd8f | 2529 | array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx); |
0423fb71 KO |
2530 | #endif |
2531 | ||
67e0dd8f KO |
2532 | for (i = path->sorted_idx; i < trans->nr_sorted; i++) |
2533 | trans->paths[trans->sorted[i]].sorted_idx = i; | |
64bc0011 | 2534 | |
0423fb71 | 2535 | btree_trans_verify_sorted_refs(trans); |
7c26ecae KO |
2536 | } |
2537 | ||
67e0dd8f | 2538 | void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) |
64bc0011 | 2539 | { |
67e0dd8f KO |
2540 | if (iter->path) |
2541 | bch2_path_put(trans, iter->path, | |
2542 | iter->flags & BTREE_ITER_INTENT); | |
2543 | iter->path = NULL; | |
64bc0011 KO |
2544 | } |
2545 | ||
67e0dd8f KO |
2546 | static void __bch2_trans_iter_init(struct btree_trans *trans, |
2547 | struct btree_iter *iter, | |
2548 | enum btree_id btree_id, struct bpos pos, | |
2549 | unsigned locks_want, | |
2550 | unsigned depth, | |
2551 | unsigned flags) | |
1c6fdbd8 | 2552 | { |
e5af273f KO |
2553 | EBUG_ON(trans->restarted); |
2554 | ||
f21566f1 KO |
2555 | if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && |
2556 | btree_node_type_is_extents(btree_id)) | |
3dea728c | 2557 | flags |= BTREE_ITER_IS_EXTENTS; |
1c6fdbd8 | 2558 | |
a861c722 KO |
2559 | if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) && |
2560 | !btree_type_has_snapshots(btree_id)) | |
e751c01a KO |
2561 | flags &= ~BTREE_ITER_ALL_SNAPSHOTS; |
2562 | ||
a861c722 KO |
2563 | if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) && |
2564 | btree_type_has_snapshots(btree_id)) | |
2565 | flags |= BTREE_ITER_FILTER_SNAPSHOTS; | |
e751c01a | 2566 | |
67e0dd8f KO |
2567 | iter->trans = trans; |
2568 | iter->path = NULL; | |
2569 | iter->btree_id = btree_id; | |
2570 | iter->min_depth = depth; | |
f21566f1 KO |
2571 | iter->flags = flags; |
2572 | iter->snapshot = pos.snapshot; | |
67e0dd8f KO |
2573 | iter->pos = pos; |
2574 | iter->k.type = KEY_TYPE_deleted; | |
2575 | iter->k.p = pos; | |
2576 | iter->k.size = 0; | |
e751c01a | 2577 | |
67e0dd8f KO |
2578 | iter->path = bch2_path_get(trans, |
2579 | flags & BTREE_ITER_CACHED, | |
2580 | btree_id, | |
db92f2ea | 2581 | iter->pos, |
67e0dd8f KO |
2582 | locks_want, |
2583 | depth, | |
2584 | flags & BTREE_ITER_INTENT); | |
1c6fdbd8 KO |
2585 | } |
2586 | ||
67e0dd8f KO |
2587 | void bch2_trans_iter_init(struct btree_trans *trans, |
2588 | struct btree_iter *iter, | |
2589 | unsigned btree_id, struct bpos pos, | |
2590 | unsigned flags) | |
424eb881 | 2591 | { |
67e0dd8f KO |
2592 | __bch2_trans_iter_init(trans, iter, btree_id, pos, |
2593 | 0, 0, flags); | |
424eb881 KO |
2594 | } |
2595 | ||
67e0dd8f KO |
2596 | void bch2_trans_node_iter_init(struct btree_trans *trans, |
2597 | struct btree_iter *iter, | |
2598 | enum btree_id btree_id, | |
2599 | struct bpos pos, | |
2600 | unsigned locks_want, | |
2601 | unsigned depth, | |
2602 | unsigned flags) | |
1c6fdbd8 | 2603 | { |
67e0dd8f KO |
2604 | __bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth, |
2605 | BTREE_ITER_NOT_EXTENTS| | |
2606 | __BTREE_ITER_ALL_SNAPSHOTS| | |
2607 | BTREE_ITER_ALL_SNAPSHOTS| | |
2608 | flags); | |
2609 | BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH)); | |
2610 | BUG_ON(iter->path->level != depth); | |
2611 | BUG_ON(iter->min_depth != depth); | |
2612 | } | |
7c26ecae | 2613 | |
67e0dd8f KO |
2614 | void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) |
2615 | { | |
2616 | *dst = *src; | |
2617 | if (src->path) | |
2618 | __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); | |
1c6fdbd8 KO |
2619 | } |
2620 | ||
73a117d2 | 2621 | void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) |
1c6fdbd8 | 2622 | { |
73a117d2 KO |
2623 | size_t new_top = trans->mem_top + size; |
2624 | void *p; | |
2625 | ||
2626 | if (new_top > trans->mem_bytes) { | |
1c6fdbd8 | 2627 | size_t old_bytes = trans->mem_bytes; |
73a117d2 | 2628 | size_t new_bytes = roundup_pow_of_two(new_top); |
e131b6aa KO |
2629 | void *new_mem; |
2630 | ||
2631 | WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); | |
2632 | ||
2633 | new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS); | |
2634 | if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { | |
2635 | new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); | |
2636 | new_bytes = BTREE_TRANS_MEM_MAX; | |
2637 | kfree(trans->mem); | |
2638 | } | |
1c6fdbd8 KO |
2639 | |
2640 | if (!new_mem) | |
73a117d2 | 2641 | return ERR_PTR(-ENOMEM); |
1c6fdbd8 KO |
2642 | |
2643 | trans->mem = new_mem; | |
2644 | trans->mem_bytes = new_bytes; | |
2645 | ||
1c7a0adf | 2646 | if (old_bytes) { |
73a117d2 | 2647 | trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes); |
e5af273f | 2648 | btree_trans_restart(trans); |
73a117d2 | 2649 | return ERR_PTR(-EINTR); |
1c7a0adf | 2650 | } |
1c6fdbd8 KO |
2651 | } |
2652 | ||
20bceecb | 2653 | p = trans->mem + trans->mem_top; |
1c6fdbd8 | 2654 | trans->mem_top += size; |
7ed158f2 | 2655 | memset(p, 0, size); |
20bceecb | 2656 | return p; |
1c6fdbd8 KO |
2657 | } |
2658 | ||
d38494c4 | 2659 | /** |
955af634 | 2660 | * bch2_trans_begin() - reset a transaction after a interrupted attempt |
d38494c4 | 2661 | * @trans: transaction to reset |
d38494c4 DR |
2662 | * |
2663 | * While iterating over nodes or updating nodes a attempt to lock a btree | |
2664 | * node may return EINTR when the trylock fails. When this occurs | |
955af634 | 2665 | * bch2_trans_begin() should be called and the transaction retried. |
d38494c4 | 2666 | */ |
955af634 | 2667 | void bch2_trans_begin(struct btree_trans *trans) |
1c6fdbd8 | 2668 | { |
67e0dd8f KO |
2669 | struct btree_insert_entry *i; |
2670 | struct btree_path *path; | |
a8e00bd4 | 2671 | |
67e0dd8f KO |
2672 | trans_for_each_update(trans, i) |
2673 | __btree_path_put(i->path, true); | |
1c6fdbd8 | 2674 | |
a49e9a05 | 2675 | trans->extra_journal_res = 0; |
5df4be3f | 2676 | trans->nr_updates = 0; |
163e885a | 2677 | trans->mem_top = 0; |
bf7b87a4 | 2678 | |
43d00243 | 2679 | trans->hooks = NULL; |
96e2aa1b KO |
2680 | trans->extra_journal_entries = NULL; |
2681 | trans->extra_journal_entry_u64s = 0; | |
2682 | ||
f21539a5 KO |
2683 | if (trans->fs_usage_deltas) { |
2684 | trans->fs_usage_deltas->used = 0; | |
2685 | memset((void *) trans->fs_usage_deltas + | |
2686 | offsetof(struct replicas_delta_list, memset_start), 0, | |
2687 | (void *) &trans->fs_usage_deltas->memset_end - | |
2688 | (void *) &trans->fs_usage_deltas->memset_start); | |
2689 | } | |
2690 | ||
67e0dd8f KO |
2691 | trans_for_each_path(trans, path) { |
2692 | /* | |
2693 | * XXX: we probably shouldn't be doing this if the transaction | |
2694 | * was restarted, but currently we still overflow transaction | |
2695 | * iterators if we do that | |
2696 | */ | |
2697 | if (!path->ref && !path->preserve) | |
2698 | __bch2_path_free(trans, path); | |
2699 | else | |
2700 | path->preserve = path->should_be_locked = false; | |
2701 | } | |
2702 | ||
955af634 | 2703 | bch2_trans_cond_resched(trans); |
50dc0f69 | 2704 | |
955af634 | 2705 | if (trans->restarted) |
67e0dd8f | 2706 | bch2_btree_path_traverse_all(trans); |
e5af273f KO |
2707 | |
2708 | trans->restarted = false; | |
1c6fdbd8 KO |
2709 | } |
2710 | ||
67e0dd8f | 2711 | static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) |
1a21bf98 | 2712 | { |
67e0dd8f | 2713 | size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX; |
3eb26d01 | 2714 | size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX; |
dbd1e825 | 2715 | void *p = NULL; |
1a21bf98 KO |
2716 | |
2717 | BUG_ON(trans->used_mempool); | |
2718 | ||
dbd1e825 | 2719 | #ifdef __KERNEL__ |
67e0dd8f | 2720 | p = this_cpu_xchg(c->btree_paths_bufs->path , NULL); |
dbd1e825 KO |
2721 | #endif |
2722 | if (!p) | |
67e0dd8f | 2723 | p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); |
1a21bf98 | 2724 | |
67e0dd8f | 2725 | trans->paths = p; p += paths_bytes; |
1a21bf98 | 2726 | trans->updates = p; p += updates_bytes; |
1a21bf98 KO |
2727 | } |
2728 | ||
20bceecb KO |
2729 | void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, |
2730 | unsigned expected_nr_iters, | |
2731 | size_t expected_mem_bytes) | |
c0ebe3e4 | 2732 | __acquires(&c->btree_trans_barrier) |
1c6fdbd8 | 2733 | { |
ae1ede58 | 2734 | memset(trans, 0, sizeof(*trans)); |
1c6fdbd8 | 2735 | trans->c = c; |
ba5c6557 | 2736 | trans->ip = _RET_IP_; |
20bceecb | 2737 | |
67e0dd8f | 2738 | bch2_trans_alloc_paths(trans, c); |
20bceecb | 2739 | |
0b5c9f59 KO |
2740 | if (expected_mem_bytes) { |
2741 | expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes); | |
2742 | trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL); | |
e131b6aa KO |
2743 | |
2744 | if (!unlikely(trans->mem)) { | |
2745 | trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); | |
2746 | trans->mem_bytes = BTREE_TRANS_MEM_MAX; | |
2747 | } else { | |
0b5c9f59 | 2748 | trans->mem_bytes = expected_mem_bytes; |
e131b6aa | 2749 | } |
0b5c9f59 | 2750 | } |
495aabed | 2751 | |
876c7af3 KO |
2752 | trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); |
2753 | ||
495aabed | 2754 | #ifdef CONFIG_BCACHEFS_DEBUG |
f96c0df4 | 2755 | trans->pid = current->pid; |
495aabed KO |
2756 | mutex_lock(&c->btree_trans_lock); |
2757 | list_add(&trans->list, &c->btree_trans_list); | |
2758 | mutex_unlock(&c->btree_trans_lock); | |
2759 | #endif | |
1c6fdbd8 KO |
2760 | } |
2761 | ||
67e0dd8f KO |
2762 | static void check_btree_paths_leaked(struct btree_trans *trans) |
2763 | { | |
2764 | #ifdef CONFIG_BCACHEFS_DEBUG | |
2765 | struct bch_fs *c = trans->c; | |
2766 | struct btree_path *path; | |
2767 | ||
2768 | trans_for_each_path(trans, path) | |
2769 | if (path->ref) | |
2770 | goto leaked; | |
2771 | return; | |
2772 | leaked: | |
2773 | bch_err(c, "btree paths leaked from %pS!", (void *) trans->ip); | |
2774 | trans_for_each_path(trans, path) | |
2775 | if (path->ref) | |
2776 | printk(KERN_ERR " btree %s %pS\n", | |
2777 | bch2_btree_ids[path->btree_id], | |
2778 | (void *) path->ip_allocated); | |
2779 | /* Be noisy about this: */ | |
2780 | bch2_fatal_error(c); | |
2781 | #endif | |
2782 | } | |
2783 | ||
1c6fdbd8 | 2784 | int bch2_trans_exit(struct btree_trans *trans) |
c0ebe3e4 | 2785 | __releases(&c->btree_trans_barrier) |
1c6fdbd8 | 2786 | { |
67e0dd8f | 2787 | struct btree_insert_entry *i; |
1a21bf98 KO |
2788 | struct bch_fs *c = trans->c; |
2789 | ||
ece254b2 | 2790 | bch2_trans_unlock(trans); |
1c6fdbd8 | 2791 | |
67e0dd8f KO |
2792 | trans_for_each_update(trans, i) |
2793 | __btree_path_put(i->path, true); | |
2794 | trans->nr_updates = 0; | |
509d3e0a | 2795 | |
67e0dd8f | 2796 | check_btree_paths_leaked(trans); |
50dc0f69 | 2797 | |
67e0dd8f KO |
2798 | #ifdef CONFIG_BCACHEFS_DEBUG |
2799 | mutex_lock(&c->btree_trans_lock); | |
495aabed | 2800 | list_del(&trans->list); |
67e0dd8f | 2801 | mutex_unlock(&c->btree_trans_lock); |
495aabed KO |
2802 | #endif |
2803 | ||
876c7af3 KO |
2804 | srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); |
2805 | ||
67e0dd8f | 2806 | bch2_journal_preres_put(&c->journal, &trans->journal_preres); |
2ca88e5a | 2807 | |
9620c3ec KO |
2808 | if (trans->fs_usage_deltas) { |
2809 | if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) == | |
2810 | REPLICAS_DELTA_LIST_MAX) | |
2811 | mempool_free(trans->fs_usage_deltas, | |
67e0dd8f | 2812 | &c->replicas_delta_pool); |
9620c3ec KO |
2813 | else |
2814 | kfree(trans->fs_usage_deltas); | |
2815 | } | |
e131b6aa KO |
2816 | |
2817 | if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) | |
67e0dd8f | 2818 | mempool_free(trans->mem, &c->btree_trans_mem_pool); |
e131b6aa KO |
2819 | else |
2820 | kfree(trans->mem); | |
1a21bf98 | 2821 | |
dbd1e825 KO |
2822 | #ifdef __KERNEL__ |
2823 | /* | |
2824 | * Userspace doesn't have a real percpu implementation: | |
2825 | */ | |
67e0dd8f | 2826 | trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths); |
dbd1e825 | 2827 | #endif |
e131b6aa | 2828 | |
67e0dd8f KO |
2829 | if (trans->paths) |
2830 | mempool_free(trans->paths, &c->btree_paths_pool); | |
1a21bf98 | 2831 | |
1c6fdbd8 | 2832 | trans->mem = (void *) 0x1; |
67e0dd8f | 2833 | trans->paths = (void *) 0x1; |
ece254b2 KO |
2834 | |
2835 | return trans->error ? -EIO : 0; | |
1c6fdbd8 | 2836 | } |
36e9d698 | 2837 | |
b3d1e6ca | 2838 | static void __maybe_unused |
67e0dd8f | 2839 | bch2_btree_path_node_to_text(struct printbuf *out, |
b3d1e6ca | 2840 | struct btree_bkey_cached_common *_b, |
f21566f1 | 2841 | bool cached) |
d211b408 | 2842 | { |
5c1d808a KO |
2843 | pr_buf(out, " l=%u %s:", |
2844 | _b->level, bch2_btree_ids[_b->btree_id]); | |
f21566f1 | 2845 | bch2_bpos_to_text(out, btree_node_pos(_b, cached)); |
d211b408 KO |
2846 | } |
2847 | ||
2d587674 | 2848 | #ifdef CONFIG_BCACHEFS_DEBUG |
f21566f1 | 2849 | static bool trans_has_locks(struct btree_trans *trans) |
5c1d808a | 2850 | { |
67e0dd8f | 2851 | struct btree_path *path; |
5c1d808a | 2852 | |
67e0dd8f KO |
2853 | trans_for_each_path(trans, path) |
2854 | if (path->nodes_locked) | |
5c1d808a KO |
2855 | return true; |
2856 | return false; | |
2857 | } | |
2d587674 | 2858 | #endif |
5c1d808a | 2859 | |
495aabed KO |
2860 | void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) |
2861 | { | |
2862 | #ifdef CONFIG_BCACHEFS_DEBUG | |
2863 | struct btree_trans *trans; | |
67e0dd8f | 2864 | struct btree_path *path; |
495aabed KO |
2865 | struct btree *b; |
2866 | unsigned l; | |
2867 | ||
2868 | mutex_lock(&c->btree_trans_lock); | |
2869 | list_for_each_entry(trans, &c->btree_trans_list, list) { | |
f21566f1 | 2870 | if (!trans_has_locks(trans)) |
5c1d808a KO |
2871 | continue; |
2872 | ||
2873 | pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip); | |
495aabed | 2874 | |
67e0dd8f KO |
2875 | trans_for_each_path(trans, path) { |
2876 | if (!path->nodes_locked) | |
495aabed KO |
2877 | continue; |
2878 | ||
1d3ecd7e | 2879 | pr_buf(out, " path %u %c l=%u %s:", |
67e0dd8f KO |
2880 | path->idx, |
2881 | path->cached ? 'c' : 'b', | |
1d3ecd7e | 2882 | path->level, |
67e0dd8f KO |
2883 | bch2_btree_ids[path->btree_id]); |
2884 | bch2_bpos_to_text(out, path->pos); | |
495aabed KO |
2885 | pr_buf(out, "\n"); |
2886 | ||
2887 | for (l = 0; l < BTREE_MAX_DEPTH; l++) { | |
67e0dd8f | 2888 | if (btree_node_locked(path, l)) { |
d211b408 | 2889 | pr_buf(out, " %s l=%u ", |
67e0dd8f KO |
2890 | btree_node_intent_locked(path, l) ? "i" : "r", l); |
2891 | bch2_btree_path_node_to_text(out, | |
2892 | (void *) path->l[l].b, | |
2893 | path->cached); | |
495aabed KO |
2894 | pr_buf(out, "\n"); |
2895 | } | |
2896 | } | |
2897 | } | |
2898 | ||
2899 | b = READ_ONCE(trans->locking); | |
2900 | if (b) { | |
67e0dd8f KO |
2901 | path = &trans->paths[trans->locking_path_idx]; |
2902 | pr_buf(out, " locking path %u %c l=%u %s:", | |
2903 | trans->locking_path_idx, | |
2904 | path->cached ? 'c' : 'b', | |
515282ac KO |
2905 | trans->locking_level, |
2906 | bch2_btree_ids[trans->locking_btree_id]); | |
2907 | bch2_bpos_to_text(out, trans->locking_pos); | |
2908 | ||
d211b408 | 2909 | pr_buf(out, " node "); |
67e0dd8f KO |
2910 | bch2_btree_path_node_to_text(out, |
2911 | (void *) b, path->cached); | |
495aabed KO |
2912 | pr_buf(out, "\n"); |
2913 | } | |
2914 | } | |
2915 | mutex_unlock(&c->btree_trans_lock); | |
2916 | #endif | |
2917 | } | |
2918 | ||
36e9d698 KO |
2919 | void bch2_fs_btree_iter_exit(struct bch_fs *c) |
2920 | { | |
e131b6aa | 2921 | mempool_exit(&c->btree_trans_mem_pool); |
67e0dd8f | 2922 | mempool_exit(&c->btree_paths_pool); |
876c7af3 | 2923 | cleanup_srcu_struct(&c->btree_trans_barrier); |
36e9d698 KO |
2924 | } |
2925 | ||
2926 | int bch2_fs_btree_iter_init(struct bch_fs *c) | |
2927 | { | |
2928 | unsigned nr = BTREE_ITER_MAX; | |
2929 | ||
495aabed KO |
2930 | INIT_LIST_HEAD(&c->btree_trans_list); |
2931 | mutex_init(&c->btree_trans_lock); | |
2932 | ||
876c7af3 | 2933 | return init_srcu_struct(&c->btree_trans_barrier) ?: |
67e0dd8f KO |
2934 | mempool_init_kmalloc_pool(&c->btree_paths_pool, 1, |
2935 | sizeof(struct btree_path) * nr + | |
e131b6aa KO |
2936 | sizeof(struct btree_insert_entry) * nr) ?: |
2937 | mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, | |
2938 | BTREE_TRANS_MEM_MAX); | |
36e9d698 | 2939 | } |