Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> | |
4 | * Copyright (C) 2014 Datera Inc. | |
5 | */ | |
6 | ||
7 | #include "bcachefs.h" | |
7b3f84ea | 8 | #include "alloc_background.h" |
ef337c54 | 9 | #include "alloc_foreground.h" |
47d2080e | 10 | #include "backpointers.h" |
1c6fdbd8 | 11 | #include "bkey_methods.h" |
07a1006a | 12 | #include "bkey_buf.h" |
401585fe | 13 | #include "btree_journal_iter.h" |
ec061b21 | 14 | #include "btree_key_cache.h" |
1c6fdbd8 | 15 | #include "btree_locking.h" |
43f5ea46 | 16 | #include "btree_node_scan.h" |
1c6fdbd8 KO |
17 | #include "btree_update_interior.h" |
18 | #include "btree_io.h" | |
19 | #include "btree_gc.h" | |
20 | #include "buckets.h" | |
21 | #include "clock.h" | |
22 | #include "debug.h" | |
cd575ddf | 23 | #include "ec.h" |
1c6fdbd8 KO |
24 | #include "error.h" |
25 | #include "extents.h" | |
26 | #include "journal.h" | |
27 | #include "keylist.h" | |
28 | #include "move.h" | |
d2554263 | 29 | #include "recovery_passes.h" |
890b74f0 | 30 | #include "reflink.h" |
1c6fdbd8 KO |
31 | #include "replicas.h" |
32 | #include "super-io.h" | |
33 | #include "trace.h" | |
34 | ||
35 | #include <linux/slab.h> | |
36 | #include <linux/bitops.h> | |
37 | #include <linux/freezer.h> | |
38 | #include <linux/kthread.h> | |
39 | #include <linux/preempt.h> | |
40 | #include <linux/rcupdate.h> | |
41 | #include <linux/sched/task.h> | |
42 | ||
4351d3ec KO |
43 | #define DROP_THIS_NODE 10 |
44 | #define DROP_PREV_NODE 11 | |
43f5ea46 | 45 | #define DID_FILL_FROM_SCAN 12 |
4351d3ec | 46 | |
ad00bce0 KO |
47 | static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) |
48 | { | |
49 | return (struct bkey_s) {{{ | |
50 | (struct bkey *) k.k, | |
51 | (struct bch_val *) k.v | |
52 | }}}; | |
53 | } | |
54 | ||
2252aa27 KO |
55 | static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) |
56 | { | |
57 | preempt_disable(); | |
58 | write_seqcount_begin(&c->gc_pos_lock); | |
59 | c->gc_pos = new_pos; | |
60 | write_seqcount_end(&c->gc_pos_lock); | |
61 | preempt_enable(); | |
62 | } | |
63 | ||
64 | static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) | |
65 | { | |
24b27975 | 66 | BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) < 0); |
2252aa27 KO |
67 | __gc_pos_set(c, new_pos); |
68 | } | |
69 | ||
aae15aaf KO |
70 | static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst) |
71 | { | |
72 | switch (b->key.k.type) { | |
73 | case KEY_TYPE_btree_ptr: { | |
74 | struct bkey_i_btree_ptr *src = bkey_i_to_btree_ptr(&b->key); | |
75 | ||
76 | dst->k.p = src->k.p; | |
77 | dst->v.mem_ptr = 0; | |
78 | dst->v.seq = b->data->keys.seq; | |
79 | dst->v.sectors_written = 0; | |
80 | dst->v.flags = 0; | |
81 | dst->v.min_key = b->data->min_key; | |
82 | set_bkey_val_bytes(&dst->k, sizeof(dst->v) + bkey_val_bytes(&src->k)); | |
83 | memcpy(dst->v.start, src->v.start, bkey_val_bytes(&src->k)); | |
84 | break; | |
85 | } | |
86 | case KEY_TYPE_btree_ptr_v2: | |
87 | bkey_copy(&dst->k_i, &b->key); | |
88 | break; | |
89 | default: | |
90 | BUG(); | |
91 | } | |
92 | } | |
93 | ||
94 | static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) | |
95 | { | |
96 | struct bkey_i_btree_ptr_v2 *new; | |
97 | int ret; | |
98 | ||
43f5ea46 KO |
99 | if (c->opts.verbose) { |
100 | struct printbuf buf = PRINTBUF; | |
101 | ||
102 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); | |
103 | prt_str(&buf, " -> "); | |
104 | bch2_bpos_to_text(&buf, new_min); | |
105 | ||
106 | bch_info(c, "%s(): %s", __func__, buf.buf); | |
107 | printbuf_exit(&buf); | |
108 | } | |
109 | ||
a1019576 | 110 | new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); |
aae15aaf | 111 | if (!new) |
65d48e35 | 112 | return -BCH_ERR_ENOMEM_gc_repair_key; |
aae15aaf KO |
113 | |
114 | btree_ptr_to_v2(b, new); | |
115 | b->data->min_key = new_min; | |
116 | new->v.min_key = new_min; | |
117 | SET_BTREE_PTR_RANGE_UPDATED(&new->v, true); | |
118 | ||
e75b2d4c | 119 | ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i); |
aae15aaf KO |
120 | if (ret) { |
121 | kfree(new); | |
122 | return ret; | |
d06c1a0c | 123 | } |
1c6fdbd8 | 124 | |
aae15aaf | 125 | bch2_btree_node_drop_keys_outside_node(b); |
48620e51 | 126 | bkey_copy(&b->key, &new->k_i); |
aae15aaf KO |
127 | return 0; |
128 | } | |
129 | ||
130 | static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) | |
131 | { | |
132 | struct bkey_i_btree_ptr_v2 *new; | |
133 | int ret; | |
134 | ||
43f5ea46 KO |
135 | if (c->opts.verbose) { |
136 | struct printbuf buf = PRINTBUF; | |
137 | ||
138 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); | |
139 | prt_str(&buf, " -> "); | |
140 | bch2_bpos_to_text(&buf, new_max); | |
141 | ||
142 | bch_info(c, "%s(): %s", __func__, buf.buf); | |
143 | printbuf_exit(&buf); | |
144 | } | |
145 | ||
aae15aaf KO |
146 | ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); |
147 | if (ret) | |
148 | return ret; | |
149 | ||
a1019576 | 150 | new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); |
aae15aaf | 151 | if (!new) |
65d48e35 | 152 | return -BCH_ERR_ENOMEM_gc_repair_key; |
aae15aaf KO |
153 | |
154 | btree_ptr_to_v2(b, new); | |
155 | b->data->max_key = new_max; | |
156 | new->k.p = new_max; | |
157 | SET_BTREE_PTR_RANGE_UPDATED(&new->v, true); | |
158 | ||
e75b2d4c | 159 | ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i); |
aae15aaf KO |
160 | if (ret) { |
161 | kfree(new); | |
162 | return ret; | |
163 | } | |
164 | ||
165 | bch2_btree_node_drop_keys_outside_node(b); | |
166 | ||
167 | mutex_lock(&c->btree_cache.lock); | |
168 | bch2_btree_node_hash_remove(&c->btree_cache, b); | |
169 | ||
170 | bkey_copy(&b->key, &new->k_i); | |
171 | ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); | |
172 | BUG_ON(ret); | |
173 | mutex_unlock(&c->btree_cache.lock); | |
174 | return 0; | |
175 | } | |
176 | ||
43f5ea46 KO |
177 | static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b, |
178 | struct btree *prev, struct btree *cur, | |
179 | struct bpos *pulled_from_scan) | |
aae15aaf KO |
180 | { |
181 | struct bpos expected_start = !prev | |
182 | ? b->data->min_key | |
183 | : bpos_successor(prev->key.k.p); | |
43f5ea46 | 184 | struct printbuf buf = PRINTBUF; |
aae15aaf KO |
185 | int ret = 0; |
186 | ||
43f5ea46 KO |
187 | BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && |
188 | !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, | |
189 | b->data->min_key)); | |
190 | ||
191 | if (bpos_eq(expected_start, cur->data->min_key)) | |
192 | return 0; | |
193 | ||
194 | prt_printf(&buf, " at btree %s level %u:\n parent: ", | |
195 | bch2_btree_id_str(b->c.btree_id), b->c.level); | |
196 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); | |
197 | ||
198 | if (prev) { | |
199 | prt_printf(&buf, "\n prev: "); | |
200 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key)); | |
aae15aaf KO |
201 | } |
202 | ||
43f5ea46 KO |
203 | prt_str(&buf, "\n next: "); |
204 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key)); | |
4351d3ec | 205 | |
43f5ea46 KO |
206 | if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */ |
207 | if (b->c.level == 1 && | |
208 | bpos_lt(*pulled_from_scan, cur->data->min_key)) { | |
209 | ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, | |
210 | expected_start, | |
211 | bpos_predecessor(cur->data->min_key)); | |
212 | if (ret) | |
213 | goto err; | |
214 | ||
215 | *pulled_from_scan = cur->data->min_key; | |
216 | ret = DID_FILL_FROM_SCAN; | |
217 | } else { | |
218 | if (mustfix_fsck_err(c, btree_node_topology_bad_min_key, | |
219 | "btree node with incorrect min_key%s", buf.buf)) | |
220 | ret = set_node_min(c, cur, expected_start); | |
221 | } | |
222 | } else { /* overlap */ | |
223 | if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { /* cur overwrites prev */ | |
224 | if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */ | |
225 | if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node, | |
226 | "btree node overwritten by next node%s", buf.buf)) | |
227 | ret = DROP_PREV_NODE; | |
228 | } else { | |
229 | if (mustfix_fsck_err(c, btree_node_topology_bad_max_key, | |
230 | "btree node with incorrect max_key%s", buf.buf)) | |
231 | ret = set_node_max(c, prev, | |
232 | bpos_predecessor(cur->data->min_key)); | |
233 | } | |
234 | } else { | |
235 | if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */ | |
236 | if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node, | |
237 | "btree node overwritten by prev node%s", buf.buf)) | |
238 | ret = DROP_THIS_NODE; | |
239 | } else { | |
240 | if (mustfix_fsck_err(c, btree_node_topology_bad_min_key, | |
241 | "btree node with incorrect min_key%s", buf.buf)) | |
242 | ret = set_node_min(c, cur, expected_start); | |
243 | } | |
fa8e94fa | 244 | } |
aae15aaf | 245 | } |
43f5ea46 | 246 | err: |
aae15aaf | 247 | fsck_err: |
43f5ea46 | 248 | printbuf_exit(&buf); |
aae15aaf KO |
249 | return ret; |
250 | } | |
251 | ||
252 | static int btree_repair_node_end(struct bch_fs *c, struct btree *b, | |
43f5ea46 | 253 | struct btree *child, struct bpos *pulled_from_scan) |
aae15aaf | 254 | { |
43f5ea46 | 255 | struct printbuf buf = PRINTBUF; |
aae15aaf KO |
256 | int ret = 0; |
257 | ||
43f5ea46 KO |
258 | if (bpos_eq(child->key.k.p, b->key.k.p)) |
259 | return 0; | |
260 | ||
261 | prt_printf(&buf, "at btree %s level %u:\n parent: ", | |
262 | bch2_btree_id_str(b->c.btree_id), b->c.level); | |
263 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); | |
264 | ||
265 | prt_str(&buf, "\n child: "); | |
266 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key)); | |
267 | ||
268 | if (mustfix_fsck_err(c, btree_node_topology_bad_max_key, | |
269 | "btree node with incorrect max_key%s", buf.buf)) { | |
270 | if (b->c.level == 1 && | |
271 | bpos_lt(*pulled_from_scan, b->key.k.p)) { | |
272 | ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, | |
273 | bpos_successor(child->key.k.p), b->key.k.p); | |
274 | if (ret) | |
275 | goto err; | |
276 | ||
277 | *pulled_from_scan = b->key.k.p; | |
278 | ret = DID_FILL_FROM_SCAN; | |
279 | } else { | |
280 | ret = set_node_max(c, child, b->key.k.p); | |
281 | } | |
aae15aaf | 282 | } |
fa8e94fa | 283 | err: |
aae15aaf | 284 | fsck_err: |
43f5ea46 | 285 | printbuf_exit(&buf); |
aae15aaf KO |
286 | return ret; |
287 | } | |
a66f7989 | 288 | |
43f5ea46 KO |
289 | static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b, |
290 | struct bpos *pulled_from_scan) | |
aae15aaf | 291 | { |
ca7d8fca | 292 | struct bch_fs *c = trans->c; |
aae15aaf KO |
293 | struct btree_and_journal_iter iter; |
294 | struct bkey_s_c k; | |
4351d3ec | 295 | struct bkey_buf prev_k, cur_k; |
aae15aaf | 296 | struct btree *prev = NULL, *cur = NULL; |
43f5ea46 | 297 | bool have_child, new_pass = false; |
48620e51 | 298 | struct printbuf buf = PRINTBUF; |
aae15aaf KO |
299 | int ret = 0; |
300 | ||
301 | if (!b->c.level) | |
302 | return 0; | |
43f5ea46 | 303 | |
4351d3ec KO |
304 | bch2_bkey_buf_init(&prev_k); |
305 | bch2_bkey_buf_init(&cur_k); | |
43f5ea46 KO |
306 | again: |
307 | cur = prev = NULL; | |
308 | have_child = new_pass = false; | |
fc634d8e | 309 | bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); |
5f43b013 | 310 | iter.prefetch = true; |
aae15aaf KO |
311 | |
312 | while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { | |
e88a75eb KO |
313 | BUG_ON(bpos_lt(k.k->p, b->data->min_key)); |
314 | BUG_ON(bpos_gt(k.k->p, b->data->max_key)); | |
a0b73c1c | 315 | |
aae15aaf | 316 | bch2_btree_and_journal_iter_advance(&iter); |
4351d3ec | 317 | bch2_bkey_buf_reassemble(&cur_k, c, k); |
aae15aaf | 318 | |
ca7d8fca | 319 | cur = bch2_btree_node_get_noiter(trans, cur_k.k, |
aae15aaf KO |
320 | b->c.btree_id, b->c.level - 1, |
321 | false); | |
322 | ret = PTR_ERR_OR_ZERO(cur); | |
323 | ||
fa8e94fa KO |
324 | printbuf_reset(&buf); |
325 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); | |
326 | ||
52946d82 | 327 | if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), c, |
b65db750 | 328 | btree_node_unreadable, |
48620e51 | 329 | "Topology repair: unreadable btree node at btree %s level %u:\n" |
aae15aaf | 330 | " %s", |
88dfe193 | 331 | bch2_btree_id_str(b->c.btree_id), |
aae15aaf | 332 | b->c.level - 1, |
fa8e94fa | 333 | buf.buf)) { |
ca7d8fca | 334 | bch2_btree_node_evict(trans, cur_k.k); |
2817d453 | 335 | cur = NULL; |
5ab4beb7 KO |
336 | ret = bch2_journal_key_delete(c, b->c.btree_id, |
337 | b->c.level, cur_k.k->k.p); | |
a0b73c1c | 338 | if (ret) |
4351d3ec | 339 | break; |
5ab4beb7 KO |
340 | |
341 | if (!btree_id_is_alloc(b->c.btree_id)) { | |
342 | ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); | |
343 | if (ret) | |
344 | break; | |
345 | } | |
aae15aaf | 346 | continue; |
a0b73c1c KO |
347 | } |
348 | ||
cf904c8d KO |
349 | bch_err_msg(c, ret, "getting btree node"); |
350 | if (ret) | |
aae15aaf | 351 | break; |
a0b73c1c | 352 | |
43f5ea46 KO |
353 | if (bch2_btree_node_is_stale(c, cur)) { |
354 | bch_info(c, "btree node %s older than nodes found by scanning", buf.buf); | |
355 | six_unlock_read(&cur->c.lock); | |
356 | bch2_btree_node_evict(trans, cur_k.k); | |
357 | ret = bch2_journal_key_delete(c, b->c.btree_id, | |
358 | b->c.level, cur_k.k->k.p); | |
359 | cur = NULL; | |
360 | if (ret) | |
361 | break; | |
362 | continue; | |
363 | } | |
364 | ||
365 | ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan); | |
366 | if (ret == DID_FILL_FROM_SCAN) { | |
367 | new_pass = true; | |
368 | ret = 0; | |
369 | } | |
4351d3ec KO |
370 | |
371 | if (ret == DROP_THIS_NODE) { | |
372 | six_unlock_read(&cur->c.lock); | |
ca7d8fca | 373 | bch2_btree_node_evict(trans, cur_k.k); |
4351d3ec KO |
374 | ret = bch2_journal_key_delete(c, b->c.btree_id, |
375 | b->c.level, cur_k.k->k.p); | |
2817d453 | 376 | cur = NULL; |
4351d3ec KO |
377 | if (ret) |
378 | break; | |
379 | continue; | |
380 | } | |
381 | ||
aae15aaf KO |
382 | if (prev) |
383 | six_unlock_read(&prev->c.lock); | |
4351d3ec | 384 | prev = NULL; |
aae15aaf | 385 | |
4351d3ec | 386 | if (ret == DROP_PREV_NODE) { |
79032b07 | 387 | bch_info(c, "dropped prev node"); |
ca7d8fca | 388 | bch2_btree_node_evict(trans, prev_k.k); |
4351d3ec KO |
389 | ret = bch2_journal_key_delete(c, b->c.btree_id, |
390 | b->c.level, prev_k.k->k.p); | |
391 | if (ret) | |
392 | break; | |
393 | ||
394 | bch2_btree_and_journal_iter_exit(&iter); | |
4351d3ec KO |
395 | goto again; |
396 | } else if (ret) | |
aae15aaf | 397 | break; |
4351d3ec KO |
398 | |
399 | prev = cur; | |
400 | cur = NULL; | |
401 | bch2_bkey_buf_copy(&prev_k, c, cur_k.k); | |
aae15aaf KO |
402 | } |
403 | ||
404 | if (!ret && !IS_ERR_OR_NULL(prev)) { | |
405 | BUG_ON(cur); | |
43f5ea46 KO |
406 | ret = btree_repair_node_end(c, b, prev, pulled_from_scan); |
407 | if (ret == DID_FILL_FROM_SCAN) { | |
408 | new_pass = true; | |
409 | ret = 0; | |
410 | } | |
aae15aaf KO |
411 | } |
412 | ||
413 | if (!IS_ERR_OR_NULL(prev)) | |
414 | six_unlock_read(&prev->c.lock); | |
415 | prev = NULL; | |
416 | if (!IS_ERR_OR_NULL(cur)) | |
417 | six_unlock_read(&cur->c.lock); | |
418 | cur = NULL; | |
a0b73c1c | 419 | |
aae15aaf KO |
420 | if (ret) |
421 | goto err; | |
422 | ||
423 | bch2_btree_and_journal_iter_exit(&iter); | |
43f5ea46 KO |
424 | |
425 | if (new_pass) | |
426 | goto again; | |
427 | ||
fc634d8e | 428 | bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b); |
5f43b013 | 429 | iter.prefetch = true; |
aae15aaf KO |
430 | |
431 | while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { | |
4351d3ec | 432 | bch2_bkey_buf_reassemble(&cur_k, c, k); |
aae15aaf | 433 | bch2_btree_and_journal_iter_advance(&iter); |
a0b73c1c | 434 | |
ca7d8fca | 435 | cur = bch2_btree_node_get_noiter(trans, cur_k.k, |
aae15aaf KO |
436 | b->c.btree_id, b->c.level - 1, |
437 | false); | |
438 | ret = PTR_ERR_OR_ZERO(cur); | |
a0b73c1c | 439 | |
cf904c8d KO |
440 | bch_err_msg(c, ret, "getting btree node"); |
441 | if (ret) | |
aae15aaf | 442 | goto err; |
a0b73c1c | 443 | |
43f5ea46 | 444 | ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan); |
aae15aaf KO |
445 | six_unlock_read(&cur->c.lock); |
446 | cur = NULL; | |
447 | ||
448 | if (ret == DROP_THIS_NODE) { | |
ca7d8fca | 449 | bch2_btree_node_evict(trans, cur_k.k); |
aae15aaf | 450 | ret = bch2_journal_key_delete(c, b->c.btree_id, |
4351d3ec | 451 | b->c.level, cur_k.k->k.p); |
43f5ea46 | 452 | new_pass = true; |
a0b73c1c | 453 | } |
aae15aaf KO |
454 | |
455 | if (ret) | |
456 | goto err; | |
457 | ||
458 | have_child = true; | |
a0b73c1c | 459 | } |
aae15aaf | 460 | |
fa8e94fa KO |
461 | printbuf_reset(&buf); |
462 | bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); | |
463 | ||
aae15aaf | 464 | if (mustfix_fsck_err_on(!have_child, c, |
b65db750 | 465 | btree_node_topology_interior_node_empty, |
aae15aaf KO |
466 | "empty interior btree node at btree %s level %u\n" |
467 | " %s", | |
88dfe193 | 468 | bch2_btree_id_str(b->c.btree_id), |
fa8e94fa | 469 | b->c.level, buf.buf)) |
aae15aaf KO |
470 | ret = DROP_THIS_NODE; |
471 | err: | |
d06c1a0c | 472 | fsck_err: |
aae15aaf KO |
473 | if (!IS_ERR_OR_NULL(prev)) |
474 | six_unlock_read(&prev->c.lock); | |
475 | if (!IS_ERR_OR_NULL(cur)) | |
476 | six_unlock_read(&cur->c.lock); | |
477 | ||
478 | bch2_btree_and_journal_iter_exit(&iter); | |
aae15aaf | 479 | |
43f5ea46 | 480 | if (!ret && new_pass) |
aae15aaf KO |
481 | goto again; |
482 | ||
43f5ea46 KO |
483 | BUG_ON(!ret && bch2_btree_node_check_topology(trans, b)); |
484 | ||
485 | bch2_bkey_buf_exit(&prev_k, c); | |
486 | bch2_bkey_buf_exit(&cur_k, c); | |
fa8e94fa | 487 | printbuf_exit(&buf); |
aae15aaf KO |
488 | return ret; |
489 | } | |
490 | ||
922bc5a0 | 491 | int bch2_check_topology(struct bch_fs *c) |
aae15aaf | 492 | { |
6bd68ec2 | 493 | struct btree_trans *trans = bch2_trans_get(c); |
43f5ea46 | 494 | struct bpos pulled_from_scan = POS_MIN; |
aae15aaf KO |
495 | int ret = 0; |
496 | ||
43f5ea46 | 497 | for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { |
faa6cb6c | 498 | struct btree_root *r = bch2_btree_id_root(c, i); |
43f5ea46 | 499 | bool reconstructed_root = false; |
faa6cb6c | 500 | |
43f5ea46 KO |
501 | if (r->error) { |
502 | ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); | |
503 | if (ret) | |
504 | break; | |
505 | reconstruct_root: | |
506 | bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i)); | |
507 | ||
508 | r->alive = false; | |
509 | r->error = 0; | |
510 | ||
511 | if (!bch2_btree_has_scanned_nodes(c, i)) { | |
512 | mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing, | |
513 | "no nodes found for btree %s, continue?", bch2_btree_id_str(i)); | |
e2e568bd | 514 | bch2_btree_root_alloc_fake_trans(trans, i, 0); |
43f5ea46 | 515 | } else { |
e2e568bd | 516 | bch2_btree_root_alloc_fake_trans(trans, i, 1); |
359571c3 | 517 | bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); |
43f5ea46 KO |
518 | ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); |
519 | if (ret) | |
520 | break; | |
521 | } | |
522 | ||
43f5ea46 KO |
523 | reconstructed_root = true; |
524 | } | |
faa6cb6c | 525 | |
43f5ea46 | 526 | struct btree *b = r->b; |
aae15aaf | 527 | |
6bd68ec2 | 528 | btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); |
43f5ea46 | 529 | ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); |
aae15aaf KO |
530 | six_unlock_read(&b->c.lock); |
531 | ||
532 | if (ret == DROP_THIS_NODE) { | |
43f5ea46 KO |
533 | bch2_btree_node_hash_remove(&c->btree_cache, b); |
534 | mutex_lock(&c->btree_cache.lock); | |
535 | list_move(&b->list, &c->btree_cache.freeable); | |
536 | mutex_unlock(&c->btree_cache.lock); | |
537 | ||
538 | r->b = NULL; | |
539 | ||
540 | if (!reconstructed_root) | |
541 | goto reconstruct_root; | |
542 | ||
543 | bch_err(c, "empty btree root %s", bch2_btree_id_str(i)); | |
e2e568bd | 544 | bch2_btree_root_alloc_fake_trans(trans, i, 0); |
43f5ea46 KO |
545 | r->alive = false; |
546 | ret = 0; | |
aae15aaf KO |
547 | } |
548 | } | |
43f5ea46 | 549 | fsck_err: |
6bd68ec2 | 550 | bch2_trans_put(trans); |
d06c1a0c | 551 | return ret; |
1c6fdbd8 KO |
552 | } |
553 | ||
2252aa27 KO |
554 | /* marking of btree keys/nodes: */ |
555 | ||
904823de | 556 | static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, |
24b27975 KO |
557 | unsigned level, struct btree **prev, |
558 | struct btree_iter *iter, struct bkey_s_c k, | |
c929f230 | 559 | bool initial) |
2252aa27 | 560 | { |
904823de | 561 | struct bch_fs *c = trans->c; |
24b27975 KO |
562 | |
563 | if (iter) { | |
564 | struct btree_path *path = btree_iter_path(trans, iter); | |
565 | struct btree *b = path_l(path)->b; | |
566 | ||
567 | if (*prev != b) { | |
568 | int ret = bch2_btree_node_check_topology(trans, b); | |
569 | if (ret) | |
570 | return ret; | |
571 | } | |
572 | *prev = b; | |
573 | } | |
574 | ||
58e1ea4b KO |
575 | struct bkey deleted = KEY(0, 0, 0); |
576 | struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; | |
27c15ed2 | 577 | struct printbuf buf = PRINTBUF; |
2252aa27 KO |
578 | int ret = 0; |
579 | ||
f40d13f9 | 580 | deleted.p = k.k->p; |
58e1ea4b | 581 | |
91f8b567 | 582 | if (initial) { |
29364f34 | 583 | BUG_ON(bch2_journal_seq_verify && |
f40d13f9 | 584 | k.k->version.lo > atomic64_read(&c->journal.seq)); |
91f8b567 | 585 | |
f40d13f9 | 586 | if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c, |
b65db750 | 587 | bkey_version_in_future, |
a9bc0a51 | 588 | "key version number higher than recorded: %llu > %llu", |
f40d13f9 | 589 | k.k->version.lo, |
a9bc0a51 | 590 | atomic64_read(&c->key_version))) |
f40d13f9 | 591 | atomic64_set(&c->key_version, k.k->version.lo); |
2252aa27 KO |
592 | } |
593 | ||
f40d13f9 | 594 | if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k), |
27c15ed2 KO |
595 | c, btree_bitmap_not_marked, |
596 | "btree ptr not marked in member info btree allocated bitmap\n %s", | |
f40d13f9 | 597 | (bch2_bkey_val_to_text(&buf, c, k), |
27c15ed2 KO |
598 | buf.buf))) { |
599 | mutex_lock(&c->sb_lock); | |
f40d13f9 | 600 | bch2_dev_btree_bitmap_mark(c, k); |
27c15ed2 KO |
601 | bch2_write_super(c); |
602 | mutex_unlock(&c->sb_lock); | |
603 | } | |
604 | ||
f40d13f9 KO |
605 | /* |
606 | * We require a commit before key_trigger() because | |
607 | * key_trigger(BTREE_TRIGGER_GC) is not idempotant; we'll calculate the | |
608 | * wrong result if we run it multiple times. | |
609 | */ | |
24b27975 | 610 | unsigned flags = !iter ? BTREE_TRIGGER_is_root : 0; |
f40d13f9 KO |
611 | |
612 | ret = bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(k), | |
613 | BTREE_TRIGGER_check_repair|flags); | |
614 | if (ret) | |
615 | goto out; | |
616 | ||
617 | if (trans->nr_updates) { | |
618 | ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: | |
619 | -BCH_ERR_transaction_restart_nested; | |
620 | goto out; | |
621 | } | |
622 | ||
623 | ret = bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(k), | |
624 | BTREE_TRIGGER_gc|flags); | |
625 | out: | |
91f8b567 | 626 | fsck_err: |
27c15ed2 | 627 | printbuf_exit(&buf); |
cf904c8d | 628 | bch_err_fn(c, ret); |
2252aa27 KO |
629 | return ret; |
630 | } | |
631 | ||
24b27975 | 632 | static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial) |
1c6fdbd8 | 633 | { |
904823de | 634 | struct bch_fs *c = trans->c; |
24b27975 | 635 | int level = 0, target_depth = btree_node_type_needs_gc(__btree_node_type(0, btree)) ? 0 : 1; |
1c6fdbd8 KO |
636 | int ret = 0; |
637 | ||
24b27975 KO |
638 | /* We need to make sure every leaf node is readable before going RW */ |
639 | if (initial) | |
640 | target_depth = 0; | |
1c6fdbd8 | 641 | |
24b27975 | 642 | /* root */ |
1c6fdbd8 | 643 | mutex_lock(&c->btree_root_lock); |
24b27975 KO |
644 | struct btree *b = bch2_btree_id_root(c, btree)->b; |
645 | if (!btree_node_fake(b)) { | |
646 | gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX)); | |
f40d13f9 KO |
647 | ret = lockrestart_do(trans, |
648 | bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, | |
24b27975 KO |
649 | NULL, NULL, bkey_i_to_s_c(&b->key), initial)); |
650 | level = b->c.level; | |
651 | } | |
1c6fdbd8 | 652 | mutex_unlock(&c->btree_root_lock); |
8b2b9d11 | 653 | |
79032b07 KO |
654 | if (ret) |
655 | return ret; | |
656 | ||
24b27975 KO |
657 | for (; level >= target_depth; --level) { |
658 | struct btree *prev = NULL; | |
659 | struct btree_iter iter; | |
660 | bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, level, | |
661 | BTREE_ITER_prefetch); | |
a0b73c1c | 662 | |
24b27975 KO |
663 | ret = for_each_btree_key_continue(trans, iter, 0, k, ({ |
664 | gc_pos_set(c, gc_pos_btree(btree, level, k.k->p)); | |
665 | bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial); | |
666 | })); | |
667 | if (ret) | |
668 | break; | |
e62d65f2 | 669 | } |
ba665494 | 670 | |
e62d65f2 KO |
671 | return ret; |
672 | } | |
673 | ||
cd575ddf KO |
674 | static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) |
675 | { | |
676 | return (int) btree_id_to_gc_phase(l) - | |
677 | (int) btree_id_to_gc_phase(r); | |
678 | } | |
679 | ||
24b27975 | 680 | static int bch2_gc_btrees(struct bch_fs *c) |
2252aa27 | 681 | { |
6bd68ec2 | 682 | struct btree_trans *trans = bch2_trans_get(c); |
cd575ddf | 683 | enum btree_id ids[BTREE_ID_NR]; |
2252aa27 | 684 | unsigned i; |
aae15aaf | 685 | int ret = 0; |
2252aa27 | 686 | |
cd575ddf KO |
687 | for (i = 0; i < BTREE_ID_NR; i++) |
688 | ids[i] = i; | |
689 | bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp); | |
690 | ||
58dda9c1 KO |
691 | for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) { |
692 | unsigned btree = i < BTREE_ID_NR ? ids[i] : i; | |
2252aa27 | 693 | |
58dda9c1 | 694 | if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b)) |
faa6cb6c KO |
695 | continue; |
696 | ||
24b27975 | 697 | ret = bch2_gc_btree(trans, btree, true); |
faa6cb6c | 698 | |
b982d645 KO |
699 | if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), |
700 | c, btree_node_read_error, | |
701 | "btree node read error for %s", | |
702 | bch2_btree_id_str(btree))) | |
703 | ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); | |
704 | } | |
705 | fsck_err: | |
6bd68ec2 | 706 | bch2_trans_put(trans); |
cf904c8d | 707 | bch_err_fn(c, ret); |
aae15aaf | 708 | return ret; |
2252aa27 KO |
709 | } |
710 | ||
c281db0f | 711 | static int bch2_mark_superblocks(struct bch_fs *c) |
1c6fdbd8 | 712 | { |
1c6fdbd8 KO |
713 | mutex_lock(&c->sb_lock); |
714 | gc_pos_set(c, gc_phase(GC_PHASE_SB)); | |
715 | ||
5dd8c60e | 716 | int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); |
1c6fdbd8 | 717 | mutex_unlock(&c->sb_lock); |
c281db0f | 718 | return ret; |
1c6fdbd8 KO |
719 | } |
720 | ||
9ca53b55 KO |
721 | static void bch2_gc_free(struct bch_fs *c) |
722 | { | |
990d42d1 KO |
723 | genradix_free(&c->reflink_gc_table); |
724 | genradix_free(&c->gc_stripes); | |
dfe9bfb3 | 725 | |
9fea2274 | 726 | for_each_member_device(c, ca) { |
cb6fc943 | 727 | kvfree(rcu_dereference_protected(ca->buckets_gc, 1)); |
5735608c | 728 | ca->buckets_gc = NULL; |
9ca53b55 | 729 | |
180fb49d KO |
730 | free_percpu(ca->usage_gc); |
731 | ca->usage_gc = NULL; | |
9ca53b55 KO |
732 | } |
733 | ||
5e82a9a1 KO |
734 | free_percpu(c->usage_gc); |
735 | c->usage_gc = NULL; | |
06b7345c KO |
736 | } |
737 | ||
58dda9c1 | 738 | static int bch2_gc_done(struct bch_fs *c) |
9ca53b55 | 739 | { |
3a402c8d | 740 | struct bch_dev *ca = NULL; |
fa8e94fa | 741 | struct printbuf buf = PRINTBUF; |
9fea2274 | 742 | unsigned i; |
cccf4e6d | 743 | int ret = 0; |
9ca53b55 | 744 | |
ec061b21 KO |
745 | percpu_down_write(&c->mark_lock); |
746 | ||
58dda9c1 KO |
747 | #define copy_field(_err, _f, _msg, ...) \ |
748 | if (fsck_err_on(dst->_f != src->_f, c, _err, \ | |
749 | _msg ": got %llu, should be %llu" , ##__VA_ARGS__, \ | |
750 | dst->_f, src->_f)) \ | |
80b3bf33 | 751 | dst->_f = src->_f |
58dda9c1 | 752 | #define copy_dev_field(_err, _f, _msg, ...) \ |
9fea2274 | 753 | copy_field(_err, _f, "dev %u has wrong " _msg, ca->dev_idx, ##__VA_ARGS__) |
58dda9c1 | 754 | #define copy_fs_field(_err, _f, _msg, ...) \ |
b65db750 | 755 | copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__) |
9ca53b55 | 756 | |
180fb49d KO |
757 | for (i = 0; i < ARRAY_SIZE(c->usage); i++) |
758 | bch2_fs_usage_acc_to_base(c, i); | |
759 | ||
9fea2274 | 760 | __for_each_member_device(c, ca) { |
ec061b21 KO |
761 | struct bch_dev_usage *dst = ca->usage_base; |
762 | struct bch_dev_usage *src = (void *) | |
73bd774d | 763 | bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc, |
ec061b21 KO |
764 | dev_usage_u64s()); |
765 | ||
ec061b21 | 766 | for (i = 0; i < BCH_DATA_NR; i++) { |
b65db750 | 767 | copy_dev_field(dev_usage_buckets_wrong, |
e58f963c | 768 | d[i].buckets, "%s buckets", bch2_data_type_str(i)); |
b65db750 | 769 | copy_dev_field(dev_usage_sectors_wrong, |
e58f963c | 770 | d[i].sectors, "%s sectors", bch2_data_type_str(i)); |
b65db750 | 771 | copy_dev_field(dev_usage_fragmented_wrong, |
e58f963c | 772 | d[i].fragmented, "%s fragmented", bch2_data_type_str(i)); |
180fb49d | 773 | } |
b5e85d4d | 774 | } |
9ca53b55 KO |
775 | |
776 | { | |
ecf37a4a | 777 | unsigned nr = fs_usage_u64s(c); |
5e82a9a1 | 778 | struct bch_fs_usage *dst = c->usage_base; |
23f80d2b | 779 | struct bch_fs_usage *src = (void *) |
73bd774d | 780 | bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr); |
9ca53b55 | 781 | |
b65db750 | 782 | copy_fs_field(fs_usage_hidden_wrong, |
8e7834a8 | 783 | b.hidden, "hidden"); |
b65db750 | 784 | copy_fs_field(fs_usage_btree_wrong, |
8e7834a8 | 785 | b.btree, "btree"); |
06b7345c | 786 | |
58dda9c1 KO |
787 | copy_fs_field(fs_usage_data_wrong, |
788 | b.data, "data"); | |
789 | copy_fs_field(fs_usage_cached_wrong, | |
790 | b.cached, "cached"); | |
791 | copy_fs_field(fs_usage_reserved_wrong, | |
792 | b.reserved, "reserved"); | |
793 | copy_fs_field(fs_usage_nr_inodes_wrong, | |
794 | b.nr_inodes,"nr_inodes"); | |
795 | ||
796 | for (i = 0; i < BCH_REPLICAS_MAX; i++) | |
797 | copy_fs_field(fs_usage_persistent_reserved_wrong, | |
798 | persistent_reserved[i], | |
799 | "persistent_reserved[%i]", i); | |
9ca53b55 | 800 | |
7ef2a73a | 801 | for (i = 0; i < c->replicas.nr; i++) { |
086a52f7 | 802 | struct bch_replicas_entry_v1 *e = |
8777210b | 803 | cpu_replicas_entry(&c->replicas, i); |
8777210b | 804 | |
fa8e94fa KO |
805 | printbuf_reset(&buf); |
806 | bch2_replicas_entry_to_text(&buf, e); | |
8777210b | 807 | |
b65db750 KO |
808 | copy_fs_field(fs_usage_replicas_wrong, |
809 | replicas[i], "%s", buf.buf); | |
7ef2a73a | 810 | } |
9ca53b55 | 811 | } |
76f4c7b0 | 812 | |
9ca53b55 KO |
813 | #undef copy_fs_field |
814 | #undef copy_dev_field | |
dfe9bfb3 KO |
815 | #undef copy_stripe_field |
816 | #undef copy_field | |
cccf4e6d | 817 | fsck_err: |
f295298b | 818 | bch2_dev_put(ca); |
cf904c8d | 819 | bch_err_fn(c, ret); |
ec061b21 | 820 | percpu_up_write(&c->mark_lock); |
fa8e94fa | 821 | printbuf_exit(&buf); |
cccf4e6d | 822 | return ret; |
9ca53b55 KO |
823 | } |
824 | ||
14d7d61f | 825 | static int bch2_gc_start(struct bch_fs *c) |
9ca53b55 | 826 | { |
5e82a9a1 | 827 | BUG_ON(c->usage_gc); |
9ca53b55 | 828 | |
5e82a9a1 | 829 | c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), |
ecf37a4a | 830 | sizeof(u64), GFP_KERNEL); |
1e1a31c4 KO |
831 | if (!c->usage_gc) { |
832 | bch_err(c, "error allocating c->usage_gc"); | |
65d48e35 | 833 | return -BCH_ERR_ENOMEM_gc_start; |
1e1a31c4 | 834 | } |
9ca53b55 | 835 | |
9fea2274 | 836 | for_each_member_device(c, ca) { |
180fb49d | 837 | BUG_ON(ca->usage_gc); |
9ca53b55 | 838 | |
180fb49d KO |
839 | ca->usage_gc = alloc_percpu(struct bch_dev_usage); |
840 | if (!ca->usage_gc) { | |
841 | bch_err(c, "error allocating ca->usage_gc"); | |
f295298b | 842 | bch2_dev_put(ca); |
65d48e35 | 843 | return -BCH_ERR_ENOMEM_gc_start; |
1c6fdbd8 | 844 | } |
822835ff KO |
845 | |
846 | this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets, | |
847 | ca->mi.nbuckets - ca->mi.first_bucket); | |
1c6fdbd8 | 848 | } |
9ca53b55 | 849 | |
ec061b21 KO |
850 | return 0; |
851 | } | |
852 | ||
3d48a7f8 KO |
853 | /* returns true if not equal */ |
854 | static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l, | |
855 | struct bch_alloc_v4 r) | |
856 | { | |
857 | return l.gen != r.gen || | |
858 | l.oldest_gen != r.oldest_gen || | |
859 | l.data_type != r.data_type || | |
860 | l.dirty_sectors != r.dirty_sectors || | |
861 | l.cached_sectors != r.cached_sectors || | |
862 | l.stripe_redundancy != r.stripe_redundancy || | |
863 | l.stripe != r.stripe; | |
864 | } | |
865 | ||
ec061b21 KO |
866 | static int bch2_alloc_write_key(struct btree_trans *trans, |
867 | struct btree_iter *iter, | |
f5faf43f | 868 | struct bch_dev *ca, |
58dda9c1 | 869 | struct bkey_s_c k) |
ec061b21 KO |
870 | { |
871 | struct bch_fs *c = trans->c; | |
3d48a7f8 | 872 | struct bkey_i_alloc_v4 *a; |
c02eb9e8 | 873 | struct bch_alloc_v4 old_gc, gc, old_convert, new; |
19a614d2 | 874 | const struct bch_alloc_v4 *old; |
ec061b21 KO |
875 | int ret; |
876 | ||
19a614d2 | 877 | old = bch2_alloc_to_v4(k, &old_convert); |
c02eb9e8 | 878 | gc = new = *old; |
ec061b21 KO |
879 | |
880 | percpu_down_read(&c->mark_lock); | |
c02eb9e8 KO |
881 | __bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset)); |
882 | ||
883 | old_gc = gc; | |
b3eba6a4 KO |
884 | |
885 | if ((old->data_type == BCH_DATA_sb || | |
886 | old->data_type == BCH_DATA_journal) && | |
887 | !bch2_dev_is_online(ca)) { | |
c02eb9e8 KO |
888 | gc.data_type = old->data_type; |
889 | gc.dirty_sectors = old->dirty_sectors; | |
b3eba6a4 | 890 | } |
822835ff KO |
891 | |
892 | /* | |
c02eb9e8 | 893 | * gc.data_type doesn't yet include need_discard & need_gc_gen states - |
822835ff KO |
894 | * fix that here: |
895 | */ | |
c02eb9e8 | 896 | alloc_data_type_set(&gc, gc.data_type); |
ec061b21 | 897 | |
b3eba6a4 KO |
898 | if (gc.data_type != old_gc.data_type || |
899 | gc.dirty_sectors != old_gc.dirty_sectors) | |
c02eb9e8 | 900 | bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true); |
37bb9c95 | 901 | percpu_up_read(&c->mark_lock); |
b3eba6a4 | 902 | |
cdce1094 | 903 | if (fsck_err_on(new.data_type != gc.data_type, c, |
b65db750 | 904 | alloc_key_data_type_wrong, |
91065976 KO |
905 | "bucket %llu:%llu gen %u has wrong data_type" |
906 | ": got %s, should be %s", | |
907 | iter->pos.inode, iter->pos.offset, | |
908 | gc.gen, | |
e58f963c KO |
909 | bch2_data_type_str(new.data_type), |
910 | bch2_data_type_str(gc.data_type))) | |
91065976 KO |
911 | new.data_type = gc.data_type; |
912 | ||
b65db750 | 913 | #define copy_bucket_field(_errtype, _f) \ |
cdce1094 | 914 | if (fsck_err_on(new._f != gc._f, c, _errtype, \ |
ec061b21 KO |
915 | "bucket %llu:%llu gen %u data type %s has wrong " #_f \ |
916 | ": got %u, should be %u", \ | |
917 | iter->pos.inode, iter->pos.offset, \ | |
66d90823 | 918 | gc.gen, \ |
e58f963c | 919 | bch2_data_type_str(gc.data_type), \ |
3d48a7f8 KO |
920 | new._f, gc._f)) \ |
921 | new._f = gc._f; \ | |
ec061b21 | 922 | |
b65db750 KO |
923 | copy_bucket_field(alloc_key_gen_wrong, |
924 | gen); | |
925 | copy_bucket_field(alloc_key_dirty_sectors_wrong, | |
926 | dirty_sectors); | |
927 | copy_bucket_field(alloc_key_cached_sectors_wrong, | |
928 | cached_sectors); | |
929 | copy_bucket_field(alloc_key_stripe_wrong, | |
930 | stripe); | |
931 | copy_bucket_field(alloc_key_stripe_redundancy_wrong, | |
932 | stripe_redundancy); | |
ec061b21 KO |
933 | #undef copy_bucket_field |
934 | ||
19a614d2 | 935 | if (!bch2_alloc_v4_cmp(*old, new)) |
ec061b21 KO |
936 | return 0; |
937 | ||
3d48a7f8 KO |
938 | a = bch2_alloc_to_v4_mut(trans, k); |
939 | ret = PTR_ERR_OR_ZERO(a); | |
940 | if (ret) | |
941 | return ret; | |
942 | ||
943 | a->v = new; | |
ec061b21 | 944 | |
7003589d KO |
945 | /* |
946 | * The trigger normally makes sure this is set, but we're not running | |
947 | * triggers: | |
948 | */ | |
949 | if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ]) | |
950 | a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); | |
951 | ||
5dd8c60e | 952 | ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_norun); |
ec061b21 KO |
953 | fsck_err: |
954 | return ret; | |
955 | } | |
956 | ||
58dda9c1 | 957 | static int bch2_gc_alloc_done(struct bch_fs *c) |
ec061b21 | 958 | { |
ec061b21 KO |
959 | int ret = 0; |
960 | ||
9fea2274 KO |
961 | for_each_member_device(c, ca) { |
962 | ret = bch2_trans_run(c, | |
963 | for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc, | |
964 | POS(ca->dev_idx, ca->mi.first_bucket), | |
965 | POS(ca->dev_idx, ca->mi.nbuckets - 1), | |
5dd8c60e | 966 | BTREE_ITER_slots|BTREE_ITER_prefetch, k, |
9fea2274 | 967 | NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, |
f5faf43f | 968 | bch2_alloc_write_key(trans, &iter, ca, k))); |
9fea2274 | 969 | if (ret) { |
f295298b | 970 | bch2_dev_put(ca); |
ec061b21 KO |
971 | break; |
972 | } | |
973 | } | |
a1d58243 | 974 | |
9fea2274 KO |
975 | bch_err_fn(c, ret); |
976 | return ret; | |
ec061b21 | 977 | } |
41e37786 | 978 | |
58dda9c1 | 979 | static int bch2_gc_alloc_start(struct bch_fs *c) |
ec061b21 | 980 | { |
9fea2274 | 981 | for_each_member_device(c, ca) { |
cb6fc943 | 982 | struct bucket_array *buckets = kvmalloc(sizeof(struct bucket_array) + |
ec061b21 KO |
983 | ca->mi.nbuckets * sizeof(struct bucket), |
984 | GFP_KERNEL|__GFP_ZERO); | |
985 | if (!buckets) { | |
f295298b | 986 | bch2_dev_put(ca); |
ec061b21 | 987 | bch_err(c, "error allocating ca->buckets[gc]"); |
9fea2274 | 988 | return -BCH_ERR_ENOMEM_gc_alloc_start; |
28062d32 | 989 | } |
9ca53b55 | 990 | |
ec061b21 KO |
991 | buckets->first_bucket = ca->mi.first_bucket; |
992 | buckets->nbuckets = ca->mi.nbuckets; | |
5735608c | 993 | rcu_assign_pointer(ca->buckets_gc, buckets); |
b5e85d4d | 994 | } |
0741d378 | 995 | |
ad897d24 | 996 | struct bch_dev *ca = NULL; |
9fea2274 KO |
997 | int ret = bch2_trans_run(c, |
998 | for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, | |
5dd8c60e | 999 | BTREE_ITER_prefetch, k, ({ |
ad897d24 KO |
1000 | ca = bch2_dev_iterate(c, ca, k.k->p.inode); |
1001 | if (!ca) { | |
1002 | bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); | |
1003 | continue; | |
1004 | } | |
463086d9 | 1005 | |
9fea2274 KO |
1006 | struct bch_alloc_v4 a_convert; |
1007 | const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); | |
1008 | ||
ad897d24 | 1009 | struct bucket *g = gc_bucket(ca, k.k->p.offset); |
9fea2274 KO |
1010 | g->gen_valid = 1; |
1011 | g->gen = a->gen; | |
9fea2274 KO |
1012 | 0; |
1013 | }))); | |
ad897d24 | 1014 | bch2_dev_put(ca); |
cf904c8d | 1015 | bch_err_fn(c, ret); |
5735608c | 1016 | return ret; |
1c6fdbd8 KO |
1017 | } |
1018 | ||
326568f1 KO |
1019 | static int bch2_gc_write_reflink_key(struct btree_trans *trans, |
1020 | struct btree_iter *iter, | |
1021 | struct bkey_s_c k, | |
1022 | size_t *idx) | |
890b74f0 | 1023 | { |
326568f1 KO |
1024 | struct bch_fs *c = trans->c; |
1025 | const __le64 *refcount = bkey_refcount_c(k); | |
fa8e94fa | 1026 | struct printbuf buf = PRINTBUF; |
326568f1 | 1027 | struct reflink_gc *r; |
890b74f0 KO |
1028 | int ret = 0; |
1029 | ||
326568f1 | 1030 | if (!refcount) |
890b74f0 KO |
1031 | return 0; |
1032 | ||
326568f1 KO |
1033 | while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && |
1034 | r->offset < k.k->p.offset) | |
1035 | ++*idx; | |
904823de | 1036 | |
326568f1 KO |
1037 | if (!r || |
1038 | r->offset != k.k->p.offset || | |
1039 | r->size != k.k->size) { | |
1040 | bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); | |
1041 | return -EINVAL; | |
1042 | } | |
890b74f0 | 1043 | |
326568f1 | 1044 | if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, |
b65db750 | 1045 | reflink_v_refcount_wrong, |
326568f1 KO |
1046 | "reflink key has wrong refcount:\n" |
1047 | " %s\n" | |
1048 | " should be %u", | |
1049 | (bch2_bkey_val_to_text(&buf, c, k), buf.buf), | |
1050 | r->refcount)) { | |
06ebc483 | 1051 | struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); |
326568f1 KO |
1052 | ret = PTR_ERR_OR_ZERO(new); |
1053 | if (ret) | |
719aec84 | 1054 | goto out; |
890b74f0 | 1055 | |
326568f1 KO |
1056 | if (!r->refcount) |
1057 | new->k.type = KEY_TYPE_deleted; | |
1058 | else | |
717296c3 | 1059 | *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); |
06ebc483 | 1060 | ret = bch2_trans_update(trans, iter, new, 0); |
326568f1 | 1061 | } |
719aec84 | 1062 | out: |
326568f1 KO |
1063 | fsck_err: |
1064 | printbuf_exit(&buf); | |
1065 | return ret; | |
1066 | } | |
890b74f0 | 1067 | |
58dda9c1 | 1068 | static int bch2_gc_reflink_done(struct bch_fs *c) |
326568f1 | 1069 | { |
326568f1 | 1070 | size_t idx = 0; |
890b74f0 | 1071 | |
80eab7a7 KO |
1072 | int ret = bch2_trans_run(c, |
1073 | for_each_btree_key_commit(trans, iter, | |
1074 | BTREE_ID_reflink, POS_MIN, | |
5dd8c60e | 1075 | BTREE_ITER_prefetch, k, |
80eab7a7 KO |
1076 | NULL, NULL, BCH_TRANS_COMMIT_no_enospc, |
1077 | bch2_gc_write_reflink_key(trans, &iter, k, &idx))); | |
890b74f0 KO |
1078 | c->reflink_gc_nr = 0; |
1079 | return ret; | |
1080 | } | |
1081 | ||
58dda9c1 | 1082 | static int bch2_gc_reflink_start(struct bch_fs *c) |
8f11548e | 1083 | { |
8f11548e KO |
1084 | c->reflink_gc_nr = 0; |
1085 | ||
80eab7a7 | 1086 | int ret = bch2_trans_run(c, |
5028b907 | 1087 | for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, |
5dd8c60e | 1088 | BTREE_ITER_prefetch, k, ({ |
27b2df98 | 1089 | const __le64 *refcount = bkey_refcount_c(k); |
8f11548e | 1090 | |
27b2df98 KO |
1091 | if (!refcount) |
1092 | continue; | |
8f11548e | 1093 | |
80eab7a7 KO |
1094 | struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table, |
1095 | c->reflink_gc_nr++, GFP_KERNEL); | |
27b2df98 KO |
1096 | if (!r) { |
1097 | ret = -BCH_ERR_ENOMEM_gc_reflink_start; | |
1098 | break; | |
1099 | } | |
8f11548e | 1100 | |
27b2df98 KO |
1101 | r->offset = k.k->p.offset; |
1102 | r->size = k.k->size; | |
1103 | r->refcount = 0; | |
1104 | 0; | |
1105 | }))); | |
8f11548e | 1106 | |
27b2df98 | 1107 | bch_err_fn(c, ret); |
8f11548e KO |
1108 | return ret; |
1109 | } | |
1110 | ||
326568f1 KO |
1111 | static int bch2_gc_write_stripes_key(struct btree_trans *trans, |
1112 | struct btree_iter *iter, | |
1113 | struct bkey_s_c k) | |
990d42d1 | 1114 | { |
326568f1 | 1115 | struct bch_fs *c = trans->c; |
fa8e94fa | 1116 | struct printbuf buf = PRINTBUF; |
326568f1 KO |
1117 | const struct bch_stripe *s; |
1118 | struct gc_stripe *m; | |
d57c9add | 1119 | bool bad = false; |
990d42d1 KO |
1120 | unsigned i; |
1121 | int ret = 0; | |
1122 | ||
326568f1 | 1123 | if (k.k->type != KEY_TYPE_stripe) |
990d42d1 KO |
1124 | return 0; |
1125 | ||
326568f1 KO |
1126 | s = bkey_s_c_to_stripe(k).v; |
1127 | m = genradix_ptr(&c->gc_stripes, k.k->p.offset); | |
5222a460 | 1128 | |
d57c9add KO |
1129 | for (i = 0; i < s->nr_blocks; i++) { |
1130 | u32 old = stripe_blockcount_get(s, i); | |
1131 | u32 new = (m ? m->block_sectors[i] : 0); | |
1132 | ||
1133 | if (old != new) { | |
1134 | prt_printf(&buf, "stripe block %u has wrong sector count: got %u, should be %u\n", | |
1135 | i, old, new); | |
1136 | bad = true; | |
1137 | } | |
1138 | } | |
1139 | ||
1140 | if (bad) | |
1141 | bch2_bkey_val_to_text(&buf, c, k); | |
1142 | ||
b65db750 KO |
1143 | if (fsck_err_on(bad, c, stripe_sector_count_wrong, |
1144 | "%s", buf.buf)) { | |
326568f1 KO |
1145 | struct bkey_i_stripe *new; |
1146 | ||
1147 | new = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); | |
1148 | ret = PTR_ERR_OR_ZERO(new); | |
1149 | if (ret) | |
1150 | return ret; | |
990d42d1 | 1151 | |
326568f1 | 1152 | bkey_reassemble(&new->k_i, k); |
990d42d1 | 1153 | |
326568f1 KO |
1154 | for (i = 0; i < new->v.nr_blocks; i++) |
1155 | stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); | |
990d42d1 | 1156 | |
326568f1 | 1157 | ret = bch2_trans_update(trans, iter, &new->k_i, 0); |
990d42d1 KO |
1158 | } |
1159 | fsck_err: | |
326568f1 KO |
1160 | printbuf_exit(&buf); |
1161 | return ret; | |
1162 | } | |
990d42d1 | 1163 | |
58dda9c1 | 1164 | static int bch2_gc_stripes_done(struct bch_fs *c) |
326568f1 | 1165 | { |
80eab7a7 KO |
1166 | return bch2_trans_run(c, |
1167 | for_each_btree_key_commit(trans, iter, | |
1168 | BTREE_ID_stripes, POS_MIN, | |
5dd8c60e | 1169 | BTREE_ITER_prefetch, k, |
80eab7a7 KO |
1170 | NULL, NULL, BCH_TRANS_COMMIT_no_enospc, |
1171 | bch2_gc_write_stripes_key(trans, &iter, k))); | |
990d42d1 KO |
1172 | } |
1173 | ||
1c6fdbd8 | 1174 | /** |
24b27975 | 1175 | * bch2_check_allocations - walk all references to buckets, and recompute them: |
9ca53b55 | 1176 | * |
96dea3d5 | 1177 | * @c: filesystem object |
96dea3d5 KO |
1178 | * |
1179 | * Returns: 0 on success, or standard errcode on failure | |
1180 | * | |
9ca53b55 KO |
1181 | * Order matters here: |
1182 | * - Concurrent GC relies on the fact that we have a total ordering for | |
1183 | * everything that GC walks - see gc_will_visit_node(), | |
1184 | * gc_will_visit_root() | |
1185 | * | |
1186 | * - also, references move around in the course of index updates and | |
1187 | * various other crap: everything needs to agree on the ordering | |
1188 | * references are allowed to move around in - e.g., we're allowed to | |
1189 | * start with a reference owned by an open_bucket (the allocator) and | |
1190 | * move it to the btree, but not the reverse. | |
1191 | * | |
1192 | * This is necessary to ensure that gc doesn't miss references that | |
1193 | * move around - if references move backwards in the ordering GC | |
1194 | * uses, GC could skip past them | |
1c6fdbd8 | 1195 | */ |
24b27975 | 1196 | int bch2_check_allocations(struct bch_fs *c) |
1c6fdbd8 | 1197 | { |
2252aa27 | 1198 | int ret; |
1c6fdbd8 | 1199 | |
1ada1606 | 1200 | lockdep_assert_held(&c->state_lock); |
1c6fdbd8 | 1201 | |
1c6fdbd8 | 1202 | down_write(&c->gc_lock); |
00b8ccf7 | 1203 | |
c0960603 | 1204 | bch2_btree_interior_updates_flush(c); |
8f11548e | 1205 | |
14d7d61f | 1206 | ret = bch2_gc_start(c) ?: |
58dda9c1 KO |
1207 | bch2_gc_alloc_start(c) ?: |
1208 | bch2_gc_reflink_start(c); | |
9ca53b55 | 1209 | if (ret) |
1c6fdbd8 | 1210 | goto out; |
930e1a92 | 1211 | |
8f11548e | 1212 | gc_pos_set(c, gc_phase(GC_PHASE_START)); |
1c6fdbd8 | 1213 | |
c281db0f KO |
1214 | ret = bch2_mark_superblocks(c); |
1215 | BUG_ON(ret); | |
1c6fdbd8 | 1216 | |
24b27975 | 1217 | ret = bch2_gc_btrees(c); |
9ca53b55 | 1218 | if (ret) |
2252aa27 | 1219 | goto out; |
1c6fdbd8 | 1220 | |
1c6fdbd8 | 1221 | c->gc_count++; |
a0b73c1c | 1222 | |
930e1a92 | 1223 | bch2_journal_block(&c->journal); |
a0b73c1c | 1224 | out: |
930e1a92 KO |
1225 | ret = bch2_gc_alloc_done(c) ?: |
1226 | bch2_gc_done(c) ?: | |
1227 | bch2_gc_stripes_done(c) ?: | |
1228 | bch2_gc_reflink_done(c); | |
9ca53b55 | 1229 | |
930e1a92 | 1230 | bch2_journal_unblock(&c->journal); |
5e82a9a1 | 1231 | |
ec061b21 | 1232 | percpu_down_write(&c->mark_lock); |
9ca53b55 | 1233 | /* Indicates that gc is no longer in progress: */ |
dfe9bfb3 | 1234 | __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); |
9ca53b55 KO |
1235 | |
1236 | bch2_gc_free(c); | |
05b3d5ac KO |
1237 | percpu_up_write(&c->mark_lock); |
1238 | ||
1c6fdbd8 | 1239 | up_write(&c->gc_lock); |
9ca53b55 | 1240 | |
1c6fdbd8 KO |
1241 | /* |
1242 | * At startup, allocations can happen directly instead of via the | |
1243 | * allocator thread - issue wakeup in case they blocked on gc_lock: | |
1244 | */ | |
1245 | closure_wake_up(&c->freelist_wait); | |
cf904c8d | 1246 | bch_err_fn(c, ret); |
9ca53b55 | 1247 | return ret; |
1c6fdbd8 KO |
1248 | } |
1249 | ||
a1783320 KO |
1250 | static int gc_btree_gens_key(struct btree_trans *trans, |
1251 | struct btree_iter *iter, | |
1252 | struct bkey_s_c k) | |
c47c50f8 | 1253 | { |
a1783320 | 1254 | struct bch_fs *c = trans->c; |
c47c50f8 | 1255 | struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); |
a1783320 KO |
1256 | struct bkey_i *u; |
1257 | int ret; | |
c47c50f8 | 1258 | |
10330402 KO |
1259 | if (unlikely(test_bit(BCH_FS_going_ro, &c->flags))) |
1260 | return -EROFS; | |
1261 | ||
c47c50f8 | 1262 | percpu_down_read(&c->mark_lock); |
ad897d24 | 1263 | rcu_read_lock(); |
c47c50f8 | 1264 | bkey_for_each_ptr(ptrs, ptr) { |
ad897d24 KO |
1265 | struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); |
1266 | if (!ca) | |
1267 | continue; | |
c47c50f8 | 1268 | |
3858aa42 | 1269 | if (dev_ptr_stale(ca, ptr) > 16) { |
ad897d24 | 1270 | rcu_read_unlock(); |
c47c50f8 | 1271 | percpu_up_read(&c->mark_lock); |
a1783320 | 1272 | goto update; |
c47c50f8 KO |
1273 | } |
1274 | } | |
1275 | ||
1276 | bkey_for_each_ptr(ptrs, ptr) { | |
ad897d24 KO |
1277 | struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); |
1278 | if (!ca) | |
1279 | continue; | |
c47c50f8 | 1280 | |
ad897d24 | 1281 | u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)]; |
c45c8667 KO |
1282 | if (gen_after(*gen, ptr->gen)) |
1283 | *gen = ptr->gen; | |
c47c50f8 | 1284 | } |
ad897d24 | 1285 | rcu_read_unlock(); |
c47c50f8 | 1286 | percpu_up_read(&c->mark_lock); |
a1783320 KO |
1287 | return 0; |
1288 | update: | |
0fb3355d | 1289 | u = bch2_bkey_make_mut(trans, iter, &k, 0); |
a1783320 KO |
1290 | ret = PTR_ERR_OR_ZERO(u); |
1291 | if (ret) | |
1292 | return ret; | |
c47c50f8 | 1293 | |
a1783320 | 1294 | bch2_extent_normalize(c, bkey_i_to_s(u)); |
dbda63bb | 1295 | return 0; |
451570a5 KO |
1296 | } |
1297 | ||
ad897d24 KO |
1298 | static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev *ca, |
1299 | struct btree_iter *iter, struct bkey_s_c k) | |
c45c8667 | 1300 | { |
19a614d2 KO |
1301 | struct bch_alloc_v4 a_convert; |
1302 | const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); | |
3d48a7f8 | 1303 | struct bkey_i_alloc_v4 *a_mut; |
c45c8667 KO |
1304 | int ret; |
1305 | ||
19a614d2 | 1306 | if (a->oldest_gen == ca->oldest_gen[iter->pos.offset]) |
c45c8667 KO |
1307 | return 0; |
1308 | ||
3d48a7f8 KO |
1309 | a_mut = bch2_alloc_to_v4_mut(trans, k); |
1310 | ret = PTR_ERR_OR_ZERO(a_mut); | |
1311 | if (ret) | |
1312 | return ret; | |
1313 | ||
1314 | a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; | |
fa9bb741 | 1315 | alloc_data_type_set(&a_mut->v, a_mut->v.data_type); |
c45c8667 | 1316 | |
3d48a7f8 | 1317 | return bch2_trans_update(trans, iter, &a_mut->k_i, 0); |
c45c8667 KO |
1318 | } |
1319 | ||
451570a5 KO |
1320 | int bch2_gc_gens(struct bch_fs *c) |
1321 | { | |
c45c8667 | 1322 | u64 b, start_time = local_clock(); |
451570a5 KO |
1323 | int ret; |
1324 | ||
b9c3d139 KO |
1325 | /* |
1326 | * Ideally we would be using state_lock and not gc_lock here, but that | |
1327 | * introduces a deadlock in the RO path - we currently take the state | |
1328 | * lock at the start of going RO, thus the gc thread may get stuck: | |
1329 | */ | |
c45c8667 KO |
1330 | if (!mutex_trylock(&c->gc_gens_lock)) |
1331 | return 0; | |
1332 | ||
674cfc26 | 1333 | trace_and_count(c, gc_gens_start, c); |
b9c3d139 | 1334 | down_read(&c->gc_lock); |
451570a5 | 1335 | |
9fea2274 | 1336 | for_each_member_device(c, ca) { |
253ba178 | 1337 | struct bucket_gens *gens = bucket_gens(ca); |
c45c8667 KO |
1338 | |
1339 | BUG_ON(ca->oldest_gen); | |
1340 | ||
253ba178 | 1341 | ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL); |
c45c8667 | 1342 | if (!ca->oldest_gen) { |
f295298b | 1343 | bch2_dev_put(ca); |
65d48e35 | 1344 | ret = -BCH_ERR_ENOMEM_gc_gens; |
c45c8667 KO |
1345 | goto err; |
1346 | } | |
1347 | ||
c45c8667 KO |
1348 | for (b = gens->first_bucket; |
1349 | b < gens->nbuckets; b++) | |
1350 | ca->oldest_gen[b] = gens->b[b]; | |
451570a5 KO |
1351 | } |
1352 | ||
9fea2274 | 1353 | for (unsigned i = 0; i < BTREE_ID_NR; i++) |
2da671dc | 1354 | if (btree_type_has_ptrs(i)) { |
ac516d0e KO |
1355 | c->gc_gens_btree = i; |
1356 | c->gc_gens_pos = POS_MIN; | |
96dea3d5 | 1357 | |
9fea2274 KO |
1358 | ret = bch2_trans_run(c, |
1359 | for_each_btree_key_commit(trans, iter, i, | |
1360 | POS_MIN, | |
5dd8c60e | 1361 | BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, |
9fea2274 KO |
1362 | k, |
1363 | NULL, NULL, | |
1364 | BCH_TRANS_COMMIT_no_enospc, | |
1365 | gc_btree_gens_key(trans, &iter, k))); | |
5f659376 | 1366 | if (ret) |
451570a5 KO |
1367 | goto err; |
1368 | } | |
1369 | ||
ad897d24 | 1370 | struct bch_dev *ca = NULL; |
9fea2274 KO |
1371 | ret = bch2_trans_run(c, |
1372 | for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, | |
1373 | POS_MIN, | |
5dd8c60e | 1374 | BTREE_ITER_prefetch, |
9fea2274 KO |
1375 | k, |
1376 | NULL, NULL, | |
ad897d24 KO |
1377 | BCH_TRANS_COMMIT_no_enospc, ({ |
1378 | ca = bch2_dev_iterate(c, ca, k.k->p.inode); | |
1379 | if (!ca) { | |
1380 | bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); | |
1381 | continue; | |
1382 | } | |
1383 | bch2_alloc_write_oldest_gen(trans, ca, &iter, k); | |
1384 | }))); | |
1385 | bch2_dev_put(ca); | |
1386 | ||
5f659376 | 1387 | if (ret) |
a1783320 | 1388 | goto err; |
74ed7e56 | 1389 | |
ac516d0e KO |
1390 | c->gc_gens_btree = 0; |
1391 | c->gc_gens_pos = POS_MIN; | |
1392 | ||
74ed7e56 | 1393 | c->gc_count++; |
991ba021 KO |
1394 | |
1395 | bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); | |
674cfc26 | 1396 | trace_and_count(c, gc_gens_end, c); |
451570a5 | 1397 | err: |
9fea2274 | 1398 | for_each_member_device(c, ca) { |
c45c8667 KO |
1399 | kvfree(ca->oldest_gen); |
1400 | ca->oldest_gen = NULL; | |
1401 | } | |
1402 | ||
b9c3d139 | 1403 | up_read(&c->gc_lock); |
c45c8667 | 1404 | mutex_unlock(&c->gc_gens_lock); |
9fea2274 KO |
1405 | if (!bch2_err_matches(ret, EROFS)) |
1406 | bch_err_fn(c, ret); | |
451570a5 KO |
1407 | return ret; |
1408 | } | |
1409 | ||
10330402 | 1410 | static void bch2_gc_gens_work(struct work_struct *work) |
1c6fdbd8 | 1411 | { |
10330402 KO |
1412 | struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work); |
1413 | bch2_gc_gens(c); | |
1414 | bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens); | |
1c6fdbd8 KO |
1415 | } |
1416 | ||
10330402 | 1417 | void bch2_gc_gens_async(struct bch_fs *c) |
1c6fdbd8 | 1418 | { |
10330402 KO |
1419 | if (bch2_write_ref_tryget(c, BCH_WRITE_REF_gc_gens) && |
1420 | !queue_work(c->write_ref_wq, &c->gc_gens_work)) | |
1421 | bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens); | |
1c6fdbd8 KO |
1422 | } |
1423 | ||
10330402 | 1424 | void bch2_fs_gc_init(struct bch_fs *c) |
1c6fdbd8 | 1425 | { |
10330402 | 1426 | seqcount_init(&c->gc_pos_lock); |
1c6fdbd8 | 1427 | |
10330402 | 1428 | INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work); |
1c6fdbd8 | 1429 | } |