Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
7b3f84ea | 4 | #include "alloc_background.h" |
d2554263 | 5 | #include "bkey_buf.h" |
401585fe | 6 | #include "btree_journal_iter.h" |
4409b808 | 7 | #include "btree_node_scan.h" |
1c6fdbd8 KO |
8 | #include "btree_update.h" |
9 | #include "btree_update_interior.h" | |
10 | #include "btree_io.h" | |
3e0745e2 | 11 | #include "buckets.h" |
1c6fdbd8 | 12 | #include "dirent.h" |
d4bf5eec | 13 | #include "errcode.h" |
1c6fdbd8 | 14 | #include "error.h" |
96385742 | 15 | #include "fs-common.h" |
1c6fdbd8 | 16 | #include "journal_io.h" |
644d180b | 17 | #include "journal_reclaim.h" |
1dd7f9d9 | 18 | #include "journal_seq_blacklist.h" |
aaad530a | 19 | #include "logged_ops.h" |
a4805d66 | 20 | #include "move.h" |
1c6fdbd8 | 21 | #include "quota.h" |
fb3f57bb | 22 | #include "rebalance.h" |
1c6fdbd8 | 23 | #include "recovery.h" |
d2554263 | 24 | #include "recovery_passes.h" |
42b72e0b | 25 | #include "replicas.h" |
a37ad1a3 | 26 | #include "sb-clean.h" |
84f16387 | 27 | #include "sb-downgrade.h" |
8e877caa | 28 | #include "snapshot.h" |
1c6fdbd8 KO |
29 | #include "super-io.h" |
30 | ||
644d180b | 31 | #include <linux/sort.h> |
1c6fdbd8 KO |
32 | #include <linux/stat.h> |
33 | ||
34 | #define QSTR(n) { { { .len = strlen(n) } }, .name = n } | |
35 | ||
55936afe KO |
36 | void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) |
37 | { | |
38 | u64 b = BIT_ULL(btree); | |
39 | ||
40 | if (!(c->sb.btrees_lost_data & b)) { | |
41 | bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); | |
42 | ||
43 | mutex_lock(&c->sb_lock); | |
44 | bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); | |
45 | bch2_write_super(c); | |
46 | mutex_unlock(&c->sb_lock); | |
47 | } | |
48 | } | |
49 | ||
33114c2d | 50 | /* for -o reconstruct_alloc: */ |
bdbf953b | 51 | static void bch2_reconstruct_alloc(struct bch_fs *c) |
33114c2d | 52 | { |
cdce1094 KO |
53 | bch2_journal_log_msg(c, "dropping alloc info"); |
54 | bch_info(c, "dropping and reconstructing all alloc info"); | |
55 | ||
56 | mutex_lock(&c->sb_lock); | |
57 | struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); | |
58 | ||
59 | __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); | |
60 | __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_alloc_info, ext->recovery_passes_required); | |
61 | __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_lrus, ext->recovery_passes_required); | |
62 | __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_extents_to_backpointers, ext->recovery_passes_required); | |
63 | __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_alloc_to_lru_refs, ext->recovery_passes_required); | |
64 | ||
65 | __set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_alloc_key, ext->errors_silent); | |
66 | __set_bit_le64(BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen, ext->errors_silent); | |
67 | __set_bit_le64(BCH_FSCK_ERR_stale_dirty_ptr, ext->errors_silent); | |
d1b213a0 KO |
68 | |
69 | __set_bit_le64(BCH_FSCK_ERR_dev_usage_buckets_wrong, ext->errors_silent); | |
70 | __set_bit_le64(BCH_FSCK_ERR_dev_usage_sectors_wrong, ext->errors_silent); | |
71 | __set_bit_le64(BCH_FSCK_ERR_dev_usage_fragmented_wrong, ext->errors_silent); | |
72 | ||
73 | __set_bit_le64(BCH_FSCK_ERR_fs_usage_btree_wrong, ext->errors_silent); | |
74 | __set_bit_le64(BCH_FSCK_ERR_fs_usage_cached_wrong, ext->errors_silent); | |
75 | __set_bit_le64(BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, ext->errors_silent); | |
76 | __set_bit_le64(BCH_FSCK_ERR_fs_usage_replicas_wrong, ext->errors_silent); | |
77 | ||
cdce1094 KO |
78 | __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); |
79 | __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); | |
80 | __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); | |
d1b213a0 | 81 | __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); |
cdce1094 KO |
82 | __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); |
83 | __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); | |
84 | __set_bit_le64(BCH_FSCK_ERR_need_discard_key_wrong, ext->errors_silent); | |
85 | __set_bit_le64(BCH_FSCK_ERR_freespace_key_wrong, ext->errors_silent); | |
86 | __set_bit_le64(BCH_FSCK_ERR_bucket_gens_key_wrong, ext->errors_silent); | |
87 | __set_bit_le64(BCH_FSCK_ERR_freespace_hole_missing, ext->errors_silent); | |
88 | __set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_backpointer, ext->errors_silent); | |
89 | __set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); | |
90 | c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); | |
91 | ||
92 | bch2_write_super(c); | |
93 | mutex_unlock(&c->sb_lock); | |
94 | ||
95 | c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); | |
96 | ||
a5860368 | 97 | |
bdbf953b KO |
98 | bch2_shoot_down_journal_keys(c, BTREE_ID_alloc, |
99 | 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); | |
100 | bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers, | |
101 | 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); | |
102 | bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard, | |
103 | 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); | |
104 | bch2_shoot_down_journal_keys(c, BTREE_ID_freespace, | |
105 | 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); | |
106 | bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens, | |
107 | 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); | |
33114c2d KO |
108 | } |
109 | ||
877da05f KO |
110 | /* |
111 | * Btree node pointers have a field to stack a pointer to the in memory btree | |
112 | * node; we need to zero out this field when reading in btree nodes, or when | |
113 | * reading in keys from the journal: | |
114 | */ | |
115 | static void zero_out_btree_mem_ptr(struct journal_keys *keys) | |
116 | { | |
95ffc7fb | 117 | darray_for_each(*keys, i) |
877da05f KO |
118 | if (i->k->k.type == KEY_TYPE_btree_ptr_v2) |
119 | bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0; | |
120 | } | |
121 | ||
d0734356 KO |
122 | /* journal replay: */ |
123 | ||
124 | static void replay_now_at(struct journal *j, u64 seq) | |
125 | { | |
126 | BUG_ON(seq < j->replay_journal_seq); | |
c609947b KO |
127 | |
128 | seq = min(seq, j->replay_journal_seq_end); | |
d0734356 KO |
129 | |
130 | while (j->replay_journal_seq < seq) | |
131 | bch2_journal_pin_put(j, j->replay_journal_seq++); | |
132 | } | |
133 | ||
d8601afc KO |
134 | static int bch2_journal_replay_key(struct btree_trans *trans, |
135 | struct journal_key *k) | |
2d594dfb | 136 | { |
67e0dd8f | 137 | struct btree_iter iter; |
697e546f | 138 | unsigned iter_flags = |
5dd8c60e KO |
139 | BTREE_ITER_intent| |
140 | BTREE_ITER_not_extents; | |
141 | unsigned update_flags = BTREE_TRIGGER_norun; | |
f6d0368e | 142 | int ret; |
2d594dfb | 143 | |
57322430 KO |
144 | if (k->overwritten) |
145 | return 0; | |
146 | ||
9a71de67 KO |
147 | trans->journal_res.seq = k->journal_seq; |
148 | ||
f26c67f4 | 149 | /* |
5dd8c60e | 150 | * BTREE_UPDATE_key_cache_reclaim disables key cache lookup/update to |
f26c67f4 KO |
151 | * keep the key cache coherent with the underlying btree. Nothing |
152 | * besides the allocator is doing updates yet so we don't need key cache | |
153 | * coherency for non-alloc btrees, and key cache fills for snapshots | |
5dd8c60e | 154 | * btrees use BTREE_ITER_filter_snapshots, which isn't available until |
f26c67f4 KO |
155 | * the snapshots recovery pass runs. |
156 | */ | |
697e546f | 157 | if (!k->level && k->btree_id == BTREE_ID_alloc) |
5dd8c60e | 158 | iter_flags |= BTREE_ITER_cached; |
f26c67f4 | 159 | else |
5dd8c60e | 160 | update_flags |= BTREE_UPDATE_key_cache_reclaim; |
697e546f KO |
161 | |
162 | bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, | |
163 | BTREE_MAX_DEPTH, k->level, | |
164 | iter_flags); | |
dfd41fb9 KO |
165 | ret = bch2_btree_iter_traverse(&iter); |
166 | if (ret) | |
167 | goto out; | |
168 | ||
ba89083e KO |
169 | struct btree_path *path = btree_iter_path(trans, &iter); |
170 | if (unlikely(!btree_path_node(path, k->level))) { | |
171 | bch2_trans_iter_exit(trans, &iter); | |
172 | bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, | |
173 | BTREE_MAX_DEPTH, 0, iter_flags); | |
174 | ret = bch2_btree_iter_traverse(&iter) ?: | |
175 | bch2_btree_increase_depth(trans, iter.path, 0) ?: | |
176 | -BCH_ERR_transaction_restart_nested; | |
177 | goto out; | |
178 | } | |
179 | ||
dfd41fb9 KO |
180 | /* Must be checked with btree locked: */ |
181 | if (k->overwritten) | |
182 | goto out; | |
183 | ||
f26c67f4 | 184 | ret = bch2_trans_update(trans, &iter, k->k, update_flags); |
dfd41fb9 | 185 | out: |
67e0dd8f | 186 | bch2_trans_iter_exit(trans, &iter); |
f6d0368e | 187 | return ret; |
2d594dfb KO |
188 | } |
189 | ||
5d20ba48 KO |
190 | static int journal_sort_seq_cmp(const void *_l, const void *_r) |
191 | { | |
5ba2fd11 KO |
192 | const struct journal_key *l = *((const struct journal_key **)_l); |
193 | const struct journal_key *r = *((const struct journal_key **)_r); | |
5d20ba48 | 194 | |
d8601afc | 195 | return cmp_int(l->journal_seq, r->journal_seq); |
5d20ba48 KO |
196 | } |
197 | ||
d2554263 | 198 | int bch2_journal_replay(struct bch_fs *c) |
644d180b | 199 | { |
5ba2fd11 | 200 | struct journal_keys *keys = &c->journal_keys; |
57322430 | 201 | DARRAY(struct journal_key *) keys_sorted = { 0 }; |
644d180b | 202 | struct journal *j = &c->journal; |
78328fec KO |
203 | u64 start_seq = c->journal_replay_seq_start; |
204 | u64 end_seq = c->journal_replay_seq_start; | |
ca563dcc | 205 | struct btree_trans *trans = NULL; |
4fe0eeea | 206 | bool immediate_flush = false; |
4a147af2 | 207 | int ret = 0; |
5ba2fd11 | 208 | |
5bbe3f2d | 209 | if (keys->nr) { |
349b1d83 BF |
210 | ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", |
211 | keys->nr, start_seq, end_seq); | |
5bbe3f2d KO |
212 | if (ret) |
213 | goto err; | |
214 | } | |
215 | ||
8a443d3e KO |
216 | BUG_ON(!atomic_read(&keys->ref)); |
217 | ||
a5860368 | 218 | move_gap(keys, keys->nr); |
ca563dcc | 219 | trans = bch2_trans_get(c); |
a5860368 | 220 | |
57322430 KO |
221 | /* |
222 | * First, attempt to replay keys in sorted order. This is more | |
223 | * efficient - better locality of btree access - but some might fail if | |
224 | * that would cause a journal deadlock. | |
225 | */ | |
95ffc7fb | 226 | darray_for_each(*keys, k) { |
57322430 KO |
227 | cond_resched(); |
228 | ||
4fe0eeea KO |
229 | /* |
230 | * k->allocated means the key wasn't read in from the journal, | |
231 | * rather it was from early repair code | |
232 | */ | |
233 | if (k->allocated) | |
234 | immediate_flush = true; | |
235 | ||
57322430 KO |
236 | /* Skip fastpath if we're low on space in the journal */ |
237 | ret = c->journal.watermark ? -1 : | |
238 | commit_do(trans, NULL, NULL, | |
cb52d23e KO |
239 | BCH_TRANS_COMMIT_no_enospc| |
240 | BCH_TRANS_COMMIT_journal_reclaim| | |
241 | (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), | |
57322430 KO |
242 | bch2_journal_replay_key(trans, k)); |
243 | BUG_ON(!ret && !k->overwritten); | |
244 | if (ret) { | |
245 | ret = darray_push(&keys_sorted, k); | |
246 | if (ret) | |
247 | goto err; | |
248 | } | |
249 | } | |
5ba2fd11 | 250 | |
57322430 KO |
251 | /* |
252 | * Now, replay any remaining keys in the order in which they appear in | |
253 | * the journal, unpinning those journal entries as we go: | |
254 | */ | |
255 | sort(keys_sorted.data, keys_sorted.nr, | |
256 | sizeof(keys_sorted.data[0]), | |
257 | journal_sort_seq_cmp, NULL); | |
258 | ||
259 | darray_for_each(keys_sorted, kp) { | |
d0734356 | 260 | cond_resched(); |
5d20ba48 | 261 | |
57322430 KO |
262 | struct journal_key *k = *kp; |
263 | ||
79055f50 KO |
264 | if (k->journal_seq) |
265 | replay_now_at(j, k->journal_seq); | |
266 | else | |
267 | replay_now_at(j, j->replay_journal_seq_end); | |
5d20ba48 | 268 | |
57322430 | 269 | ret = commit_do(trans, NULL, NULL, |
cb52d23e | 270 | BCH_TRANS_COMMIT_no_enospc| |
57322430 | 271 | (!k->allocated |
cb52d23e | 272 | ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim |
57322430 | 273 | : 0), |
6bd68ec2 | 274 | bch2_journal_replay_key(trans, k)); |
57322430 KO |
275 | bch_err_msg(c, ret, "while replaying key at btree %s level %u:", |
276 | bch2_btree_id_str(k->btree_id), k->level); | |
277 | if (ret) | |
5d20ba48 | 278 | goto err; |
57322430 KO |
279 | |
280 | BUG_ON(!k->overwritten); | |
7b512638 | 281 | } |
644d180b | 282 | |
57322430 KO |
283 | /* |
284 | * We need to put our btree_trans before calling flush_all_pins(), since | |
285 | * that will use a btree_trans internally | |
286 | */ | |
287 | bch2_trans_put(trans); | |
288 | trans = NULL; | |
289 | ||
4409b808 | 290 | if (!c->opts.retain_recovery_info && |
13c1e583 | 291 | c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) |
8a443d3e KO |
292 | bch2_journal_keys_put_initial(c); |
293 | ||
644d180b KO |
294 | replay_now_at(j, j->replay_journal_seq_end); |
295 | j->replay_journal_seq = 0; | |
296 | ||
297 | bch2_journal_set_replay_done(j); | |
d5d3be7d | 298 | |
4fe0eeea KO |
299 | /* if we did any repair, flush it immediately */ |
300 | if (immediate_flush) { | |
301 | bch2_journal_flush_all_pins(&c->journal); | |
302 | ret = bch2_journal_meta(&c->journal); | |
303 | } | |
304 | ||
b27d7afb | 305 | if (keys->nr) |
349b1d83 | 306 | bch2_journal_log_msg(c, "journal replay finished"); |
5d20ba48 | 307 | err: |
57322430 KO |
308 | if (trans) |
309 | bch2_trans_put(trans); | |
310 | darray_exit(&keys_sorted); | |
311 | bch_err_fn(c, ret); | |
5d20ba48 | 312 | return ret; |
7b512638 KO |
313 | } |
314 | ||
644d180b | 315 | /* journal replay early: */ |
7b512638 | 316 | |
42b72e0b KO |
317 | static int journal_replay_entry_early(struct bch_fs *c, |
318 | struct jset_entry *entry) | |
319 | { | |
320 | int ret = 0; | |
321 | ||
322 | switch (entry->type) { | |
323 | case BCH_JSET_ENTRY_btree_root: { | |
2ded276b KO |
324 | struct btree_root *r; |
325 | ||
faa6cb6c KO |
326 | while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) { |
327 | ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }); | |
328 | if (ret) | |
329 | return ret; | |
2ded276b KO |
330 | } |
331 | ||
faa6cb6c | 332 | r = bch2_btree_id_root(c, entry->btree_id); |
42b72e0b KO |
333 | |
334 | if (entry->u64s) { | |
335 | r->level = entry->level; | |
6dfa10ab | 336 | bkey_copy(&r->key, (struct bkey_i *) entry->start); |
42b72e0b KO |
337 | r->error = 0; |
338 | } else { | |
52946d82 | 339 | r->error = -BCH_ERR_btree_node_read_error; |
42b72e0b KO |
340 | } |
341 | r->alive = true; | |
342 | break; | |
343 | } | |
344 | case BCH_JSET_ENTRY_usage: { | |
345 | struct jset_entry_usage *u = | |
346 | container_of(entry, struct jset_entry_usage, entry); | |
347 | ||
3577df5f | 348 | switch (entry->btree_id) { |
528b18e6 | 349 | case BCH_FS_USAGE_reserved: |
3577df5f | 350 | if (entry->level < BCH_REPLICAS_MAX) |
5e82a9a1 KO |
351 | c->usage_base->persistent_reserved[entry->level] = |
352 | le64_to_cpu(u->v); | |
42b72e0b | 353 | break; |
528b18e6 | 354 | case BCH_FS_USAGE_inodes: |
8e7834a8 | 355 | c->usage_base->b.nr_inodes = le64_to_cpu(u->v); |
42b72e0b | 356 | break; |
528b18e6 | 357 | case BCH_FS_USAGE_key_version: |
42b72e0b | 358 | atomic64_set(&c->key_version, |
3577df5f | 359 | le64_to_cpu(u->v)); |
42b72e0b KO |
360 | break; |
361 | } | |
362 | ||
363 | break; | |
364 | } | |
3577df5f KO |
365 | case BCH_JSET_ENTRY_data_usage: { |
366 | struct jset_entry_data_usage *u = | |
367 | container_of(entry, struct jset_entry_data_usage, entry); | |
180fb49d | 368 | |
3577df5f KO |
369 | ret = bch2_replicas_set_usage(c, &u->r, |
370 | le64_to_cpu(u->v)); | |
371 | break; | |
372 | } | |
180fb49d KO |
373 | case BCH_JSET_ENTRY_dev_usage: { |
374 | struct jset_entry_dev_usage *u = | |
375 | container_of(entry, struct jset_entry_dev_usage, entry); | |
adf81796 KO |
376 | unsigned nr_types = jset_entry_dev_usage_nr_types(u); |
377 | ||
378 | rcu_read_lock(); | |
379 | struct bch_dev *ca = bch2_dev_rcu(c, le32_to_cpu(u->dev)); | |
380 | if (ca) | |
381 | for (unsigned i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { | |
382 | ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); | |
383 | ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors); | |
384 | ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented); | |
385 | } | |
386 | rcu_read_unlock(); | |
180fb49d KO |
387 | |
388 | break; | |
389 | } | |
1dd7f9d9 KO |
390 | case BCH_JSET_ENTRY_blacklist: { |
391 | struct jset_entry_blacklist *bl_entry = | |
392 | container_of(entry, struct jset_entry_blacklist, entry); | |
393 | ||
394 | ret = bch2_journal_seq_blacklist_add(c, | |
395 | le64_to_cpu(bl_entry->seq), | |
396 | le64_to_cpu(bl_entry->seq) + 1); | |
397 | break; | |
398 | } | |
399 | case BCH_JSET_ENTRY_blacklist_v2: { | |
400 | struct jset_entry_blacklist_v2 *bl_entry = | |
401 | container_of(entry, struct jset_entry_blacklist_v2, entry); | |
402 | ||
403 | ret = bch2_journal_seq_blacklist_add(c, | |
404 | le64_to_cpu(bl_entry->start), | |
405 | le64_to_cpu(bl_entry->end) + 1); | |
406 | break; | |
407 | } | |
2abe5420 KO |
408 | case BCH_JSET_ENTRY_clock: { |
409 | struct jset_entry_clock *clock = | |
410 | container_of(entry, struct jset_entry_clock, entry); | |
411 | ||
c0ebe3e4 | 412 | atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time)); |
2abe5420 | 413 | } |
42b72e0b KO |
414 | } |
415 | ||
416 | return ret; | |
417 | } | |
418 | ||
1dd7f9d9 | 419 | static int journal_replay_early(struct bch_fs *c, |
ce6201c4 | 420 | struct bch_sb_field_clean *clean) |
1c6fdbd8 | 421 | { |
7b512638 | 422 | if (clean) { |
cea07a7b | 423 | for (struct jset_entry *entry = clean->start; |
7b512638 KO |
424 | entry != vstruct_end(&clean->field); |
425 | entry = vstruct_next(entry)) { | |
cea07a7b | 426 | int ret = journal_replay_entry_early(c, entry); |
7b512638 KO |
427 | if (ret) |
428 | return ret; | |
429 | } | |
430 | } else { | |
ce6201c4 KO |
431 | struct genradix_iter iter; |
432 | struct journal_replay *i, **_i; | |
433 | ||
434 | genradix_for_each(&c->journal_entries, iter, _i) { | |
435 | i = *_i; | |
436 | ||
2cce3752 | 437 | if (journal_replay_ignore(i)) |
adbcada4 | 438 | continue; |
7b512638 | 439 | |
7b512638 | 440 | vstruct_for_each(&i->j, entry) { |
cea07a7b | 441 | int ret = journal_replay_entry_early(c, entry); |
7b512638 KO |
442 | if (ret) |
443 | return ret; | |
444 | } | |
adbcada4 | 445 | } |
134915f3 | 446 | } |
1c6fdbd8 | 447 | |
7b512638 KO |
448 | bch2_fs_usage_initialize(c); |
449 | ||
450 | return 0; | |
451 | } | |
452 | ||
644d180b KO |
453 | /* sb clean section: */ |
454 | ||
7b512638 KO |
455 | static int read_btree_roots(struct bch_fs *c) |
456 | { | |
7b512638 | 457 | int ret = 0; |
1c6fdbd8 | 458 | |
4409b808 | 459 | for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { |
faa6cb6c | 460 | struct btree_root *r = bch2_btree_id_root(c, i); |
1c6fdbd8 | 461 | |
7b512638 KO |
462 | if (!r->alive) |
463 | continue; | |
1c6fdbd8 | 464 | |
cdce1094 | 465 | if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) |
1c6fdbd8 KO |
466 | continue; |
467 | ||
4409b808 KO |
468 | if (mustfix_fsck_err_on((ret = r->error), |
469 | c, btree_root_bkey_invalid, | |
470 | "invalid btree root %s", | |
471 | bch2_btree_id_str(i)) || | |
472 | mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), | |
473 | c, btree_root_read_error, | |
474 | "error reading btree root %s l=%u: %s", | |
475 | bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { | |
476 | if (btree_id_is_alloc(i)) { | |
477 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); | |
478 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); | |
479 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); | |
480 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); | |
481 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); | |
19dd3172 | 482 | c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); |
4409b808 KO |
483 | r->error = 0; |
484 | } else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { | |
485 | bch_info(c, "will run btree node scan"); | |
486 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); | |
487 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); | |
488 | } | |
7b512638 | 489 | |
7dcf62c0 | 490 | ret = 0; |
55936afe | 491 | bch2_btree_lost_data(c, i); |
7b512638 | 492 | } |
1c6fdbd8 | 493 | } |
7b512638 | 494 | |
4409b808 | 495 | for (unsigned i = 0; i < BTREE_ID_NR; i++) { |
faa6cb6c | 496 | struct btree_root *r = bch2_btree_id_root(c, i); |
806c8a6a | 497 | |
4409b808 | 498 | if (!r->b && !r->error) { |
806c8a6a KO |
499 | r->alive = false; |
500 | r->level = 0; | |
f2f61f41 | 501 | bch2_btree_root_alloc_fake(c, i, 0); |
806c8a6a KO |
502 | } |
503 | } | |
1c6fdbd8 KO |
504 | fsck_err: |
505 | return ret; | |
506 | } | |
507 | ||
15eaaa4c | 508 | static bool check_version_upgrade(struct bch_fs *c) |
24964e1c | 509 | { |
ba8eeae8 | 510 | unsigned latest_version = bcachefs_metadata_version_current; |
2eeccee8 KO |
511 | unsigned latest_compatible = min(latest_version, |
512 | bch2_latest_compatible_version(c->sb.version)); | |
ba8eeae8 KO |
513 | unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; |
514 | unsigned new_version = 0; | |
515 | ||
516 | if (old_version < bcachefs_metadata_required_upgrade_below) { | |
517 | if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || | |
518 | latest_compatible < bcachefs_metadata_required_upgrade_below) | |
519 | new_version = latest_version; | |
520 | else | |
521 | new_version = latest_compatible; | |
522 | } else { | |
523 | switch (c->opts.version_upgrade) { | |
524 | case BCH_VERSION_UPGRADE_compatible: | |
525 | new_version = latest_compatible; | |
526 | break; | |
527 | case BCH_VERSION_UPGRADE_incompatible: | |
528 | new_version = latest_version; | |
529 | break; | |
530 | case BCH_VERSION_UPGRADE_none: | |
2eeccee8 | 531 | new_version = min(old_version, latest_version); |
ba8eeae8 KO |
532 | break; |
533 | } | |
534 | } | |
24964e1c | 535 | |
ba8eeae8 | 536 | if (new_version > old_version) { |
24964e1c KO |
537 | struct printbuf buf = PRINTBUF; |
538 | ||
ba8eeae8 KO |
539 | if (old_version < bcachefs_metadata_required_upgrade_below) |
540 | prt_str(&buf, "Version upgrade required:\n"); | |
541 | ||
542 | if (old_version != c->sb.version) { | |
543 | prt_str(&buf, "Version upgrade from "); | |
544 | bch2_version_to_text(&buf, c->sb.version_upgrade_complete); | |
545 | prt_str(&buf, " to "); | |
3045bb95 | 546 | bch2_version_to_text(&buf, c->sb.version); |
ba8eeae8 | 547 | prt_str(&buf, " incomplete\n"); |
3045bb95 | 548 | } |
24964e1c | 549 | |
ba8eeae8 KO |
550 | prt_printf(&buf, "Doing %s version upgrade from ", |
551 | BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version) | |
552 | ? "incompatible" : "compatible"); | |
553 | bch2_version_to_text(&buf, old_version); | |
554 | prt_str(&buf, " to "); | |
555 | bch2_version_to_text(&buf, new_version); | |
556 | prt_newline(&buf); | |
557 | ||
15eaaa4c KO |
558 | struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); |
559 | __le64 passes = ext->recovery_passes_required[0]; | |
560 | bch2_sb_set_upgrade(c, old_version, new_version); | |
561 | passes = ext->recovery_passes_required[0] & ~passes; | |
24964e1c | 562 | |
15eaaa4c KO |
563 | if (passes) { |
564 | prt_str(&buf, " running recovery passes: "); | |
565 | prt_bitflags(&buf, bch2_recovery_passes, | |
566 | bch2_recovery_passes_from_stable(le64_to_cpu(passes))); | |
065bd335 | 567 | } |
24964e1c | 568 | |
065bd335 | 569 | bch_info(c, "%s", buf.buf); |
ba8eeae8 | 570 | |
6619d846 | 571 | bch2_sb_upgrade(c, new_version); |
ba8eeae8 KO |
572 | |
573 | printbuf_exit(&buf); | |
8b16413c | 574 | return true; |
24964e1c | 575 | } |
8b16413c KO |
576 | |
577 | return false; | |
24964e1c KO |
578 | } |
579 | ||
1c6fdbd8 KO |
580 | int bch2_fs_recovery(struct bch_fs *c) |
581 | { | |
1dd7f9d9 | 582 | struct bch_sb_field_clean *clean = NULL; |
adbcada4 | 583 | struct jset *last_journal_entry = NULL; |
4fc1f402 | 584 | u64 last_seq = 0, blacklist_seq, journal_seq; |
4932e07e | 585 | int ret = 0; |
1c6fdbd8 | 586 | |
a37ad1a3 KO |
587 | if (c->sb.clean) { |
588 | clean = bch2_read_superblock_clean(c); | |
589 | ret = PTR_ERR_OR_ZERO(clean); | |
590 | if (ret) | |
591 | goto err; | |
1dd7f9d9 | 592 | |
1c6fdbd8 KO |
593 | bch_info(c, "recovering from clean shutdown, journal seq %llu", |
594 | le64_to_cpu(clean->journal_seq)); | |
a37ad1a3 | 595 | } else { |
0a84a066 | 596 | bch_info(c, "recovering from unclean shutdown"); |
a37ad1a3 | 597 | } |
1c6fdbd8 | 598 | |
f2785955 KO |
599 | if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) { |
600 | bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported"); | |
601 | ret = -EINVAL; | |
602 | goto err; | |
603 | } | |
604 | ||
73590619 KO |
605 | if (!c->sb.clean && |
606 | !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { | |
607 | bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); | |
608 | ret = -EINVAL; | |
609 | goto err; | |
610 | } | |
611 | ||
13c1e583 KO |
612 | if (c->opts.norecovery) |
613 | c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; | |
1cab5a82 | 614 | |
62606398 KO |
615 | mutex_lock(&c->sb_lock); |
616 | struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); | |
617 | bool write_sb = false; | |
8b16413c | 618 | |
62606398 KO |
619 | if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { |
620 | ext->recovery_passes_required[0] |= | |
621 | cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); | |
622 | write_sb = true; | |
623 | } | |
8b16413c | 624 | |
62606398 KO |
625 | u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); |
626 | if (sb_passes) { | |
627 | struct printbuf buf = PRINTBUF; | |
628 | prt_str(&buf, "superblock requires following recovery passes to be run:\n "); | |
629 | prt_bitflags(&buf, bch2_recovery_passes, sb_passes); | |
630 | bch_info(c, "%s", buf.buf); | |
631 | printbuf_exit(&buf); | |
632 | } | |
84f16387 | 633 | |
62606398 KO |
634 | if (bch2_check_version_downgrade(c)) { |
635 | struct printbuf buf = PRINTBUF; | |
84f16387 | 636 | |
62606398 | 637 | prt_str(&buf, "Version downgrade required:"); |
84f16387 | 638 | |
62606398 KO |
639 | __le64 passes = ext->recovery_passes_required[0]; |
640 | bch2_sb_set_downgrade(c, | |
641 | BCH_VERSION_MINOR(bcachefs_metadata_version_current), | |
642 | BCH_VERSION_MINOR(c->sb.version)); | |
643 | passes = ext->recovery_passes_required[0] & ~passes; | |
644 | if (passes) { | |
645 | prt_str(&buf, "\n running recovery passes: "); | |
646 | prt_bitflags(&buf, bch2_recovery_passes, | |
647 | bch2_recovery_passes_from_stable(le64_to_cpu(passes))); | |
84f16387 KO |
648 | } |
649 | ||
62606398 KO |
650 | bch_info(c, "%s", buf.buf); |
651 | printbuf_exit(&buf); | |
652 | write_sb = true; | |
653 | } | |
8b16413c | 654 | |
62606398 KO |
655 | if (check_version_upgrade(c)) |
656 | write_sb = true; | |
8b16413c | 657 | |
62606398 KO |
658 | if (write_sb) |
659 | bch2_write_super(c); | |
660 | ||
661 | c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); | |
662 | mutex_unlock(&c->sb_lock); | |
8b16413c KO |
663 | |
664 | if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) | |
665 | c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); | |
666 | ||
d55ddf6e KO |
667 | if (c->opts.fsck) |
668 | set_bit(BCH_FS_fsck_running, &c->flags); | |
669 | ||
adbcada4 KO |
670 | ret = bch2_blacklist_table_initialize(c); |
671 | if (ret) { | |
672 | bch_err(c, "error initializing blacklist table"); | |
673 | goto err; | |
674 | } | |
675 | ||
45150765 KO |
676 | bch2_journal_pos_from_member_info_resume(c); |
677 | ||
4dcd90b6 | 678 | if (!c->sb.clean || c->opts.retain_recovery_info) { |
ce6201c4 KO |
679 | struct genradix_iter iter; |
680 | struct journal_replay **i; | |
1dd7f9d9 | 681 | |
365f64f3 | 682 | bch_verbose(c, "starting journal read"); |
5bbe3f2d | 683 | ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq); |
1c6fdbd8 KO |
684 | if (ret) |
685 | goto err; | |
686 | ||
e0de429a KO |
687 | /* |
688 | * note: cmd_list_journal needs the blacklist table fully up to date so | |
689 | * it can asterisk ignored journal entries: | |
690 | */ | |
691 | if (c->opts.read_journal_only) | |
692 | goto out; | |
693 | ||
ce6201c4 | 694 | genradix_for_each_reverse(&c->journal_entries, iter, i) |
2cce3752 | 695 | if (!journal_replay_ignore(*i)) { |
ce6201c4 | 696 | last_journal_entry = &(*i)->j; |
adbcada4 KO |
697 | break; |
698 | } | |
699 | ||
700 | if (mustfix_fsck_err_on(c->sb.clean && | |
701 | last_journal_entry && | |
702 | !journal_entry_empty(last_journal_entry), c, | |
b65db750 | 703 | clean_but_journal_not_empty, |
932aa837 | 704 | "filesystem marked clean but journal not empty")) { |
19dd3172 | 705 | c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); |
932aa837 KO |
706 | SET_BCH_SB_CLEAN(c->disk_sb.sb, false); |
707 | c->sb.clean = false; | |
708 | } | |
1dd7f9d9 | 709 | |
adbcada4 | 710 | if (!last_journal_entry) { |
b65db750 KO |
711 | fsck_err_on(!c->sb.clean, c, |
712 | dirty_but_no_journal_entries, | |
713 | "no journal entries found"); | |
1ba8a796 KO |
714 | if (clean) |
715 | goto use_clean; | |
716 | ||
717 | genradix_for_each_reverse(&c->journal_entries, iter, i) | |
718 | if (*i) { | |
719 | last_journal_entry = &(*i)->j; | |
2cce3752 KO |
720 | (*i)->ignore_blacklisted = false; |
721 | (*i)->ignore_not_dirty= false; | |
5a53f851 KO |
722 | /* |
723 | * This was probably a NO_FLUSH entry, | |
724 | * so last_seq was garbage - but we know | |
725 | * we're only using a single journal | |
726 | * entry, set it here: | |
727 | */ | |
728 | (*i)->j.last_seq = (*i)->j.seq; | |
1ba8a796 KO |
729 | break; |
730 | } | |
1dd7f9d9 KO |
731 | } |
732 | ||
401585fe | 733 | ret = bch2_journal_keys_sort(c); |
95752a02 | 734 | if (ret) |
d0734356 | 735 | goto err; |
d0734356 | 736 | |
adbcada4 | 737 | if (c->sb.clean && last_journal_entry) { |
a37ad1a3 | 738 | ret = bch2_verify_superblock_clean(c, &clean, |
adbcada4 KO |
739 | last_journal_entry); |
740 | if (ret) | |
741 | goto err; | |
742 | } | |
743 | } else { | |
744 | use_clean: | |
745 | if (!clean) { | |
746 | bch_err(c, "no superblock clean section found"); | |
1ed0a5d2 | 747 | ret = -BCH_ERR_fsck_repair_impossible; |
7b512638 | 748 | goto err; |
1dd7f9d9 | 749 | |
adbcada4 KO |
750 | } |
751 | blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1; | |
1dd7f9d9 KO |
752 | } |
753 | ||
78328fec | 754 | c->journal_replay_seq_start = last_seq; |
1e81f89b | 755 | c->journal_replay_seq_end = blacklist_seq - 1; |
78328fec | 756 | |
cdce1094 | 757 | if (c->opts.reconstruct_alloc) |
bdbf953b | 758 | bch2_reconstruct_alloc(c); |
33114c2d | 759 | |
877da05f KO |
760 | zero_out_btree_mem_ptr(&c->journal_keys); |
761 | ||
ce6201c4 | 762 | ret = journal_replay_early(c, clean); |
1dd7f9d9 KO |
763 | if (ret) |
764 | goto err; | |
765 | ||
9b6e2f1e KO |
766 | /* |
767 | * After an unclean shutdown, skip then next few journal sequence | |
768 | * numbers as they may have been referenced by btree writes that | |
769 | * happened before their corresponding journal writes - those btree | |
770 | * writes need to be ignored, by skipping and blacklisting the next few | |
771 | * journal sequence numbers: | |
772 | */ | |
773 | if (!c->sb.clean) | |
774 | journal_seq += 8; | |
775 | ||
adbcada4 | 776 | if (blacklist_seq != journal_seq) { |
349b1d83 BF |
777 | ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu", |
778 | blacklist_seq, journal_seq) ?: | |
5bbe3f2d | 779 | bch2_journal_seq_blacklist_add(c, |
adbcada4 | 780 | blacklist_seq, journal_seq); |
1dd7f9d9 | 781 | if (ret) { |
6fa30fe7 | 782 | bch_err_msg(c, ret, "error creating new journal seq blacklist entry"); |
7b512638 | 783 | goto err; |
1dd7f9d9 | 784 | } |
f707e3d8 | 785 | } |
1c6fdbd8 | 786 | |
349b1d83 BF |
787 | ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", |
788 | journal_seq, last_seq, blacklist_seq - 1) ?: | |
5bbe3f2d | 789 | bch2_fs_journal_start(&c->journal, journal_seq); |
7b512638 KO |
790 | if (ret) |
791 | goto err; | |
1c6fdbd8 | 792 | |
1cab5a82 KO |
793 | /* |
794 | * Skip past versions that might have possibly been used (as nonces), | |
795 | * but hadn't had their pointers written: | |
796 | */ | |
797 | if (c->sb.encryption_type && !c->sb.clean) | |
798 | atomic64_add(1 << 16, &c->key_version); | |
799 | ||
7b512638 KO |
800 | ret = read_btree_roots(c); |
801 | if (ret) | |
802 | goto err; | |
1c6fdbd8 | 803 | |
067d228b | 804 | ret = bch2_run_recovery_passes(c); |
4e65431c KO |
805 | if (ret) |
806 | goto err; | |
1c6fdbd8 | 807 | |
d55ddf6e KO |
808 | clear_bit(BCH_FS_fsck_running, &c->flags); |
809 | ||
4fe0eeea KO |
810 | /* fsync if we fixed errors */ |
811 | if (test_bit(BCH_FS_errors_fixed, &c->flags)) { | |
812 | bch2_journal_flush_all_pins(&c->journal); | |
813 | bch2_journal_meta(&c->journal); | |
814 | } | |
815 | ||
b56b787c KO |
816 | /* If we fixed errors, verify that fs is actually clean now: */ |
817 | if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && | |
3c471b65 KO |
818 | test_bit(BCH_FS_errors_fixed, &c->flags) && |
819 | !test_bit(BCH_FS_errors_not_fixed, &c->flags) && | |
820 | !test_bit(BCH_FS_error, &c->flags)) { | |
30418de0 KO |
821 | bch2_flush_fsck_errs(c); |
822 | ||
b56b787c | 823 | bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); |
3c471b65 | 824 | clear_bit(BCH_FS_errors_fixed, &c->flags); |
b56b787c KO |
825 | |
826 | c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; | |
827 | ||
828 | ret = bch2_run_recovery_passes(c); | |
829 | if (ret) | |
830 | goto err; | |
831 | ||
3c471b65 KO |
832 | if (test_bit(BCH_FS_errors_fixed, &c->flags) || |
833 | test_bit(BCH_FS_errors_not_fixed, &c->flags)) { | |
b56b787c | 834 | bch_err(c, "Second fsck run was not clean"); |
3c471b65 | 835 | set_bit(BCH_FS_errors_not_fixed, &c->flags); |
b56b787c KO |
836 | } |
837 | ||
3c471b65 | 838 | set_bit(BCH_FS_errors_fixed, &c->flags); |
b56b787c KO |
839 | } |
840 | ||
7b512638 | 841 | if (enabled_qtypes(c)) { |
619f5bee | 842 | bch_verbose(c, "reading quotas"); |
7b512638 KO |
843 | ret = bch2_fs_quota_read(c); |
844 | if (ret) | |
845 | goto err; | |
846 | bch_verbose(c, "quotas done"); | |
847 | } | |
848 | ||
26609b61 | 849 | mutex_lock(&c->sb_lock); |
62606398 KO |
850 | ext = bch2_sb_field_get(c->disk_sb.sb, ext); |
851 | write_sb = false; | |
8b16413c | 852 | |
f87bf892 KO |
853 | if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { |
854 | SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); | |
932aa837 KO |
855 | write_sb = true; |
856 | } | |
857 | ||
3c471b65 | 858 | if (!test_bit(BCH_FS_error, &c->flags) && |
8b16413c | 859 | !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) { |
c0ebe3e4 | 860 | c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); |
932aa837 | 861 | write_sb = true; |
88c07f73 KO |
862 | } |
863 | ||
0a34c058 | 864 | if (!test_bit(BCH_FS_error, &c->flags) && |
27fcec6c | 865 | !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) { |
0a34c058 KO |
866 | memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); |
867 | write_sb = true; | |
8b16413c KO |
868 | } |
869 | ||
55936afe KO |
870 | if (c->opts.fsck && |
871 | !test_bit(BCH_FS_error, &c->flags) && | |
872 | c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 && | |
873 | ext->btrees_lost_data) { | |
874 | ext->btrees_lost_data = 0; | |
875 | write_sb = true; | |
876 | } | |
877 | ||
0bc166ff | 878 | if (c->opts.fsck && |
3c471b65 KO |
879 | !test_bit(BCH_FS_error, &c->flags) && |
880 | !test_bit(BCH_FS_errors_not_fixed, &c->flags)) { | |
0bc166ff | 881 | SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); |
aae15aaf | 882 | SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0); |
932aa837 | 883 | write_sb = true; |
0bc166ff | 884 | } |
932aa837 | 885 | |
f0415829 KO |
886 | if (bch2_blacklist_entries_gc(c)) |
887 | write_sb = true; | |
888 | ||
932aa837 KO |
889 | if (write_sb) |
890 | bch2_write_super(c); | |
26609b61 | 891 | mutex_unlock(&c->sb_lock); |
1dd7f9d9 | 892 | |
d93cf685 | 893 | if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) || |
73bd774d | 894 | c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) { |
d93cf685 KO |
895 | struct bch_move_stats stats; |
896 | ||
b2d1d56b | 897 | bch2_move_stats_init(&stats, "recovery"); |
d93cf685 | 898 | |
fbf92708 KO |
899 | struct printbuf buf = PRINTBUF; |
900 | bch2_version_to_text(&buf, c->sb.version_min); | |
901 | bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf); | |
902 | printbuf_exit(&buf); | |
903 | ||
904 | ret = bch2_fs_read_write_early(c) ?: | |
1bb3c2a9 | 905 | bch2_scan_old_btree_nodes(c, &stats); |
d93cf685 KO |
906 | if (ret) |
907 | goto err; | |
908 | bch_info(c, "scanning for old btree nodes done"); | |
909 | } | |
910 | ||
619f5bee | 911 | ret = 0; |
4932e07e | 912 | out: |
619f5bee | 913 | bch2_flush_fsck_errs(c); |
89b05118 | 914 | |
4409b808 | 915 | if (!c->opts.retain_recovery_info) { |
8a443d3e | 916 | bch2_journal_keys_put_initial(c); |
4409b808 KO |
917 | bch2_find_btree_nodes_exit(&c->found_btree_nodes); |
918 | } | |
7ffec9cc KO |
919 | if (!IS_ERR(clean)) |
920 | kfree(clean); | |
4ab35c34 | 921 | |
62719cf3 KO |
922 | if (!ret && |
923 | test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && | |
924 | !c->opts.nochanges) { | |
4ab35c34 KO |
925 | bch2_fs_read_write_early(c); |
926 | bch2_delete_dead_snapshots_async(c); | |
927 | } | |
928 | ||
cf904c8d | 929 | bch_err_fn(c, ret); |
1c6fdbd8 | 930 | return ret; |
4932e07e KO |
931 | err: |
932 | fsck_err: | |
933 | bch2_fs_emergency_read_only(c); | |
934 | goto out; | |
1c6fdbd8 KO |
935 | } |
936 | ||
937 | int bch2_fs_initialize(struct bch_fs *c) | |
938 | { | |
939 | struct bch_inode_unpacked root_inode, lostfound_inode; | |
940 | struct bkey_inode_buf packed_inode; | |
1c6fdbd8 | 941 | struct qstr lostfound = QSTR("lost+found"); |
1c6fdbd8 KO |
942 | int ret; |
943 | ||
944 | bch_notice(c, "initializing new filesystem"); | |
a292be3b | 945 | set_bit(BCH_FS_new_fs, &c->flags); |
1c6fdbd8 | 946 | |
3e0745e2 | 947 | mutex_lock(&c->sb_lock); |
c0ebe3e4 KO |
948 | c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); |
949 | c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); | |
61fc3c96 | 950 | |
84f16387 | 951 | bch2_check_version_downgrade(c); |
6619d846 | 952 | |
3045bb95 | 953 | if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { |
6619d846 | 954 | bch2_sb_upgrade(c, bcachefs_metadata_version_current); |
3045bb95 | 955 | SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); |
73590619 KO |
956 | bch2_write_super(c); |
957 | } | |
61fc3c96 KO |
958 | mutex_unlock(&c->sb_lock); |
959 | ||
d2554263 | 960 | c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; |
3c471b65 | 961 | set_bit(BCH_FS_may_go_rw, &c->flags); |
1c6fdbd8 | 962 | |
9fea2274 | 963 | for (unsigned i = 0; i < BTREE_ID_NR; i++) |
f2f61f41 | 964 | bch2_btree_root_alloc_fake(c, i, 0); |
dfe9bfb3 | 965 | |
9fea2274 | 966 | for_each_member_device(c, ca) |
822835ff KO |
967 | bch2_dev_usage_init(ca); |
968 | ||
bbe682c7 KO |
969 | ret = bch2_fs_journal_alloc(c); |
970 | if (ret) | |
971 | goto err; | |
1c6fdbd8 | 972 | |
1c6fdbd8 KO |
973 | /* |
974 | * journal_res_get() will crash if called before this has | |
975 | * set up the journal.pin FIFO and journal.cur pointer: | |
976 | */ | |
ce6201c4 | 977 | bch2_fs_journal_start(&c->journal, 1); |
1c6fdbd8 KO |
978 | bch2_journal_set_replay_done(&c->journal); |
979 | ||
8d6b6222 KO |
980 | ret = bch2_fs_read_write_early(c); |
981 | if (ret) | |
982 | goto err; | |
983 | ||
984 | /* | |
985 | * Write out the superblock and journal buckets, now that we can do | |
986 | * btree updates | |
987 | */ | |
f25d8215 | 988 | bch_verbose(c, "marking superblocks"); |
bbe682c7 KO |
989 | ret = bch2_trans_mark_dev_sbs(c); |
990 | bch_err_msg(c, ret, "marking superblocks"); | |
991 | if (ret) | |
992 | goto err; | |
09943313 | 993 | |
9fea2274 | 994 | for_each_online_member(c, ca) |
09943313 | 995 | ca->new_fs_bucket_idx = 0; |
8d6b6222 | 996 | |
c6b2826c KO |
997 | ret = bch2_fs_freespace_init(c); |
998 | if (ret) | |
999 | goto err; | |
1000 | ||
067d228b | 1001 | ret = bch2_initialize_subvolumes(c); |
14b393ee KO |
1002 | if (ret) |
1003 | goto err; | |
1004 | ||
1005 | bch_verbose(c, "reading snapshots table"); | |
067d228b | 1006 | ret = bch2_snapshots_read(c); |
14b393ee KO |
1007 | if (ret) |
1008 | goto err; | |
1009 | bch_verbose(c, "reading snapshots done"); | |
1010 | ||
a1019576 | 1011 | bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); |
14b393ee KO |
1012 | root_inode.bi_inum = BCACHEFS_ROOT_INO; |
1013 | root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; | |
8dd69d9f | 1014 | bch2_inode_pack(&packed_inode, &root_inode); |
e751c01a | 1015 | packed_inode.inode.k.p.snapshot = U32_MAX; |
1c6fdbd8 | 1016 | |
96dea3d5 | 1017 | ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0); |
cf904c8d KO |
1018 | bch_err_msg(c, ret, "creating root directory"); |
1019 | if (ret) | |
1c6fdbd8 KO |
1020 | goto err; |
1021 | ||
96385742 | 1022 | bch2_inode_init_early(c, &lostfound_inode); |
1c6fdbd8 | 1023 | |
58e2388f | 1024 | ret = bch2_trans_do(c, NULL, NULL, 0, |
6bd68ec2 | 1025 | bch2_create_trans(trans, |
6fed42bb | 1026 | BCACHEFS_ROOT_SUBVOL_INUM, |
96385742 KO |
1027 | &root_inode, &lostfound_inode, |
1028 | &lostfound, | |
b627c7d8 | 1029 | 0, 0, S_IFDIR|0700, 0, |
42d23732 | 1030 | NULL, NULL, (subvol_inum) { 0 }, 0)); |
cf904c8d KO |
1031 | bch_err_msg(c, ret, "creating lost+found"); |
1032 | if (ret) | |
1c6fdbd8 KO |
1033 | goto err; |
1034 | ||
d2554263 | 1035 | c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1; |
249bf593 | 1036 | |
1c6fdbd8 KO |
1037 | if (enabled_qtypes(c)) { |
1038 | ret = bch2_fs_quota_read(c); | |
1039 | if (ret) | |
1040 | goto err; | |
1041 | } | |
1042 | ||
9be1efe9 | 1043 | ret = bch2_journal_flush(&c->journal); |
cf904c8d KO |
1044 | bch_err_msg(c, ret, "writing first journal entry"); |
1045 | if (ret) | |
1c6fdbd8 KO |
1046 | goto err; |
1047 | ||
1048 | mutex_lock(&c->sb_lock); | |
1049 | SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); | |
1050 | SET_BCH_SB_CLEAN(c->disk_sb.sb, false); | |
1051 | ||
1052 | bch2_write_super(c); | |
1053 | mutex_unlock(&c->sb_lock); | |
1054 | ||
1055 | return 0; | |
1056 | err: | |
9fea2274 | 1057 | bch_err_fn(c, ret); |
1c6fdbd8 KO |
1058 | return ret; |
1059 | } |