Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
07a1006a | 4 | #include "bkey_buf.h" |
1c6fdbd8 KO |
5 | #include "btree_update.h" |
6 | #include "dirent.h" | |
7 | #include "error.h" | |
96385742 | 8 | #include "fs-common.h" |
1c6fdbd8 KO |
9 | #include "fsck.h" |
10 | #include "inode.h" | |
11 | #include "keylist.h" | |
12 | #include "super.h" | |
13 | #include "xattr.h" | |
14 | ||
15 | #include <linux/dcache.h> /* struct qstr */ | |
16 | #include <linux/generic-radix-tree.h> | |
17 | ||
18 | #define QSTR(n) { { { .len = strlen(n) } }, .name = n } | |
19 | ||
424eb881 KO |
20 | static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) |
21 | { | |
22 | struct btree_iter *iter; | |
23 | struct bkey_s_c k; | |
24 | u64 sectors = 0; | |
94f651e2 | 25 | int ret; |
424eb881 | 26 | |
41f8b09e | 27 | for_each_btree_key(trans, iter, BTREE_ID_extents, |
94f651e2 | 28 | POS(inum, 0), 0, k, ret) { |
424eb881 KO |
29 | if (k.k->p.inode != inum) |
30 | break; | |
31 | ||
32 | if (bkey_extent_is_allocation(k.k)) | |
33 | sectors += k.k->size; | |
34 | } | |
35 | ||
94f651e2 KO |
36 | bch2_trans_iter_free(trans, iter); |
37 | ||
38 | return ret ?: sectors; | |
424eb881 KO |
39 | } |
40 | ||
b1fd23df KO |
41 | static int __remove_dirent(struct btree_trans *trans, |
42 | struct bkey_s_c_dirent dirent) | |
1c6fdbd8 | 43 | { |
0f238367 | 44 | struct bch_fs *c = trans->c; |
1c6fdbd8 KO |
45 | struct qstr name; |
46 | struct bch_inode_unpacked dir_inode; | |
47 | struct bch_hash_info dir_hash_info; | |
48 | u64 dir_inum = dirent.k->p.inode; | |
49 | int ret; | |
50 | char *buf; | |
51 | ||
52 | name.len = bch2_dirent_name_bytes(dirent); | |
b1fd23df KO |
53 | buf = bch2_trans_kmalloc(trans, name.len + 1); |
54 | if (IS_ERR(buf)) | |
55 | return PTR_ERR(buf); | |
1c6fdbd8 KO |
56 | |
57 | memcpy(buf, dirent.v->d_name, name.len); | |
58 | buf[name.len] = '\0'; | |
59 | name.name = buf; | |
60 | ||
fe38b720 | 61 | ret = __bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode, 0); |
b1fd23df | 62 | if (ret && ret != -EINTR) |
1c6fdbd8 | 63 | bch_err(c, "remove_dirent: err %i looking up directory inode", ret); |
b1fd23df KO |
64 | if (ret) |
65 | return ret; | |
1c6fdbd8 KO |
66 | |
67 | dir_hash_info = bch2_hash_info_init(c, &dir_inode); | |
68 | ||
b1fd23df KO |
69 | ret = bch2_hash_delete(trans, bch2_dirent_hash_desc, |
70 | &dir_hash_info, dir_inum, &name); | |
71 | if (ret && ret != -EINTR) | |
1c6fdbd8 | 72 | bch_err(c, "remove_dirent: err %i deleting dirent", ret); |
b1fd23df KO |
73 | if (ret) |
74 | return ret; | |
75 | ||
76 | return 0; | |
77 | } | |
78 | ||
79 | static int remove_dirent(struct btree_trans *trans, | |
80 | struct bkey_s_c_dirent dirent) | |
81 | { | |
82 | return __bch2_trans_do(trans, NULL, NULL, | |
b1fd23df KO |
83 | BTREE_INSERT_NOFAIL| |
84 | BTREE_INSERT_LAZY_RW, | |
b1fd23df | 85 | __remove_dirent(trans, dirent)); |
1c6fdbd8 KO |
86 | } |
87 | ||
88 | static int reattach_inode(struct bch_fs *c, | |
89 | struct bch_inode_unpacked *lostfound_inode, | |
90 | u64 inum) | |
91 | { | |
184b1dc1 | 92 | struct bch_inode_unpacked dir_u, inode_u; |
1c6fdbd8 KO |
93 | char name_buf[20]; |
94 | struct qstr name; | |
95 | int ret; | |
96 | ||
97 | snprintf(name_buf, sizeof(name_buf), "%llu", inum); | |
98 | name = (struct qstr) QSTR(name_buf); | |
99 | ||
b1fd23df | 100 | ret = bch2_trans_do(c, NULL, NULL, |
96385742 KO |
101 | BTREE_INSERT_LAZY_RW, |
102 | bch2_link_trans(&trans, lostfound_inode->bi_inum, | |
184b1dc1 | 103 | inum, &dir_u, &inode_u, &name)); |
96385742 KO |
104 | if (ret) |
105 | bch_err(c, "error %i reattaching inode %llu", ret, inum); | |
1c6fdbd8 | 106 | |
1c6fdbd8 KO |
107 | return ret; |
108 | } | |
109 | ||
110 | struct inode_walker { | |
111 | bool first_this_inode; | |
112 | bool have_inode; | |
113 | u64 cur_inum; | |
114 | struct bch_inode_unpacked inode; | |
115 | }; | |
116 | ||
117 | static struct inode_walker inode_walker_init(void) | |
118 | { | |
119 | return (struct inode_walker) { | |
120 | .cur_inum = -1, | |
121 | .have_inode = false, | |
122 | }; | |
123 | } | |
124 | ||
6bd13057 KO |
125 | static int walk_inode(struct btree_trans *trans, |
126 | struct inode_walker *w, u64 inum) | |
1c6fdbd8 | 127 | { |
6bd13057 | 128 | if (inum != w->cur_inum) { |
fe38b720 KO |
129 | int ret = __bch2_inode_find_by_inum_trans(trans, inum, |
130 | &w->inode, 0); | |
1c6fdbd8 KO |
131 | |
132 | if (ret && ret != -ENOENT) | |
133 | return ret; | |
134 | ||
6bd13057 KO |
135 | w->have_inode = !ret; |
136 | w->cur_inum = inum; | |
137 | w->first_this_inode = true; | |
138 | } else { | |
139 | w->first_this_inode = false; | |
1c6fdbd8 KO |
140 | } |
141 | ||
142 | return 0; | |
143 | } | |
144 | ||
7ac2c55e KO |
145 | static int hash_redo_key(struct btree_trans *trans, |
146 | const struct bch_hash_desc desc, | |
147 | struct bch_hash_info *hash_info, | |
148 | struct btree_iter *k_iter, struct bkey_s_c k) | |
1c6fdbd8 | 149 | { |
b1fd23df | 150 | struct bkey_i delete; |
1c6fdbd8 | 151 | struct bkey_i *tmp; |
1c6fdbd8 | 152 | |
b1fd23df KO |
153 | tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); |
154 | if (IS_ERR(tmp)) | |
155 | return PTR_ERR(tmp); | |
1c6fdbd8 KO |
156 | |
157 | bkey_reassemble(tmp, k); | |
158 | ||
b1fd23df KO |
159 | bkey_init(&delete.k); |
160 | delete.k.p = k_iter->pos; | |
2d594dfb | 161 | bch2_trans_update(trans, k_iter, &delete, 0); |
1c6fdbd8 | 162 | |
7ac2c55e | 163 | return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, |
eaf79831 | 164 | tmp, 0); |
1c6fdbd8 KO |
165 | } |
166 | ||
424eb881 KO |
167 | static int fsck_hash_delete_at(struct btree_trans *trans, |
168 | const struct bch_hash_desc desc, | |
1c6fdbd8 | 169 | struct bch_hash_info *info, |
424eb881 | 170 | struct btree_iter *iter) |
1c6fdbd8 | 171 | { |
1c6fdbd8 | 172 | int ret; |
1c6fdbd8 | 173 | retry: |
424eb881 KO |
174 | ret = bch2_hash_delete_at(trans, desc, info, iter) ?: |
175 | bch2_trans_commit(trans, NULL, NULL, | |
134915f3 KO |
176 | BTREE_INSERT_NOFAIL| |
177 | BTREE_INSERT_LAZY_RW); | |
424eb881 KO |
178 | if (ret == -EINTR) { |
179 | ret = bch2_btree_iter_traverse(iter); | |
180 | if (!ret) | |
181 | goto retry; | |
182 | } | |
1c6fdbd8 | 183 | |
1c6fdbd8 KO |
184 | return ret; |
185 | } | |
186 | ||
7ac2c55e KO |
187 | static int hash_check_key(struct btree_trans *trans, |
188 | const struct bch_hash_desc desc, | |
189 | struct bch_hash_info *hash_info, | |
190 | struct btree_iter *k_iter, struct bkey_s_c hash_k) | |
d69f41d6 | 191 | { |
424eb881 | 192 | struct bch_fs *c = trans->c; |
7ac2c55e | 193 | struct btree_iter *iter = NULL; |
d69f41d6 | 194 | char buf[200]; |
7ac2c55e KO |
195 | struct bkey_s_c k; |
196 | u64 hash; | |
d69f41d6 KO |
197 | int ret = 0; |
198 | ||
7ac2c55e KO |
199 | if (hash_k.k->type != desc.key_type) |
200 | return 0; | |
201 | ||
202 | hash = desc.hash_bkey(hash_info, hash_k); | |
203 | ||
204 | if (likely(hash == hash_k.k->p.offset)) | |
d69f41d6 KO |
205 | return 0; |
206 | ||
7ac2c55e KO |
207 | if (hash_k.k->p.offset < hash) |
208 | goto bad_hash; | |
d69f41d6 | 209 | |
7ac2c55e KO |
210 | for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash), |
211 | BTREE_ITER_SLOTS, k, ret) { | |
212 | if (!bkey_cmp(k.k->p, hash_k.k->p)) | |
d69f41d6 KO |
213 | break; |
214 | ||
7ac2c55e KO |
215 | if (fsck_err_on(k.k->type == desc.key_type && |
216 | !desc.cmp_bkey(k, hash_k), c, | |
d69f41d6 | 217 | "duplicate hash table keys:\n%s", |
319f9ac3 | 218 | (bch2_bkey_val_to_text(&PBUF(buf), c, |
7ac2c55e KO |
219 | hash_k), buf))) { |
220 | ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter); | |
d69f41d6 KO |
221 | if (ret) |
222 | return ret; | |
223 | ret = 1; | |
224 | break; | |
225 | } | |
d69f41d6 | 226 | |
7ac2c55e KO |
227 | if (bkey_deleted(k.k)) { |
228 | bch2_trans_iter_free(trans, iter); | |
229 | goto bad_hash; | |
1c6fdbd8 | 230 | } |
1c6fdbd8 | 231 | |
7ac2c55e KO |
232 | } |
233 | bch2_trans_iter_free(trans, iter); | |
1c6fdbd8 | 234 | return ret; |
7ac2c55e KO |
235 | bad_hash: |
236 | if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " | |
237 | "hashed to %llu should be at %llu\n%s", | |
238 | desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, | |
239 | hash, iter->pos.offset, | |
240 | (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE) | |
741daa5b KO |
241 | return 0; |
242 | ||
7ac2c55e KO |
243 | ret = __bch2_trans_do(trans, NULL, NULL, |
244 | BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, | |
245 | hash_redo_key(trans, desc, hash_info, k_iter, hash_k)); | |
246 | if (ret) { | |
247 | bch_err(c, "hash_redo_key err %i", ret); | |
248 | return ret; | |
741daa5b | 249 | } |
7ac2c55e | 250 | return -EINTR; |
741daa5b | 251 | fsck_err: |
741daa5b | 252 | return ret; |
741daa5b KO |
253 | } |
254 | ||
5c16add5 KO |
255 | static int check_inode(struct btree_trans *trans, |
256 | struct btree_iter *iter, | |
257 | struct bkey_s_c_inode inode) | |
258 | { | |
259 | struct bch_fs *c = trans->c; | |
260 | struct bch_inode_unpacked u; | |
261 | bool do_update = false; | |
262 | int ret = 0; | |
263 | ||
264 | ret = bch2_inode_unpack(inode, &u); | |
265 | ||
266 | if (bch2_fs_inconsistent_on(ret, c, | |
267 | "error unpacking inode %llu in fsck", | |
268 | inode.k->p.inode)) | |
269 | return ret; | |
270 | ||
271 | if (u.bi_flags & BCH_INODE_UNLINKED && | |
272 | (!c->sb.clean || | |
273 | fsck_err(c, "filesystem marked clean, but inode %llu unlinked", | |
274 | u.bi_inum))) { | |
275 | bch_verbose(c, "deleting inode %llu", u.bi_inum); | |
276 | ||
277 | bch2_trans_unlock(trans); | |
278 | bch2_fs_lazy_rw(c); | |
279 | ||
280 | ret = bch2_inode_rm(c, u.bi_inum, false); | |
281 | if (ret) | |
282 | bch_err(c, "error in fsck: error %i while deleting inode", ret); | |
283 | return ret; | |
284 | } | |
285 | ||
286 | if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY && | |
287 | (!c->sb.clean || | |
288 | fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty", | |
289 | u.bi_inum))) { | |
290 | bch_verbose(c, "truncating inode %llu", u.bi_inum); | |
291 | ||
292 | bch2_trans_unlock(trans); | |
293 | bch2_fs_lazy_rw(c); | |
294 | ||
295 | /* | |
296 | * XXX: need to truncate partial blocks too here - or ideally | |
297 | * just switch units to bytes and that issue goes away | |
298 | */ | |
299 | ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, | |
300 | POS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9), | |
301 | POS(u.bi_inum, U64_MAX), | |
302 | NULL); | |
303 | if (ret) { | |
304 | bch_err(c, "error in fsck: error %i truncating inode", ret); | |
305 | return ret; | |
306 | } | |
307 | ||
308 | /* | |
309 | * We truncated without our normal sector accounting hook, just | |
310 | * make sure we recalculate it: | |
311 | */ | |
312 | u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY; | |
313 | ||
314 | u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; | |
315 | do_update = true; | |
316 | } | |
317 | ||
318 | if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY && | |
319 | (!c->sb.clean || | |
320 | fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty", | |
321 | u.bi_inum))) { | |
322 | s64 sectors; | |
323 | ||
324 | bch_verbose(c, "recounting sectors for inode %llu", | |
325 | u.bi_inum); | |
326 | ||
327 | sectors = bch2_count_inode_sectors(trans, u.bi_inum); | |
328 | if (sectors < 0) { | |
329 | bch_err(c, "error in fsck: error %i recounting inode sectors", | |
330 | (int) sectors); | |
331 | return sectors; | |
332 | } | |
333 | ||
334 | u.bi_sectors = sectors; | |
335 | u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY; | |
336 | do_update = true; | |
337 | } | |
338 | ||
339 | if (!S_ISDIR(u.bi_mode) && | |
340 | u.bi_nlink && | |
341 | !(u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) && | |
342 | (fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c, | |
343 | "inode missing BCH_INODE_BACKPTR_UNTRUSTED flags") || | |
344 | c->opts.version_upgrade)) { | |
345 | u.bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED; | |
346 | do_update = true; | |
347 | } | |
348 | ||
349 | if (do_update) { | |
350 | struct bkey_inode_buf p; | |
351 | ||
352 | bch2_inode_pack(c, &p, &u); | |
353 | p.inode.k.p = iter->pos; | |
354 | ||
355 | ret = __bch2_trans_do(trans, NULL, NULL, | |
356 | BTREE_INSERT_NOFAIL| | |
357 | BTREE_INSERT_LAZY_RW, | |
358 | (bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0)); | |
359 | if (ret) | |
360 | bch_err(c, "error in fsck: error %i " | |
361 | "updating inode", ret); | |
362 | } | |
363 | fsck_err: | |
364 | return ret; | |
365 | } | |
366 | ||
367 | noinline_for_stack | |
368 | static int check_inodes(struct bch_fs *c, bool full) | |
369 | { | |
370 | struct btree_trans trans; | |
371 | struct btree_iter *iter; | |
372 | struct bkey_s_c k; | |
373 | struct bkey_s_c_inode inode; | |
374 | int ret; | |
375 | ||
376 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); | |
377 | ||
378 | for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, 0, k, ret) { | |
379 | if (k.k->type != KEY_TYPE_inode) | |
380 | continue; | |
381 | ||
382 | inode = bkey_s_c_to_inode(k); | |
383 | ||
384 | if (full || | |
385 | (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| | |
386 | BCH_INODE_I_SECTORS_DIRTY| | |
387 | BCH_INODE_UNLINKED))) { | |
388 | ret = check_inode(&trans, iter, inode); | |
389 | if (ret) | |
390 | break; | |
391 | } | |
392 | } | |
393 | bch2_trans_iter_put(&trans, iter); | |
394 | ||
395 | BUG_ON(ret == -EINTR); | |
396 | ||
397 | return bch2_trans_exit(&trans) ?: ret; | |
398 | } | |
399 | ||
abcecb49 | 400 | static int fix_overlapping_extent(struct btree_trans *trans, |
e3e464ac KO |
401 | struct bkey_s_c k, struct bpos cut_at) |
402 | { | |
abcecb49 | 403 | struct btree_iter *iter; |
e3e464ac KO |
404 | struct bkey_i *u; |
405 | int ret; | |
406 | ||
407 | u = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); | |
408 | ret = PTR_ERR_OR_ZERO(u); | |
409 | if (ret) | |
410 | return ret; | |
411 | ||
412 | bkey_reassemble(u, k); | |
413 | bch2_cut_front(cut_at, u); | |
414 | ||
e3e464ac KO |
415 | |
416 | /* | |
abcecb49 KO |
417 | * We don't want to go through the extent_handle_overwrites path: |
418 | * | |
419 | * XXX: this is going to screw up disk accounting, extent triggers | |
420 | * assume things about extent overwrites - we should be running the | |
421 | * triggers manually here | |
e3e464ac | 422 | */ |
abcecb49 KO |
423 | iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p, |
424 | BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); | |
e3e464ac | 425 | |
abcecb49 KO |
426 | BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); |
427 | bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN); | |
428 | bch2_trans_iter_put(trans, iter); | |
429 | ||
430 | return bch2_trans_commit(trans, NULL, NULL, | |
431 | BTREE_INSERT_NOFAIL| | |
432 | BTREE_INSERT_LAZY_RW); | |
e3e464ac KO |
433 | } |
434 | ||
1c6fdbd8 KO |
435 | /* |
436 | * Walk extents: verify that extents have a corresponding S_ISREG inode, and | |
437 | * that i_size an i_sectors are consistent | |
438 | */ | |
439 | noinline_for_stack | |
440 | static int check_extents(struct bch_fs *c) | |
441 | { | |
442 | struct inode_walker w = inode_walker_init(); | |
424eb881 KO |
443 | struct btree_trans trans; |
444 | struct btree_iter *iter; | |
1c6fdbd8 | 445 | struct bkey_s_c k; |
07a1006a | 446 | struct bkey_buf prev; |
abcecb49 | 447 | u64 i_sectors = 0; |
1c6fdbd8 KO |
448 | int ret = 0; |
449 | ||
07a1006a | 450 | bch2_bkey_buf_init(&prev); |
f7005e01 | 451 | prev.k->k = KEY(0, 0, 0); |
20bceecb | 452 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
424eb881 | 453 | |
1c6fdbd8 KO |
454 | bch_verbose(c, "checking extents"); |
455 | ||
41f8b09e | 456 | iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, |
0728eed7 KO |
457 | POS(BCACHEFS_ROOT_INO, 0), |
458 | BTREE_ITER_INTENT); | |
6bd13057 | 459 | retry: |
abcecb49 KO |
460 | while ((k = bch2_btree_iter_peek(iter)).k && |
461 | !(ret = bkey_err(k))) { | |
462 | if (w.have_inode && | |
463 | w.cur_inum != k.k->p.inode && | |
464 | !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && | |
465 | fsck_err_on(w.inode.bi_sectors != i_sectors, c, | |
466 | "inode %llu has incorrect i_sectors: got %llu, should be %llu", | |
467 | w.inode.bi_inum, | |
468 | w.inode.bi_sectors, i_sectors)) { | |
469 | struct btree_iter *inode_iter = | |
470 | bch2_trans_get_iter(&trans, BTREE_ID_inodes, | |
471 | POS(0, w.cur_inum), | |
472 | BTREE_ITER_INTENT); | |
473 | ||
474 | w.inode.bi_sectors = i_sectors; | |
475 | ||
476 | ret = __bch2_trans_do(&trans, NULL, NULL, | |
477 | BTREE_INSERT_NOFAIL| | |
478 | BTREE_INSERT_LAZY_RW, | |
479 | bch2_inode_write(&trans, inode_iter, &w.inode)); | |
480 | bch2_trans_iter_put(&trans, inode_iter); | |
481 | if (ret) | |
482 | break; | |
483 | } | |
484 | ||
485 | if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { | |
f7005e01 KO |
486 | char buf1[200]; |
487 | char buf2[200]; | |
e3e464ac | 488 | |
f7005e01 KO |
489 | bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); |
490 | bch2_bkey_val_to_text(&PBUF(buf2), c, k); | |
e3e464ac | 491 | |
abcecb49 KO |
492 | if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) |
493 | return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR; | |
e3e464ac | 494 | } |
e3e464ac | 495 | |
6bd13057 | 496 | ret = walk_inode(&trans, &w, k.k->p.inode); |
1c6fdbd8 KO |
497 | if (ret) |
498 | break; | |
499 | ||
abcecb49 KO |
500 | if (w.first_this_inode) |
501 | i_sectors = 0; | |
502 | ||
1c6fdbd8 | 503 | if (fsck_err_on(!w.have_inode, c, |
abcecb49 KO |
504 | "extent type %u for missing inode %llu", |
505 | k.k->type, k.k->p.inode) || | |
1c6fdbd8 | 506 | fsck_err_on(w.have_inode && |
abcecb49 KO |
507 | !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, |
508 | "extent type %u for non regular file, inode %llu mode %o", | |
509 | k.k->type, k.k->p.inode, w.inode.bi_mode)) { | |
510 | bch2_fs_lazy_rw(c); | |
511 | return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, | |
512 | POS(k.k->p.inode, 0), | |
513 | POS(k.k->p.inode, U64_MAX), | |
514 | NULL) ?: -EINTR; | |
1c6fdbd8 KO |
515 | } |
516 | ||
abcecb49 KO |
517 | if (fsck_err_on(w.have_inode && |
518 | !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && | |
519 | k.k->type != KEY_TYPE_reservation && | |
520 | k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c, | |
521 | "extent type %u offset %llu past end of inode %llu, i_size %llu", | |
522 | k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { | |
523 | bch2_fs_lazy_rw(c); | |
524 | return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, | |
525 | POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c)) >> 9), | |
526 | POS(k.k->p.inode, U64_MAX), | |
527 | NULL) ?: -EINTR; | |
1c6fdbd8 KO |
528 | } |
529 | ||
abcecb49 KO |
530 | if (bkey_extent_is_allocation(k.k)) |
531 | i_sectors += k.k->size; | |
532 | bch2_bkey_buf_reassemble(&prev, c, k); | |
1c6fdbd8 | 533 | |
e0ba3b64 | 534 | bch2_btree_iter_advance(iter); |
1c6fdbd8 | 535 | } |
1c6fdbd8 | 536 | fsck_err: |
6bd13057 KO |
537 | if (ret == -EINTR) |
538 | goto retry; | |
abcecb49 | 539 | bch2_trans_iter_put(&trans, iter); |
07a1006a | 540 | bch2_bkey_buf_exit(&prev, c); |
424eb881 | 541 | return bch2_trans_exit(&trans) ?: ret; |
1c6fdbd8 KO |
542 | } |
543 | ||
544 | /* | |
545 | * Walk dirents: verify that they all have a corresponding S_ISDIR inode, | |
546 | * validate d_type | |
547 | */ | |
548 | noinline_for_stack | |
549 | static int check_dirents(struct bch_fs *c) | |
550 | { | |
551 | struct inode_walker w = inode_walker_init(); | |
7ac2c55e | 552 | struct bch_hash_info hash_info; |
d69f41d6 KO |
553 | struct btree_trans trans; |
554 | struct btree_iter *iter; | |
1c6fdbd8 | 555 | struct bkey_s_c k; |
1c6fdbd8 KO |
556 | char buf[200]; |
557 | int ret = 0; | |
558 | ||
559 | bch_verbose(c, "checking dirents"); | |
560 | ||
20bceecb | 561 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
d69f41d6 | 562 | |
41f8b09e | 563 | iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents, |
6bd13057 KO |
564 | POS(BCACHEFS_ROOT_INO, 0), 0); |
565 | retry: | |
abcecb49 KO |
566 | while ((k = bch2_btree_iter_peek(iter)).k && |
567 | !(ret = bkey_err(k))) { | |
1c6fdbd8 KO |
568 | struct bkey_s_c_dirent d; |
569 | struct bch_inode_unpacked target; | |
570 | bool have_target; | |
571 | u64 d_inum; | |
572 | ||
6bd13057 | 573 | ret = walk_inode(&trans, &w, k.k->p.inode); |
1c6fdbd8 KO |
574 | if (ret) |
575 | break; | |
576 | ||
577 | if (fsck_err_on(!w.have_inode, c, | |
578 | "dirent in nonexisting directory:\n%s", | |
319f9ac3 | 579 | (bch2_bkey_val_to_text(&PBUF(buf), c, |
319f9ac3 | 580 | k), buf)) || |
1c6fdbd8 KO |
581 | fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, |
582 | "dirent in non directory inode type %u:\n%s", | |
583 | mode_to_type(w.inode.bi_mode), | |
319f9ac3 | 584 | (bch2_bkey_val_to_text(&PBUF(buf), c, |
319f9ac3 | 585 | k), buf))) { |
0564b167 | 586 | ret = bch2_btree_delete_at(&trans, iter, 0); |
1c6fdbd8 KO |
587 | if (ret) |
588 | goto err; | |
7ac2c55e | 589 | goto next; |
1c6fdbd8 KO |
590 | } |
591 | ||
7ac2c55e KO |
592 | if (!w.have_inode) |
593 | goto next; | |
1c6fdbd8 | 594 | |
7ac2c55e KO |
595 | if (w.first_this_inode) |
596 | hash_info = bch2_hash_info_init(c, &w.inode); | |
597 | ||
598 | ret = hash_check_key(&trans, bch2_dirent_hash_desc, | |
599 | &hash_info, iter, k); | |
1c6fdbd8 KO |
600 | if (ret > 0) { |
601 | ret = 0; | |
7ac2c55e | 602 | goto next; |
1c6fdbd8 | 603 | } |
741daa5b KO |
604 | if (ret) |
605 | goto fsck_err; | |
1c6fdbd8 | 606 | |
26609b61 | 607 | if (k.k->type != KEY_TYPE_dirent) |
7ac2c55e | 608 | goto next; |
1c6fdbd8 KO |
609 | |
610 | d = bkey_s_c_to_dirent(k); | |
611 | d_inum = le64_to_cpu(d.v->d_inum); | |
612 | ||
fe38b720 | 613 | ret = __bch2_inode_find_by_inum_trans(&trans, d_inum, &target, 0); |
1c6fdbd8 KO |
614 | if (ret && ret != -ENOENT) |
615 | break; | |
616 | ||
617 | have_target = !ret; | |
618 | ret = 0; | |
619 | ||
620 | if (fsck_err_on(!have_target, c, | |
621 | "dirent points to missing inode:\n%s", | |
319f9ac3 | 622 | (bch2_bkey_val_to_text(&PBUF(buf), c, |
319f9ac3 | 623 | k), buf))) { |
0f238367 | 624 | ret = remove_dirent(&trans, d); |
1c6fdbd8 KO |
625 | if (ret) |
626 | goto err; | |
7ac2c55e | 627 | goto next; |
1c6fdbd8 KO |
628 | } |
629 | ||
7ac2c55e KO |
630 | if (!have_target) |
631 | goto next; | |
632 | ||
ab2a29cc KO |
633 | if (!target.bi_nlink && |
634 | !(target.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) && | |
635 | (target.bi_dir != k.k->p.inode || | |
636 | target.bi_dir_offset != k.k->p.offset) && | |
637 | (fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c, | |
638 | "inode %llu has wrong backpointer:\n" | |
639 | "got %llu:%llu\n" | |
640 | "should be %llu:%llu", | |
641 | d_inum, | |
642 | target.bi_dir, | |
643 | target.bi_dir_offset, | |
644 | k.k->p.inode, | |
645 | k.k->p.offset) || | |
646 | c->opts.version_upgrade)) { | |
647 | struct bkey_inode_buf p; | |
648 | ||
649 | target.bi_dir = k.k->p.inode; | |
650 | target.bi_dir_offset = k.k->p.offset; | |
651 | bch2_trans_unlock(&trans); | |
652 | ||
653 | bch2_inode_pack(c, &p, &target); | |
654 | ||
655 | ret = bch2_btree_insert(c, BTREE_ID_inodes, | |
656 | &p.inode.k_i, NULL, NULL, | |
657 | BTREE_INSERT_NOFAIL| | |
658 | BTREE_INSERT_LAZY_RW); | |
659 | if (ret) { | |
660 | bch_err(c, "error in fsck: error %i updating inode", ret); | |
661 | goto err; | |
662 | } | |
663 | continue; | |
664 | } | |
665 | ||
7ac2c55e | 666 | if (fsck_err_on(d.v->d_type != |
1c6fdbd8 KO |
667 | mode_to_type(target.bi_mode), c, |
668 | "incorrect d_type: should be %u:\n%s", | |
669 | mode_to_type(target.bi_mode), | |
319f9ac3 | 670 | (bch2_bkey_val_to_text(&PBUF(buf), c, |
319f9ac3 | 671 | k), buf))) { |
1c6fdbd8 KO |
672 | struct bkey_i_dirent *n; |
673 | ||
674 | n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); | |
675 | if (!n) { | |
676 | ret = -ENOMEM; | |
677 | goto err; | |
678 | } | |
679 | ||
680 | bkey_reassemble(&n->k_i, d.s_c); | |
681 | n->v.d_type = mode_to_type(target.bi_mode); | |
682 | ||
b1fd23df | 683 | ret = __bch2_trans_do(&trans, NULL, NULL, |
b1fd23df KO |
684 | BTREE_INSERT_NOFAIL| |
685 | BTREE_INSERT_LAZY_RW, | |
2d594dfb | 686 | (bch2_trans_update(&trans, iter, &n->k_i, 0), 0)); |
1c6fdbd8 KO |
687 | kfree(n); |
688 | if (ret) | |
689 | goto err; | |
690 | ||
691 | } | |
7ac2c55e | 692 | next: |
e0ba3b64 | 693 | bch2_btree_iter_advance(iter); |
1c6fdbd8 KO |
694 | } |
695 | err: | |
696 | fsck_err: | |
6bd13057 KO |
697 | if (ret == -EINTR) |
698 | goto retry; | |
699 | ||
abcecb49 | 700 | bch2_trans_iter_put(&trans, iter); |
d69f41d6 | 701 | return bch2_trans_exit(&trans) ?: ret; |
1c6fdbd8 KO |
702 | } |
703 | ||
704 | /* | |
705 | * Walk xattrs: verify that they all have a corresponding inode | |
706 | */ | |
707 | noinline_for_stack | |
708 | static int check_xattrs(struct bch_fs *c) | |
709 | { | |
710 | struct inode_walker w = inode_walker_init(); | |
7ac2c55e | 711 | struct bch_hash_info hash_info; |
d69f41d6 KO |
712 | struct btree_trans trans; |
713 | struct btree_iter *iter; | |
1c6fdbd8 KO |
714 | struct bkey_s_c k; |
715 | int ret = 0; | |
716 | ||
717 | bch_verbose(c, "checking xattrs"); | |
718 | ||
20bceecb | 719 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
d69f41d6 | 720 | |
41f8b09e | 721 | iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, |
d69f41d6 | 722 | POS(BCACHEFS_ROOT_INO, 0), 0); |
6bd13057 | 723 | retry: |
abcecb49 KO |
724 | while ((k = bch2_btree_iter_peek(iter)).k && |
725 | !(ret = bkey_err(k))) { | |
6bd13057 | 726 | ret = walk_inode(&trans, &w, k.k->p.inode); |
1c6fdbd8 KO |
727 | if (ret) |
728 | break; | |
729 | ||
730 | if (fsck_err_on(!w.have_inode, c, | |
731 | "xattr for missing inode %llu", | |
732 | k.k->p.inode)) { | |
0564b167 | 733 | ret = bch2_btree_delete_at(&trans, iter, 0); |
1c6fdbd8 | 734 | if (ret) |
abcecb49 | 735 | break; |
1c6fdbd8 KO |
736 | continue; |
737 | } | |
738 | ||
739 | if (w.first_this_inode && w.have_inode) | |
7ac2c55e | 740 | hash_info = bch2_hash_info_init(c, &w.inode); |
1c6fdbd8 | 741 | |
424eb881 | 742 | ret = hash_check_key(&trans, bch2_xattr_hash_desc, |
7ac2c55e | 743 | &hash_info, iter, k); |
1c6fdbd8 | 744 | if (ret) |
abcecb49 KO |
745 | break; |
746 | ||
e0ba3b64 | 747 | bch2_btree_iter_advance(iter); |
1c6fdbd8 | 748 | } |
1c6fdbd8 | 749 | fsck_err: |
6bd13057 KO |
750 | if (ret == -EINTR) |
751 | goto retry; | |
abcecb49 | 752 | |
abcecb49 | 753 | bch2_trans_iter_put(&trans, iter); |
d69f41d6 | 754 | return bch2_trans_exit(&trans) ?: ret; |
1c6fdbd8 KO |
755 | } |
756 | ||
757 | /* Get root directory, create if it doesn't exist: */ | |
758 | static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) | |
759 | { | |
760 | struct bkey_inode_buf packed; | |
761 | int ret; | |
762 | ||
763 | bch_verbose(c, "checking root directory"); | |
764 | ||
fe38b720 KO |
765 | ret = bch2_trans_do(c, NULL, NULL, 0, |
766 | __bch2_inode_find_by_inum_trans(&trans, BCACHEFS_ROOT_INO, | |
767 | root_inode, 0)); | |
1c6fdbd8 KO |
768 | if (ret && ret != -ENOENT) |
769 | return ret; | |
770 | ||
771 | if (fsck_err_on(ret, c, "root directory missing")) | |
772 | goto create_root; | |
773 | ||
774 | if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c, | |
775 | "root inode not a directory")) | |
776 | goto create_root; | |
777 | ||
778 | return 0; | |
779 | fsck_err: | |
780 | return ret; | |
781 | create_root: | |
96385742 | 782 | bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755, |
1c6fdbd8 KO |
783 | 0, NULL); |
784 | root_inode->bi_inum = BCACHEFS_ROOT_INO; | |
785 | ||
a3e72262 | 786 | bch2_inode_pack(c, &packed, root_inode); |
1c6fdbd8 | 787 | |
41f8b09e | 788 | return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, |
9d455b24 KO |
789 | NULL, NULL, |
790 | BTREE_INSERT_NOFAIL| | |
791 | BTREE_INSERT_LAZY_RW); | |
1c6fdbd8 KO |
792 | } |
793 | ||
794 | /* Get lost+found, create if it doesn't exist: */ | |
795 | static int check_lostfound(struct bch_fs *c, | |
796 | struct bch_inode_unpacked *root_inode, | |
797 | struct bch_inode_unpacked *lostfound_inode) | |
798 | { | |
799 | struct qstr lostfound = QSTR("lost+found"); | |
800 | struct bch_hash_info root_hash_info = | |
801 | bch2_hash_info_init(c, root_inode); | |
1c6fdbd8 KO |
802 | u64 inum; |
803 | int ret; | |
804 | ||
805 | bch_verbose(c, "checking lost+found"); | |
806 | ||
807 | inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info, | |
808 | &lostfound); | |
809 | if (!inum) { | |
810 | bch_notice(c, "creating lost+found"); | |
811 | goto create_lostfound; | |
812 | } | |
813 | ||
fe38b720 KO |
814 | ret = bch2_trans_do(c, NULL, NULL, 0, |
815 | __bch2_inode_find_by_inum_trans(&trans, inum, lostfound_inode, 0)); | |
1c6fdbd8 KO |
816 | if (ret && ret != -ENOENT) |
817 | return ret; | |
818 | ||
819 | if (fsck_err_on(ret, c, "lost+found missing")) | |
820 | goto create_lostfound; | |
821 | ||
822 | if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c, | |
823 | "lost+found inode not a directory")) | |
824 | goto create_lostfound; | |
825 | ||
826 | return 0; | |
827 | fsck_err: | |
828 | return ret; | |
829 | create_lostfound: | |
96385742 KO |
830 | bch2_inode_init_early(c, lostfound_inode); |
831 | ||
b1fd23df | 832 | ret = bch2_trans_do(c, NULL, NULL, |
96385742 KO |
833 | BTREE_INSERT_NOFAIL| |
834 | BTREE_INSERT_LAZY_RW, | |
835 | bch2_create_trans(&trans, | |
836 | BCACHEFS_ROOT_INO, root_inode, | |
837 | lostfound_inode, &lostfound, | |
b627c7d8 | 838 | 0, 0, S_IFDIR|0700, 0, NULL, NULL)); |
1c6fdbd8 | 839 | if (ret) |
96385742 | 840 | bch_err(c, "error creating lost+found: %i", ret); |
1c6fdbd8 | 841 | |
96385742 | 842 | return ret; |
1c6fdbd8 KO |
843 | } |
844 | ||
fe458476 | 845 | typedef GENRADIX(unsigned long) inode_bitmap; |
1c6fdbd8 | 846 | |
fe458476 | 847 | static inline bool inode_bitmap_test(inode_bitmap *b, size_t nr) |
1c6fdbd8 | 848 | { |
fe458476 KO |
849 | unsigned long *w = genradix_ptr(b, nr / BITS_PER_LONG); |
850 | return w ? test_bit(nr & (BITS_PER_LONG - 1), w) : false; | |
1c6fdbd8 KO |
851 | } |
852 | ||
fe458476 | 853 | static inline int inode_bitmap_set(inode_bitmap *b, size_t nr) |
1c6fdbd8 | 854 | { |
fe458476 | 855 | unsigned long *w = genradix_ptr_alloc(b, nr / BITS_PER_LONG, GFP_KERNEL); |
1c6fdbd8 | 856 | |
fe458476 KO |
857 | if (!w) |
858 | return -ENOMEM; | |
1c6fdbd8 | 859 | |
fe458476 | 860 | *w |= 1UL << (nr & (BITS_PER_LONG - 1)); |
1c6fdbd8 KO |
861 | return 0; |
862 | } | |
863 | ||
864 | struct pathbuf { | |
865 | size_t nr; | |
866 | size_t size; | |
867 | ||
868 | struct pathbuf_entry { | |
869 | u64 inum; | |
870 | u64 offset; | |
871 | } *entries; | |
872 | }; | |
873 | ||
874 | static int path_down(struct pathbuf *p, u64 inum) | |
875 | { | |
876 | if (p->nr == p->size) { | |
877 | size_t new_size = max_t(size_t, 256UL, p->size * 2); | |
878 | void *n = krealloc(p->entries, | |
879 | new_size * sizeof(p->entries[0]), | |
880 | GFP_KERNEL); | |
881 | if (!n) | |
882 | return -ENOMEM; | |
883 | ||
884 | p->entries = n; | |
885 | p->size = new_size; | |
886 | }; | |
887 | ||
888 | p->entries[p->nr++] = (struct pathbuf_entry) { | |
889 | .inum = inum, | |
890 | .offset = 0, | |
891 | }; | |
892 | return 0; | |
893 | } | |
894 | ||
895 | noinline_for_stack | |
896 | static int check_directory_structure(struct bch_fs *c, | |
897 | struct bch_inode_unpacked *lostfound_inode) | |
898 | { | |
fe458476 | 899 | inode_bitmap dirs_done; |
1c6fdbd8 KO |
900 | struct pathbuf path = { 0, 0, NULL }; |
901 | struct pathbuf_entry *e; | |
424eb881 KO |
902 | struct btree_trans trans; |
903 | struct btree_iter *iter; | |
1c6fdbd8 KO |
904 | struct bkey_s_c k; |
905 | struct bkey_s_c_dirent dirent; | |
906 | bool had_unreachable; | |
907 | u64 d_inum; | |
908 | int ret = 0; | |
909 | ||
20bceecb | 910 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
424eb881 | 911 | |
1c6fdbd8 KO |
912 | bch_verbose(c, "checking directory structure"); |
913 | ||
914 | /* DFS: */ | |
915 | restart_dfs: | |
fe458476 | 916 | genradix_init(&dirs_done); |
1c6fdbd8 KO |
917 | had_unreachable = false; |
918 | ||
919 | ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO); | |
920 | if (ret) { | |
921 | bch_err(c, "memory allocation failure in inode_bitmap_set()"); | |
922 | goto err; | |
923 | } | |
924 | ||
925 | ret = path_down(&path, BCACHEFS_ROOT_INO); | |
6bd13057 KO |
926 | if (ret) |
927 | goto err; | |
1c6fdbd8 KO |
928 | |
929 | while (path.nr) { | |
930 | next: | |
931 | e = &path.entries[path.nr - 1]; | |
932 | ||
933 | if (e->offset == U64_MAX) | |
934 | goto up; | |
935 | ||
41f8b09e | 936 | for_each_btree_key(&trans, iter, BTREE_ID_dirents, |
94f651e2 | 937 | POS(e->inum, e->offset + 1), 0, k, ret) { |
1c6fdbd8 KO |
938 | if (k.k->p.inode != e->inum) |
939 | break; | |
940 | ||
941 | e->offset = k.k->p.offset; | |
942 | ||
26609b61 | 943 | if (k.k->type != KEY_TYPE_dirent) |
1c6fdbd8 KO |
944 | continue; |
945 | ||
946 | dirent = bkey_s_c_to_dirent(k); | |
947 | ||
948 | if (dirent.v->d_type != DT_DIR) | |
949 | continue; | |
950 | ||
951 | d_inum = le64_to_cpu(dirent.v->d_inum); | |
952 | ||
953 | if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, | |
954 | "directory %llu has multiple hardlinks", | |
955 | d_inum)) { | |
0f238367 | 956 | ret = remove_dirent(&trans, dirent); |
1c6fdbd8 KO |
957 | if (ret) |
958 | goto err; | |
959 | continue; | |
960 | } | |
961 | ||
962 | ret = inode_bitmap_set(&dirs_done, d_inum); | |
963 | if (ret) { | |
964 | bch_err(c, "memory allocation failure in inode_bitmap_set()"); | |
965 | goto err; | |
966 | } | |
967 | ||
968 | ret = path_down(&path, d_inum); | |
969 | if (ret) { | |
970 | goto err; | |
971 | } | |
972 | ||
424eb881 KO |
973 | ret = bch2_trans_iter_free(&trans, iter); |
974 | if (ret) { | |
975 | bch_err(c, "btree error %i in fsck", ret); | |
976 | goto err; | |
977 | } | |
1c6fdbd8 KO |
978 | goto next; |
979 | } | |
94f651e2 | 980 | ret = bch2_trans_iter_free(&trans, iter) ?: ret; |
1c6fdbd8 KO |
981 | if (ret) { |
982 | bch_err(c, "btree error %i in fsck", ret); | |
983 | goto err; | |
984 | } | |
985 | up: | |
986 | path.nr--; | |
987 | } | |
988 | ||
41f8b09e | 989 | iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS_MIN, 0); |
6bd13057 | 990 | retry: |
ef9f95ba | 991 | for_each_btree_key_continue(iter, 0, k, ret) { |
26609b61 | 992 | if (k.k->type != KEY_TYPE_inode) |
1c6fdbd8 KO |
993 | continue; |
994 | ||
995 | if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode))) | |
996 | continue; | |
997 | ||
6bd13057 KO |
998 | ret = bch2_empty_dir_trans(&trans, k.k->p.inode); |
999 | if (ret == -EINTR) | |
1000 | goto retry; | |
1001 | if (!ret) | |
1c6fdbd8 KO |
1002 | continue; |
1003 | ||
39fb2983 | 1004 | if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c, |
1c6fdbd8 | 1005 | "unreachable directory found (inum %llu)", |
39fb2983 | 1006 | k.k->p.offset)) { |
58fbf808 | 1007 | bch2_trans_unlock(&trans); |
1c6fdbd8 | 1008 | |
39fb2983 | 1009 | ret = reattach_inode(c, lostfound_inode, k.k->p.offset); |
1c6fdbd8 KO |
1010 | if (ret) { |
1011 | goto err; | |
1012 | } | |
1013 | ||
1014 | had_unreachable = true; | |
1015 | } | |
1016 | } | |
ef9f95ba | 1017 | bch2_trans_iter_free(&trans, iter); |
1c6fdbd8 KO |
1018 | if (ret) |
1019 | goto err; | |
1020 | ||
1021 | if (had_unreachable) { | |
1022 | bch_info(c, "reattached unreachable directories, restarting pass to check for loops"); | |
fe458476 | 1023 | genradix_free(&dirs_done); |
1c6fdbd8 KO |
1024 | kfree(path.entries); |
1025 | memset(&dirs_done, 0, sizeof(dirs_done)); | |
1026 | memset(&path, 0, sizeof(path)); | |
1027 | goto restart_dfs; | |
1028 | } | |
1c6fdbd8 KO |
1029 | err: |
1030 | fsck_err: | |
424eb881 | 1031 | ret = bch2_trans_exit(&trans) ?: ret; |
fe458476 | 1032 | genradix_free(&dirs_done); |
6bd13057 KO |
1033 | kfree(path.entries); |
1034 | return ret; | |
1c6fdbd8 KO |
1035 | } |
1036 | ||
1037 | struct nlink { | |
1038 | u32 count; | |
1039 | u32 dir_count; | |
1040 | }; | |
1041 | ||
1042 | typedef GENRADIX(struct nlink) nlink_table; | |
1043 | ||
1044 | static void inc_link(struct bch_fs *c, nlink_table *links, | |
1045 | u64 range_start, u64 *range_end, | |
1046 | u64 inum, bool dir) | |
1047 | { | |
1048 | struct nlink *link; | |
1049 | ||
1050 | if (inum < range_start || inum >= *range_end) | |
1051 | return; | |
1052 | ||
2bb748a6 KO |
1053 | if (inum - range_start >= SIZE_MAX / sizeof(struct nlink)) { |
1054 | *range_end = inum; | |
1055 | return; | |
1056 | } | |
1057 | ||
1c6fdbd8 KO |
1058 | link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL); |
1059 | if (!link) { | |
619f5bee | 1060 | bch_verbose(c, "allocation failed during fsck - will need another pass"); |
1c6fdbd8 KO |
1061 | *range_end = inum; |
1062 | return; | |
1063 | } | |
1064 | ||
1065 | if (dir) | |
1066 | link->dir_count++; | |
1067 | else | |
1068 | link->count++; | |
1069 | } | |
1070 | ||
1071 | noinline_for_stack | |
1072 | static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, | |
1073 | u64 range_start, u64 *range_end) | |
1074 | { | |
424eb881 KO |
1075 | struct btree_trans trans; |
1076 | struct btree_iter *iter; | |
1c6fdbd8 KO |
1077 | struct bkey_s_c k; |
1078 | struct bkey_s_c_dirent d; | |
1079 | u64 d_inum; | |
1080 | int ret; | |
1081 | ||
20bceecb | 1082 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
424eb881 | 1083 | |
1c6fdbd8 KO |
1084 | inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); |
1085 | ||
41f8b09e | 1086 | for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN, 0, k, ret) { |
1c6fdbd8 | 1087 | switch (k.k->type) { |
26609b61 | 1088 | case KEY_TYPE_dirent: |
1c6fdbd8 KO |
1089 | d = bkey_s_c_to_dirent(k); |
1090 | d_inum = le64_to_cpu(d.v->d_inum); | |
1091 | ||
1092 | if (d.v->d_type == DT_DIR) | |
1093 | inc_link(c, links, range_start, range_end, | |
1094 | d.k->p.inode, true); | |
1095 | ||
1096 | inc_link(c, links, range_start, range_end, | |
1097 | d_inum, false); | |
1098 | ||
1099 | break; | |
1100 | } | |
1101 | ||
424eb881 | 1102 | bch2_trans_cond_resched(&trans); |
1c6fdbd8 | 1103 | } |
abcecb49 KO |
1104 | bch2_trans_iter_put(&trans, iter); |
1105 | ||
94f651e2 | 1106 | ret = bch2_trans_exit(&trans) ?: ret; |
1c6fdbd8 | 1107 | if (ret) |
619f5bee | 1108 | bch_err(c, "error in fsck: btree error %i while walking dirents", ret); |
1c6fdbd8 KO |
1109 | |
1110 | return ret; | |
1111 | } | |
1112 | ||
5c16add5 | 1113 | static int check_inode_nlink(struct btree_trans *trans, |
1c6fdbd8 | 1114 | struct bch_inode_unpacked *lostfound_inode, |
5c16add5 KO |
1115 | struct btree_iter *iter, |
1116 | struct bkey_s_c_inode inode, | |
1117 | struct nlink *link) | |
1c6fdbd8 | 1118 | { |
5c16add5 KO |
1119 | struct bch_fs *c = trans->c; |
1120 | struct bch_inode_unpacked u; | |
1121 | u32 i_nlink, real_i_nlink; | |
1c6fdbd8 KO |
1122 | int ret = 0; |
1123 | ||
5c16add5 KO |
1124 | ret = bch2_inode_unpack(inode, &u); |
1125 | /* Should never happen, checked by bch2_inode_invalid: */ | |
1126 | if (bch2_fs_inconsistent_on(ret, c, | |
1127 | "error unpacking inode %llu in fsck", | |
1128 | inode.k->p.inode)) | |
1129 | return ret; | |
1130 | ||
1131 | i_nlink = bch2_inode_nlink_get(&u); | |
1132 | real_i_nlink = link->count * nlink_bias(u.bi_mode) + link->dir_count; | |
1133 | ||
1c6fdbd8 KO |
1134 | /* |
1135 | * These should have been caught/fixed by earlier passes, we don't | |
1136 | * repair them here: | |
1137 | */ | |
5c16add5 | 1138 | if (S_ISDIR(u.bi_mode) && link->count > 1) { |
1c6fdbd8 | 1139 | need_fsck_err(c, "directory %llu with multiple hardlinks: %u", |
5c16add5 | 1140 | u.bi_inum, link->count); |
1c6fdbd8 KO |
1141 | return 0; |
1142 | } | |
1143 | ||
5c16add5 | 1144 | if (S_ISDIR(u.bi_mode) && !link->count) { |
1c6fdbd8 | 1145 | need_fsck_err(c, "unreachable directory found (inum %llu)", |
5c16add5 | 1146 | u.bi_inum); |
1c6fdbd8 KO |
1147 | return 0; |
1148 | } | |
1149 | ||
5c16add5 | 1150 | if (!S_ISDIR(u.bi_mode) && link->dir_count) { |
9ef846a7 | 1151 | need_fsck_err(c, "non directory with subdirectories (inum %llu)", |
5c16add5 | 1152 | u.bi_inum); |
1c6fdbd8 KO |
1153 | return 0; |
1154 | } | |
1155 | ||
88c07f73 | 1156 | if (!link->count && |
5c16add5 | 1157 | !(u.bi_flags & BCH_INODE_UNLINKED) && |
1c3ff72c | 1158 | (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) { |
88c07f73 | 1159 | if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)", |
5c16add5 | 1160 | u.bi_inum, mode_to_type(u.bi_mode)) == |
88c07f73 KO |
1161 | FSCK_ERR_IGNORE) |
1162 | return 0; | |
1163 | ||
5c16add5 | 1164 | ret = reattach_inode(c, lostfound_inode, u.bi_inum); |
88c07f73 KO |
1165 | if (ret) |
1166 | return ret; | |
1167 | ||
1168 | link->count = 1; | |
5c16add5 | 1169 | real_i_nlink = nlink_bias(u.bi_mode) + link->dir_count; |
88c07f73 KO |
1170 | goto set_i_nlink; |
1171 | } | |
1172 | ||
1c6fdbd8 KO |
1173 | if (i_nlink < link->count) { |
1174 | if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)", | |
5c16add5 KO |
1175 | u.bi_inum, i_nlink, link->count, |
1176 | mode_to_type(u.bi_mode)) == FSCK_ERR_IGNORE) | |
1c6fdbd8 KO |
1177 | return 0; |
1178 | goto set_i_nlink; | |
1179 | } | |
1180 | ||
1181 | if (i_nlink != real_i_nlink && | |
1182 | c->sb.clean) { | |
1183 | if (fsck_err(c, "filesystem marked clean, " | |
1184 | "but inode %llu has wrong i_nlink " | |
1185 | "(type %u i_nlink %u, should be %u)", | |
5c16add5 | 1186 | u.bi_inum, mode_to_type(u.bi_mode), |
1c6fdbd8 KO |
1187 | i_nlink, real_i_nlink) == FSCK_ERR_IGNORE) |
1188 | return 0; | |
1189 | goto set_i_nlink; | |
1190 | } | |
1191 | ||
88c07f73 | 1192 | if (i_nlink != real_i_nlink && |
1c3ff72c | 1193 | (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) { |
88c07f73 KO |
1194 | if (fsck_err(c, "inode %llu has wrong i_nlink " |
1195 | "(type %u i_nlink %u, should be %u)", | |
5c16add5 | 1196 | u.bi_inum, mode_to_type(u.bi_mode), |
88c07f73 KO |
1197 | i_nlink, real_i_nlink) == FSCK_ERR_IGNORE) |
1198 | return 0; | |
1199 | goto set_i_nlink; | |
1200 | } | |
1201 | ||
1c6fdbd8 KO |
1202 | if (real_i_nlink && i_nlink != real_i_nlink) |
1203 | bch_verbose(c, "setting inode %llu nlink from %u to %u", | |
5c16add5 | 1204 | u.bi_inum, i_nlink, real_i_nlink); |
1c6fdbd8 KO |
1205 | set_i_nlink: |
1206 | if (i_nlink != real_i_nlink) { | |
1c6fdbd8 KO |
1207 | struct bkey_inode_buf p; |
1208 | ||
5c16add5 | 1209 | bch2_inode_nlink_set(&u, real_i_nlink); |
a3e72262 | 1210 | bch2_inode_pack(c, &p, &u); |
e751c01a | 1211 | p.inode.k.p = iter->pos; |
1c6fdbd8 | 1212 | |
b1fd23df | 1213 | ret = __bch2_trans_do(trans, NULL, NULL, |
b1fd23df KO |
1214 | BTREE_INSERT_NOFAIL| |
1215 | BTREE_INSERT_LAZY_RW, | |
2d594dfb | 1216 | (bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0)); |
b1fd23df | 1217 | if (ret) |
5c16add5 | 1218 | bch_err(c, "error in fsck: error %i updating inode", ret); |
1c6fdbd8 KO |
1219 | } |
1220 | fsck_err: | |
1221 | return ret; | |
1222 | } | |
1223 | ||
1224 | noinline_for_stack | |
1225 | static int bch2_gc_walk_inodes(struct bch_fs *c, | |
1226 | struct bch_inode_unpacked *lostfound_inode, | |
1227 | nlink_table *links, | |
1228 | u64 range_start, u64 range_end) | |
1229 | { | |
0564b167 KO |
1230 | struct btree_trans trans; |
1231 | struct btree_iter *iter; | |
1c6fdbd8 KO |
1232 | struct bkey_s_c k; |
1233 | struct nlink *link, zero_links = { 0, 0 }; | |
1234 | struct genradix_iter nlinks_iter; | |
1235 | int ret = 0, ret2 = 0; | |
1236 | u64 nlinks_pos; | |
1237 | ||
20bceecb | 1238 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
0564b167 | 1239 | |
41f8b09e | 1240 | iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, |
1d60b999 | 1241 | POS(0, range_start), 0); |
1c6fdbd8 KO |
1242 | nlinks_iter = genradix_iter_init(links, 0); |
1243 | ||
0564b167 | 1244 | while ((k = bch2_btree_iter_peek(iter)).k && |
2bb748a6 KO |
1245 | !(ret2 = bkey_err(k)) && |
1246 | iter->pos.offset < range_end) { | |
1c6fdbd8 KO |
1247 | peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); |
1248 | ||
1d60b999 | 1249 | if (!link && (!k.k || iter->pos.offset >= range_end)) |
1c6fdbd8 KO |
1250 | break; |
1251 | ||
1252 | nlinks_pos = range_start + nlinks_iter.pos; | |
2bb748a6 KO |
1253 | |
1254 | if (link && nlinks_pos < iter->pos.offset) { | |
1c6fdbd8 | 1255 | /* Should have been caught by dirents pass: */ |
2bb748a6 | 1256 | need_fsck_err_on(link->count, c, |
1c6fdbd8 KO |
1257 | "missing inode %llu (nlink %u)", |
1258 | nlinks_pos, link->count); | |
1259 | genradix_iter_advance(&nlinks_iter, links); | |
1260 | goto peek_nlinks; | |
1261 | } | |
1262 | ||
2bb748a6 | 1263 | if (!link || nlinks_pos > iter->pos.offset) |
1c6fdbd8 KO |
1264 | link = &zero_links; |
1265 | ||
26609b61 | 1266 | if (k.k && k.k->type == KEY_TYPE_inode) { |
5c16add5 KO |
1267 | ret = check_inode_nlink(&trans, lostfound_inode, iter, |
1268 | bkey_s_c_to_inode(k), link); | |
1c6fdbd8 KO |
1269 | BUG_ON(ret == -EINTR); |
1270 | if (ret) | |
1271 | break; | |
1c6fdbd8 KO |
1272 | } else { |
1273 | /* Should have been caught by dirents pass: */ | |
1274 | need_fsck_err_on(link->count, c, | |
1275 | "missing inode %llu (nlink %u)", | |
1276 | nlinks_pos, link->count); | |
1277 | } | |
1278 | ||
1d60b999 | 1279 | if (nlinks_pos == iter->pos.offset) |
1c6fdbd8 KO |
1280 | genradix_iter_advance(&nlinks_iter, links); |
1281 | ||
e0ba3b64 | 1282 | bch2_btree_iter_advance(iter); |
424eb881 | 1283 | bch2_trans_cond_resched(&trans); |
1c6fdbd8 KO |
1284 | } |
1285 | fsck_err: | |
abcecb49 | 1286 | bch2_trans_iter_put(&trans, iter); |
0564b167 KO |
1287 | bch2_trans_exit(&trans); |
1288 | ||
1c6fdbd8 | 1289 | if (ret2) |
619f5bee | 1290 | bch_err(c, "error in fsck: btree error %i while walking inodes", ret2); |
1c6fdbd8 KO |
1291 | |
1292 | return ret ?: ret2; | |
1293 | } | |
1294 | ||
1295 | noinline_for_stack | |
5c16add5 | 1296 | static int check_nlinks(struct bch_fs *c, |
1c6fdbd8 KO |
1297 | struct bch_inode_unpacked *lostfound_inode) |
1298 | { | |
1299 | nlink_table links; | |
1300 | u64 this_iter_range_start, next_iter_range_start = 0; | |
1301 | int ret = 0; | |
1302 | ||
1303 | bch_verbose(c, "checking inode nlinks"); | |
1304 | ||
1305 | genradix_init(&links); | |
1306 | ||
1307 | do { | |
1308 | this_iter_range_start = next_iter_range_start; | |
1309 | next_iter_range_start = U64_MAX; | |
1310 | ||
1311 | ret = bch2_gc_walk_dirents(c, &links, | |
1312 | this_iter_range_start, | |
1313 | &next_iter_range_start); | |
1314 | if (ret) | |
1315 | break; | |
1316 | ||
1317 | ret = bch2_gc_walk_inodes(c, lostfound_inode, &links, | |
1318 | this_iter_range_start, | |
1319 | next_iter_range_start); | |
1320 | if (ret) | |
1321 | break; | |
1322 | ||
1323 | genradix_free(&links); | |
1324 | } while (next_iter_range_start != U64_MAX); | |
1325 | ||
1326 | genradix_free(&links); | |
1327 | ||
1328 | return ret; | |
1329 | } | |
1330 | ||
1c6fdbd8 KO |
1331 | /* |
1332 | * Checks for inconsistencies that shouldn't happen, unless we have a bug. | |
1333 | * Doesn't fix them yet, mainly because they haven't yet been observed: | |
1334 | */ | |
619f5bee | 1335 | int bch2_fsck_full(struct bch_fs *c) |
1c6fdbd8 KO |
1336 | { |
1337 | struct bch_inode_unpacked root_inode, lostfound_inode; | |
1c6fdbd8 | 1338 | |
5c16add5 KO |
1339 | return check_inodes(c, true) ?: |
1340 | check_extents(c) ?: | |
1c6fdbd8 KO |
1341 | check_dirents(c) ?: |
1342 | check_xattrs(c) ?: | |
1343 | check_root(c, &root_inode) ?: | |
1344 | check_lostfound(c, &root_inode, &lostfound_inode) ?: | |
1345 | check_directory_structure(c, &lostfound_inode) ?: | |
5c16add5 | 1346 | check_nlinks(c, &lostfound_inode); |
1c6fdbd8 KO |
1347 | } |
1348 | ||
619f5bee | 1349 | int bch2_fsck_walk_inodes_only(struct bch_fs *c) |
1c6fdbd8 | 1350 | { |
5c16add5 | 1351 | return check_inodes(c, false); |
1c6fdbd8 | 1352 | } |