bcachefs: Simplify hash table checks
[linux-block.git] / fs / bcachefs / fsck.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
07a1006a 4#include "bkey_buf.h"
1c6fdbd8
KO
5#include "btree_update.h"
6#include "dirent.h"
7#include "error.h"
96385742 8#include "fs-common.h"
1c6fdbd8
KO
9#include "fsck.h"
10#include "inode.h"
11#include "keylist.h"
12#include "super.h"
13#include "xattr.h"
14
15#include <linux/dcache.h> /* struct qstr */
16#include <linux/generic-radix-tree.h>
17
18#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
19
424eb881
KO
20static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum)
21{
22 struct btree_iter *iter;
23 struct bkey_s_c k;
24 u64 sectors = 0;
94f651e2 25 int ret;
424eb881 26
41f8b09e 27 for_each_btree_key(trans, iter, BTREE_ID_extents,
94f651e2 28 POS(inum, 0), 0, k, ret) {
424eb881
KO
29 if (k.k->p.inode != inum)
30 break;
31
32 if (bkey_extent_is_allocation(k.k))
33 sectors += k.k->size;
34 }
35
94f651e2
KO
36 bch2_trans_iter_free(trans, iter);
37
38 return ret ?: sectors;
424eb881
KO
39}
40
b1fd23df
KO
41static int __remove_dirent(struct btree_trans *trans,
42 struct bkey_s_c_dirent dirent)
1c6fdbd8 43{
0f238367 44 struct bch_fs *c = trans->c;
1c6fdbd8
KO
45 struct qstr name;
46 struct bch_inode_unpacked dir_inode;
47 struct bch_hash_info dir_hash_info;
48 u64 dir_inum = dirent.k->p.inode;
49 int ret;
50 char *buf;
51
52 name.len = bch2_dirent_name_bytes(dirent);
b1fd23df
KO
53 buf = bch2_trans_kmalloc(trans, name.len + 1);
54 if (IS_ERR(buf))
55 return PTR_ERR(buf);
1c6fdbd8
KO
56
57 memcpy(buf, dirent.v->d_name, name.len);
58 buf[name.len] = '\0';
59 name.name = buf;
60
fe38b720 61 ret = __bch2_inode_find_by_inum_trans(trans, dir_inum, &dir_inode, 0);
b1fd23df 62 if (ret && ret != -EINTR)
1c6fdbd8 63 bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
b1fd23df
KO
64 if (ret)
65 return ret;
1c6fdbd8
KO
66
67 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
68
b1fd23df
KO
69 ret = bch2_hash_delete(trans, bch2_dirent_hash_desc,
70 &dir_hash_info, dir_inum, &name);
71 if (ret && ret != -EINTR)
1c6fdbd8 72 bch_err(c, "remove_dirent: err %i deleting dirent", ret);
b1fd23df
KO
73 if (ret)
74 return ret;
75
76 return 0;
77}
78
79static int remove_dirent(struct btree_trans *trans,
80 struct bkey_s_c_dirent dirent)
81{
82 return __bch2_trans_do(trans, NULL, NULL,
b1fd23df
KO
83 BTREE_INSERT_NOFAIL|
84 BTREE_INSERT_LAZY_RW,
b1fd23df 85 __remove_dirent(trans, dirent));
1c6fdbd8
KO
86}
87
88static int reattach_inode(struct bch_fs *c,
89 struct bch_inode_unpacked *lostfound_inode,
90 u64 inum)
91{
184b1dc1 92 struct bch_inode_unpacked dir_u, inode_u;
1c6fdbd8
KO
93 char name_buf[20];
94 struct qstr name;
95 int ret;
96
97 snprintf(name_buf, sizeof(name_buf), "%llu", inum);
98 name = (struct qstr) QSTR(name_buf);
99
b1fd23df 100 ret = bch2_trans_do(c, NULL, NULL,
96385742
KO
101 BTREE_INSERT_LAZY_RW,
102 bch2_link_trans(&trans, lostfound_inode->bi_inum,
184b1dc1 103 inum, &dir_u, &inode_u, &name));
96385742
KO
104 if (ret)
105 bch_err(c, "error %i reattaching inode %llu", ret, inum);
1c6fdbd8 106
1c6fdbd8
KO
107 return ret;
108}
109
110struct inode_walker {
111 bool first_this_inode;
112 bool have_inode;
113 u64 cur_inum;
114 struct bch_inode_unpacked inode;
115};
116
117static struct inode_walker inode_walker_init(void)
118{
119 return (struct inode_walker) {
120 .cur_inum = -1,
121 .have_inode = false,
122 };
123}
124
6bd13057
KO
125static int walk_inode(struct btree_trans *trans,
126 struct inode_walker *w, u64 inum)
1c6fdbd8 127{
6bd13057 128 if (inum != w->cur_inum) {
fe38b720
KO
129 int ret = __bch2_inode_find_by_inum_trans(trans, inum,
130 &w->inode, 0);
1c6fdbd8
KO
131
132 if (ret && ret != -ENOENT)
133 return ret;
134
6bd13057
KO
135 w->have_inode = !ret;
136 w->cur_inum = inum;
137 w->first_this_inode = true;
138 } else {
139 w->first_this_inode = false;
1c6fdbd8
KO
140 }
141
142 return 0;
143}
144
7ac2c55e
KO
145static int hash_redo_key(struct btree_trans *trans,
146 const struct bch_hash_desc desc,
147 struct bch_hash_info *hash_info,
148 struct btree_iter *k_iter, struct bkey_s_c k)
1c6fdbd8 149{
b1fd23df 150 struct bkey_i delete;
1c6fdbd8 151 struct bkey_i *tmp;
1c6fdbd8 152
b1fd23df
KO
153 tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
154 if (IS_ERR(tmp))
155 return PTR_ERR(tmp);
1c6fdbd8
KO
156
157 bkey_reassemble(tmp, k);
158
b1fd23df
KO
159 bkey_init(&delete.k);
160 delete.k.p = k_iter->pos;
2d594dfb 161 bch2_trans_update(trans, k_iter, &delete, 0);
1c6fdbd8 162
7ac2c55e 163 return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode,
eaf79831 164 tmp, 0);
1c6fdbd8
KO
165}
166
424eb881
KO
167static int fsck_hash_delete_at(struct btree_trans *trans,
168 const struct bch_hash_desc desc,
1c6fdbd8 169 struct bch_hash_info *info,
424eb881 170 struct btree_iter *iter)
1c6fdbd8 171{
1c6fdbd8 172 int ret;
1c6fdbd8 173retry:
424eb881
KO
174 ret = bch2_hash_delete_at(trans, desc, info, iter) ?:
175 bch2_trans_commit(trans, NULL, NULL,
134915f3
KO
176 BTREE_INSERT_NOFAIL|
177 BTREE_INSERT_LAZY_RW);
424eb881
KO
178 if (ret == -EINTR) {
179 ret = bch2_btree_iter_traverse(iter);
180 if (!ret)
181 goto retry;
182 }
1c6fdbd8 183
1c6fdbd8
KO
184 return ret;
185}
186
7ac2c55e
KO
187static int hash_check_key(struct btree_trans *trans,
188 const struct bch_hash_desc desc,
189 struct bch_hash_info *hash_info,
190 struct btree_iter *k_iter, struct bkey_s_c hash_k)
d69f41d6 191{
424eb881 192 struct bch_fs *c = trans->c;
7ac2c55e 193 struct btree_iter *iter = NULL;
d69f41d6 194 char buf[200];
7ac2c55e
KO
195 struct bkey_s_c k;
196 u64 hash;
d69f41d6
KO
197 int ret = 0;
198
7ac2c55e
KO
199 if (hash_k.k->type != desc.key_type)
200 return 0;
201
202 hash = desc.hash_bkey(hash_info, hash_k);
203
204 if (likely(hash == hash_k.k->p.offset))
d69f41d6
KO
205 return 0;
206
7ac2c55e
KO
207 if (hash_k.k->p.offset < hash)
208 goto bad_hash;
d69f41d6 209
7ac2c55e
KO
210 for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash),
211 BTREE_ITER_SLOTS, k, ret) {
212 if (!bkey_cmp(k.k->p, hash_k.k->p))
d69f41d6
KO
213 break;
214
7ac2c55e
KO
215 if (fsck_err_on(k.k->type == desc.key_type &&
216 !desc.cmp_bkey(k, hash_k), c,
d69f41d6 217 "duplicate hash table keys:\n%s",
319f9ac3 218 (bch2_bkey_val_to_text(&PBUF(buf), c,
7ac2c55e
KO
219 hash_k), buf))) {
220 ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter);
d69f41d6
KO
221 if (ret)
222 return ret;
223 ret = 1;
224 break;
225 }
d69f41d6 226
7ac2c55e
KO
227 if (bkey_deleted(k.k)) {
228 bch2_trans_iter_free(trans, iter);
229 goto bad_hash;
1c6fdbd8 230 }
1c6fdbd8 231
7ac2c55e
KO
232 }
233 bch2_trans_iter_free(trans, iter);
1c6fdbd8 234 return ret;
7ac2c55e
KO
235bad_hash:
236 if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
237 "hashed to %llu should be at %llu\n%s",
238 desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset,
239 hash, iter->pos.offset,
240 (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
741daa5b
KO
241 return 0;
242
7ac2c55e
KO
243 ret = __bch2_trans_do(trans, NULL, NULL,
244 BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
245 hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
246 if (ret) {
247 bch_err(c, "hash_redo_key err %i", ret);
248 return ret;
741daa5b 249 }
7ac2c55e 250 return -EINTR;
741daa5b 251fsck_err:
741daa5b 252 return ret;
741daa5b
KO
253}
254
5c16add5
KO
255static int check_inode(struct btree_trans *trans,
256 struct btree_iter *iter,
257 struct bkey_s_c_inode inode)
258{
259 struct bch_fs *c = trans->c;
260 struct bch_inode_unpacked u;
261 bool do_update = false;
262 int ret = 0;
263
264 ret = bch2_inode_unpack(inode, &u);
265
266 if (bch2_fs_inconsistent_on(ret, c,
267 "error unpacking inode %llu in fsck",
268 inode.k->p.inode))
269 return ret;
270
271 if (u.bi_flags & BCH_INODE_UNLINKED &&
272 (!c->sb.clean ||
273 fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
274 u.bi_inum))) {
275 bch_verbose(c, "deleting inode %llu", u.bi_inum);
276
277 bch2_trans_unlock(trans);
278 bch2_fs_lazy_rw(c);
279
280 ret = bch2_inode_rm(c, u.bi_inum, false);
281 if (ret)
282 bch_err(c, "error in fsck: error %i while deleting inode", ret);
283 return ret;
284 }
285
286 if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
287 (!c->sb.clean ||
288 fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
289 u.bi_inum))) {
290 bch_verbose(c, "truncating inode %llu", u.bi_inum);
291
292 bch2_trans_unlock(trans);
293 bch2_fs_lazy_rw(c);
294
295 /*
296 * XXX: need to truncate partial blocks too here - or ideally
297 * just switch units to bytes and that issue goes away
298 */
299 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
300 POS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9),
301 POS(u.bi_inum, U64_MAX),
302 NULL);
303 if (ret) {
304 bch_err(c, "error in fsck: error %i truncating inode", ret);
305 return ret;
306 }
307
308 /*
309 * We truncated without our normal sector accounting hook, just
310 * make sure we recalculate it:
311 */
312 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
313
314 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
315 do_update = true;
316 }
317
318 if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
319 (!c->sb.clean ||
320 fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
321 u.bi_inum))) {
322 s64 sectors;
323
324 bch_verbose(c, "recounting sectors for inode %llu",
325 u.bi_inum);
326
327 sectors = bch2_count_inode_sectors(trans, u.bi_inum);
328 if (sectors < 0) {
329 bch_err(c, "error in fsck: error %i recounting inode sectors",
330 (int) sectors);
331 return sectors;
332 }
333
334 u.bi_sectors = sectors;
335 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
336 do_update = true;
337 }
338
339 if (!S_ISDIR(u.bi_mode) &&
340 u.bi_nlink &&
341 !(u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
342 (fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c,
343 "inode missing BCH_INODE_BACKPTR_UNTRUSTED flags") ||
344 c->opts.version_upgrade)) {
345 u.bi_flags |= BCH_INODE_BACKPTR_UNTRUSTED;
346 do_update = true;
347 }
348
349 if (do_update) {
350 struct bkey_inode_buf p;
351
352 bch2_inode_pack(c, &p, &u);
353 p.inode.k.p = iter->pos;
354
355 ret = __bch2_trans_do(trans, NULL, NULL,
356 BTREE_INSERT_NOFAIL|
357 BTREE_INSERT_LAZY_RW,
358 (bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
359 if (ret)
360 bch_err(c, "error in fsck: error %i "
361 "updating inode", ret);
362 }
363fsck_err:
364 return ret;
365}
366
367noinline_for_stack
368static int check_inodes(struct bch_fs *c, bool full)
369{
370 struct btree_trans trans;
371 struct btree_iter *iter;
372 struct bkey_s_c k;
373 struct bkey_s_c_inode inode;
374 int ret;
375
376 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
377
378 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, 0, k, ret) {
379 if (k.k->type != KEY_TYPE_inode)
380 continue;
381
382 inode = bkey_s_c_to_inode(k);
383
384 if (full ||
385 (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
386 BCH_INODE_I_SECTORS_DIRTY|
387 BCH_INODE_UNLINKED))) {
388 ret = check_inode(&trans, iter, inode);
389 if (ret)
390 break;
391 }
392 }
393 bch2_trans_iter_put(&trans, iter);
394
395 BUG_ON(ret == -EINTR);
396
397 return bch2_trans_exit(&trans) ?: ret;
398}
399
abcecb49 400static int fix_overlapping_extent(struct btree_trans *trans,
e3e464ac
KO
401 struct bkey_s_c k, struct bpos cut_at)
402{
abcecb49 403 struct btree_iter *iter;
e3e464ac
KO
404 struct bkey_i *u;
405 int ret;
406
407 u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
408 ret = PTR_ERR_OR_ZERO(u);
409 if (ret)
410 return ret;
411
412 bkey_reassemble(u, k);
413 bch2_cut_front(cut_at, u);
414
e3e464ac
KO
415
416 /*
abcecb49
KO
417 * We don't want to go through the extent_handle_overwrites path:
418 *
419 * XXX: this is going to screw up disk accounting, extent triggers
420 * assume things about extent overwrites - we should be running the
421 * triggers manually here
e3e464ac 422 */
abcecb49
KO
423 iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p,
424 BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
e3e464ac 425
abcecb49
KO
426 BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
427 bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN);
428 bch2_trans_iter_put(trans, iter);
429
430 return bch2_trans_commit(trans, NULL, NULL,
431 BTREE_INSERT_NOFAIL|
432 BTREE_INSERT_LAZY_RW);
e3e464ac
KO
433}
434
1c6fdbd8
KO
435/*
436 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
437 * that i_size an i_sectors are consistent
438 */
439noinline_for_stack
440static int check_extents(struct bch_fs *c)
441{
442 struct inode_walker w = inode_walker_init();
424eb881
KO
443 struct btree_trans trans;
444 struct btree_iter *iter;
1c6fdbd8 445 struct bkey_s_c k;
07a1006a 446 struct bkey_buf prev;
abcecb49 447 u64 i_sectors = 0;
1c6fdbd8
KO
448 int ret = 0;
449
07a1006a 450 bch2_bkey_buf_init(&prev);
f7005e01 451 prev.k->k = KEY(0, 0, 0);
20bceecb 452 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 453
1c6fdbd8
KO
454 bch_verbose(c, "checking extents");
455
41f8b09e 456 iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
0728eed7
KO
457 POS(BCACHEFS_ROOT_INO, 0),
458 BTREE_ITER_INTENT);
6bd13057 459retry:
abcecb49
KO
460 while ((k = bch2_btree_iter_peek(iter)).k &&
461 !(ret = bkey_err(k))) {
462 if (w.have_inode &&
463 w.cur_inum != k.k->p.inode &&
464 !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
465 fsck_err_on(w.inode.bi_sectors != i_sectors, c,
466 "inode %llu has incorrect i_sectors: got %llu, should be %llu",
467 w.inode.bi_inum,
468 w.inode.bi_sectors, i_sectors)) {
469 struct btree_iter *inode_iter =
470 bch2_trans_get_iter(&trans, BTREE_ID_inodes,
471 POS(0, w.cur_inum),
472 BTREE_ITER_INTENT);
473
474 w.inode.bi_sectors = i_sectors;
475
476 ret = __bch2_trans_do(&trans, NULL, NULL,
477 BTREE_INSERT_NOFAIL|
478 BTREE_INSERT_LAZY_RW,
479 bch2_inode_write(&trans, inode_iter, &w.inode));
480 bch2_trans_iter_put(&trans, inode_iter);
481 if (ret)
482 break;
483 }
484
485 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
f7005e01
KO
486 char buf1[200];
487 char buf2[200];
e3e464ac 488
f7005e01
KO
489 bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
490 bch2_bkey_val_to_text(&PBUF(buf2), c, k);
e3e464ac 491
abcecb49
KO
492 if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2))
493 return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR;
e3e464ac 494 }
e3e464ac 495
6bd13057 496 ret = walk_inode(&trans, &w, k.k->p.inode);
1c6fdbd8
KO
497 if (ret)
498 break;
499
abcecb49
KO
500 if (w.first_this_inode)
501 i_sectors = 0;
502
1c6fdbd8 503 if (fsck_err_on(!w.have_inode, c,
abcecb49
KO
504 "extent type %u for missing inode %llu",
505 k.k->type, k.k->p.inode) ||
1c6fdbd8 506 fsck_err_on(w.have_inode &&
abcecb49
KO
507 !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
508 "extent type %u for non regular file, inode %llu mode %o",
509 k.k->type, k.k->p.inode, w.inode.bi_mode)) {
510 bch2_fs_lazy_rw(c);
511 return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
512 POS(k.k->p.inode, 0),
513 POS(k.k->p.inode, U64_MAX),
514 NULL) ?: -EINTR;
1c6fdbd8
KO
515 }
516
abcecb49
KO
517 if (fsck_err_on(w.have_inode &&
518 !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
519 k.k->type != KEY_TYPE_reservation &&
520 k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
521 "extent type %u offset %llu past end of inode %llu, i_size %llu",
522 k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
523 bch2_fs_lazy_rw(c);
524 return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents,
525 POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c)) >> 9),
526 POS(k.k->p.inode, U64_MAX),
527 NULL) ?: -EINTR;
1c6fdbd8
KO
528 }
529
abcecb49
KO
530 if (bkey_extent_is_allocation(k.k))
531 i_sectors += k.k->size;
532 bch2_bkey_buf_reassemble(&prev, c, k);
1c6fdbd8 533
e0ba3b64 534 bch2_btree_iter_advance(iter);
1c6fdbd8 535 }
1c6fdbd8 536fsck_err:
6bd13057
KO
537 if (ret == -EINTR)
538 goto retry;
abcecb49 539 bch2_trans_iter_put(&trans, iter);
07a1006a 540 bch2_bkey_buf_exit(&prev, c);
424eb881 541 return bch2_trans_exit(&trans) ?: ret;
1c6fdbd8
KO
542}
543
544/*
545 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
546 * validate d_type
547 */
548noinline_for_stack
549static int check_dirents(struct bch_fs *c)
550{
551 struct inode_walker w = inode_walker_init();
7ac2c55e 552 struct bch_hash_info hash_info;
d69f41d6
KO
553 struct btree_trans trans;
554 struct btree_iter *iter;
1c6fdbd8 555 struct bkey_s_c k;
1c6fdbd8
KO
556 char buf[200];
557 int ret = 0;
558
559 bch_verbose(c, "checking dirents");
560
20bceecb 561 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
d69f41d6 562
41f8b09e 563 iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
6bd13057
KO
564 POS(BCACHEFS_ROOT_INO, 0), 0);
565retry:
abcecb49
KO
566 while ((k = bch2_btree_iter_peek(iter)).k &&
567 !(ret = bkey_err(k))) {
1c6fdbd8
KO
568 struct bkey_s_c_dirent d;
569 struct bch_inode_unpacked target;
570 bool have_target;
571 u64 d_inum;
572
6bd13057 573 ret = walk_inode(&trans, &w, k.k->p.inode);
1c6fdbd8
KO
574 if (ret)
575 break;
576
577 if (fsck_err_on(!w.have_inode, c,
578 "dirent in nonexisting directory:\n%s",
319f9ac3 579 (bch2_bkey_val_to_text(&PBUF(buf), c,
319f9ac3 580 k), buf)) ||
1c6fdbd8
KO
581 fsck_err_on(!S_ISDIR(w.inode.bi_mode), c,
582 "dirent in non directory inode type %u:\n%s",
583 mode_to_type(w.inode.bi_mode),
319f9ac3 584 (bch2_bkey_val_to_text(&PBUF(buf), c,
319f9ac3 585 k), buf))) {
0564b167 586 ret = bch2_btree_delete_at(&trans, iter, 0);
1c6fdbd8
KO
587 if (ret)
588 goto err;
7ac2c55e 589 goto next;
1c6fdbd8
KO
590 }
591
7ac2c55e
KO
592 if (!w.have_inode)
593 goto next;
1c6fdbd8 594
7ac2c55e
KO
595 if (w.first_this_inode)
596 hash_info = bch2_hash_info_init(c, &w.inode);
597
598 ret = hash_check_key(&trans, bch2_dirent_hash_desc,
599 &hash_info, iter, k);
1c6fdbd8
KO
600 if (ret > 0) {
601 ret = 0;
7ac2c55e 602 goto next;
1c6fdbd8 603 }
741daa5b
KO
604 if (ret)
605 goto fsck_err;
1c6fdbd8 606
26609b61 607 if (k.k->type != KEY_TYPE_dirent)
7ac2c55e 608 goto next;
1c6fdbd8
KO
609
610 d = bkey_s_c_to_dirent(k);
611 d_inum = le64_to_cpu(d.v->d_inum);
612
fe38b720 613 ret = __bch2_inode_find_by_inum_trans(&trans, d_inum, &target, 0);
1c6fdbd8
KO
614 if (ret && ret != -ENOENT)
615 break;
616
617 have_target = !ret;
618 ret = 0;
619
620 if (fsck_err_on(!have_target, c,
621 "dirent points to missing inode:\n%s",
319f9ac3 622 (bch2_bkey_val_to_text(&PBUF(buf), c,
319f9ac3 623 k), buf))) {
0f238367 624 ret = remove_dirent(&trans, d);
1c6fdbd8
KO
625 if (ret)
626 goto err;
7ac2c55e 627 goto next;
1c6fdbd8
KO
628 }
629
7ac2c55e
KO
630 if (!have_target)
631 goto next;
632
ab2a29cc
KO
633 if (!target.bi_nlink &&
634 !(target.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
635 (target.bi_dir != k.k->p.inode ||
636 target.bi_dir_offset != k.k->p.offset) &&
637 (fsck_err_on(c->sb.version >= bcachefs_metadata_version_inode_backpointers, c,
638 "inode %llu has wrong backpointer:\n"
639 "got %llu:%llu\n"
640 "should be %llu:%llu",
641 d_inum,
642 target.bi_dir,
643 target.bi_dir_offset,
644 k.k->p.inode,
645 k.k->p.offset) ||
646 c->opts.version_upgrade)) {
647 struct bkey_inode_buf p;
648
649 target.bi_dir = k.k->p.inode;
650 target.bi_dir_offset = k.k->p.offset;
651 bch2_trans_unlock(&trans);
652
653 bch2_inode_pack(c, &p, &target);
654
655 ret = bch2_btree_insert(c, BTREE_ID_inodes,
656 &p.inode.k_i, NULL, NULL,
657 BTREE_INSERT_NOFAIL|
658 BTREE_INSERT_LAZY_RW);
659 if (ret) {
660 bch_err(c, "error in fsck: error %i updating inode", ret);
661 goto err;
662 }
663 continue;
664 }
665
7ac2c55e 666 if (fsck_err_on(d.v->d_type !=
1c6fdbd8
KO
667 mode_to_type(target.bi_mode), c,
668 "incorrect d_type: should be %u:\n%s",
669 mode_to_type(target.bi_mode),
319f9ac3 670 (bch2_bkey_val_to_text(&PBUF(buf), c,
319f9ac3 671 k), buf))) {
1c6fdbd8
KO
672 struct bkey_i_dirent *n;
673
674 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
675 if (!n) {
676 ret = -ENOMEM;
677 goto err;
678 }
679
680 bkey_reassemble(&n->k_i, d.s_c);
681 n->v.d_type = mode_to_type(target.bi_mode);
682
b1fd23df 683 ret = __bch2_trans_do(&trans, NULL, NULL,
b1fd23df
KO
684 BTREE_INSERT_NOFAIL|
685 BTREE_INSERT_LAZY_RW,
2d594dfb 686 (bch2_trans_update(&trans, iter, &n->k_i, 0), 0));
1c6fdbd8
KO
687 kfree(n);
688 if (ret)
689 goto err;
690
691 }
7ac2c55e 692next:
e0ba3b64 693 bch2_btree_iter_advance(iter);
1c6fdbd8
KO
694 }
695err:
696fsck_err:
6bd13057
KO
697 if (ret == -EINTR)
698 goto retry;
699
abcecb49 700 bch2_trans_iter_put(&trans, iter);
d69f41d6 701 return bch2_trans_exit(&trans) ?: ret;
1c6fdbd8
KO
702}
703
704/*
705 * Walk xattrs: verify that they all have a corresponding inode
706 */
707noinline_for_stack
708static int check_xattrs(struct bch_fs *c)
709{
710 struct inode_walker w = inode_walker_init();
7ac2c55e 711 struct bch_hash_info hash_info;
d69f41d6
KO
712 struct btree_trans trans;
713 struct btree_iter *iter;
1c6fdbd8
KO
714 struct bkey_s_c k;
715 int ret = 0;
716
717 bch_verbose(c, "checking xattrs");
718
20bceecb 719 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
d69f41d6 720
41f8b09e 721 iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs,
d69f41d6 722 POS(BCACHEFS_ROOT_INO, 0), 0);
6bd13057 723retry:
abcecb49
KO
724 while ((k = bch2_btree_iter_peek(iter)).k &&
725 !(ret = bkey_err(k))) {
6bd13057 726 ret = walk_inode(&trans, &w, k.k->p.inode);
1c6fdbd8
KO
727 if (ret)
728 break;
729
730 if (fsck_err_on(!w.have_inode, c,
731 "xattr for missing inode %llu",
732 k.k->p.inode)) {
0564b167 733 ret = bch2_btree_delete_at(&trans, iter, 0);
1c6fdbd8 734 if (ret)
abcecb49 735 break;
1c6fdbd8
KO
736 continue;
737 }
738
739 if (w.first_this_inode && w.have_inode)
7ac2c55e 740 hash_info = bch2_hash_info_init(c, &w.inode);
1c6fdbd8 741
424eb881 742 ret = hash_check_key(&trans, bch2_xattr_hash_desc,
7ac2c55e 743 &hash_info, iter, k);
1c6fdbd8 744 if (ret)
abcecb49
KO
745 break;
746
e0ba3b64 747 bch2_btree_iter_advance(iter);
1c6fdbd8 748 }
1c6fdbd8 749fsck_err:
6bd13057
KO
750 if (ret == -EINTR)
751 goto retry;
abcecb49 752
abcecb49 753 bch2_trans_iter_put(&trans, iter);
d69f41d6 754 return bch2_trans_exit(&trans) ?: ret;
1c6fdbd8
KO
755}
756
757/* Get root directory, create if it doesn't exist: */
758static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode)
759{
760 struct bkey_inode_buf packed;
761 int ret;
762
763 bch_verbose(c, "checking root directory");
764
fe38b720
KO
765 ret = bch2_trans_do(c, NULL, NULL, 0,
766 __bch2_inode_find_by_inum_trans(&trans, BCACHEFS_ROOT_INO,
767 root_inode, 0));
1c6fdbd8
KO
768 if (ret && ret != -ENOENT)
769 return ret;
770
771 if (fsck_err_on(ret, c, "root directory missing"))
772 goto create_root;
773
774 if (fsck_err_on(!S_ISDIR(root_inode->bi_mode), c,
775 "root inode not a directory"))
776 goto create_root;
777
778 return 0;
779fsck_err:
780 return ret;
781create_root:
96385742 782 bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755,
1c6fdbd8
KO
783 0, NULL);
784 root_inode->bi_inum = BCACHEFS_ROOT_INO;
785
a3e72262 786 bch2_inode_pack(c, &packed, root_inode);
1c6fdbd8 787
41f8b09e 788 return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
9d455b24
KO
789 NULL, NULL,
790 BTREE_INSERT_NOFAIL|
791 BTREE_INSERT_LAZY_RW);
1c6fdbd8
KO
792}
793
794/* Get lost+found, create if it doesn't exist: */
795static int check_lostfound(struct bch_fs *c,
796 struct bch_inode_unpacked *root_inode,
797 struct bch_inode_unpacked *lostfound_inode)
798{
799 struct qstr lostfound = QSTR("lost+found");
800 struct bch_hash_info root_hash_info =
801 bch2_hash_info_init(c, root_inode);
1c6fdbd8
KO
802 u64 inum;
803 int ret;
804
805 bch_verbose(c, "checking lost+found");
806
807 inum = bch2_dirent_lookup(c, BCACHEFS_ROOT_INO, &root_hash_info,
808 &lostfound);
809 if (!inum) {
810 bch_notice(c, "creating lost+found");
811 goto create_lostfound;
812 }
813
fe38b720
KO
814 ret = bch2_trans_do(c, NULL, NULL, 0,
815 __bch2_inode_find_by_inum_trans(&trans, inum, lostfound_inode, 0));
1c6fdbd8
KO
816 if (ret && ret != -ENOENT)
817 return ret;
818
819 if (fsck_err_on(ret, c, "lost+found missing"))
820 goto create_lostfound;
821
822 if (fsck_err_on(!S_ISDIR(lostfound_inode->bi_mode), c,
823 "lost+found inode not a directory"))
824 goto create_lostfound;
825
826 return 0;
827fsck_err:
828 return ret;
829create_lostfound:
96385742
KO
830 bch2_inode_init_early(c, lostfound_inode);
831
b1fd23df 832 ret = bch2_trans_do(c, NULL, NULL,
96385742
KO
833 BTREE_INSERT_NOFAIL|
834 BTREE_INSERT_LAZY_RW,
835 bch2_create_trans(&trans,
836 BCACHEFS_ROOT_INO, root_inode,
837 lostfound_inode, &lostfound,
b627c7d8 838 0, 0, S_IFDIR|0700, 0, NULL, NULL));
1c6fdbd8 839 if (ret)
96385742 840 bch_err(c, "error creating lost+found: %i", ret);
1c6fdbd8 841
96385742 842 return ret;
1c6fdbd8
KO
843}
844
fe458476 845typedef GENRADIX(unsigned long) inode_bitmap;
1c6fdbd8 846
fe458476 847static inline bool inode_bitmap_test(inode_bitmap *b, size_t nr)
1c6fdbd8 848{
fe458476
KO
849 unsigned long *w = genradix_ptr(b, nr / BITS_PER_LONG);
850 return w ? test_bit(nr & (BITS_PER_LONG - 1), w) : false;
1c6fdbd8
KO
851}
852
fe458476 853static inline int inode_bitmap_set(inode_bitmap *b, size_t nr)
1c6fdbd8 854{
fe458476 855 unsigned long *w = genradix_ptr_alloc(b, nr / BITS_PER_LONG, GFP_KERNEL);
1c6fdbd8 856
fe458476
KO
857 if (!w)
858 return -ENOMEM;
1c6fdbd8 859
fe458476 860 *w |= 1UL << (nr & (BITS_PER_LONG - 1));
1c6fdbd8
KO
861 return 0;
862}
863
864struct pathbuf {
865 size_t nr;
866 size_t size;
867
868 struct pathbuf_entry {
869 u64 inum;
870 u64 offset;
871 } *entries;
872};
873
874static int path_down(struct pathbuf *p, u64 inum)
875{
876 if (p->nr == p->size) {
877 size_t new_size = max_t(size_t, 256UL, p->size * 2);
878 void *n = krealloc(p->entries,
879 new_size * sizeof(p->entries[0]),
880 GFP_KERNEL);
881 if (!n)
882 return -ENOMEM;
883
884 p->entries = n;
885 p->size = new_size;
886 };
887
888 p->entries[p->nr++] = (struct pathbuf_entry) {
889 .inum = inum,
890 .offset = 0,
891 };
892 return 0;
893}
894
895noinline_for_stack
896static int check_directory_structure(struct bch_fs *c,
897 struct bch_inode_unpacked *lostfound_inode)
898{
fe458476 899 inode_bitmap dirs_done;
1c6fdbd8
KO
900 struct pathbuf path = { 0, 0, NULL };
901 struct pathbuf_entry *e;
424eb881
KO
902 struct btree_trans trans;
903 struct btree_iter *iter;
1c6fdbd8
KO
904 struct bkey_s_c k;
905 struct bkey_s_c_dirent dirent;
906 bool had_unreachable;
907 u64 d_inum;
908 int ret = 0;
909
20bceecb 910 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 911
1c6fdbd8
KO
912 bch_verbose(c, "checking directory structure");
913
914 /* DFS: */
915restart_dfs:
fe458476 916 genradix_init(&dirs_done);
1c6fdbd8
KO
917 had_unreachable = false;
918
919 ret = inode_bitmap_set(&dirs_done, BCACHEFS_ROOT_INO);
920 if (ret) {
921 bch_err(c, "memory allocation failure in inode_bitmap_set()");
922 goto err;
923 }
924
925 ret = path_down(&path, BCACHEFS_ROOT_INO);
6bd13057
KO
926 if (ret)
927 goto err;
1c6fdbd8
KO
928
929 while (path.nr) {
930next:
931 e = &path.entries[path.nr - 1];
932
933 if (e->offset == U64_MAX)
934 goto up;
935
41f8b09e 936 for_each_btree_key(&trans, iter, BTREE_ID_dirents,
94f651e2 937 POS(e->inum, e->offset + 1), 0, k, ret) {
1c6fdbd8
KO
938 if (k.k->p.inode != e->inum)
939 break;
940
941 e->offset = k.k->p.offset;
942
26609b61 943 if (k.k->type != KEY_TYPE_dirent)
1c6fdbd8
KO
944 continue;
945
946 dirent = bkey_s_c_to_dirent(k);
947
948 if (dirent.v->d_type != DT_DIR)
949 continue;
950
951 d_inum = le64_to_cpu(dirent.v->d_inum);
952
953 if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
954 "directory %llu has multiple hardlinks",
955 d_inum)) {
0f238367 956 ret = remove_dirent(&trans, dirent);
1c6fdbd8
KO
957 if (ret)
958 goto err;
959 continue;
960 }
961
962 ret = inode_bitmap_set(&dirs_done, d_inum);
963 if (ret) {
964 bch_err(c, "memory allocation failure in inode_bitmap_set()");
965 goto err;
966 }
967
968 ret = path_down(&path, d_inum);
969 if (ret) {
970 goto err;
971 }
972
424eb881
KO
973 ret = bch2_trans_iter_free(&trans, iter);
974 if (ret) {
975 bch_err(c, "btree error %i in fsck", ret);
976 goto err;
977 }
1c6fdbd8
KO
978 goto next;
979 }
94f651e2 980 ret = bch2_trans_iter_free(&trans, iter) ?: ret;
1c6fdbd8
KO
981 if (ret) {
982 bch_err(c, "btree error %i in fsck", ret);
983 goto err;
984 }
985up:
986 path.nr--;
987 }
988
41f8b09e 989 iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS_MIN, 0);
6bd13057 990retry:
ef9f95ba 991 for_each_btree_key_continue(iter, 0, k, ret) {
26609b61 992 if (k.k->type != KEY_TYPE_inode)
1c6fdbd8
KO
993 continue;
994
995 if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode)))
996 continue;
997
6bd13057
KO
998 ret = bch2_empty_dir_trans(&trans, k.k->p.inode);
999 if (ret == -EINTR)
1000 goto retry;
1001 if (!ret)
1c6fdbd8
KO
1002 continue;
1003
39fb2983 1004 if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.offset), c,
1c6fdbd8 1005 "unreachable directory found (inum %llu)",
39fb2983 1006 k.k->p.offset)) {
58fbf808 1007 bch2_trans_unlock(&trans);
1c6fdbd8 1008
39fb2983 1009 ret = reattach_inode(c, lostfound_inode, k.k->p.offset);
1c6fdbd8
KO
1010 if (ret) {
1011 goto err;
1012 }
1013
1014 had_unreachable = true;
1015 }
1016 }
ef9f95ba 1017 bch2_trans_iter_free(&trans, iter);
1c6fdbd8
KO
1018 if (ret)
1019 goto err;
1020
1021 if (had_unreachable) {
1022 bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
fe458476 1023 genradix_free(&dirs_done);
1c6fdbd8
KO
1024 kfree(path.entries);
1025 memset(&dirs_done, 0, sizeof(dirs_done));
1026 memset(&path, 0, sizeof(path));
1027 goto restart_dfs;
1028 }
1c6fdbd8
KO
1029err:
1030fsck_err:
424eb881 1031 ret = bch2_trans_exit(&trans) ?: ret;
fe458476 1032 genradix_free(&dirs_done);
6bd13057
KO
1033 kfree(path.entries);
1034 return ret;
1c6fdbd8
KO
1035}
1036
1037struct nlink {
1038 u32 count;
1039 u32 dir_count;
1040};
1041
1042typedef GENRADIX(struct nlink) nlink_table;
1043
1044static void inc_link(struct bch_fs *c, nlink_table *links,
1045 u64 range_start, u64 *range_end,
1046 u64 inum, bool dir)
1047{
1048 struct nlink *link;
1049
1050 if (inum < range_start || inum >= *range_end)
1051 return;
1052
2bb748a6
KO
1053 if (inum - range_start >= SIZE_MAX / sizeof(struct nlink)) {
1054 *range_end = inum;
1055 return;
1056 }
1057
1c6fdbd8
KO
1058 link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
1059 if (!link) {
619f5bee 1060 bch_verbose(c, "allocation failed during fsck - will need another pass");
1c6fdbd8
KO
1061 *range_end = inum;
1062 return;
1063 }
1064
1065 if (dir)
1066 link->dir_count++;
1067 else
1068 link->count++;
1069}
1070
1071noinline_for_stack
1072static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links,
1073 u64 range_start, u64 *range_end)
1074{
424eb881
KO
1075 struct btree_trans trans;
1076 struct btree_iter *iter;
1c6fdbd8
KO
1077 struct bkey_s_c k;
1078 struct bkey_s_c_dirent d;
1079 u64 d_inum;
1080 int ret;
1081
20bceecb 1082 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 1083
1c6fdbd8
KO
1084 inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false);
1085
41f8b09e 1086 for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN, 0, k, ret) {
1c6fdbd8 1087 switch (k.k->type) {
26609b61 1088 case KEY_TYPE_dirent:
1c6fdbd8
KO
1089 d = bkey_s_c_to_dirent(k);
1090 d_inum = le64_to_cpu(d.v->d_inum);
1091
1092 if (d.v->d_type == DT_DIR)
1093 inc_link(c, links, range_start, range_end,
1094 d.k->p.inode, true);
1095
1096 inc_link(c, links, range_start, range_end,
1097 d_inum, false);
1098
1099 break;
1100 }
1101
424eb881 1102 bch2_trans_cond_resched(&trans);
1c6fdbd8 1103 }
abcecb49
KO
1104 bch2_trans_iter_put(&trans, iter);
1105
94f651e2 1106 ret = bch2_trans_exit(&trans) ?: ret;
1c6fdbd8 1107 if (ret)
619f5bee 1108 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1c6fdbd8
KO
1109
1110 return ret;
1111}
1112
5c16add5 1113static int check_inode_nlink(struct btree_trans *trans,
1c6fdbd8 1114 struct bch_inode_unpacked *lostfound_inode,
5c16add5
KO
1115 struct btree_iter *iter,
1116 struct bkey_s_c_inode inode,
1117 struct nlink *link)
1c6fdbd8 1118{
5c16add5
KO
1119 struct bch_fs *c = trans->c;
1120 struct bch_inode_unpacked u;
1121 u32 i_nlink, real_i_nlink;
1c6fdbd8
KO
1122 int ret = 0;
1123
5c16add5
KO
1124 ret = bch2_inode_unpack(inode, &u);
1125 /* Should never happen, checked by bch2_inode_invalid: */
1126 if (bch2_fs_inconsistent_on(ret, c,
1127 "error unpacking inode %llu in fsck",
1128 inode.k->p.inode))
1129 return ret;
1130
1131 i_nlink = bch2_inode_nlink_get(&u);
1132 real_i_nlink = link->count * nlink_bias(u.bi_mode) + link->dir_count;
1133
1c6fdbd8
KO
1134 /*
1135 * These should have been caught/fixed by earlier passes, we don't
1136 * repair them here:
1137 */
5c16add5 1138 if (S_ISDIR(u.bi_mode) && link->count > 1) {
1c6fdbd8 1139 need_fsck_err(c, "directory %llu with multiple hardlinks: %u",
5c16add5 1140 u.bi_inum, link->count);
1c6fdbd8
KO
1141 return 0;
1142 }
1143
5c16add5 1144 if (S_ISDIR(u.bi_mode) && !link->count) {
1c6fdbd8 1145 need_fsck_err(c, "unreachable directory found (inum %llu)",
5c16add5 1146 u.bi_inum);
1c6fdbd8
KO
1147 return 0;
1148 }
1149
5c16add5 1150 if (!S_ISDIR(u.bi_mode) && link->dir_count) {
9ef846a7 1151 need_fsck_err(c, "non directory with subdirectories (inum %llu)",
5c16add5 1152 u.bi_inum);
1c6fdbd8
KO
1153 return 0;
1154 }
1155
88c07f73 1156 if (!link->count &&
5c16add5 1157 !(u.bi_flags & BCH_INODE_UNLINKED) &&
1c3ff72c 1158 (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
88c07f73 1159 if (fsck_err(c, "unreachable inode %llu not marked as unlinked (type %u)",
5c16add5 1160 u.bi_inum, mode_to_type(u.bi_mode)) ==
88c07f73
KO
1161 FSCK_ERR_IGNORE)
1162 return 0;
1163
5c16add5 1164 ret = reattach_inode(c, lostfound_inode, u.bi_inum);
88c07f73
KO
1165 if (ret)
1166 return ret;
1167
1168 link->count = 1;
5c16add5 1169 real_i_nlink = nlink_bias(u.bi_mode) + link->dir_count;
88c07f73
KO
1170 goto set_i_nlink;
1171 }
1172
1c6fdbd8
KO
1173 if (i_nlink < link->count) {
1174 if (fsck_err(c, "inode %llu i_link too small (%u < %u, type %i)",
5c16add5
KO
1175 u.bi_inum, i_nlink, link->count,
1176 mode_to_type(u.bi_mode)) == FSCK_ERR_IGNORE)
1c6fdbd8
KO
1177 return 0;
1178 goto set_i_nlink;
1179 }
1180
1181 if (i_nlink != real_i_nlink &&
1182 c->sb.clean) {
1183 if (fsck_err(c, "filesystem marked clean, "
1184 "but inode %llu has wrong i_nlink "
1185 "(type %u i_nlink %u, should be %u)",
5c16add5 1186 u.bi_inum, mode_to_type(u.bi_mode),
1c6fdbd8
KO
1187 i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1188 return 0;
1189 goto set_i_nlink;
1190 }
1191
88c07f73 1192 if (i_nlink != real_i_nlink &&
1c3ff72c 1193 (c->sb.features & (1 << BCH_FEATURE_atomic_nlink))) {
88c07f73
KO
1194 if (fsck_err(c, "inode %llu has wrong i_nlink "
1195 "(type %u i_nlink %u, should be %u)",
5c16add5 1196 u.bi_inum, mode_to_type(u.bi_mode),
88c07f73
KO
1197 i_nlink, real_i_nlink) == FSCK_ERR_IGNORE)
1198 return 0;
1199 goto set_i_nlink;
1200 }
1201
1c6fdbd8
KO
1202 if (real_i_nlink && i_nlink != real_i_nlink)
1203 bch_verbose(c, "setting inode %llu nlink from %u to %u",
5c16add5 1204 u.bi_inum, i_nlink, real_i_nlink);
1c6fdbd8
KO
1205set_i_nlink:
1206 if (i_nlink != real_i_nlink) {
1c6fdbd8
KO
1207 struct bkey_inode_buf p;
1208
5c16add5 1209 bch2_inode_nlink_set(&u, real_i_nlink);
a3e72262 1210 bch2_inode_pack(c, &p, &u);
e751c01a 1211 p.inode.k.p = iter->pos;
1c6fdbd8 1212
b1fd23df 1213 ret = __bch2_trans_do(trans, NULL, NULL,
b1fd23df
KO
1214 BTREE_INSERT_NOFAIL|
1215 BTREE_INSERT_LAZY_RW,
2d594dfb 1216 (bch2_trans_update(trans, iter, &p.inode.k_i, 0), 0));
b1fd23df 1217 if (ret)
5c16add5 1218 bch_err(c, "error in fsck: error %i updating inode", ret);
1c6fdbd8
KO
1219 }
1220fsck_err:
1221 return ret;
1222}
1223
1224noinline_for_stack
1225static int bch2_gc_walk_inodes(struct bch_fs *c,
1226 struct bch_inode_unpacked *lostfound_inode,
1227 nlink_table *links,
1228 u64 range_start, u64 range_end)
1229{
0564b167
KO
1230 struct btree_trans trans;
1231 struct btree_iter *iter;
1c6fdbd8
KO
1232 struct bkey_s_c k;
1233 struct nlink *link, zero_links = { 0, 0 };
1234 struct genradix_iter nlinks_iter;
1235 int ret = 0, ret2 = 0;
1236 u64 nlinks_pos;
1237
20bceecb 1238 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
0564b167 1239
41f8b09e 1240 iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes,
1d60b999 1241 POS(0, range_start), 0);
1c6fdbd8
KO
1242 nlinks_iter = genradix_iter_init(links, 0);
1243
0564b167 1244 while ((k = bch2_btree_iter_peek(iter)).k &&
2bb748a6
KO
1245 !(ret2 = bkey_err(k)) &&
1246 iter->pos.offset < range_end) {
1c6fdbd8
KO
1247peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
1248
1d60b999 1249 if (!link && (!k.k || iter->pos.offset >= range_end))
1c6fdbd8
KO
1250 break;
1251
1252 nlinks_pos = range_start + nlinks_iter.pos;
2bb748a6
KO
1253
1254 if (link && nlinks_pos < iter->pos.offset) {
1c6fdbd8 1255 /* Should have been caught by dirents pass: */
2bb748a6 1256 need_fsck_err_on(link->count, c,
1c6fdbd8
KO
1257 "missing inode %llu (nlink %u)",
1258 nlinks_pos, link->count);
1259 genradix_iter_advance(&nlinks_iter, links);
1260 goto peek_nlinks;
1261 }
1262
2bb748a6 1263 if (!link || nlinks_pos > iter->pos.offset)
1c6fdbd8
KO
1264 link = &zero_links;
1265
26609b61 1266 if (k.k && k.k->type == KEY_TYPE_inode) {
5c16add5
KO
1267 ret = check_inode_nlink(&trans, lostfound_inode, iter,
1268 bkey_s_c_to_inode(k), link);
1c6fdbd8
KO
1269 BUG_ON(ret == -EINTR);
1270 if (ret)
1271 break;
1c6fdbd8
KO
1272 } else {
1273 /* Should have been caught by dirents pass: */
1274 need_fsck_err_on(link->count, c,
1275 "missing inode %llu (nlink %u)",
1276 nlinks_pos, link->count);
1277 }
1278
1d60b999 1279 if (nlinks_pos == iter->pos.offset)
1c6fdbd8
KO
1280 genradix_iter_advance(&nlinks_iter, links);
1281
e0ba3b64 1282 bch2_btree_iter_advance(iter);
424eb881 1283 bch2_trans_cond_resched(&trans);
1c6fdbd8
KO
1284 }
1285fsck_err:
abcecb49 1286 bch2_trans_iter_put(&trans, iter);
0564b167
KO
1287 bch2_trans_exit(&trans);
1288
1c6fdbd8 1289 if (ret2)
619f5bee 1290 bch_err(c, "error in fsck: btree error %i while walking inodes", ret2);
1c6fdbd8
KO
1291
1292 return ret ?: ret2;
1293}
1294
1295noinline_for_stack
5c16add5 1296static int check_nlinks(struct bch_fs *c,
1c6fdbd8
KO
1297 struct bch_inode_unpacked *lostfound_inode)
1298{
1299 nlink_table links;
1300 u64 this_iter_range_start, next_iter_range_start = 0;
1301 int ret = 0;
1302
1303 bch_verbose(c, "checking inode nlinks");
1304
1305 genradix_init(&links);
1306
1307 do {
1308 this_iter_range_start = next_iter_range_start;
1309 next_iter_range_start = U64_MAX;
1310
1311 ret = bch2_gc_walk_dirents(c, &links,
1312 this_iter_range_start,
1313 &next_iter_range_start);
1314 if (ret)
1315 break;
1316
1317 ret = bch2_gc_walk_inodes(c, lostfound_inode, &links,
1318 this_iter_range_start,
1319 next_iter_range_start);
1320 if (ret)
1321 break;
1322
1323 genradix_free(&links);
1324 } while (next_iter_range_start != U64_MAX);
1325
1326 genradix_free(&links);
1327
1328 return ret;
1329}
1330
1c6fdbd8
KO
1331/*
1332 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
1333 * Doesn't fix them yet, mainly because they haven't yet been observed:
1334 */
619f5bee 1335int bch2_fsck_full(struct bch_fs *c)
1c6fdbd8
KO
1336{
1337 struct bch_inode_unpacked root_inode, lostfound_inode;
1c6fdbd8 1338
5c16add5
KO
1339 return check_inodes(c, true) ?:
1340 check_extents(c) ?:
1c6fdbd8
KO
1341 check_dirents(c) ?:
1342 check_xattrs(c) ?:
1343 check_root(c, &root_inode) ?:
1344 check_lostfound(c, &root_inode, &lostfound_inode) ?:
1345 check_directory_structure(c, &lostfound_inode) ?:
5c16add5 1346 check_nlinks(c, &lostfound_inode);
1c6fdbd8
KO
1347}
1348
619f5bee 1349int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1c6fdbd8 1350{
5c16add5 1351 return check_inodes(c, false);
1c6fdbd8 1352}