bcachefs: Fix bch2_btree_iter_advance()
[linux-block.git] / fs / bcachefs / fsck.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
07a1006a 4#include "bkey_buf.h"
1c6fdbd8
KO
5#include "btree_update.h"
6#include "dirent.h"
7#include "error.h"
96385742 8#include "fs-common.h"
1c6fdbd8
KO
9#include "fsck.h"
10#include "inode.h"
11#include "keylist.h"
14b393ee 12#include "subvolume.h"
1c6fdbd8
KO
13#include "super.h"
14#include "xattr.h"
15
fc51b041 16#include <linux/bsearch.h>
1c6fdbd8 17#include <linux/dcache.h> /* struct qstr */
1c6fdbd8
KO
18
19#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
20
ef1669ff
KO
21static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
22 u32 snapshot)
424eb881 23{
67e0dd8f 24 struct btree_iter iter;
424eb881
KO
25 struct bkey_s_c k;
26 u64 sectors = 0;
94f651e2 27 int ret;
424eb881 28
41f8b09e 29 for_each_btree_key(trans, iter, BTREE_ID_extents,
ef1669ff 30 SPOS(inum, 0, snapshot), 0, k, ret) {
424eb881
KO
31 if (k.k->p.inode != inum)
32 break;
33
34 if (bkey_extent_is_allocation(k.k))
35 sectors += k.k->size;
36 }
37
67e0dd8f 38 bch2_trans_iter_exit(trans, &iter);
94f651e2
KO
39
40 return ret ?: sectors;
424eb881
KO
41}
42
ef1669ff
KO
43static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
44 u32 snapshot)
45{
46 struct btree_iter iter;
47 struct bkey_s_c k;
48 struct bkey_s_c_dirent d;
49 u64 subdirs = 0;
50 int ret;
51
52 for_each_btree_key(trans, iter, BTREE_ID_dirents,
53 SPOS(inum, 0, snapshot), 0, k, ret) {
54 if (k.k->p.inode != inum)
55 break;
56
57 if (k.k->type != KEY_TYPE_dirent)
58 continue;
59
60 d = bkey_s_c_to_dirent(k);
61 if (d.v->d_type == DT_DIR)
62 subdirs++;
63 }
64
65 bch2_trans_iter_exit(trans, &iter);
66
67 return ret ?: subdirs;
68}
69
81ed9ce3
KO
70static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
71 u32 *subvol)
72{
73 struct btree_iter iter;
74 struct bkey_s_c k;
75 int ret;
76
77 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
78 POS(0, snapshot), 0);
79 k = bch2_btree_iter_peek_slot(&iter);
80 ret = bkey_err(k);
81 if (ret)
82 goto err;
83
84 if (k.k->type != KEY_TYPE_snapshot) {
85 bch_err(trans->c, "snapshot %u not fonud", snapshot);
86 ret = -ENOENT;
87 goto err;
88 }
89
90 *subvol = le32_to_cpu(bkey_s_c_to_snapshot(k).v->subvol);
91err:
92 bch2_trans_iter_exit(trans, &iter);
93 return ret;
94
95}
96
97static int snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
98 u32 *subvol)
99{
100 return lockrestart_do(trans, __snapshot_lookup_subvol(trans, snapshot, subvol));
101}
102
ef1669ff
KO
103static int __subvol_lookup(struct btree_trans *trans, u32 subvol,
104 u32 *snapshot, u64 *inum)
81ed9ce3 105{
97996ddf 106 struct bch_subvolume s;
81ed9ce3
KO
107 int ret;
108
97996ddf 109 ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
81ed9ce3 110
97996ddf
KO
111 *snapshot = le32_to_cpu(s.snapshot);
112 *inum = le64_to_cpu(s.inode);
81ed9ce3 113 return ret;
81ed9ce3
KO
114}
115
ef1669ff
KO
116static int subvol_lookup(struct btree_trans *trans, u32 subvol,
117 u32 *snapshot, u64 *inum)
81ed9ce3 118{
ef1669ff 119 return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum));
81ed9ce3
KO
120}
121
58686a25
KO
122static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
123 struct bch_inode_unpacked *inode,
124 u32 *snapshot)
8a85b20c 125{
67e0dd8f 126 struct btree_iter iter;
8a85b20c
KO
127 struct bkey_s_c k;
128 int ret;
129
67e0dd8f 130 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
ef1669ff 131 SPOS(0, inode_nr, *snapshot), 0);
67e0dd8f 132 k = bch2_btree_iter_peek_slot(&iter);
8a85b20c
KO
133 ret = bkey_err(k);
134 if (ret)
135 goto err;
136
8a85b20c
KO
137 ret = k.k->type == KEY_TYPE_inode
138 ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode)
139 : -ENOENT;
4db65027
KO
140 if (!ret)
141 *snapshot = iter.pos.snapshot;
8a85b20c 142err:
67e0dd8f 143 bch2_trans_iter_exit(trans, &iter);
8a85b20c
KO
144 return ret;
145}
146
58686a25
KO
147static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
148 struct bch_inode_unpacked *inode,
149 u32 *snapshot)
150{
151 return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
152}
153
ef1669ff
KO
154static int __lookup_dirent(struct btree_trans *trans,
155 struct bch_hash_info hash_info,
156 subvol_inum dir, struct qstr *name,
157 u64 *target, unsigned *type)
158{
159 struct btree_iter iter;
160 struct bkey_s_c_dirent d;
161 int ret;
162
163 ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
164 &hash_info, dir, name, 0);
165 if (ret)
166 return ret;
167
168 d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
169 *target = le64_to_cpu(d.v->d_inum);
170 *type = d.v->d_type;
171 bch2_trans_iter_exit(trans, &iter);
172 return 0;
173}
174
175static int lookup_dirent(struct btree_trans *trans,
176 struct bch_hash_info hash_info,
177 subvol_inum dir, struct qstr *name,
178 u64 *target, unsigned *type)
179{
180 return lockrestart_do(trans,
181 __lookup_dirent(trans, hash_info, dir, name, target, type));
182}
183
58686a25
KO
184static int __write_inode(struct btree_trans *trans,
185 struct bch_inode_unpacked *inode,
186 u32 snapshot)
8a85b20c 187{
67e0dd8f
KO
188 struct btree_iter iter;
189 int ret;
190
191 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
192 SPOS(0, inode->bi_inum, snapshot),
193 BTREE_ITER_INTENT);
194
195 ret = bch2_btree_iter_traverse(&iter) ?:
196 bch2_inode_write(trans, &iter, inode);
197 bch2_trans_iter_exit(trans, &iter);
58686a25
KO
198 return ret;
199}
200
201static int write_inode(struct btree_trans *trans,
202 struct bch_inode_unpacked *inode,
203 u32 snapshot)
204{
8a85b20c
KO
205 int ret = __bch2_trans_do(trans, NULL, NULL,
206 BTREE_INSERT_NOFAIL|
207 BTREE_INSERT_LAZY_RW,
58686a25 208 __write_inode(trans, inode, snapshot));
8a85b20c
KO
209 if (ret)
210 bch_err(trans->c, "error in fsck: error %i updating inode", ret);
211 return ret;
212}
213
ef1669ff
KO
214static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot)
215{
216 struct btree_iter iter = { NULL };
217 struct bkey_i_inode_generation delete;
218 struct bch_inode_unpacked inode_u;
219 struct bkey_s_c k;
220 int ret;
221
222 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
223 SPOS(inum, 0, snapshot),
224 SPOS(inum, U64_MAX, snapshot),
225 0, NULL) ?:
226 bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
227 SPOS(inum, 0, snapshot),
228 SPOS(inum, U64_MAX, snapshot),
229 0, NULL) ?:
230 bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
231 SPOS(inum, 0, snapshot),
232 SPOS(inum, U64_MAX, snapshot),
233 0, NULL);
234 if (ret)
235 goto err;
236retry:
237 bch2_trans_begin(trans);
238
239 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
240 SPOS(0, inum, snapshot), BTREE_ITER_INTENT);
241 k = bch2_btree_iter_peek_slot(&iter);
242
243 ret = bkey_err(k);
244 if (ret)
245 goto err;
246
247 if (k.k->type != KEY_TYPE_inode) {
248 bch2_fs_inconsistent(trans->c,
249 "inode %llu:%u not found when deleting",
250 inum, snapshot);
251 ret = -EIO;
252 goto err;
253 }
254
255 bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
256
257 /* Subvolume root? */
258 if (inode_u.bi_subvol) {
2027875b 259 ret = bch2_subvolume_delete(trans, inode_u.bi_subvol);
ef1669ff
KO
260 if (ret)
261 goto err;
262 }
263
264 bkey_inode_generation_init(&delete.k_i);
265 delete.k.p = iter.pos;
266 delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
267
268 ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
269 bch2_trans_commit(trans, NULL, NULL,
270 BTREE_INSERT_NOFAIL);
271err:
272 bch2_trans_iter_exit(trans, &iter);
273 if (ret == -EINTR)
274 goto retry;
275
276 return ret;
277}
278
ae8bbb9f 279static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
1c6fdbd8 280{
0f238367 281 struct bch_fs *c = trans->c;
67e0dd8f 282 struct btree_iter iter;
1c6fdbd8
KO
283 struct bch_inode_unpacked dir_inode;
284 struct bch_hash_info dir_hash_info;
1c6fdbd8 285 int ret;
1c6fdbd8 286
ae8bbb9f 287 ret = lookup_inode(trans, pos.inode, &dir_inode, NULL);
b1fd23df
KO
288 if (ret)
289 return ret;
1c6fdbd8
KO
290
291 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
292
67e0dd8f 293 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
b1fd23df 294
ae8bbb9f 295 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
42d23732 296 &dir_hash_info, &iter, 0);
67e0dd8f 297 bch2_trans_iter_exit(trans, &iter);
ae8bbb9f 298 return ret;
b1fd23df
KO
299}
300
ae8bbb9f 301static int remove_dirent(struct btree_trans *trans, struct bpos pos)
b1fd23df 302{
ae8bbb9f
KO
303 int ret = __bch2_trans_do(trans, NULL, NULL,
304 BTREE_INSERT_NOFAIL|
305 BTREE_INSERT_LAZY_RW,
306 __remove_dirent(trans, pos));
307 if (ret)
308 bch_err(trans->c, "remove_dirent: err %i deleting dirent", ret);
309 return ret;
1c6fdbd8
KO
310}
311
58686a25 312/* Get lost+found, create if it doesn't exist: */
ef1669ff 313static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
58686a25 314 struct bch_inode_unpacked *lostfound)
1c6fdbd8 315{
58686a25
KO
316 struct bch_fs *c = trans->c;
317 struct bch_inode_unpacked root;
318 struct bch_hash_info root_hash_info;
319 struct qstr lostfound_str = QSTR("lost+found");
ef1669ff
KO
320 subvol_inum root_inum = { .subvol = subvol };
321 u64 inum = 0;
322 unsigned d_type = 0;
58686a25
KO
323 u32 snapshot;
324 int ret;
325
ef1669ff
KO
326 ret = subvol_lookup(trans, subvol, &snapshot, &root_inum.inum);
327 if (ret)
328 return ret;
81ed9ce3 329
ef1669ff
KO
330 ret = lookup_inode(trans, root_inum.inum, &root, &snapshot);
331 if (ret) {
332 bch_err(c, "error fetching subvol root: %i", ret);
58686a25 333 return ret;
ef1669ff 334 }
58686a25
KO
335
336 root_hash_info = bch2_hash_info_init(c, &root);
ef1669ff
KO
337
338 ret = lookup_dirent(trans, root_hash_info, root_inum,
339 &lostfound_str, &inum, &d_type);
340 if (ret == -ENOENT) {
58686a25
KO
341 bch_notice(c, "creating lost+found");
342 goto create_lostfound;
343 }
344
ef1669ff
KO
345 if (ret) {
346 bch_err(c, "error looking up lost+found: %i", ret);
347 return ret;
348 }
349
350 if (d_type != DT_DIR) {
351 bch_err(c, "error looking up lost+found: not a directory");
352 return ret;
353
354 }
355
58686a25
KO
356 ret = lookup_inode(trans, inum, lostfound, &snapshot);
357 if (ret && ret != -ENOENT) {
358 /*
359 * The check_dirents pass has already run, dangling dirents
360 * shouldn't exist here:
361 */
362 bch_err(c, "error looking up lost+found: %i", ret);
363 return ret;
364 }
365
366 if (ret == -ENOENT) {
367create_lostfound:
368 bch2_inode_init_early(c, lostfound);
369
370 ret = __bch2_trans_do(trans, NULL, NULL,
371 BTREE_INSERT_NOFAIL|
372 BTREE_INSERT_LAZY_RW,
ef1669ff
KO
373 bch2_create_trans(trans, root_inum, &root,
374 lostfound, &lostfound_str,
42d23732
KO
375 0, 0, S_IFDIR|0700, 0, NULL, NULL,
376 (subvol_inum) { }, 0));
58686a25
KO
377 if (ret)
378 bch_err(c, "error creating lost+found: %i", ret);
379 }
380
381 return 0;
382}
383
384static int reattach_inode(struct btree_trans *trans,
81ed9ce3 385 struct bch_inode_unpacked *inode,
ef1669ff 386 u32 inode_snapshot)
58686a25
KO
387{
388 struct bch_hash_info dir_hash;
389 struct bch_inode_unpacked lostfound;
1c6fdbd8
KO
390 char name_buf[20];
391 struct qstr name;
176cf4bf 392 u64 dir_offset = 0;
81ed9ce3 393 u32 subvol;
1c6fdbd8
KO
394 int ret;
395
ef1669ff 396 ret = snapshot_lookup_subvol(trans, inode_snapshot, &subvol);
81ed9ce3
KO
397 if (ret)
398 return ret;
399
400 ret = lookup_lostfound(trans, subvol, &lostfound);
176cf4bf 401 if (ret)
d3ff7fec 402 return ret;
176cf4bf 403
58686a25
KO
404 if (S_ISDIR(inode->bi_mode)) {
405 lostfound.bi_nlink++;
176cf4bf 406
58686a25 407 ret = write_inode(trans, &lostfound, U32_MAX);
176cf4bf 408 if (ret)
d3ff7fec 409 return ret;
176cf4bf
KO
410 }
411
58686a25 412 dir_hash = bch2_hash_info_init(trans->c, &lostfound);
176cf4bf 413
58686a25
KO
414 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
415 name = (struct qstr) QSTR(name_buf);
176cf4bf 416
58686a25 417 ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
ef1669ff
KO
418 bch2_dirent_create(trans,
419 (subvol_inum) {
420 .subvol = subvol,
421 .inum = lostfound.bi_inum,
422 },
423 &dir_hash,
424 mode_to_type(inode->bi_mode),
425 &name, inode->bi_inum, &dir_offset,
426 BCH_HASH_SET_MUST_CREATE));
58686a25
KO
427 if (ret) {
428 bch_err(trans->c, "error %i reattaching inode %llu",
429 ret, inode->bi_inum);
430 return ret;
431 }
176cf4bf 432
58686a25
KO
433 inode->bi_dir = lostfound.bi_inum;
434 inode->bi_dir_offset = dir_offset;
1c6fdbd8 435
ef1669ff 436 return write_inode(trans, inode, inode_snapshot);
1c6fdbd8
KO
437}
438
d3ff7fec
KO
439static int remove_backpointer(struct btree_trans *trans,
440 struct bch_inode_unpacked *inode)
441{
67e0dd8f 442 struct btree_iter iter;
d3ff7fec
KO
443 struct bkey_s_c k;
444 int ret;
445
67e0dd8f
KO
446 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
447 POS(inode->bi_dir, inode->bi_dir_offset), 0);
448 k = bch2_btree_iter_peek_slot(&iter);
d3ff7fec
KO
449 ret = bkey_err(k);
450 if (ret)
451 goto out;
452 if (k.k->type != KEY_TYPE_dirent) {
453 ret = -ENOENT;
454 goto out;
455 }
456
ae8bbb9f 457 ret = remove_dirent(trans, k.k->p);
d3ff7fec 458out:
67e0dd8f 459 bch2_trans_iter_exit(trans, &iter);
d3ff7fec
KO
460 return ret;
461}
462
ef1669ff
KO
463static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos)
464{
465 pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
466
467 if (bkey_cmp(s->pos, pos))
468 s->nr = 0;
469 s->pos = pos;
470
ef1669ff
KO
471 /* Might get called multiple times due to lock restarts */
472 if (s->nr && s->d[s->nr - 1] == pos.snapshot)
473 return 0;
474
18443cb9 475 return snapshots_seen_add(c, s, pos.snapshot);
ef1669ff
KO
476}
477
478/**
479 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
480 * and @ancestor hasn't been overwritten in @seen
481 *
482 * That is, returns whether key in @ancestor snapshot is visible in @id snapshot
483 */
484static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
485 u32 id, u32 ancestor)
486{
487 ssize_t i;
488
489 BUG_ON(id > ancestor);
490
491 id = snapshot_t(c, id)->equiv;
492 ancestor = snapshot_t(c, ancestor)->equiv;
493
494 /* @ancestor should be the snapshot most recently added to @seen */
495 BUG_ON(!seen->nr || seen->d[seen->nr - 1] != ancestor);
496 BUG_ON(seen->pos.snapshot != ancestor);
497
498 if (id == ancestor)
499 return true;
500
501 if (!bch2_snapshot_is_ancestor(c, id, ancestor))
502 return false;
503
504 for (i = seen->nr - 2;
505 i >= 0 && seen->d[i] >= id;
506 --i)
507 if (bch2_snapshot_is_ancestor(c, id, seen->d[i]) &&
508 bch2_snapshot_is_ancestor(c, seen->d[i], ancestor))
509 return false;
510
511 return true;
512}
513
514/**
515 * ref_visible - given a key with snapshot id @src that points to a key with
516 * snapshot id @dst, test whether there is some snapshot in which @dst is
517 * visible.
518 *
519 * This assumes we're visiting @src keys in natural key order.
520 *
521 * @s - list of snapshot IDs already seen at @src
522 * @src - snapshot ID of src key
523 * @dst - snapshot ID of dst key
524 */
525static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
526 u32 src, u32 dst)
527{
528 return dst <= src
529 ? key_visible_in_snapshot(c, s, dst, src)
530 : bch2_snapshot_is_ancestor(c, src, dst);
531}
532
533#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
534 for (_i = (_w)->d; _i < (_w)->d + (_w)->nr && (_i)->snapshot <= (_snapshot); _i++)\
535 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
536
1c6fdbd8 537struct inode_walker {
ef1669ff
KO
538 bool first_this_inode;
539 u64 cur_inum;
540
541 size_t nr;
542 size_t size;
543 struct inode_walker_entry {
544 struct bch_inode_unpacked inode;
545 u32 snapshot;
546 u64 count;
547 } *d;
1c6fdbd8
KO
548};
549
ef1669ff
KO
550static void inode_walker_exit(struct inode_walker *w)
551{
552 kfree(w->d);
553 w->d = NULL;
554}
555
1c6fdbd8
KO
556static struct inode_walker inode_walker_init(void)
557{
ef1669ff
KO
558 return (struct inode_walker) { 0, };
559}
560
561static int inode_walker_realloc(struct inode_walker *w)
562{
563 if (w->nr == w->size) {
564 size_t new_size = max_t(size_t, 8UL, w->size * 2);
565 void *d = krealloc(w->d, new_size * sizeof(w->d[0]),
566 GFP_KERNEL);
567 if (!d)
568 return -ENOMEM;
569
570 w->d = d;
571 w->size = new_size;
572 }
573
574 return 0;
575}
576
577static int add_inode(struct bch_fs *c, struct inode_walker *w,
578 struct bkey_s_c_inode inode)
579{
580 struct bch_inode_unpacked u;
581 int ret;
582
583 ret = inode_walker_realloc(w);
584 if (ret)
585 return ret;
586
587 BUG_ON(bch2_inode_unpack(inode, &u));
588
589 w->d[w->nr++] = (struct inode_walker_entry) {
590 .inode = u,
591 .snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv,
1c6fdbd8 592 };
ef1669ff
KO
593
594 return 0;
1c6fdbd8
KO
595}
596
914f2786 597static int __walk_inode(struct btree_trans *trans,
ef1669ff 598 struct inode_walker *w, struct bpos pos)
1c6fdbd8 599{
ef1669ff
KO
600 struct bch_fs *c = trans->c;
601 struct btree_iter iter;
602 struct bkey_s_c k;
603 unsigned i, ancestor_pos;
604 int ret;
1c6fdbd8 605
ef1669ff 606 pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
1c6fdbd8 607
ef1669ff 608 if (pos.inode == w->cur_inum) {
6bd13057 609 w->first_this_inode = false;
ef1669ff 610 goto lookup_snapshot;
1c6fdbd8
KO
611 }
612
ef1669ff
KO
613 w->nr = 0;
614
615 for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, pos.inode),
616 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
617 if (k.k->p.offset != pos.inode)
618 break;
619
620 if (k.k->type == KEY_TYPE_inode)
621 add_inode(c, w, bkey_s_c_to_inode(k));
622 }
623 bch2_trans_iter_exit(trans, &iter);
624
625 if (ret)
626 return ret;
627
628 w->cur_inum = pos.inode;
629 w->first_this_inode = true;
630lookup_snapshot:
631 for (i = 0; i < w->nr; i++)
632 if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->d[i].snapshot))
633 goto found;
634 return INT_MAX;
635found:
636 BUG_ON(pos.snapshot > w->d[i].snapshot);
637
638 if (pos.snapshot != w->d[i].snapshot) {
639 ancestor_pos = i;
640
641 while (i && w->d[i - 1].snapshot > pos.snapshot)
642 --i;
643
644 ret = inode_walker_realloc(w);
645 if (ret)
646 return ret;
647
648 array_insert_item(w->d, w->nr, i, w->d[ancestor_pos]);
649 w->d[i].snapshot = pos.snapshot;
650 w->d[i].count = 0;
651 }
652
653 return i;
1c6fdbd8
KO
654}
655
914f2786 656static int walk_inode(struct btree_trans *trans,
ef1669ff 657 struct inode_walker *w, struct bpos pos)
914f2786 658{
ef1669ff
KO
659 return lockrestart_do(trans, __walk_inode(trans, w, pos));
660}
661
662static int __get_visible_inodes(struct btree_trans *trans,
663 struct inode_walker *w,
664 struct snapshots_seen *s,
665 u64 inum)
666{
667 struct bch_fs *c = trans->c;
668 struct btree_iter iter;
669 struct bkey_s_c k;
670 int ret;
671
672 w->nr = 0;
673
674 for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
675 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
676 if (k.k->p.offset != inum)
677 break;
678
679 if (k.k->type != KEY_TYPE_inode)
680 continue;
681
682 if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) {
683 add_inode(c, w, bkey_s_c_to_inode(k));
684 if (k.k->p.snapshot >= s->pos.snapshot)
685 break;
686 }
687 }
688 bch2_trans_iter_exit(trans, &iter);
689
690 return ret;
691}
692
693static int check_key_has_snapshot(struct btree_trans *trans,
694 struct btree_iter *iter,
695 struct bkey_s_c k)
696{
697 struct bch_fs *c = trans->c;
698 char buf[200];
699 int ret = 0;
700
701 if (fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c,
702 "key in missing snapshot: %s",
703 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
704 ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
705 bch2_btree_delete_at(trans, iter,
706 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
707 return ret ?: -EINTR;
708 }
709fsck_err:
710 return ret;
914f2786
KO
711}
712
7ac2c55e
KO
713static int hash_redo_key(struct btree_trans *trans,
714 const struct bch_hash_desc desc,
715 struct bch_hash_info *hash_info,
716 struct btree_iter *k_iter, struct bkey_s_c k)
1c6fdbd8 717{
ef1669ff
KO
718 bch_err(trans->c, "hash_redo_key() not implemented yet");
719 return -EINVAL;
720#if 0
e3b4b48c 721 struct bkey_i *delete;
1c6fdbd8 722 struct bkey_i *tmp;
1c6fdbd8 723
e3b4b48c
KO
724 delete = bch2_trans_kmalloc(trans, sizeof(*delete));
725 if (IS_ERR(delete))
726 return PTR_ERR(delete);
727
b1fd23df
KO
728 tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
729 if (IS_ERR(tmp))
730 return PTR_ERR(tmp);
1c6fdbd8
KO
731
732 bkey_reassemble(tmp, k);
733
e3b4b48c
KO
734 bkey_init(&delete->k);
735 delete->k.p = k_iter->pos;
8c3f6da9
KO
736 return bch2_btree_iter_traverse(k_iter) ?:
737 bch2_trans_update(trans, k_iter, delete, 0) ?:
bc3f8b25 738 bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
ef1669ff 739#endif
1c6fdbd8
KO
740}
741
424eb881
KO
742static int fsck_hash_delete_at(struct btree_trans *trans,
743 const struct bch_hash_desc desc,
1c6fdbd8 744 struct bch_hash_info *info,
424eb881 745 struct btree_iter *iter)
1c6fdbd8 746{
1c6fdbd8 747 int ret;
1c6fdbd8 748retry:
42d23732 749 ret = bch2_hash_delete_at(trans, desc, info, iter, 0) ?:
424eb881 750 bch2_trans_commit(trans, NULL, NULL,
134915f3
KO
751 BTREE_INSERT_NOFAIL|
752 BTREE_INSERT_LAZY_RW);
424eb881
KO
753 if (ret == -EINTR) {
754 ret = bch2_btree_iter_traverse(iter);
755 if (!ret)
756 goto retry;
757 }
1c6fdbd8 758
1c6fdbd8
KO
759 return ret;
760}
761
7ac2c55e
KO
762static int hash_check_key(struct btree_trans *trans,
763 const struct bch_hash_desc desc,
764 struct bch_hash_info *hash_info,
765 struct btree_iter *k_iter, struct bkey_s_c hash_k)
d69f41d6 766{
424eb881 767 struct bch_fs *c = trans->c;
67e0dd8f 768 struct btree_iter iter = { NULL };
d69f41d6 769 char buf[200];
7ac2c55e
KO
770 struct bkey_s_c k;
771 u64 hash;
d69f41d6
KO
772 int ret = 0;
773
7ac2c55e
KO
774 if (hash_k.k->type != desc.key_type)
775 return 0;
776
777 hash = desc.hash_bkey(hash_info, hash_k);
778
779 if (likely(hash == hash_k.k->p.offset))
d69f41d6
KO
780 return 0;
781
7ac2c55e
KO
782 if (hash_k.k->p.offset < hash)
783 goto bad_hash;
d69f41d6 784
7ac2c55e
KO
785 for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash),
786 BTREE_ITER_SLOTS, k, ret) {
787 if (!bkey_cmp(k.k->p, hash_k.k->p))
d69f41d6
KO
788 break;
789
7ac2c55e
KO
790 if (fsck_err_on(k.k->type == desc.key_type &&
791 !desc.cmp_bkey(k, hash_k), c,
d69f41d6 792 "duplicate hash table keys:\n%s",
319f9ac3 793 (bch2_bkey_val_to_text(&PBUF(buf), c,
7ac2c55e
KO
794 hash_k), buf))) {
795 ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter);
d69f41d6
KO
796 if (ret)
797 return ret;
798 ret = 1;
799 break;
800 }
d69f41d6 801
7ac2c55e 802 if (bkey_deleted(k.k)) {
67e0dd8f 803 bch2_trans_iter_exit(trans, &iter);
7ac2c55e 804 goto bad_hash;
1c6fdbd8 805 }
1c6fdbd8 806
7ac2c55e 807 }
67e0dd8f 808 bch2_trans_iter_exit(trans, &iter);
1c6fdbd8 809 return ret;
7ac2c55e
KO
810bad_hash:
811 if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
e3b4b48c
KO
812 "hashed to %llu\n%s",
813 desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash,
7ac2c55e 814 (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
741daa5b
KO
815 return 0;
816
7ac2c55e
KO
817 ret = __bch2_trans_do(trans, NULL, NULL,
818 BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
819 hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
820 if (ret) {
821 bch_err(c, "hash_redo_key err %i", ret);
822 return ret;
741daa5b 823 }
7ac2c55e 824 return -EINTR;
741daa5b 825fsck_err:
741daa5b 826 return ret;
741daa5b
KO
827}
828
5c16add5
KO
829static int check_inode(struct btree_trans *trans,
830 struct btree_iter *iter,
ef1669ff
KO
831 struct bch_inode_unpacked *prev,
832 struct bch_inode_unpacked u)
5c16add5
KO
833{
834 struct bch_fs *c = trans->c;
5c16add5
KO
835 bool do_update = false;
836 int ret = 0;
837
ef1669ff
KO
838 if (fsck_err_on(prev &&
839 (prev->bi_hash_seed != u.bi_hash_seed ||
840 mode_to_type(prev->bi_mode) != mode_to_type(u.bi_mode)), c,
841 "inodes in different snapshots don't match")) {
842 bch_err(c, "repair not implemented yet");
843 return -EINVAL;
844 }
5c16add5
KO
845
846 if (u.bi_flags & BCH_INODE_UNLINKED &&
847 (!c->sb.clean ||
848 fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
849 u.bi_inum))) {
5c16add5
KO
850 bch2_trans_unlock(trans);
851 bch2_fs_lazy_rw(c);
852
ef1669ff 853 ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
5c16add5
KO
854 if (ret)
855 bch_err(c, "error in fsck: error %i while deleting inode", ret);
856 return ret;
857 }
858
859 if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
860 (!c->sb.clean ||
861 fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
862 u.bi_inum))) {
863 bch_verbose(c, "truncating inode %llu", u.bi_inum);
864
865 bch2_trans_unlock(trans);
866 bch2_fs_lazy_rw(c);
867
868 /*
869 * XXX: need to truncate partial blocks too here - or ideally
870 * just switch units to bytes and that issue goes away
871 */
872 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
ef1669ff
KO
873 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
874 iter->pos.snapshot),
5c16add5 875 POS(u.bi_inum, U64_MAX),
ef1669ff 876 0, NULL);
5c16add5
KO
877 if (ret) {
878 bch_err(c, "error in fsck: error %i truncating inode", ret);
879 return ret;
880 }
881
882 /*
883 * We truncated without our normal sector accounting hook, just
884 * make sure we recalculate it:
885 */
886 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
887
888 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
889 do_update = true;
890 }
891
892 if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
893 (!c->sb.clean ||
894 fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
895 u.bi_inum))) {
896 s64 sectors;
897
898 bch_verbose(c, "recounting sectors for inode %llu",
899 u.bi_inum);
900
ef1669ff 901 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
5c16add5
KO
902 if (sectors < 0) {
903 bch_err(c, "error in fsck: error %i recounting inode sectors",
904 (int) sectors);
905 return sectors;
906 }
907
908 u.bi_sectors = sectors;
909 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
910 do_update = true;
911 }
912
d3ff7fec
KO
913 if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
914 u.bi_dir = 0;
915 u.bi_dir_offset = 0;
916 u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
5c16add5
KO
917 do_update = true;
918 }
919
920 if (do_update) {
ef1669ff 921 ret = write_inode(trans, &u, iter->pos.snapshot);
5c16add5
KO
922 if (ret)
923 bch_err(c, "error in fsck: error %i "
924 "updating inode", ret);
925 }
926fsck_err:
927 return ret;
928}
929
930noinline_for_stack
931static int check_inodes(struct bch_fs *c, bool full)
932{
933 struct btree_trans trans;
67e0dd8f 934 struct btree_iter iter;
5c16add5
KO
935 struct bkey_s_c k;
936 struct bkey_s_c_inode inode;
ef1669ff 937 struct bch_inode_unpacked prev, u;
5c16add5
KO
938 int ret;
939
ef1669ff
KO
940 memset(&prev, 0, sizeof(prev));
941
5c16add5
KO
942 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
943
909004d2
KO
944 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
945 BTREE_ITER_INTENT|
ef1669ff
KO
946 BTREE_ITER_PREFETCH|
947 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
948 ret = check_key_has_snapshot(&trans, &iter, k);
949 if (ret)
950 break;
951
952 /*
953 * if snapshot id isn't a leaf node, skip it - deletion in
954 * particular is not atomic, so on the internal snapshot nodes
955 * we can see inodes marked for deletion after a clean shutdown
956 */
957 if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
958 continue;
959
5c16add5
KO
960 if (k.k->type != KEY_TYPE_inode)
961 continue;
962
963 inode = bkey_s_c_to_inode(k);
964
ef1669ff
KO
965 if (!full &&
966 !(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY|
967 BCH_INODE_I_SECTORS_DIRTY|
968 BCH_INODE_UNLINKED)))
969 continue;
970
971 BUG_ON(bch2_inode_unpack(inode, &u));
972
973 ret = check_inode(&trans, &iter,
974 full && prev.bi_inum == u.bi_inum
975 ? &prev : NULL, u);
976 if (ret)
977 break;
978
979 prev = u;
5c16add5 980 }
67e0dd8f 981 bch2_trans_iter_exit(&trans, &iter);
5c16add5
KO
982
983 BUG_ON(ret == -EINTR);
984
9a796fdb
KO
985 bch2_trans_exit(&trans);
986 return ret;
5c16add5
KO
987}
988
ef1669ff
KO
989noinline_for_stack
990static int check_subvols(struct bch_fs *c)
991{
992 struct btree_trans trans;
993 struct btree_iter iter;
994 struct bkey_s_c k;
2027875b 995 struct bkey_s_c_subvolume subvol;
ef1669ff
KO
996 int ret;
997
998 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
999
1000 for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN,
1001 0, k, ret) {
2027875b
KO
1002 if (k.k->type != KEY_TYPE_subvolume)
1003 continue;
1004
1005 subvol = bkey_s_c_to_subvolume(k);
1006
1007 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
1008 ret = __bch2_trans_do(&trans, NULL, NULL,
1009 BTREE_INSERT_LAZY_RW,
1010 bch2_subvolume_delete(&trans, iter.pos.offset));
1011 if (ret) {
1012 bch_err(c, "error deleting subvolume %llu: %i",
1013 iter.pos.offset, ret);
1014 break;
1015 }
1016 }
ef1669ff
KO
1017 }
1018 bch2_trans_iter_exit(&trans, &iter);
1019
1020 bch2_trans_exit(&trans);
1021 return ret;
1022}
1023
1024/*
1025 * Checking for overlapping extents needs to be reimplemented
1026 */
1027#if 0
abcecb49 1028static int fix_overlapping_extent(struct btree_trans *trans,
e3e464ac
KO
1029 struct bkey_s_c k, struct bpos cut_at)
1030{
67e0dd8f 1031 struct btree_iter iter;
e3e464ac
KO
1032 struct bkey_i *u;
1033 int ret;
1034
1035 u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
1036 ret = PTR_ERR_OR_ZERO(u);
1037 if (ret)
1038 return ret;
1039
1040 bkey_reassemble(u, k);
1041 bch2_cut_front(cut_at, u);
1042
e3e464ac 1043
ef1669ff
KO
1044 /*
1045 * We don't want to go through the extent_handle_overwrites path:
1046 *
1047 * XXX: this is going to screw up disk accounting, extent triggers
1048 * assume things about extent overwrites - we should be running the
1049 * triggers manually here
1050 */
1051 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
1052 BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
1053
1054 BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
1055 ret = bch2_btree_iter_traverse(&iter) ?:
1056 bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
1057 bch2_trans_commit(trans, NULL, NULL,
1058 BTREE_INSERT_NOFAIL|
1059 BTREE_INSERT_LAZY_RW);
1060 bch2_trans_iter_exit(trans, &iter);
1061 return ret;
1062}
1063#endif
1064
4db65027
KO
1065static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
1066 struct btree_iter *iter,
1067 struct bpos pos)
1068{
1069 struct bkey_s_c k;
1070 int ret;
1071
1072 bch2_trans_iter_init(trans, iter, BTREE_ID_dirents, pos, 0);
1073 k = bch2_btree_iter_peek_slot(iter);
1074 ret = bkey_err(k);
1075 if (!ret && k.k->type != KEY_TYPE_dirent)
1076 ret = -ENOENT;
1077 if (ret) {
1078 bch2_trans_iter_exit(trans, iter);
1079 return (struct bkey_s_c_dirent) { .k = ERR_PTR(ret) };
1080 }
1081
1082 return bkey_s_c_to_dirent(k);
1083}
1084
1085static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
1086 struct bkey_s_c_dirent d)
1087{
1088 return inode->bi_dir == d.k->p.inode &&
1089 inode->bi_dir_offset == d.k->p.offset;
1090}
1091
1092static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
1093 struct bch_inode_unpacked *inode)
1094{
1095 return d.v->d_type == DT_SUBVOL
1096 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
1097 : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
1098}
1099
ef1669ff
KO
1100static int inode_backpointer_exists(struct btree_trans *trans,
1101 struct bch_inode_unpacked *inode,
1102 u32 snapshot)
1103{
1104 struct btree_iter iter;
4db65027 1105 struct bkey_s_c_dirent d;
ef1669ff
KO
1106 int ret;
1107
4db65027
KO
1108 d = dirent_get_by_pos(trans, &iter,
1109 SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
1110 ret = bkey_err(d.s_c);
ef1669ff 1111 if (ret)
4db65027 1112 return ret;
488f9776 1113
4db65027 1114 ret = dirent_points_to_inode(d, inode);
ef1669ff
KO
1115 bch2_trans_iter_exit(trans, &iter);
1116 return ret;
1117}
1118
ef1669ff
KO
1119static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
1120{
1121 struct bch_fs *c = trans->c;
1122 struct inode_walker_entry *i;
1123 int ret = 0, ret2 = 0;
1124 s64 count2;
1125
1126 for (i = w->d; i < w->d + w->nr; i++) {
1127 if (i->inode.bi_sectors == i->count)
1128 continue;
1129
1130 count2 = lockrestart_do(trans,
1131 bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot));
1132
1133 if (i->count != count2) {
1134 bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu",
1135 i->count, count2);
1136 i->count = count2;
1137 if (i->inode.bi_sectors == i->count)
1138 continue;
1139 }
1140
1141 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
1142 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
1143 w->cur_inum, i->snapshot,
1144 i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE)
1145 continue;
1146
1147 i->inode.bi_sectors = i->count;
1148 ret = write_inode(trans, &i->inode, i->snapshot);
1149 if (ret)
1150 break;
1151 ret2 = -EINTR;
1152 }
1153fsck_err:
1154 return ret ?: ret2;
1155}
1156
1157static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
1158 struct inode_walker *inode,
1159 struct snapshots_seen *s)
1160{
1161 struct bch_fs *c = trans->c;
1162 struct bkey_s_c k;
1163 struct inode_walker_entry *i;
1164 char buf[200];
1165 int ret = 0;
1166
1167 k = bch2_btree_iter_peek(iter);
1168 if (!k.k)
1169 return 0;
1170
1171 ret = bkey_err(k);
1172 if (ret)
1173 return ret;
1174
1175 ret = check_key_has_snapshot(trans, iter, k);
1176 if (ret)
1177 return ret;
1178
1179 ret = snapshots_seen_update(c, s, k.k->p);
1180 if (ret)
1181 return ret;
1182
1183 if (k.k->type == KEY_TYPE_whiteout)
1184 return 0;
1185
1186 if (inode->cur_inum != k.k->p.inode) {
1187 ret = check_i_sectors(trans, inode);
1188 if (ret)
1189 return ret;
1190 }
1191#if 0
1192 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
1193 char buf1[200];
1194 char buf2[200];
1195
1196 bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
1197 bch2_bkey_val_to_text(&PBUF(buf2), c, k);
1198
1199 if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2))
1200 return fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR;
1201 }
1202#endif
1203 ret = __walk_inode(trans, inode, k.k->p);
1204 if (ret < 0)
1205 return ret;
1206
1207 if (fsck_err_on(ret == INT_MAX, c,
1208 "extent in missing inode:\n %s",
1209 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
1210 return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
1211 bch2_btree_delete_at(trans, iter,
1212 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
1213
1214 if (ret == INT_MAX)
1215 return 0;
e3e464ac 1216
ef1669ff
KO
1217 i = inode->d + ret;
1218 ret = 0;
e3e464ac 1219
ef1669ff
KO
1220 if (fsck_err_on(!S_ISREG(i->inode.bi_mode) &&
1221 !S_ISLNK(i->inode.bi_mode), c,
1222 "extent in non regular inode mode %o:\n %s",
1223 i->inode.bi_mode,
1224 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
1225 return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
1226 bch2_btree_delete_at(trans, iter,
1227 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
1228
1229 if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) {
1230 for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) {
1231 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
1232 k.k->type != KEY_TYPE_reservation &&
1233 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
1234 "extent type %u offset %llu past end of inode %llu, i_size %llu",
1235 k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) {
1236 bch2_fs_lazy_rw(c);
1237 return bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
1238 SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9,
1239 k.k->p.snapshot),
1240 POS(k.k->p.inode, U64_MAX),
1241 0, NULL) ?: -EINTR;
1242 }
1243 }
1244 }
8a85b20c 1245
ef1669ff
KO
1246 if (bkey_extent_is_allocation(k.k))
1247 for_each_visible_inode(c, s, inode, k.k->p.snapshot, i)
1248 i->count += k.k->size;
1249#if 0
1250 bch2_bkey_buf_reassemble(&prev, c, k);
1251#endif
8a85b20c 1252
ef1669ff 1253fsck_err:
8a85b20c
KO
1254 return ret;
1255}
1256
1c6fdbd8
KO
1257/*
1258 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
1259 * that i_size an i_sectors are consistent
1260 */
1261noinline_for_stack
1262static int check_extents(struct bch_fs *c)
1263{
1264 struct inode_walker w = inode_walker_init();
ef1669ff 1265 struct snapshots_seen s;
424eb881 1266 struct btree_trans trans;
67e0dd8f 1267 struct btree_iter iter;
1c6fdbd8
KO
1268 int ret = 0;
1269
ef1669ff
KO
1270#if 0
1271 struct bkey_buf prev;
07a1006a 1272 bch2_bkey_buf_init(&prev);
f7005e01 1273 prev.k->k = KEY(0, 0, 0);
ef1669ff
KO
1274#endif
1275 snapshots_seen_init(&s);
20bceecb 1276 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 1277
1c6fdbd8
KO
1278 bch_verbose(c, "checking extents");
1279
67e0dd8f
KO
1280 bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
1281 POS(BCACHEFS_ROOT_INO, 0),
1282 BTREE_ITER_INTENT|
ef1669ff
KO
1283 BTREE_ITER_PREFETCH|
1284 BTREE_ITER_ALL_SNAPSHOTS);
1285
1286 do {
1287 ret = lockrestart_do(&trans,
1288 check_extent(&trans, &iter, &w, &s));
1289 if (ret)
1290 break;
1291 } while (bch2_btree_iter_advance(&iter));
1292 bch2_trans_iter_exit(&trans, &iter);
1293#if 0
1294 bch2_bkey_buf_exit(&prev, c);
1295#endif
1296 inode_walker_exit(&w);
1297 bch2_trans_exit(&trans);
1298 snapshots_seen_exit(&s);
1299
1300 return ret;
1301}
1302
1303static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
1304{
1305 struct bch_fs *c = trans->c;
1306 struct inode_walker_entry *i;
1307 int ret = 0, ret2 = 0;
1308 s64 count2;
1309
1310 for (i = w->d; i < w->d + w->nr; i++) {
1311 if (i->inode.bi_nlink == i->count)
1312 continue;
1313
1314 count2 = lockrestart_do(trans,
1315 bch2_count_subdirs(trans, w->cur_inum, i->snapshot));
1316
1317 if (i->count != count2) {
1318 bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
1319 i->count, count2);
1320 i->count = count2;
1321 if (i->inode.bi_nlink == i->count)
1322 continue;
1323 }
1324
1325 if (fsck_err_on(i->inode.bi_nlink != i->count, c,
1326 "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
1327 w->cur_inum, i->snapshot, i->inode.bi_nlink, i->count)) {
1328 i->inode.bi_nlink = i->count;
1329 ret = write_inode(trans, &i->inode, i->snapshot);
abcecb49
KO
1330 if (ret)
1331 break;
ef1669ff 1332 ret2 = -EINTR;
abcecb49 1333 }
ef1669ff
KO
1334 }
1335fsck_err:
1336 return ret ?: ret2;
1337}
abcecb49 1338
ef1669ff
KO
1339static int check_dirent_target(struct btree_trans *trans,
1340 struct btree_iter *iter,
1341 struct bkey_s_c_dirent d,
1342 struct bch_inode_unpacked *target,
1343 u32 target_snapshot)
1344{
1345 struct bch_fs *c = trans->c;
1346 bool backpointer_exists = true;
1347 char buf[200];
1348 int ret = 0;
1349
1350 if (!target->bi_dir &&
1351 !target->bi_dir_offset) {
1352 target->bi_dir = d.k->p.inode;
1353 target->bi_dir_offset = d.k->p.offset;
1354
1355 ret = write_inode(trans, target, target_snapshot);
1356 if (ret)
1357 goto err;
1358 }
1359
4db65027 1360 if (!inode_points_to_dirent(target, d)) {
ef1669ff
KO
1361 ret = inode_backpointer_exists(trans, target, d.k->p.snapshot);
1362 if (ret < 0)
1363 goto err;
e3e464ac 1364
ef1669ff
KO
1365 backpointer_exists = ret;
1366 ret = 0;
e3e464ac 1367
ef1669ff
KO
1368 if (fsck_err_on(S_ISDIR(target->bi_mode) &&
1369 backpointer_exists, c,
1370 "directory %llu with multiple links",
1371 target->bi_inum)) {
1372 ret = remove_dirent(trans, d.k->p);
1373 if (ret)
1374 goto err;
1375 return 0;
e3e464ac 1376 }
e3e464ac 1377
ef1669ff
KO
1378 if (fsck_err_on(backpointer_exists &&
1379 !target->bi_nlink, c,
1380 "inode %llu has multiple links but i_nlink 0",
1381 target->bi_inum)) {
1382 target->bi_nlink++;
1383 target->bi_flags &= ~BCH_INODE_UNLINKED;
1c6fdbd8 1384
ef1669ff
KO
1385 ret = write_inode(trans, target, target_snapshot);
1386 if (ret)
1387 goto err;
1c6fdbd8
KO
1388 }
1389
ef1669ff 1390 if (fsck_err_on(!backpointer_exists, c,
6e0c886d 1391 "inode %llu:%u has wrong backpointer:\n"
ef1669ff
KO
1392 "got %llu:%llu\n"
1393 "should be %llu:%llu",
6e0c886d 1394 target->bi_inum, target_snapshot,
ef1669ff
KO
1395 target->bi_dir,
1396 target->bi_dir_offset,
1397 d.k->p.inode,
1398 d.k->p.offset)) {
1399 target->bi_dir = d.k->p.inode;
1400 target->bi_dir_offset = d.k->p.offset;
1401
1402 ret = write_inode(trans, target, target_snapshot);
1403 if (ret)
1404 goto err;
1c6fdbd8 1405 }
ef1669ff 1406 }
1c6fdbd8 1407
ef1669ff
KO
1408 if (fsck_err_on(vfs_d_type(d.v->d_type) != mode_to_type(target->bi_mode), c,
1409 "incorrect d_type: should be %u:\n%s",
1410 mode_to_type(target->bi_mode),
1411 (bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) {
1412 struct bkey_i_dirent *n;
1c6fdbd8 1413
ef1669ff
KO
1414 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
1415 if (!n) {
1416 ret = -ENOMEM;
1417 goto err;
1418 }
1419
1420 bkey_reassemble(&n->k_i, d.s_c);
1421 n->v.d_type = mode_to_type(target->bi_mode);
1422
1423 ret = __bch2_trans_do(trans, NULL, NULL,
1424 BTREE_INSERT_NOFAIL|
1425 BTREE_INSERT_LAZY_RW,
1426 bch2_trans_update(trans, iter, &n->k_i, 0));
1427 kfree(n);
4db65027
KO
1428
1429 return ret ?: -EINTR;
1430 }
1431
1432 if (d.v->d_type == DT_SUBVOL &&
1433 target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
1434 (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
1435 fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u",
1436 le32_to_cpu(d.v->d_parent_subvol),
1437 target->bi_parent_subvol))) {
1438 struct bkey_i_dirent *n;
1439
1440 n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
1441 if (!n) {
1442 ret = -ENOMEM;
ef1669ff 1443 goto err;
4db65027
KO
1444 }
1445
1446 bkey_reassemble(&n->k_i, d.s_c);
1447 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
1448
1449 ret = __bch2_trans_do(trans, NULL, NULL,
1450 BTREE_INSERT_NOFAIL|
1451 BTREE_INSERT_LAZY_RW,
1452 bch2_trans_update(trans, iter, &n->k_i, 0));
1453 kfree(n);
1454
1455 return ret ?: -EINTR;
1c6fdbd8 1456 }
ef1669ff 1457err:
1c6fdbd8 1458fsck_err:
ef1669ff 1459 return ret;
1c6fdbd8
KO
1460}
1461
914f2786
KO
1462static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
1463 struct bch_hash_info *hash_info,
ef1669ff
KO
1464 struct inode_walker *dir,
1465 struct inode_walker *target,
1466 struct snapshots_seen *s)
1c6fdbd8 1467{
914f2786 1468 struct bch_fs *c = trans->c;
1c6fdbd8 1469 struct bkey_s_c k;
914f2786 1470 struct bkey_s_c_dirent d;
ef1669ff 1471 struct inode_walker_entry *i;
1c6fdbd8 1472 char buf[200];
914f2786 1473 int ret;
1c6fdbd8 1474
914f2786
KO
1475 k = bch2_btree_iter_peek(iter);
1476 if (!k.k)
38200544 1477 return 0;
1c6fdbd8 1478
914f2786
KO
1479 ret = bkey_err(k);
1480 if (ret)
1481 return ret;
d69f41d6 1482
ef1669ff
KO
1483 ret = check_key_has_snapshot(trans, iter, k);
1484 if (ret)
1485 return ret;
1c6fdbd8 1486
ef1669ff 1487 ret = snapshots_seen_update(c, s, k.k->p);
914f2786
KO
1488 if (ret)
1489 return ret;
8a85b20c 1490
ef1669ff
KO
1491 if (k.k->type == KEY_TYPE_whiteout)
1492 return 0;
1493
1494 if (dir->cur_inum != k.k->p.inode) {
1495 ret = check_subdir_count(trans, dir);
1496 if (ret)
1497 return ret;
1498 }
1499
1500 ret = __walk_inode(trans, dir, k.k->p);
1501 if (ret < 0)
1502 return ret;
1c6fdbd8 1503
ef1669ff 1504 if (fsck_err_on(ret == INT_MAX, c,
914f2786 1505 "dirent in nonexisting directory:\n%s",
ef1669ff
KO
1506 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
1507 return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW,
1508 bch2_btree_delete_at(trans, iter,
1509 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
1510
1511 if (ret == INT_MAX)
1512 return 0;
1513
1514 i = dir->d + ret;
1515 ret = 0;
1516
1517 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c,
914f2786 1518 "dirent in non directory inode type %u:\n%s",
ef1669ff 1519 mode_to_type(i->inode.bi_mode),
914f2786
KO
1520 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))
1521 return __bch2_trans_do(trans, NULL, NULL, 0,
1522 bch2_btree_delete_at(trans, iter, 0));
8a85b20c 1523
ef1669ff
KO
1524 if (dir->first_this_inode)
1525 *hash_info = bch2_hash_info_init(c, &dir->d[0].inode);
1c6fdbd8 1526
914f2786
KO
1527 ret = hash_check_key(trans, bch2_dirent_hash_desc,
1528 hash_info, iter, k);
1529 if (ret < 0)
1530 return ret;
1531 if (ret) /* dirent has been deleted */
1532 return 0;
7ac2c55e 1533
914f2786
KO
1534 if (k.k->type != KEY_TYPE_dirent)
1535 return 0;
1c6fdbd8 1536
914f2786 1537 d = bkey_s_c_to_dirent(k);
1c6fdbd8 1538
4db65027
KO
1539 if (d.v->d_type == DT_SUBVOL) {
1540 struct bch_inode_unpacked subvol_root;
1541 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
1542 u32 target_snapshot;
1543 u64 target_inum;
b9e1adf5 1544
4db65027
KO
1545 ret = __subvol_lookup(trans, target_subvol,
1546 &target_snapshot, &target_inum);
1547 if (ret && ret != -ENOENT)
1548 return ret;
1c6fdbd8 1549
4db65027
KO
1550 if (fsck_err_on(ret, c,
1551 "dirent points to missing subvolume %llu",
1552 le64_to_cpu(d.v->d_child_subvol)))
1553 return remove_dirent(trans, d.k->p);
914f2786 1554
ef1669ff
KO
1555 ret = __lookup_inode(trans, target_inum,
1556 &subvol_root, &target_snapshot);
1557 if (ret && ret != -ENOENT)
1558 return ret;
914f2786 1559
ef1669ff
KO
1560 if (fsck_err_on(ret, c,
1561 "subvolume %u points to missing subvolume root %llu",
1562 target_subvol,
1563 target_inum)) {
1564 bch_err(c, "repair not implemented yet");
1565 return -EINVAL;
1566 }
914f2786 1567
ef1669ff
KO
1568 if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c,
1569 "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
1570 target_inum,
1571 subvol_root.bi_subvol, target_subvol)) {
1572 subvol_root.bi_subvol = target_subvol;
1573 ret = write_inode(trans, &subvol_root, target_snapshot);
1574 if (ret)
1575 return ret;
1576 }
914f2786 1577
ef1669ff
KO
1578 ret = check_dirent_target(trans, iter, d, &subvol_root,
1579 target_snapshot);
914f2786
KO
1580 if (ret)
1581 return ret;
ef1669ff 1582 } else {
4db65027 1583 ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
ef1669ff 1584 if (ret)
914f2786 1585 return ret;
1c6fdbd8 1586
ef1669ff
KO
1587 if (fsck_err_on(!target->nr, c,
1588 "dirent points to missing inode:\n%s",
1589 (bch2_bkey_val_to_text(&PBUF(buf), c,
1590 k), buf))) {
1591 ret = remove_dirent(trans, d.k->p);
1592 if (ret)
1593 return ret;
1c6fdbd8
KO
1594 }
1595
ef1669ff
KO
1596 for (i = target->d; i < target->d + target->nr; i++) {
1597 ret = check_dirent_target(trans, iter, d,
1598 &i->inode, i->snapshot);
1599 if (ret)
1600 return ret;
d3ff7fec 1601 }
914f2786 1602 }
d3ff7fec 1603
ef1669ff
KO
1604 if (d.v->d_type == DT_DIR)
1605 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
1606 i->count++;
1c6fdbd8 1607
914f2786
KO
1608fsck_err:
1609 return ret;
1610}
1c6fdbd8 1611
914f2786
KO
1612/*
1613 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
1614 * validate d_type
1615 */
1616noinline_for_stack
1617static int check_dirents(struct bch_fs *c)
1618{
ef1669ff
KO
1619 struct inode_walker dir = inode_walker_init();
1620 struct inode_walker target = inode_walker_init();
1621 struct snapshots_seen s;
914f2786
KO
1622 struct bch_hash_info hash_info;
1623 struct btree_trans trans;
67e0dd8f 1624 struct btree_iter iter;
914f2786 1625 int ret = 0;
1c6fdbd8 1626
914f2786
KO
1627 bch_verbose(c, "checking dirents");
1628
ef1669ff 1629 snapshots_seen_init(&s);
914f2786 1630 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 1631
67e0dd8f
KO
1632 bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents,
1633 POS(BCACHEFS_ROOT_INO, 0),
1634 BTREE_ITER_INTENT|
ef1669ff
KO
1635 BTREE_ITER_PREFETCH|
1636 BTREE_ITER_ALL_SNAPSHOTS);
914f2786 1637
38200544 1638 do {
914f2786 1639 ret = lockrestart_do(&trans,
ef1669ff
KO
1640 check_dirent(&trans, &iter, &hash_info,
1641 &dir, &target, &s));
914f2786
KO
1642 if (ret)
1643 break;
67e0dd8f
KO
1644 } while (bch2_btree_iter_advance(&iter));
1645 bch2_trans_iter_exit(&trans, &iter);
914f2786 1646
ef1669ff
KO
1647 bch2_trans_exit(&trans);
1648 snapshots_seen_exit(&s);
1649 inode_walker_exit(&dir);
1650 inode_walker_exit(&target);
1651 return ret;
1c6fdbd8
KO
1652}
1653
1654/*
1655 * Walk xattrs: verify that they all have a corresponding inode
1656 */
1657noinline_for_stack
1658static int check_xattrs(struct bch_fs *c)
1659{
1660 struct inode_walker w = inode_walker_init();
7ac2c55e 1661 struct bch_hash_info hash_info;
d69f41d6 1662 struct btree_trans trans;
67e0dd8f 1663 struct btree_iter iter;
1c6fdbd8
KO
1664 struct bkey_s_c k;
1665 int ret = 0;
1666
1667 bch_verbose(c, "checking xattrs");
1668
20bceecb 1669 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
d69f41d6 1670
67e0dd8f
KO
1671 bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
1672 POS(BCACHEFS_ROOT_INO, 0),
1673 BTREE_ITER_INTENT|
ef1669ff
KO
1674 BTREE_ITER_PREFETCH|
1675 BTREE_ITER_ALL_SNAPSHOTS);
6bd13057 1676retry:
ef1669ff
KO
1677 bch2_trans_begin(&trans);
1678
67e0dd8f 1679 while ((k = bch2_btree_iter_peek(&iter)).k &&
abcecb49 1680 !(ret = bkey_err(k))) {
ef1669ff 1681 ret = check_key_has_snapshot(&trans, &iter, k);
1c6fdbd8
KO
1682 if (ret)
1683 break;
1684
ef1669ff
KO
1685 ret = walk_inode(&trans, &w, k.k->p);
1686 if (ret < 0)
1687 break;
1688
1689 if (fsck_err_on(ret == INT_MAX, c,
1c6fdbd8
KO
1690 "xattr for missing inode %llu",
1691 k.k->p.inode)) {
67e0dd8f 1692 ret = bch2_btree_delete_at(&trans, &iter, 0);
1c6fdbd8 1693 if (ret)
abcecb49 1694 break;
1c6fdbd8
KO
1695 continue;
1696 }
1697
ef1669ff
KO
1698 if (ret == INT_MAX)
1699 goto next;
1700 ret = 0;
1701
1702 if (w.first_this_inode)
1703 hash_info = bch2_hash_info_init(c, &w.d[0].inode);
1c6fdbd8 1704
424eb881 1705 ret = hash_check_key(&trans, bch2_xattr_hash_desc,
67e0dd8f 1706 &hash_info, &iter, k);
1c6fdbd8 1707 if (ret)
abcecb49 1708 break;
ef1669ff 1709next:
67e0dd8f 1710 bch2_btree_iter_advance(&iter);
1c6fdbd8 1711 }
1c6fdbd8 1712fsck_err:
6bd13057
KO
1713 if (ret == -EINTR)
1714 goto retry;
abcecb49 1715
67e0dd8f 1716 bch2_trans_iter_exit(&trans, &iter);
9a796fdb
KO
1717 bch2_trans_exit(&trans);
1718 return ret;
1c6fdbd8
KO
1719}
1720
1721/* Get root directory, create if it doesn't exist: */
ef1669ff 1722static int check_root(struct bch_fs *c)
1c6fdbd8 1723{
ef1669ff
KO
1724 struct btree_trans trans;
1725 struct bch_inode_unpacked root_inode;
d3ff7fec 1726 u32 snapshot;
ef1669ff 1727 u64 inum;
1c6fdbd8
KO
1728 int ret;
1729
ef1669ff
KO
1730 bch2_trans_init(&trans, c, 0, 0);
1731
1c6fdbd8
KO
1732 bch_verbose(c, "checking root directory");
1733
ef1669ff 1734 ret = subvol_lookup(&trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
1c6fdbd8
KO
1735 if (ret && ret != -ENOENT)
1736 return ret;
1737
ef1669ff
KO
1738 if (mustfix_fsck_err_on(ret, c, "root subvol missing")) {
1739 struct bkey_i_subvolume root_subvol;
1c6fdbd8 1740
ef1669ff
KO
1741 snapshot = U32_MAX;
1742 inum = BCACHEFS_ROOT_INO;
1c6fdbd8 1743
ef1669ff
KO
1744 bkey_subvolume_init(&root_subvol.k_i);
1745 root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
1746 root_subvol.v.flags = 0;
1747 root_subvol.v.snapshot = cpu_to_le32(snapshot);
1748 root_subvol.v.inode = cpu_to_le64(inum);
1749 ret = __bch2_trans_do(&trans, NULL, NULL,
1750 BTREE_INSERT_NOFAIL|
1751 BTREE_INSERT_LAZY_RW,
1752 __bch2_btree_insert(&trans, BTREE_ID_subvolumes, &root_subvol.k_i));
1753 if (ret) {
1754 bch_err(c, "error writing root subvol: %i", ret);
1755 goto err;
1756 }
1757
1758 }
1759
1760 ret = lookup_inode(&trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
1761 if (ret && ret != -ENOENT)
1762 return ret;
1c6fdbd8 1763
ef1669ff
KO
1764 if (mustfix_fsck_err_on(ret, c, "root directory missing") ||
1765 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c,
1766 "root inode not a directory")) {
1767 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
1768 0, NULL);
1769 root_inode.bi_inum = inum;
1c6fdbd8 1770
ef1669ff
KO
1771 ret = write_inode(&trans, &root_inode, snapshot);
1772 if (ret)
1773 bch_err(c, "error writing root inode: %i", ret);
1774 }
1775err:
1776fsck_err:
1777 bch2_trans_exit(&trans);
1778 return ret;
1c6fdbd8
KO
1779}
1780
1c6fdbd8
KO
1781struct pathbuf {
1782 size_t nr;
1783 size_t size;
1784
1785 struct pathbuf_entry {
1786 u64 inum;
6e0c886d 1787 u32 snapshot;
1c6fdbd8
KO
1788 } *entries;
1789};
1790
6e0c886d
KO
1791static bool path_is_dup(struct pathbuf *p, u64 inum, u32 snapshot)
1792{
1793 struct pathbuf_entry *i;
1794
1795 for (i = p->entries; i < p->entries + p->nr; i++)
1796 if (i->inum == inum &&
1797 i->snapshot == snapshot)
1798 return true;
1799
1800 return false;
1801}
1802
1803static int path_down(struct pathbuf *p, u64 inum, u32 snapshot)
1c6fdbd8
KO
1804{
1805 if (p->nr == p->size) {
1806 size_t new_size = max_t(size_t, 256UL, p->size * 2);
1807 void *n = krealloc(p->entries,
1808 new_size * sizeof(p->entries[0]),
1809 GFP_KERNEL);
d3ff7fec 1810 if (!n) {
1c6fdbd8 1811 return -ENOMEM;
d3ff7fec 1812 }
1c6fdbd8
KO
1813
1814 p->entries = n;
1815 p->size = new_size;
1816 };
1817
1818 p->entries[p->nr++] = (struct pathbuf_entry) {
6e0c886d
KO
1819 .inum = inum,
1820 .snapshot = snapshot,
1c6fdbd8
KO
1821 };
1822 return 0;
1823}
1824
6e0c886d
KO
1825/*
1826 * Check that a given inode is reachable from the root:
1827 *
1828 * XXX: we should also be verifying that inodes are in the right subvolumes
1829 */
d3ff7fec 1830static int check_path(struct btree_trans *trans,
d3ff7fec 1831 struct pathbuf *p,
81ed9ce3
KO
1832 struct bch_inode_unpacked *inode,
1833 u32 snapshot)
1c6fdbd8 1834{
d3ff7fec 1835 struct bch_fs *c = trans->c;
1c6fdbd8
KO
1836 int ret = 0;
1837
ef1669ff 1838 snapshot = snapshot_t(c, snapshot)->equiv;
d3ff7fec 1839 p->nr = 0;
424eb881 1840
488f9776
KO
1841 while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
1842 inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
4db65027
KO
1843 struct btree_iter dirent_iter;
1844 struct bkey_s_c_dirent d;
6e0c886d
KO
1845 u32 parent_snapshot = snapshot;
1846
4db65027 1847 if (inode->bi_subvol) {
488f9776
KO
1848 u64 inum;
1849
1850 ret = subvol_lookup(trans, inode->bi_parent_subvol,
6e0c886d 1851 &parent_snapshot, &inum);
488f9776
KO
1852 if (ret)
1853 break;
1854 }
1855
d3ff7fec 1856 ret = lockrestart_do(trans,
4db65027
KO
1857 PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter,
1858 SPOS(inode->bi_dir, inode->bi_dir_offset,
1859 parent_snapshot))).k));
1860 if (ret && ret != -ENOENT)
d3ff7fec 1861 break;
1c6fdbd8 1862
4db65027
KO
1863 if (!ret && !dirent_points_to_inode(d, inode)) {
1864 bch2_trans_iter_exit(trans, &dirent_iter);
1865 ret = -ENOENT;
1866 }
1867
1868 if (ret == -ENOENT) {
ef1669ff
KO
1869 if (fsck_err(c, "unreachable inode %llu:%u, type %u nlink %u backptr %llu:%llu",
1870 inode->bi_inum, snapshot,
d3ff7fec
KO
1871 mode_to_type(inode->bi_mode),
1872 inode->bi_nlink,
1873 inode->bi_dir,
1874 inode->bi_dir_offset))
81ed9ce3 1875 ret = reattach_inode(trans, inode, snapshot);
d3ff7fec
KO
1876 break;
1877 }
4db65027
KO
1878
1879 bch2_trans_iter_exit(trans, &dirent_iter);
1c6fdbd8 1880
d3ff7fec
KO
1881 if (!S_ISDIR(inode->bi_mode))
1882 break;
1c6fdbd8 1883
6e0c886d 1884 ret = path_down(p, inode->bi_inum, snapshot);
d3ff7fec
KO
1885 if (ret) {
1886 bch_err(c, "memory allocation failure");
1887 return ret;
1888 }
1c6fdbd8 1889
6e0c886d
KO
1890 snapshot = parent_snapshot;
1891
1892 ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
1893 if (ret) {
1894 /* Should have been caught in dirents pass */
1895 bch_err(c, "error looking up parent directory: %i", ret);
1896 break;
1897 }
1898
1899 if (path_is_dup(p, inode->bi_inum, snapshot)) {
1900 struct pathbuf_entry *i;
1c6fdbd8 1901
d3ff7fec 1902 /* XXX print path */
6e0c886d
KO
1903 bch_err(c, "directory structure loop");
1904
1905 for (i = p->entries; i < p->entries + p->nr; i++)
1906 pr_err("%llu:%u", i->inum, i->snapshot);
1907 pr_err("%llu:%u", inode->bi_inum, snapshot);
1908
d3ff7fec
KO
1909 if (!fsck_err(c, "directory structure loop"))
1910 return 0;
1c6fdbd8 1911
d3ff7fec 1912 ret = lockrestart_do(trans,
81ed9ce3 1913 remove_backpointer(trans, inode));
1c6fdbd8 1914 if (ret) {
d3ff7fec
KO
1915 bch_err(c, "error removing dirent: %i", ret);
1916 break;
1c6fdbd8
KO
1917 }
1918
81ed9ce3 1919 ret = reattach_inode(trans, inode, snapshot);
1c6fdbd8 1920 }
1c6fdbd8 1921 }
d3ff7fec
KO
1922fsck_err:
1923 if (ret)
1924 bch_err(c, "%s: err %i", __func__, ret);
1925 return ret;
1926}
1c6fdbd8 1927
d3ff7fec
KO
1928/*
1929 * Check for unreachable inodes, as well as loops in the directory structure:
1930 * After check_dirents(), if an inode backpointer doesn't exist that means it's
1931 * unreachable:
1932 */
58686a25 1933static int check_directory_structure(struct bch_fs *c)
d3ff7fec
KO
1934{
1935 struct btree_trans trans;
67e0dd8f 1936 struct btree_iter iter;
d3ff7fec
KO
1937 struct bkey_s_c k;
1938 struct bch_inode_unpacked u;
1939 struct pathbuf path = { 0, 0, NULL };
1940 int ret;
1c6fdbd8 1941
d3ff7fec 1942 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 1943
909004d2
KO
1944 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
1945 BTREE_ITER_INTENT|
ef1669ff
KO
1946 BTREE_ITER_PREFETCH|
1947 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
d3ff7fec 1948 if (k.k->type != KEY_TYPE_inode)
1c6fdbd8
KO
1949 continue;
1950
d3ff7fec
KO
1951 ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
1952 if (ret) {
1953 /* Should have been caught earlier in fsck: */
1954 bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
1955 break;
1c6fdbd8 1956 }
1c6fdbd8 1957
ef1669ff
KO
1958 if (u.bi_flags & BCH_INODE_UNLINKED)
1959 continue;
1960
81ed9ce3 1961 ret = check_path(&trans, &path, &u, iter.pos.snapshot);
d3ff7fec
KO
1962 if (ret)
1963 break;
1c6fdbd8 1964 }
67e0dd8f 1965 bch2_trans_iter_exit(&trans, &iter);
d3ff7fec
KO
1966
1967 BUG_ON(ret == -EINTR);
1968
f24fab9c
KO
1969 kfree(path.entries);
1970
9a796fdb
KO
1971 bch2_trans_exit(&trans);
1972 return ret;
1c6fdbd8
KO
1973}
1974
fc51b041
KO
1975struct nlink_table {
1976 size_t nr;
1977 size_t size;
1c6fdbd8 1978
fc51b041
KO
1979 struct nlink {
1980 u64 inum;
1981 u32 snapshot;
1982 u32 count;
1983 } *d;
1984};
1c6fdbd8 1985
fc51b041 1986static int add_nlink(struct nlink_table *t, u64 inum, u32 snapshot)
1c6fdbd8 1987{
fc51b041
KO
1988 if (t->nr == t->size) {
1989 size_t new_size = max_t(size_t, 128UL, t->size * 2);
1990 void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
1991 if (!d) {
1992 return -ENOMEM;
1993 }
1c6fdbd8 1994
82355e28
KO
1995 if (t->d)
1996 memcpy(d, t->d, t->size * sizeof(t->d[0]));
fc51b041 1997 kvfree(t->d);
1c6fdbd8 1998
fc51b041
KO
1999 t->d = d;
2000 t->size = new_size;
2bb748a6
KO
2001 }
2002
fc51b041
KO
2003
2004 t->d[t->nr++] = (struct nlink) {
2005 .inum = inum,
2006 .snapshot = snapshot,
2007 };
2008
2009 return 0;
2010}
2011
2012static int nlink_cmp(const void *_l, const void *_r)
2013{
2014 const struct nlink *l = _l;
2015 const struct nlink *r = _r;
2016
2017 return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
2018}
2019
ef1669ff
KO
2020static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
2021 struct nlink_table *links,
2022 u64 range_start, u64 range_end, u64 inum, u32 snapshot)
fc51b041
KO
2023{
2024 struct nlink *link, key = {
2025 .inum = inum, .snapshot = U32_MAX,
2026 };
2027
2028 if (inum < range_start || inum >= range_end)
1c6fdbd8 2029 return;
fc51b041
KO
2030
2031 link = __inline_bsearch(&key, links->d, links->nr,
2032 sizeof(links->d[0]), nlink_cmp);
ef1669ff
KO
2033 if (!link)
2034 return;
2035
2036 while (link > links->d && link[0].inum == link[-1].inum)
2037 --link;
2038
2039 for (; link < links->d + links->nr && link->inum == inum; link++)
2040 if (ref_visible(c, s, snapshot, link->snapshot)) {
2041 link->count++;
2042 if (link->snapshot >= snapshot)
2043 break;
2044 }
fc51b041
KO
2045}
2046
2047noinline_for_stack
2048static int check_nlinks_find_hardlinks(struct bch_fs *c,
2049 struct nlink_table *t,
2050 u64 start, u64 *end)
2051{
2052 struct btree_trans trans;
67e0dd8f 2053 struct btree_iter iter;
fc51b041
KO
2054 struct bkey_s_c k;
2055 struct bkey_s_c_inode inode;
2056 struct bch_inode_unpacked u;
2057 int ret = 0;
2058
2059 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
2060
2061 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
909004d2
KO
2062 POS(0, start),
2063 BTREE_ITER_INTENT|
ef1669ff
KO
2064 BTREE_ITER_PREFETCH|
2065 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
fc51b041
KO
2066 if (k.k->type != KEY_TYPE_inode)
2067 continue;
2068
2069 inode = bkey_s_c_to_inode(k);
2070
2071 /*
2072 * Backpointer and directory structure checks are sufficient for
2073 * directories, since they can't have hardlinks:
2074 */
2075 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
2076 continue;
2077
2078 /* Should never fail, checked by bch2_inode_invalid: */
2079 BUG_ON(bch2_inode_unpack(inode, &u));
2080
2081 if (!u.bi_nlink)
2082 continue;
2083
2084 ret = add_nlink(t, k.k->p.offset, k.k->p.snapshot);
2085 if (ret) {
2086 *end = k.k->p.offset;
2087 ret = 0;
2088 break;
2089 }
2090
1c6fdbd8 2091 }
67e0dd8f 2092 bch2_trans_iter_exit(&trans, &iter);
fc51b041
KO
2093 bch2_trans_exit(&trans);
2094
2095 if (ret)
2096 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1c6fdbd8 2097
fc51b041 2098 return ret;
1c6fdbd8
KO
2099}
2100
2101noinline_for_stack
fc51b041
KO
2102static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
2103 u64 range_start, u64 range_end)
1c6fdbd8 2104{
424eb881 2105 struct btree_trans trans;
ef1669ff 2106 struct snapshots_seen s;
67e0dd8f 2107 struct btree_iter iter;
1c6fdbd8
KO
2108 struct bkey_s_c k;
2109 struct bkey_s_c_dirent d;
1c6fdbd8
KO
2110 int ret;
2111
ef1669ff
KO
2112 snapshots_seen_init(&s);
2113
20bceecb 2114 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 2115
909004d2
KO
2116 for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
2117 BTREE_ITER_INTENT|
ef1669ff
KO
2118 BTREE_ITER_PREFETCH|
2119 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
2120 ret = snapshots_seen_update(c, &s, k.k->p);
2121 if (ret)
2122 break;
2123
1c6fdbd8 2124 switch (k.k->type) {
26609b61 2125 case KEY_TYPE_dirent:
1c6fdbd8 2126 d = bkey_s_c_to_dirent(k);
1c6fdbd8 2127
ef1669ff
KO
2128 if (d.v->d_type != DT_DIR &&
2129 d.v->d_type != DT_SUBVOL)
2130 inc_link(c, &s, links, range_start, range_end,
2131 le64_to_cpu(d.v->d_inum),
2132 d.k->p.snapshot);
1c6fdbd8
KO
2133 break;
2134 }
1c6fdbd8 2135 }
67e0dd8f 2136 bch2_trans_iter_exit(&trans, &iter);
abcecb49 2137
1c6fdbd8 2138 if (ret)
619f5bee 2139 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1c6fdbd8 2140
ef1669ff
KO
2141 bch2_trans_exit(&trans);
2142 snapshots_seen_exit(&s);
1c6fdbd8
KO
2143 return ret;
2144}
2145
1c6fdbd8 2146noinline_for_stack
fc51b041
KO
2147static int check_nlinks_update_hardlinks(struct bch_fs *c,
2148 struct nlink_table *links,
1c6fdbd8
KO
2149 u64 range_start, u64 range_end)
2150{
0564b167 2151 struct btree_trans trans;
67e0dd8f 2152 struct btree_iter iter;
1c6fdbd8 2153 struct bkey_s_c k;
fc51b041
KO
2154 struct bkey_s_c_inode inode;
2155 struct bch_inode_unpacked u;
2156 struct nlink *link = links->d;
b906aadd 2157 int ret = 0;
1c6fdbd8 2158
20bceecb 2159 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
0564b167 2160
b906aadd 2161 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
909004d2
KO
2162 POS(0, range_start),
2163 BTREE_ITER_INTENT|
ef1669ff
KO
2164 BTREE_ITER_PREFETCH|
2165 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
fc51b041 2166 if (k.k->p.offset >= range_end)
1c6fdbd8
KO
2167 break;
2168
b906aadd
KO
2169 if (k.k->type != KEY_TYPE_inode)
2170 continue;
1c6fdbd8 2171
fc51b041
KO
2172 inode = bkey_s_c_to_inode(k);
2173 if (S_ISDIR(le16_to_cpu(inode.v->bi_mode)))
2174 continue;
2175
2176 BUG_ON(bch2_inode_unpack(inode, &u));
1c6fdbd8 2177
fc51b041
KO
2178 if (!u.bi_nlink)
2179 continue;
2180
ef1669ff
KO
2181 while ((cmp_int(link->inum, k.k->p.offset) ?:
2182 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
fc51b041
KO
2183 link++;
2184 BUG_ON(link >= links->d + links->nr);
2185 }
2186
2187 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
2188 "inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)",
2189 u.bi_inum, mode_to_type(u.bi_mode),
2190 bch2_inode_nlink_get(&u), link->count)) {
2191 bch2_inode_nlink_set(&u, link->count);
2192
ea0531f8 2193 ret = write_inode(&trans, &u, k.k->p.snapshot);
fc51b041
KO
2194 if (ret)
2195 bch_err(c, "error in fsck: error %i updating inode", ret);
2196 }
1c6fdbd8 2197 }
fc51b041 2198fsck_err:
67e0dd8f 2199 bch2_trans_iter_exit(&trans, &iter);
0564b167
KO
2200 bch2_trans_exit(&trans);
2201
b906aadd
KO
2202 if (ret)
2203 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1c6fdbd8 2204
b906aadd 2205 return ret;
1c6fdbd8
KO
2206}
2207
2208noinline_for_stack
58686a25 2209static int check_nlinks(struct bch_fs *c)
1c6fdbd8 2210{
fc51b041 2211 struct nlink_table links = { 0 };
1c6fdbd8
KO
2212 u64 this_iter_range_start, next_iter_range_start = 0;
2213 int ret = 0;
2214
2215 bch_verbose(c, "checking inode nlinks");
2216
1c6fdbd8
KO
2217 do {
2218 this_iter_range_start = next_iter_range_start;
2219 next_iter_range_start = U64_MAX;
2220
fc51b041
KO
2221 ret = check_nlinks_find_hardlinks(c, &links,
2222 this_iter_range_start,
2223 &next_iter_range_start);
2224
2225 ret = check_nlinks_walk_dirents(c, &links,
1c6fdbd8 2226 this_iter_range_start,
fc51b041 2227 next_iter_range_start);
1c6fdbd8
KO
2228 if (ret)
2229 break;
2230
fc51b041 2231 ret = check_nlinks_update_hardlinks(c, &links,
1c6fdbd8
KO
2232 this_iter_range_start,
2233 next_iter_range_start);
2234 if (ret)
2235 break;
2236
fc51b041 2237 links.nr = 0;
1c6fdbd8
KO
2238 } while (next_iter_range_start != U64_MAX);
2239
fc51b041 2240 kvfree(links.d);
1c6fdbd8
KO
2241
2242 return ret;
2243}
2244
bfe88863
KO
2245static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
2246{
2247 struct bkey_s_c k;
2248 struct bkey_s_c_reflink_p p;
2249 struct bkey_i_reflink_p *u;
2250 int ret;
2251
2252 k = bch2_btree_iter_peek(iter);
2253 if (!k.k)
2254 return 0;
2255
2256 ret = bkey_err(k);
2257 if (ret)
2258 return ret;
2259
2260 if (k.k->type != KEY_TYPE_reflink_p)
2261 return 0;
2262
2263 p = bkey_s_c_to_reflink_p(k);
2264
6d76aefe 2265 if (!p.v->front_pad && !p.v->back_pad)
bfe88863
KO
2266 return 0;
2267
2268 u = bch2_trans_kmalloc(trans, sizeof(*u));
2269 ret = PTR_ERR_OR_ZERO(u);
2270 if (ret)
2271 return ret;
2272
2273 bkey_reassemble(&u->k_i, k);
6d76aefe
KO
2274 u->v.front_pad = 0;
2275 u->v.back_pad = 0;
bfe88863 2276
6b3d8b89 2277 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN);
bfe88863
KO
2278}
2279
2280static int fix_reflink_p(struct bch_fs *c)
2281{
2282 struct btree_trans trans;
2283 struct btree_iter iter;
2284 struct bkey_s_c k;
2285 int ret;
2286
2287 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
2288 return 0;
2289
2290 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
2291
2292 for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN,
2293 BTREE_ITER_INTENT|
2294 BTREE_ITER_PREFETCH|
2295 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
2296 if (k.k->type == KEY_TYPE_reflink_p) {
2297 ret = __bch2_trans_do(&trans, NULL, NULL,
2298 BTREE_INSERT_NOFAIL|
2299 BTREE_INSERT_LAZY_RW,
2300 fix_reflink_p_key(&trans, &iter));
2301 if (ret)
2302 break;
2303 }
2304 }
2305 bch2_trans_iter_exit(&trans, &iter);
2306
2307 bch2_trans_exit(&trans);
2308 return ret;
2309}
2310
1c6fdbd8
KO
2311/*
2312 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
2313 * Doesn't fix them yet, mainly because they haven't yet been observed:
2314 */
619f5bee 2315int bch2_fsck_full(struct bch_fs *c)
1c6fdbd8 2316{
14b393ee
KO
2317 return bch2_fs_snapshots_check(c) ?:
2318 check_inodes(c, true) ?:
ef1669ff 2319 check_subvols(c) ?:
5c16add5 2320 check_extents(c) ?:
1c6fdbd8
KO
2321 check_dirents(c) ?:
2322 check_xattrs(c) ?:
ef1669ff 2323 check_root(c) ?:
58686a25 2324 check_directory_structure(c) ?:
bfe88863
KO
2325 check_nlinks(c) ?:
2326 fix_reflink_p(c);
1c6fdbd8
KO
2327}
2328
619f5bee 2329int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1c6fdbd8 2330{
5c16add5 2331 return check_inodes(c, false);
1c6fdbd8 2332}