bcachefs: When fsck finds redundant snapshot keys, trigger snapshots cleanup
[linux-block.git] / fs / bcachefs / fsck.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
07a1006a 4#include "bkey_buf.h"
1c6fdbd8 5#include "btree_update.h"
91d961ba 6#include "darray.h"
1c6fdbd8
KO
7#include "dirent.h"
8#include "error.h"
96385742 9#include "fs-common.h"
1c6fdbd8
KO
10#include "fsck.h"
11#include "inode.h"
12#include "keylist.h"
14b393ee 13#include "subvolume.h"
1c6fdbd8
KO
14#include "super.h"
15#include "xattr.h"
16
fc51b041 17#include <linux/bsearch.h>
1c6fdbd8 18#include <linux/dcache.h> /* struct qstr */
1c6fdbd8
KO
19
20#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
21
ef1669ff
KO
22static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
23 u32 snapshot)
424eb881 24{
67e0dd8f 25 struct btree_iter iter;
424eb881
KO
26 struct bkey_s_c k;
27 u64 sectors = 0;
94f651e2 28 int ret;
424eb881 29
41f8b09e 30 for_each_btree_key(trans, iter, BTREE_ID_extents,
ef1669ff 31 SPOS(inum, 0, snapshot), 0, k, ret) {
424eb881
KO
32 if (k.k->p.inode != inum)
33 break;
34
35 if (bkey_extent_is_allocation(k.k))
36 sectors += k.k->size;
37 }
38
67e0dd8f 39 bch2_trans_iter_exit(trans, &iter);
94f651e2
KO
40
41 return ret ?: sectors;
424eb881
KO
42}
43
ef1669ff
KO
44static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
45 u32 snapshot)
46{
47 struct btree_iter iter;
48 struct bkey_s_c k;
49 struct bkey_s_c_dirent d;
50 u64 subdirs = 0;
51 int ret;
52
53 for_each_btree_key(trans, iter, BTREE_ID_dirents,
54 SPOS(inum, 0, snapshot), 0, k, ret) {
55 if (k.k->p.inode != inum)
56 break;
57
58 if (k.k->type != KEY_TYPE_dirent)
59 continue;
60
61 d = bkey_s_c_to_dirent(k);
62 if (d.v->d_type == DT_DIR)
63 subdirs++;
64 }
65
66 bch2_trans_iter_exit(trans, &iter);
67
68 return ret ?: subdirs;
69}
70
81ed9ce3
KO
71static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
72 u32 *subvol)
73{
74 struct btree_iter iter;
75 struct bkey_s_c k;
76 int ret;
77
78 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
79 POS(0, snapshot), 0);
80 k = bch2_btree_iter_peek_slot(&iter);
81 ret = bkey_err(k);
82 if (ret)
83 goto err;
84
85 if (k.k->type != KEY_TYPE_snapshot) {
86 bch_err(trans->c, "snapshot %u not fonud", snapshot);
87 ret = -ENOENT;
88 goto err;
89 }
90
91 *subvol = le32_to_cpu(bkey_s_c_to_snapshot(k).v->subvol);
92err:
93 bch2_trans_iter_exit(trans, &iter);
94 return ret;
95
96}
97
ef1669ff
KO
98static int __subvol_lookup(struct btree_trans *trans, u32 subvol,
99 u32 *snapshot, u64 *inum)
81ed9ce3 100{
97996ddf 101 struct bch_subvolume s;
81ed9ce3
KO
102 int ret;
103
97996ddf 104 ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
81ed9ce3 105
97996ddf
KO
106 *snapshot = le32_to_cpu(s.snapshot);
107 *inum = le64_to_cpu(s.inode);
81ed9ce3 108 return ret;
81ed9ce3
KO
109}
110
ef1669ff
KO
111static int subvol_lookup(struct btree_trans *trans, u32 subvol,
112 u32 *snapshot, u64 *inum)
81ed9ce3 113{
ef1669ff 114 return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum));
81ed9ce3
KO
115}
116
c27314b4
KO
117static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
118 struct bch_inode_unpacked *inode)
119{
120 struct btree_iter iter;
121 struct bkey_s_c k;
122 int ret;
123
124 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
125 POS(0, inode_nr),
126 BTREE_ITER_ALL_SNAPSHOTS);
127 k = bch2_btree_iter_peek(&iter);
128 ret = bkey_err(k);
129 if (ret)
130 goto err;
131
132 if (!k.k || bkey_cmp(k.k->p, POS(0, inode_nr))) {
133 ret = -ENOENT;
134 goto err;
135 }
136
3e52c222 137 ret = bch2_inode_unpack(k, inode);
c27314b4
KO
138err:
139 if (ret && ret != -EINTR)
140 bch_err(trans->c, "error %i fetching inode %llu",
141 ret, inode_nr);
142 bch2_trans_iter_exit(trans, &iter);
143 return ret;
144}
145
58686a25
KO
146static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
147 struct bch_inode_unpacked *inode,
148 u32 *snapshot)
8a85b20c 149{
67e0dd8f 150 struct btree_iter iter;
8a85b20c
KO
151 struct bkey_s_c k;
152 int ret;
153
67e0dd8f 154 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
ef1669ff 155 SPOS(0, inode_nr, *snapshot), 0);
67e0dd8f 156 k = bch2_btree_iter_peek_slot(&iter);
8a85b20c
KO
157 ret = bkey_err(k);
158 if (ret)
159 goto err;
160
3e52c222
KO
161 ret = bkey_is_inode(k.k)
162 ? bch2_inode_unpack(k, inode)
8a85b20c 163 : -ENOENT;
4db65027
KO
164 if (!ret)
165 *snapshot = iter.pos.snapshot;
8a85b20c 166err:
285b181a
KO
167 if (ret && ret != -EINTR)
168 bch_err(trans->c, "error %i fetching inode %llu:%u",
169 ret, inode_nr, *snapshot);
67e0dd8f 170 bch2_trans_iter_exit(trans, &iter);
8a85b20c
KO
171 return ret;
172}
173
58686a25
KO
174static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
175 struct bch_inode_unpacked *inode,
176 u32 *snapshot)
177{
178 return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
179}
180
ef1669ff
KO
181static int __lookup_dirent(struct btree_trans *trans,
182 struct bch_hash_info hash_info,
183 subvol_inum dir, struct qstr *name,
184 u64 *target, unsigned *type)
185{
186 struct btree_iter iter;
187 struct bkey_s_c_dirent d;
188 int ret;
189
190 ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
191 &hash_info, dir, name, 0);
192 if (ret)
193 return ret;
194
195 d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
196 *target = le64_to_cpu(d.v->d_inum);
197 *type = d.v->d_type;
198 bch2_trans_iter_exit(trans, &iter);
199 return 0;
200}
201
58686a25
KO
202static int __write_inode(struct btree_trans *trans,
203 struct bch_inode_unpacked *inode,
204 u32 snapshot)
8a85b20c 205{
67e0dd8f
KO
206 struct btree_iter iter;
207 int ret;
208
209 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
210 SPOS(0, inode->bi_inum, snapshot),
211 BTREE_ITER_INTENT);
212
213 ret = bch2_btree_iter_traverse(&iter) ?:
214 bch2_inode_write(trans, &iter, inode);
215 bch2_trans_iter_exit(trans, &iter);
58686a25
KO
216 return ret;
217}
218
219static int write_inode(struct btree_trans *trans,
220 struct bch_inode_unpacked *inode,
221 u32 snapshot)
222{
e68914ca 223 int ret = commit_do(trans, NULL, NULL,
8a85b20c
KO
224 BTREE_INSERT_NOFAIL|
225 BTREE_INSERT_LAZY_RW,
58686a25 226 __write_inode(trans, inode, snapshot));
8a85b20c
KO
227 if (ret)
228 bch_err(trans->c, "error in fsck: error %i updating inode", ret);
229 return ret;
230}
231
ef1669ff
KO
232static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot)
233{
e4085b70 234 struct bch_fs *c = trans->c;
ef1669ff
KO
235 struct btree_iter iter = { NULL };
236 struct bkey_i_inode_generation delete;
237 struct bch_inode_unpacked inode_u;
238 struct bkey_s_c k;
239 int ret;
240
241 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
242 SPOS(inum, 0, snapshot),
243 SPOS(inum, U64_MAX, snapshot),
244 0, NULL) ?:
245 bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
246 SPOS(inum, 0, snapshot),
247 SPOS(inum, U64_MAX, snapshot),
248 0, NULL) ?:
249 bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
250 SPOS(inum, 0, snapshot),
251 SPOS(inum, U64_MAX, snapshot),
252 0, NULL);
253 if (ret)
254 goto err;
255retry:
256 bch2_trans_begin(trans);
257
258 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
259 SPOS(0, inum, snapshot), BTREE_ITER_INTENT);
260 k = bch2_btree_iter_peek_slot(&iter);
261
262 ret = bkey_err(k);
263 if (ret)
264 goto err;
265
3e52c222 266 if (!bkey_is_inode(k.k)) {
e4085b70 267 bch2_fs_inconsistent(c,
ef1669ff
KO
268 "inode %llu:%u not found when deleting",
269 inum, snapshot);
270 ret = -EIO;
271 goto err;
272 }
273
3e52c222 274 bch2_inode_unpack(k, &inode_u);
ef1669ff
KO
275
276 /* Subvolume root? */
e4085b70
KO
277 if (inode_u.bi_subvol)
278 bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum);
ef1669ff
KO
279
280 bkey_inode_generation_init(&delete.k_i);
281 delete.k.p = iter.pos;
282 delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
283
284 ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
285 bch2_trans_commit(trans, NULL, NULL,
286 BTREE_INSERT_NOFAIL);
287err:
288 bch2_trans_iter_exit(trans, &iter);
289 if (ret == -EINTR)
290 goto retry;
291
292 return ret;
293}
294
ae8bbb9f 295static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
1c6fdbd8 296{
0f238367 297 struct bch_fs *c = trans->c;
67e0dd8f 298 struct btree_iter iter;
1c6fdbd8
KO
299 struct bch_inode_unpacked dir_inode;
300 struct bch_hash_info dir_hash_info;
1c6fdbd8 301 int ret;
1c6fdbd8 302
c27314b4 303 ret = lookup_first_inode(trans, pos.inode, &dir_inode);
b1fd23df 304 if (ret)
e492e7b6 305 goto err;
1c6fdbd8
KO
306
307 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
308
67e0dd8f 309 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
b1fd23df 310
ae8bbb9f 311 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
41fc8622
KO
312 &dir_hash_info, &iter,
313 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
67e0dd8f 314 bch2_trans_iter_exit(trans, &iter);
e492e7b6
KO
315err:
316 if (ret && ret != -EINTR)
317 bch_err(c, "error %i from __remove_dirent()", ret);
ae8bbb9f 318 return ret;
b1fd23df
KO
319}
320
58686a25 321/* Get lost+found, create if it doesn't exist: */
ef1669ff 322static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
58686a25 323 struct bch_inode_unpacked *lostfound)
1c6fdbd8 324{
58686a25
KO
325 struct bch_fs *c = trans->c;
326 struct bch_inode_unpacked root;
327 struct bch_hash_info root_hash_info;
328 struct qstr lostfound_str = QSTR("lost+found");
ef1669ff
KO
329 subvol_inum root_inum = { .subvol = subvol };
330 u64 inum = 0;
331 unsigned d_type = 0;
58686a25
KO
332 u32 snapshot;
333 int ret;
334
285b181a 335 ret = __subvol_lookup(trans, subvol, &snapshot, &root_inum.inum);
ef1669ff
KO
336 if (ret)
337 return ret;
81ed9ce3 338
285b181a
KO
339 ret = __lookup_inode(trans, root_inum.inum, &root, &snapshot);
340 if (ret)
58686a25
KO
341 return ret;
342
343 root_hash_info = bch2_hash_info_init(c, &root);
ef1669ff 344
285b181a 345 ret = __lookup_dirent(trans, root_hash_info, root_inum,
ef1669ff
KO
346 &lostfound_str, &inum, &d_type);
347 if (ret == -ENOENT) {
58686a25
KO
348 bch_notice(c, "creating lost+found");
349 goto create_lostfound;
350 }
351
285b181a 352 if (ret && ret != -EINTR)
ef1669ff 353 bch_err(c, "error looking up lost+found: %i", ret);
285b181a 354 if (ret)
ef1669ff 355 return ret;
ef1669ff
KO
356
357 if (d_type != DT_DIR) {
358 bch_err(c, "error looking up lost+found: not a directory");
359 return ret;
ef1669ff
KO
360 }
361
285b181a
KO
362 /*
363 * The check_dirents pass has already run, dangling dirents
364 * shouldn't exist here:
365 */
366 return __lookup_inode(trans, inum, lostfound, &snapshot);
58686a25 367
58686a25 368create_lostfound:
285b181a
KO
369 bch2_inode_init_early(c, lostfound);
370
371 ret = bch2_create_trans(trans, root_inum, &root,
372 lostfound, &lostfound_str,
373 0, 0, S_IFDIR|0700, 0, NULL, NULL,
374 (subvol_inum) { }, 0);
375 if (ret && ret != -EINTR)
376 bch_err(c, "error creating lost+found: %i", ret);
377 return ret;
58686a25
KO
378}
379
285b181a 380static int __reattach_inode(struct btree_trans *trans,
81ed9ce3 381 struct bch_inode_unpacked *inode,
ef1669ff 382 u32 inode_snapshot)
58686a25
KO
383{
384 struct bch_hash_info dir_hash;
385 struct bch_inode_unpacked lostfound;
1c6fdbd8
KO
386 char name_buf[20];
387 struct qstr name;
176cf4bf 388 u64 dir_offset = 0;
81ed9ce3 389 u32 subvol;
1c6fdbd8
KO
390 int ret;
391
285b181a 392 ret = __snapshot_lookup_subvol(trans, inode_snapshot, &subvol);
81ed9ce3
KO
393 if (ret)
394 return ret;
395
396 ret = lookup_lostfound(trans, subvol, &lostfound);
176cf4bf 397 if (ret)
d3ff7fec 398 return ret;
176cf4bf 399
58686a25
KO
400 if (S_ISDIR(inode->bi_mode)) {
401 lostfound.bi_nlink++;
176cf4bf 402
285b181a 403 ret = __write_inode(trans, &lostfound, U32_MAX);
176cf4bf 404 if (ret)
d3ff7fec 405 return ret;
176cf4bf
KO
406 }
407
58686a25 408 dir_hash = bch2_hash_info_init(trans->c, &lostfound);
176cf4bf 409
58686a25
KO
410 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
411 name = (struct qstr) QSTR(name_buf);
176cf4bf 412
285b181a
KO
413 ret = bch2_dirent_create(trans,
414 (subvol_inum) {
415 .subvol = subvol,
416 .inum = lostfound.bi_inum,
417 },
418 &dir_hash,
419 inode_d_type(inode),
420 &name, inode->bi_inum, &dir_offset,
421 BCH_HASH_SET_MUST_CREATE);
422 if (ret)
423 return ret;
424
425 inode->bi_dir = lostfound.bi_inum;
426 inode->bi_dir_offset = dir_offset;
427
428 return __write_inode(trans, inode, inode_snapshot);
429}
430
431static int reattach_inode(struct btree_trans *trans,
432 struct bch_inode_unpacked *inode,
433 u32 inode_snapshot)
434{
e68914ca 435 int ret = commit_do(trans, NULL, NULL,
285b181a
KO
436 BTREE_INSERT_LAZY_RW|
437 BTREE_INSERT_NOFAIL,
438 __reattach_inode(trans, inode, inode_snapshot));
58686a25
KO
439 if (ret) {
440 bch_err(trans->c, "error %i reattaching inode %llu",
441 ret, inode->bi_inum);
442 return ret;
443 }
176cf4bf 444
285b181a 445 return ret;
1c6fdbd8
KO
446}
447
d3ff7fec
KO
448static int remove_backpointer(struct btree_trans *trans,
449 struct bch_inode_unpacked *inode)
450{
67e0dd8f 451 struct btree_iter iter;
d3ff7fec
KO
452 struct bkey_s_c k;
453 int ret;
454
67e0dd8f
KO
455 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
456 POS(inode->bi_dir, inode->bi_dir_offset), 0);
457 k = bch2_btree_iter_peek_slot(&iter);
d3ff7fec
KO
458 ret = bkey_err(k);
459 if (ret)
460 goto out;
461 if (k.k->type != KEY_TYPE_dirent) {
462 ret = -ENOENT;
463 goto out;
464 }
465
285b181a 466 ret = __remove_dirent(trans, k.k->p);
d3ff7fec 467out:
67e0dd8f 468 bch2_trans_iter_exit(trans, &iter);
d3ff7fec
KO
469 return ret;
470}
471
49124d8a
KO
472struct snapshots_seen_entry {
473 u32 id;
474 u32 equiv;
475};
476
477struct snapshots_seen {
478 struct bpos pos;
479 DARRAY(struct snapshots_seen_entry) ids;
480};
481
482static inline void snapshots_seen_exit(struct snapshots_seen *s)
483{
484 darray_exit(&s->ids);
485}
486
487static inline void snapshots_seen_init(struct snapshots_seen *s)
488{
489 memset(s, 0, sizeof(*s));
490}
491
492static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
493 enum btree_id btree_id, struct bpos pos)
ef1669ff 494{
49124d8a
KO
495 struct snapshots_seen_entry *i, n = {
496 .id = pos.snapshot,
497 .equiv = bch2_snapshot_equiv(c, pos.snapshot),
498 };
499 int ret;
ef1669ff
KO
500
501 if (bkey_cmp(s->pos, pos))
91d961ba 502 s->ids.nr = 0;
49124d8a
KO
503
504 pos.snapshot = n.equiv;
ef1669ff
KO
505 s->pos = pos;
506
49124d8a
KO
507 darray_for_each(s->ids, i)
508 if (i->equiv == n.equiv) {
509 if (i->id != n.id) {
510 bch_err(c, "snapshot deletion did not run correctly:\n"
511 " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
512 bch2_btree_ids[btree_id],
513 pos.inode, pos.offset,
514 i->id, n.id, n.equiv);
c7a09cb1 515 return -NEED_SNAPSHOT_CLEANUP;
49124d8a 516 }
ef1669ff 517
49124d8a
KO
518 return 0;
519 }
520
521 ret = darray_push(&s->ids, n);
522 if (ret)
523 bch_err(c, "error reallocating snapshots_seen table (size %zu)",
524 s->ids.size);
525 return ret;
ef1669ff
KO
526}
527
528/**
529 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
530 * and @ancestor hasn't been overwritten in @seen
531 *
532 * That is, returns whether key in @ancestor snapshot is visible in @id snapshot
533 */
534static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
535 u32 id, u32 ancestor)
536{
537 ssize_t i;
49124d8a 538 u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0;
ef1669ff
KO
539
540 BUG_ON(id > ancestor);
49124d8a
KO
541 BUG_ON(!bch2_snapshot_is_equiv(c, id));
542 BUG_ON(!bch2_snapshot_is_equiv(c, ancestor));
ef1669ff
KO
543
544 /* @ancestor should be the snapshot most recently added to @seen */
49124d8a
KO
545 BUG_ON(ancestor != seen->pos.snapshot);
546 BUG_ON(ancestor != top);
ef1669ff
KO
547
548 if (id == ancestor)
549 return true;
550
551 if (!bch2_snapshot_is_ancestor(c, id, ancestor))
552 return false;
553
91d961ba 554 for (i = seen->ids.nr - 2;
49124d8a 555 i >= 0 && seen->ids.data[i].equiv >= id;
ef1669ff 556 --i)
49124d8a
KO
557 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) &&
558 bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor))
ef1669ff
KO
559 return false;
560
561 return true;
562}
563
564/**
565 * ref_visible - given a key with snapshot id @src that points to a key with
566 * snapshot id @dst, test whether there is some snapshot in which @dst is
567 * visible.
568 *
569 * This assumes we're visiting @src keys in natural key order.
570 *
571 * @s - list of snapshot IDs already seen at @src
572 * @src - snapshot ID of src key
573 * @dst - snapshot ID of dst key
574 */
575static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
576 u32 src, u32 dst)
577{
578 return dst <= src
579 ? key_visible_in_snapshot(c, s, dst, src)
580 : bch2_snapshot_is_ancestor(c, src, dst);
581}
582
49124d8a
KO
583#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
584 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
585 (_i)->snapshot <= (_snapshot); _i++) \
ef1669ff
KO
586 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
587
91d961ba
KO
588struct inode_walker_entry {
589 struct bch_inode_unpacked inode;
590 u32 snapshot;
591 u64 count;
592};
593
1c6fdbd8 594struct inode_walker {
ef1669ff
KO
595 bool first_this_inode;
596 u64 cur_inum;
597
91d961ba 598 DARRAY(struct inode_walker_entry) inodes;
1c6fdbd8
KO
599};
600
ef1669ff
KO
601static void inode_walker_exit(struct inode_walker *w)
602{
91d961ba 603 darray_exit(&w->inodes);
ef1669ff
KO
604}
605
1c6fdbd8
KO
606static struct inode_walker inode_walker_init(void)
607{
ef1669ff
KO
608 return (struct inode_walker) { 0, };
609}
610
ef1669ff 611static int add_inode(struct bch_fs *c, struct inode_walker *w,
3e52c222 612 struct bkey_s_c inode)
ef1669ff
KO
613{
614 struct bch_inode_unpacked u;
ef1669ff
KO
615
616 BUG_ON(bch2_inode_unpack(inode, &u));
617
91d961ba 618 return darray_push(&w->inodes, ((struct inode_walker_entry) {
ef1669ff 619 .inode = u,
49124d8a 620 .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot),
91d961ba 621 }));
1c6fdbd8
KO
622}
623
914f2786 624static int __walk_inode(struct btree_trans *trans,
ef1669ff 625 struct inode_walker *w, struct bpos pos)
1c6fdbd8 626{
ef1669ff
KO
627 struct bch_fs *c = trans->c;
628 struct btree_iter iter;
629 struct bkey_s_c k;
49124d8a 630 unsigned i;
ef1669ff 631 int ret;
1c6fdbd8 632
49124d8a 633 pos.snapshot = bch2_snapshot_equiv(c, pos.snapshot);
1c6fdbd8 634
ef1669ff 635 if (pos.inode == w->cur_inum) {
6bd13057 636 w->first_this_inode = false;
ef1669ff 637 goto lookup_snapshot;
1c6fdbd8
KO
638 }
639
91d961ba 640 w->inodes.nr = 0;
ef1669ff
KO
641
642 for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, pos.inode),
643 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
644 if (k.k->p.offset != pos.inode)
645 break;
646
3e52c222
KO
647 if (bkey_is_inode(k.k))
648 add_inode(c, w, k);
ef1669ff
KO
649 }
650 bch2_trans_iter_exit(trans, &iter);
651
652 if (ret)
653 return ret;
654
655 w->cur_inum = pos.inode;
656 w->first_this_inode = true;
657lookup_snapshot:
91d961ba
KO
658 for (i = 0; i < w->inodes.nr; i++)
659 if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->inodes.data[i].snapshot))
ef1669ff
KO
660 goto found;
661 return INT_MAX;
662found:
91d961ba 663 BUG_ON(pos.snapshot > w->inodes.data[i].snapshot);
ef1669ff 664
91d961ba 665 if (pos.snapshot != w->inodes.data[i].snapshot) {
49124d8a
KO
666 struct inode_walker_entry e = w->inodes.data[i];
667
668 e.snapshot = pos.snapshot;
669 e.count = 0;
670
671 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
672 pos.inode, pos.snapshot, w->inodes.data[i].snapshot);
ef1669ff 673
91d961ba 674 while (i && w->inodes.data[i - 1].snapshot > pos.snapshot)
ef1669ff
KO
675 --i;
676
49124d8a 677 ret = darray_insert_item(&w->inodes, i, e);
ef1669ff
KO
678 if (ret)
679 return ret;
ef1669ff
KO
680 }
681
682 return i;
1c6fdbd8
KO
683}
684
ef1669ff
KO
685static int __get_visible_inodes(struct btree_trans *trans,
686 struct inode_walker *w,
687 struct snapshots_seen *s,
688 u64 inum)
689{
690 struct bch_fs *c = trans->c;
691 struct btree_iter iter;
692 struct bkey_s_c k;
693 int ret;
694
91d961ba 695 w->inodes.nr = 0;
ef1669ff
KO
696
697 for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
698 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
49124d8a
KO
699 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
700
ef1669ff
KO
701 if (k.k->p.offset != inum)
702 break;
703
49124d8a 704 if (!ref_visible(c, s, s->pos.snapshot, equiv))
ef1669ff
KO
705 continue;
706
49124d8a 707 if (bkey_is_inode(k.k))
3e52c222 708 add_inode(c, w, k);
49124d8a
KO
709
710 if (equiv >= s->pos.snapshot)
711 break;
ef1669ff
KO
712 }
713 bch2_trans_iter_exit(trans, &iter);
714
715 return ret;
716}
717
718static int check_key_has_snapshot(struct btree_trans *trans,
719 struct btree_iter *iter,
720 struct bkey_s_c k)
721{
722 struct bch_fs *c = trans->c;
fa8e94fa 723 struct printbuf buf = PRINTBUF;
ef1669ff
KO
724 int ret = 0;
725
49124d8a 726 if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
ef1669ff 727 "key in missing snapshot: %s",
fa8e94fa
KO
728 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
729 ret = bch2_btree_delete_at(trans, iter,
285b181a 730 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1;
ef1669ff 731fsck_err:
fa8e94fa 732 printbuf_exit(&buf);
ef1669ff 733 return ret;
914f2786
KO
734}
735
7ac2c55e
KO
736static int hash_redo_key(struct btree_trans *trans,
737 const struct bch_hash_desc desc,
738 struct bch_hash_info *hash_info,
739 struct btree_iter *k_iter, struct bkey_s_c k)
1c6fdbd8 740{
ef1669ff
KO
741 bch_err(trans->c, "hash_redo_key() not implemented yet");
742 return -EINVAL;
743#if 0
e3b4b48c 744 struct bkey_i *delete;
1c6fdbd8 745 struct bkey_i *tmp;
1c6fdbd8 746
e3b4b48c
KO
747 delete = bch2_trans_kmalloc(trans, sizeof(*delete));
748 if (IS_ERR(delete))
749 return PTR_ERR(delete);
750
b1fd23df
KO
751 tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
752 if (IS_ERR(tmp))
753 return PTR_ERR(tmp);
1c6fdbd8
KO
754
755 bkey_reassemble(tmp, k);
756
e3b4b48c
KO
757 bkey_init(&delete->k);
758 delete->k.p = k_iter->pos;
8c3f6da9
KO
759 return bch2_btree_iter_traverse(k_iter) ?:
760 bch2_trans_update(trans, k_iter, delete, 0) ?:
bc3f8b25 761 bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
ef1669ff 762#endif
1c6fdbd8
KO
763}
764
7ac2c55e
KO
765static int hash_check_key(struct btree_trans *trans,
766 const struct bch_hash_desc desc,
767 struct bch_hash_info *hash_info,
768 struct btree_iter *k_iter, struct bkey_s_c hash_k)
d69f41d6 769{
424eb881 770 struct bch_fs *c = trans->c;
67e0dd8f 771 struct btree_iter iter = { NULL };
fa8e94fa 772 struct printbuf buf = PRINTBUF;
7ac2c55e
KO
773 struct bkey_s_c k;
774 u64 hash;
d69f41d6
KO
775 int ret = 0;
776
7ac2c55e
KO
777 if (hash_k.k->type != desc.key_type)
778 return 0;
779
780 hash = desc.hash_bkey(hash_info, hash_k);
781
782 if (likely(hash == hash_k.k->p.offset))
d69f41d6
KO
783 return 0;
784
7ac2c55e
KO
785 if (hash_k.k->p.offset < hash)
786 goto bad_hash;
d69f41d6 787
d8f31407
KO
788 for_each_btree_key_norestart(trans, iter, desc.btree_id,
789 POS(hash_k.k->p.inode, hash),
790 BTREE_ITER_SLOTS, k, ret) {
7ac2c55e 791 if (!bkey_cmp(k.k->p, hash_k.k->p))
d69f41d6
KO
792 break;
793
7ac2c55e
KO
794 if (fsck_err_on(k.k->type == desc.key_type &&
795 !desc.cmp_bkey(k, hash_k), c,
d69f41d6 796 "duplicate hash table keys:\n%s",
fa8e94fa
KO
797 (printbuf_reset(&buf),
798 bch2_bkey_val_to_text(&buf, c, hash_k),
799 buf.buf))) {
285b181a 800 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
d69f41d6
KO
801 break;
802 }
d69f41d6 803
7ac2c55e 804 if (bkey_deleted(k.k)) {
67e0dd8f 805 bch2_trans_iter_exit(trans, &iter);
7ac2c55e 806 goto bad_hash;
1c6fdbd8 807 }
7ac2c55e 808 }
fa8e94fa 809out:
67e0dd8f 810 bch2_trans_iter_exit(trans, &iter);
fa8e94fa 811 printbuf_exit(&buf);
1c6fdbd8 812 return ret;
7ac2c55e 813bad_hash:
d8f31407 814 if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, "
e3b4b48c 815 "hashed to %llu\n%s",
d8f31407 816 bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
fa8e94fa
KO
817 (printbuf_reset(&buf),
818 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE)
741daa5b
KO
819 return 0;
820
285b181a 821 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
7ac2c55e
KO
822 if (ret) {
823 bch_err(c, "hash_redo_key err %i", ret);
824 return ret;
741daa5b 825 }
fa8e94fa 826 ret = -EINTR;
741daa5b 827fsck_err:
fa8e94fa 828 goto out;
741daa5b
KO
829}
830
5c16add5
KO
831static int check_inode(struct btree_trans *trans,
832 struct btree_iter *iter,
ef1669ff 833 struct bch_inode_unpacked *prev,
49124d8a 834 struct snapshots_seen *s,
285b181a 835 bool full)
5c16add5
KO
836{
837 struct bch_fs *c = trans->c;
285b181a 838 struct bkey_s_c k;
285b181a 839 struct bch_inode_unpacked u;
5c16add5 840 bool do_update = false;
285b181a
KO
841 int ret;
842
843 k = bch2_btree_iter_peek(iter);
844 if (!k.k)
845 return 0;
846
847 ret = bkey_err(k);
848 if (ret)
849 return ret;
5c16add5 850
285b181a 851 ret = check_key_has_snapshot(trans, iter, k);
e492e7b6
KO
852 if (ret < 0)
853 goto err;
285b181a 854 if (ret)
e492e7b6 855 return 0;
285b181a 856
49124d8a
KO
857 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
858 if (ret)
859 goto err;
860
285b181a
KO
861 /*
862 * if snapshot id isn't a leaf node, skip it - deletion in
863 * particular is not atomic, so on the internal snapshot nodes
864 * we can see inodes marked for deletion after a clean shutdown
865 */
866 if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
867 return 0;
868
3e52c222 869 if (!bkey_is_inode(k.k))
285b181a
KO
870 return 0;
871
3e52c222 872 BUG_ON(bch2_inode_unpack(k, &u));
285b181a
KO
873
874 if (!full &&
3e52c222
KO
875 !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|
876 BCH_INODE_I_SECTORS_DIRTY|
877 BCH_INODE_UNLINKED)))
285b181a
KO
878 return 0;
879
285b181a
KO
880 if (prev->bi_inum != u.bi_inum)
881 *prev = u;
882
883 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed ||
884 inode_d_type(prev) != inode_d_type(&u), c,
ef1669ff
KO
885 "inodes in different snapshots don't match")) {
886 bch_err(c, "repair not implemented yet");
887 return -EINVAL;
888 }
5c16add5
KO
889
890 if (u.bi_flags & BCH_INODE_UNLINKED &&
891 (!c->sb.clean ||
892 fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
893 u.bi_inum))) {
5c16add5
KO
894 bch2_trans_unlock(trans);
895 bch2_fs_lazy_rw(c);
896
ef1669ff 897 ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
5c16add5
KO
898 if (ret)
899 bch_err(c, "error in fsck: error %i while deleting inode", ret);
900 return ret;
901 }
902
903 if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
904 (!c->sb.clean ||
905 fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
906 u.bi_inum))) {
907 bch_verbose(c, "truncating inode %llu", u.bi_inum);
908
909 bch2_trans_unlock(trans);
910 bch2_fs_lazy_rw(c);
911
912 /*
913 * XXX: need to truncate partial blocks too here - or ideally
914 * just switch units to bytes and that issue goes away
915 */
916 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
ef1669ff
KO
917 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
918 iter->pos.snapshot),
5c16add5 919 POS(u.bi_inum, U64_MAX),
ef1669ff 920 0, NULL);
5c16add5
KO
921 if (ret) {
922 bch_err(c, "error in fsck: error %i truncating inode", ret);
923 return ret;
924 }
925
926 /*
927 * We truncated without our normal sector accounting hook, just
928 * make sure we recalculate it:
929 */
930 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
931
932 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
933 do_update = true;
934 }
935
936 if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
937 (!c->sb.clean ||
938 fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
939 u.bi_inum))) {
940 s64 sectors;
941
942 bch_verbose(c, "recounting sectors for inode %llu",
943 u.bi_inum);
944
ef1669ff 945 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
5c16add5
KO
946 if (sectors < 0) {
947 bch_err(c, "error in fsck: error %i recounting inode sectors",
948 (int) sectors);
949 return sectors;
950 }
951
952 u.bi_sectors = sectors;
953 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
954 do_update = true;
955 }
956
d3ff7fec
KO
957 if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
958 u.bi_dir = 0;
959 u.bi_dir_offset = 0;
960 u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
5c16add5
KO
961 do_update = true;
962 }
963
964 if (do_update) {
ef1669ff 965 ret = write_inode(trans, &u, iter->pos.snapshot);
5c16add5
KO
966 if (ret)
967 bch_err(c, "error in fsck: error %i "
968 "updating inode", ret);
969 }
e492e7b6 970err:
5c16add5 971fsck_err:
e492e7b6
KO
972 if (ret)
973 bch_err(c, "error %i from check_inode()", ret);
5c16add5
KO
974 return ret;
975}
976
977noinline_for_stack
978static int check_inodes(struct bch_fs *c, bool full)
979{
980 struct btree_trans trans;
67e0dd8f 981 struct btree_iter iter;
285b181a 982 struct bch_inode_unpacked prev = { 0 };
49124d8a 983 struct snapshots_seen s;
5c16add5
KO
984 int ret;
985
49124d8a 986 snapshots_seen_init(&s);
5c16add5
KO
987 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
988
47f80bbf 989 bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN,
285b181a
KO
990 BTREE_ITER_INTENT|
991 BTREE_ITER_PREFETCH|
992 BTREE_ITER_ALL_SNAPSHOTS);
993
994 do {
e68914ca 995 ret = commit_do(&trans, NULL, NULL,
285b181a
KO
996 BTREE_INSERT_LAZY_RW|
997 BTREE_INSERT_NOFAIL,
49124d8a 998 check_inode(&trans, &iter, &prev, &s, full));
ef1669ff
KO
999 if (ret)
1000 break;
285b181a
KO
1001 } while (bch2_btree_iter_advance(&iter));
1002 bch2_trans_iter_exit(&trans, &iter);
ef1669ff 1003
285b181a 1004 bch2_trans_exit(&trans);
49124d8a 1005 snapshots_seen_exit(&s);
e492e7b6
KO
1006 if (ret)
1007 bch_err(c, "error %i from check_inodes()", ret);
285b181a
KO
1008 return ret;
1009}
ef1669ff 1010
ef1669ff
KO
1011/*
1012 * Checking for overlapping extents needs to be reimplemented
1013 */
1014#if 0
abcecb49 1015static int fix_overlapping_extent(struct btree_trans *trans,
e3e464ac
KO
1016 struct bkey_s_c k, struct bpos cut_at)
1017{
67e0dd8f 1018 struct btree_iter iter;
e3e464ac
KO
1019 struct bkey_i *u;
1020 int ret;
1021
1022 u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
1023 ret = PTR_ERR_OR_ZERO(u);
1024 if (ret)
1025 return ret;
1026
1027 bkey_reassemble(u, k);
1028 bch2_cut_front(cut_at, u);
1029
e3e464ac 1030
ef1669ff
KO
1031 /*
1032 * We don't want to go through the extent_handle_overwrites path:
1033 *
1034 * XXX: this is going to screw up disk accounting, extent triggers
1035 * assume things about extent overwrites - we should be running the
1036 * triggers manually here
1037 */
1038 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
1039 BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
1040
1041 BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
1042 ret = bch2_btree_iter_traverse(&iter) ?:
1043 bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
1044 bch2_trans_commit(trans, NULL, NULL,
1045 BTREE_INSERT_NOFAIL|
1046 BTREE_INSERT_LAZY_RW);
1047 bch2_trans_iter_exit(trans, &iter);
1048 return ret;
1049}
1050#endif
1051
4db65027
KO
1052static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
1053 struct btree_iter *iter,
1054 struct bpos pos)
1055{
1056 struct bkey_s_c k;
1057 int ret;
1058
1059 bch2_trans_iter_init(trans, iter, BTREE_ID_dirents, pos, 0);
1060 k = bch2_btree_iter_peek_slot(iter);
1061 ret = bkey_err(k);
1062 if (!ret && k.k->type != KEY_TYPE_dirent)
1063 ret = -ENOENT;
1064 if (ret) {
1065 bch2_trans_iter_exit(trans, iter);
1066 return (struct bkey_s_c_dirent) { .k = ERR_PTR(ret) };
1067 }
1068
1069 return bkey_s_c_to_dirent(k);
1070}
1071
1072static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
1073 struct bkey_s_c_dirent d)
1074{
1075 return inode->bi_dir == d.k->p.inode &&
1076 inode->bi_dir_offset == d.k->p.offset;
1077}
1078
1079static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
1080 struct bch_inode_unpacked *inode)
1081{
1082 return d.v->d_type == DT_SUBVOL
1083 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
1084 : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
1085}
1086
ef1669ff
KO
1087static int inode_backpointer_exists(struct btree_trans *trans,
1088 struct bch_inode_unpacked *inode,
1089 u32 snapshot)
1090{
1091 struct btree_iter iter;
4db65027 1092 struct bkey_s_c_dirent d;
ef1669ff
KO
1093 int ret;
1094
4db65027
KO
1095 d = dirent_get_by_pos(trans, &iter,
1096 SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
1097 ret = bkey_err(d.s_c);
ef1669ff 1098 if (ret)
e296b1f9 1099 return ret == -ENOENT ? 0 : ret;
488f9776 1100
4db65027 1101 ret = dirent_points_to_inode(d, inode);
ef1669ff
KO
1102 bch2_trans_iter_exit(trans, &iter);
1103 return ret;
1104}
1105
ef1669ff
KO
1106static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
1107{
1108 struct bch_fs *c = trans->c;
1109 struct inode_walker_entry *i;
1110 int ret = 0, ret2 = 0;
1111 s64 count2;
1112
91d961ba 1113 darray_for_each(w->inodes, i) {
ef1669ff
KO
1114 if (i->inode.bi_sectors == i->count)
1115 continue;
1116
1117 count2 = lockrestart_do(trans,
1118 bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot));
1119
1120 if (i->count != count2) {
1121 bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu",
1122 i->count, count2);
1123 i->count = count2;
1124 if (i->inode.bi_sectors == i->count)
1125 continue;
1126 }
1127
1128 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
1129 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
1130 w->cur_inum, i->snapshot,
1131 i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE)
1132 continue;
1133
1134 i->inode.bi_sectors = i->count;
1135 ret = write_inode(trans, &i->inode, i->snapshot);
1136 if (ret)
1137 break;
1138 ret2 = -EINTR;
1139 }
1140fsck_err:
e492e7b6
KO
1141 if (ret)
1142 bch_err(c, "error %i from check_i_sectors()", ret);
ef1669ff
KO
1143 return ret ?: ret2;
1144}
1145
1146static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
1147 struct inode_walker *inode,
1148 struct snapshots_seen *s)
1149{
1150 struct bch_fs *c = trans->c;
1151 struct bkey_s_c k;
1152 struct inode_walker_entry *i;
fa8e94fa 1153 struct printbuf buf = PRINTBUF;
49124d8a 1154 struct bpos equiv;
ef1669ff 1155 int ret = 0;
292dea86 1156peek:
ef1669ff
KO
1157 k = bch2_btree_iter_peek(iter);
1158 if (!k.k)
fa8e94fa 1159 goto out;
ef1669ff
KO
1160
1161 ret = bkey_err(k);
1162 if (ret)
fa8e94fa 1163 goto err;
ef1669ff
KO
1164
1165 ret = check_key_has_snapshot(trans, iter, k);
fa8e94fa
KO
1166 if (ret) {
1167 ret = ret < 0 ? ret : 0;
1168 goto out;
1169 }
ef1669ff 1170
49124d8a
KO
1171 equiv = k.k->p;
1172 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
1173
1174 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
ef1669ff 1175 if (ret)
fa8e94fa 1176 goto err;
ef1669ff
KO
1177
1178 if (k.k->type == KEY_TYPE_whiteout)
fa8e94fa 1179 goto out;
ef1669ff
KO
1180
1181 if (inode->cur_inum != k.k->p.inode) {
1182 ret = check_i_sectors(trans, inode);
1183 if (ret)
fa8e94fa 1184 goto err;
ef1669ff 1185 }
292dea86
KO
1186
1187 if (!iter->path->should_be_locked) {
1188 /*
1189 * hack: check_i_sectors may have handled a transaction restart,
1190 * it shouldn't be but we need to fix the new i_sectors check
1191 * code and delete the old bch2_count_inode_sectors() first
1192 */
1193 goto peek;
1194 }
ef1669ff
KO
1195#if 0
1196 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
1197 char buf1[200];
1198 char buf2[200];
1199
1200 bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
1201 bch2_bkey_val_to_text(&PBUF(buf2), c, k);
1202
fa8e94fa
KO
1203 if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
1204 ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR;
1205 goto out;
1206 }
ef1669ff
KO
1207 }
1208#endif
49124d8a 1209 ret = __walk_inode(trans, inode, equiv);
ef1669ff 1210 if (ret < 0)
fa8e94fa 1211 goto err;
ef1669ff
KO
1212
1213 if (fsck_err_on(ret == INT_MAX, c,
1214 "extent in missing inode:\n %s",
fa8e94fa
KO
1215 (printbuf_reset(&buf),
1216 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1217 ret = bch2_btree_delete_at(trans, iter,
285b181a 1218 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
fa8e94fa
KO
1219 goto out;
1220 }
ef1669ff 1221
fa8e94fa
KO
1222 if (ret == INT_MAX) {
1223 ret = 0;
1224 goto out;
1225 }
e3e464ac 1226
91d961ba 1227 i = inode->inodes.data + ret;
ef1669ff 1228 ret = 0;
e3e464ac 1229
ef1669ff
KO
1230 if (fsck_err_on(!S_ISREG(i->inode.bi_mode) &&
1231 !S_ISLNK(i->inode.bi_mode), c,
1232 "extent in non regular inode mode %o:\n %s",
1233 i->inode.bi_mode,
fa8e94fa
KO
1234 (printbuf_reset(&buf),
1235 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1236 ret = bch2_btree_delete_at(trans, iter,
285b181a 1237 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
fa8e94fa
KO
1238 goto out;
1239 }
ef1669ff 1240
49124d8a
KO
1241 if (!bch2_snapshot_internal_node(c, equiv.snapshot)) {
1242 for_each_visible_inode(c, s, inode, equiv.snapshot, i) {
ef1669ff
KO
1243 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
1244 k.k->type != KEY_TYPE_reservation &&
1245 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
1246 "extent type %u offset %llu past end of inode %llu, i_size %llu",
1247 k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) {
1248 bch2_fs_lazy_rw(c);
fa8e94fa 1249 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
ef1669ff 1250 SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9,
49124d8a 1251 equiv.snapshot),
ef1669ff
KO
1252 POS(k.k->p.inode, U64_MAX),
1253 0, NULL) ?: -EINTR;
fa8e94fa 1254 goto out;
ef1669ff
KO
1255 }
1256 }
1257 }
8a85b20c 1258
ef1669ff 1259 if (bkey_extent_is_allocation(k.k))
49124d8a 1260 for_each_visible_inode(c, s, inode, equiv.snapshot, i)
ef1669ff
KO
1261 i->count += k.k->size;
1262#if 0
1263 bch2_bkey_buf_reassemble(&prev, c, k);
1264#endif
8a85b20c 1265
fa8e94fa
KO
1266out:
1267err:
ef1669ff 1268fsck_err:
fa8e94fa 1269 printbuf_exit(&buf);
e492e7b6
KO
1270
1271 if (ret && ret != -EINTR)
1272 bch_err(c, "error %i from check_extent()", ret);
8a85b20c
KO
1273 return ret;
1274}
1275
1c6fdbd8
KO
1276/*
1277 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
1278 * that i_size an i_sectors are consistent
1279 */
1280noinline_for_stack
1281static int check_extents(struct bch_fs *c)
1282{
1283 struct inode_walker w = inode_walker_init();
ef1669ff 1284 struct snapshots_seen s;
424eb881 1285 struct btree_trans trans;
67e0dd8f 1286 struct btree_iter iter;
1c6fdbd8
KO
1287 int ret = 0;
1288
ef1669ff
KO
1289#if 0
1290 struct bkey_buf prev;
07a1006a 1291 bch2_bkey_buf_init(&prev);
f7005e01 1292 prev.k->k = KEY(0, 0, 0);
ef1669ff
KO
1293#endif
1294 snapshots_seen_init(&s);
20bceecb 1295 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 1296
1c6fdbd8
KO
1297 bch_verbose(c, "checking extents");
1298
67e0dd8f
KO
1299 bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
1300 POS(BCACHEFS_ROOT_INO, 0),
1301 BTREE_ITER_INTENT|
ef1669ff
KO
1302 BTREE_ITER_PREFETCH|
1303 BTREE_ITER_ALL_SNAPSHOTS);
1304
1305 do {
e68914ca 1306 ret = commit_do(&trans, NULL, NULL,
285b181a
KO
1307 BTREE_INSERT_LAZY_RW|
1308 BTREE_INSERT_NOFAIL,
ef1669ff
KO
1309 check_extent(&trans, &iter, &w, &s));
1310 if (ret)
1311 break;
1312 } while (bch2_btree_iter_advance(&iter));
1313 bch2_trans_iter_exit(&trans, &iter);
1314#if 0
1315 bch2_bkey_buf_exit(&prev, c);
1316#endif
1317 inode_walker_exit(&w);
1318 bch2_trans_exit(&trans);
1319 snapshots_seen_exit(&s);
1320
e492e7b6
KO
1321 if (ret)
1322 bch_err(c, "error %i from check_extents()", ret);
ef1669ff
KO
1323 return ret;
1324}
1325
1326static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
1327{
1328 struct bch_fs *c = trans->c;
1329 struct inode_walker_entry *i;
1330 int ret = 0, ret2 = 0;
1331 s64 count2;
1332
91d961ba 1333 darray_for_each(w->inodes, i) {
ef1669ff
KO
1334 if (i->inode.bi_nlink == i->count)
1335 continue;
1336
9e343161
KO
1337 count2 = bch2_count_subdirs(trans, w->cur_inum, i->snapshot);
1338 if (count2 < 0)
1339 return count2;
ef1669ff
KO
1340
1341 if (i->count != count2) {
1342 bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
1343 i->count, count2);
1344 i->count = count2;
1345 if (i->inode.bi_nlink == i->count)
1346 continue;
1347 }
1348
1349 if (fsck_err_on(i->inode.bi_nlink != i->count, c,
1350 "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
1351 w->cur_inum, i->snapshot, i->inode.bi_nlink, i->count)) {
1352 i->inode.bi_nlink = i->count;
1353 ret = write_inode(trans, &i->inode, i->snapshot);
abcecb49
KO
1354 if (ret)
1355 break;
ef1669ff 1356 ret2 = -EINTR;
abcecb49 1357 }
ef1669ff
KO
1358 }
1359fsck_err:
e492e7b6
KO
1360 if (ret)
1361 bch_err(c, "error %i from check_subdir_count()", ret);
ef1669ff
KO
1362 return ret ?: ret2;
1363}
abcecb49 1364
ef1669ff
KO
1365static int check_dirent_target(struct btree_trans *trans,
1366 struct btree_iter *iter,
1367 struct bkey_s_c_dirent d,
1368 struct bch_inode_unpacked *target,
1369 u32 target_snapshot)
1370{
1371 struct bch_fs *c = trans->c;
285b181a 1372 struct bkey_i_dirent *n;
ef1669ff 1373 bool backpointer_exists = true;
fa8e94fa 1374 struct printbuf buf = PRINTBUF;
ef1669ff
KO
1375 int ret = 0;
1376
1377 if (!target->bi_dir &&
1378 !target->bi_dir_offset) {
1379 target->bi_dir = d.k->p.inode;
1380 target->bi_dir_offset = d.k->p.offset;
1381
285b181a 1382 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1383 if (ret)
1384 goto err;
1385 }
1386
4db65027 1387 if (!inode_points_to_dirent(target, d)) {
ef1669ff
KO
1388 ret = inode_backpointer_exists(trans, target, d.k->p.snapshot);
1389 if (ret < 0)
1390 goto err;
e3e464ac 1391
ef1669ff
KO
1392 backpointer_exists = ret;
1393 ret = 0;
e3e464ac 1394
ef1669ff
KO
1395 if (fsck_err_on(S_ISDIR(target->bi_mode) &&
1396 backpointer_exists, c,
1397 "directory %llu with multiple links",
1398 target->bi_inum)) {
285b181a 1399 ret = __remove_dirent(trans, d.k->p);
fa8e94fa 1400 goto out;
e3e464ac 1401 }
e3e464ac 1402
ef1669ff
KO
1403 if (fsck_err_on(backpointer_exists &&
1404 !target->bi_nlink, c,
0095aa94
KO
1405 "inode %llu type %s has multiple links but i_nlink 0",
1406 target->bi_inum, bch2_d_types[d.v->d_type])) {
ef1669ff
KO
1407 target->bi_nlink++;
1408 target->bi_flags &= ~BCH_INODE_UNLINKED;
1c6fdbd8 1409
285b181a 1410 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1411 if (ret)
1412 goto err;
1c6fdbd8
KO
1413 }
1414
ef1669ff 1415 if (fsck_err_on(!backpointer_exists, c,
6e0c886d 1416 "inode %llu:%u has wrong backpointer:\n"
ef1669ff
KO
1417 "got %llu:%llu\n"
1418 "should be %llu:%llu",
6e0c886d 1419 target->bi_inum, target_snapshot,
ef1669ff
KO
1420 target->bi_dir,
1421 target->bi_dir_offset,
1422 d.k->p.inode,
1423 d.k->p.offset)) {
1424 target->bi_dir = d.k->p.inode;
1425 target->bi_dir_offset = d.k->p.offset;
1426
285b181a 1427 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1428 if (ret)
1429 goto err;
1c6fdbd8 1430 }
ef1669ff 1431 }
1c6fdbd8 1432
285b181a
KO
1433 if (fsck_err_on(d.v->d_type != inode_d_type(target), c,
1434 "incorrect d_type: got %s, should be %s:\n%s",
1435 bch2_d_type_str(d.v->d_type),
1436 bch2_d_type_str(inode_d_type(target)),
fa8e94fa
KO
1437 (printbuf_reset(&buf),
1438 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
285b181a
KO
1439 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
1440 ret = PTR_ERR_OR_ZERO(n);
1441 if (ret)
fa8e94fa 1442 goto err;
ef1669ff
KO
1443
1444 bkey_reassemble(&n->k_i, d.s_c);
285b181a 1445 n->v.d_type = inode_d_type(target);
ef1669ff 1446
285b181a
KO
1447 ret = bch2_trans_update(trans, iter, &n->k_i, 0);
1448 if (ret)
fa8e94fa 1449 goto err;
4db65027 1450
285b181a 1451 d = dirent_i_to_s_c(n);
4db65027
KO
1452 }
1453
1454 if (d.v->d_type == DT_SUBVOL &&
1455 target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
1456 (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
1457 fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u",
1458 le32_to_cpu(d.v->d_parent_subvol),
1459 target->bi_parent_subvol))) {
285b181a
KO
1460 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
1461 ret = PTR_ERR_OR_ZERO(n);
1462 if (ret)
fa8e94fa 1463 goto err;
4db65027
KO
1464
1465 bkey_reassemble(&n->k_i, d.s_c);
1466 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
1467
285b181a
KO
1468 ret = bch2_trans_update(trans, iter, &n->k_i, 0);
1469 if (ret)
fa8e94fa 1470 goto err;
4db65027 1471
285b181a 1472 d = dirent_i_to_s_c(n);
1c6fdbd8 1473 }
fa8e94fa 1474out:
ef1669ff 1475err:
1c6fdbd8 1476fsck_err:
fa8e94fa 1477 printbuf_exit(&buf);
e492e7b6
KO
1478
1479 if (ret && ret != -EINTR)
1480 bch_err(c, "error %i from check_target()", ret);
ef1669ff 1481 return ret;
1c6fdbd8
KO
1482}
1483
914f2786
KO
1484static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
1485 struct bch_hash_info *hash_info,
ef1669ff
KO
1486 struct inode_walker *dir,
1487 struct inode_walker *target,
1488 struct snapshots_seen *s)
1c6fdbd8 1489{
914f2786 1490 struct bch_fs *c = trans->c;
1c6fdbd8 1491 struct bkey_s_c k;
914f2786 1492 struct bkey_s_c_dirent d;
ef1669ff 1493 struct inode_walker_entry *i;
fa8e94fa 1494 struct printbuf buf = PRINTBUF;
49124d8a 1495 struct bpos equiv;
fa8e94fa 1496 int ret = 0;
292dea86 1497peek:
914f2786
KO
1498 k = bch2_btree_iter_peek(iter);
1499 if (!k.k)
fa8e94fa 1500 goto out;
1c6fdbd8 1501
914f2786
KO
1502 ret = bkey_err(k);
1503 if (ret)
fa8e94fa 1504 goto err;
d69f41d6 1505
ef1669ff 1506 ret = check_key_has_snapshot(trans, iter, k);
fa8e94fa
KO
1507 if (ret) {
1508 ret = ret < 0 ? ret : 0;
1509 goto out;
1510 }
1c6fdbd8 1511
49124d8a
KO
1512 equiv = k.k->p;
1513 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
1514
1515 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
914f2786 1516 if (ret)
fa8e94fa 1517 goto err;
8a85b20c 1518
ef1669ff 1519 if (k.k->type == KEY_TYPE_whiteout)
fa8e94fa 1520 goto out;
ef1669ff
KO
1521
1522 if (dir->cur_inum != k.k->p.inode) {
1523 ret = check_subdir_count(trans, dir);
1524 if (ret)
fa8e94fa 1525 goto err;
ef1669ff
KO
1526 }
1527
292dea86
KO
1528 if (!iter->path->should_be_locked) {
1529 /* hack: see check_extent() */
1530 goto peek;
1531 }
1532
49124d8a 1533 ret = __walk_inode(trans, dir, equiv);
ef1669ff 1534 if (ret < 0)
fa8e94fa 1535 goto err;
1c6fdbd8 1536
ef1669ff 1537 if (fsck_err_on(ret == INT_MAX, c,
914f2786 1538 "dirent in nonexisting directory:\n%s",
fa8e94fa
KO
1539 (printbuf_reset(&buf),
1540 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1541 ret = bch2_btree_delete_at(trans, iter,
285b181a 1542 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
fa8e94fa
KO
1543 goto out;
1544 }
ef1669ff 1545
fa8e94fa
KO
1546 if (ret == INT_MAX) {
1547 ret = 0;
1548 goto out;
1549 }
ef1669ff 1550
91d961ba 1551 i = dir->inodes.data + ret;
ef1669ff
KO
1552 ret = 0;
1553
1554 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c,
285b181a
KO
1555 "dirent in non directory inode type %s:\n%s",
1556 bch2_d_type_str(inode_d_type(&i->inode)),
fa8e94fa
KO
1557 (printbuf_reset(&buf),
1558 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1559 ret = bch2_btree_delete_at(trans, iter, 0);
1560 goto out;
1561 }
8a85b20c 1562
ef1669ff 1563 if (dir->first_this_inode)
91d961ba 1564 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
1c6fdbd8 1565
914f2786
KO
1566 ret = hash_check_key(trans, bch2_dirent_hash_desc,
1567 hash_info, iter, k);
1568 if (ret < 0)
fa8e94fa
KO
1569 goto err;
1570 if (ret) {
1571 /* dirent has been deleted */
1572 ret = 0;
1573 goto out;
1574 }
7ac2c55e 1575
914f2786 1576 if (k.k->type != KEY_TYPE_dirent)
fa8e94fa 1577 goto out;
1c6fdbd8 1578
914f2786 1579 d = bkey_s_c_to_dirent(k);
1c6fdbd8 1580
4db65027
KO
1581 if (d.v->d_type == DT_SUBVOL) {
1582 struct bch_inode_unpacked subvol_root;
1583 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
1584 u32 target_snapshot;
1585 u64 target_inum;
b9e1adf5 1586
4db65027
KO
1587 ret = __subvol_lookup(trans, target_subvol,
1588 &target_snapshot, &target_inum);
1589 if (ret && ret != -ENOENT)
fa8e94fa 1590 goto err;
1c6fdbd8 1591
4db65027
KO
1592 if (fsck_err_on(ret, c,
1593 "dirent points to missing subvolume %llu",
fa8e94fa
KO
1594 le64_to_cpu(d.v->d_child_subvol))) {
1595 ret = __remove_dirent(trans, d.k->p);
1596 goto err;
1597 }
914f2786 1598
ef1669ff
KO
1599 ret = __lookup_inode(trans, target_inum,
1600 &subvol_root, &target_snapshot);
1601 if (ret && ret != -ENOENT)
fa8e94fa 1602 goto err;
914f2786 1603
ef1669ff
KO
1604 if (fsck_err_on(ret, c,
1605 "subvolume %u points to missing subvolume root %llu",
1606 target_subvol,
1607 target_inum)) {
1608 bch_err(c, "repair not implemented yet");
fa8e94fa
KO
1609 ret = -EINVAL;
1610 goto err;
ef1669ff 1611 }
914f2786 1612
ef1669ff
KO
1613 if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c,
1614 "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
1615 target_inum,
1616 subvol_root.bi_subvol, target_subvol)) {
1617 subvol_root.bi_subvol = target_subvol;
285b181a 1618 ret = __write_inode(trans, &subvol_root, target_snapshot);
ef1669ff 1619 if (ret)
fa8e94fa 1620 goto err;
ef1669ff 1621 }
914f2786 1622
ef1669ff
KO
1623 ret = check_dirent_target(trans, iter, d, &subvol_root,
1624 target_snapshot);
914f2786 1625 if (ret)
fa8e94fa 1626 goto err;
ef1669ff 1627 } else {
4db65027 1628 ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
ef1669ff 1629 if (ret)
fa8e94fa 1630 goto err;
1c6fdbd8 1631
91d961ba 1632 if (fsck_err_on(!target->inodes.nr, c,
49124d8a
KO
1633 "dirent points to missing inode: (equiv %u)\n%s",
1634 equiv.snapshot,
fa8e94fa
KO
1635 (printbuf_reset(&buf),
1636 bch2_bkey_val_to_text(&buf, c, k),
1637 buf.buf))) {
285b181a 1638 ret = __remove_dirent(trans, d.k->p);
ef1669ff 1639 if (ret)
fa8e94fa 1640 goto err;
1c6fdbd8
KO
1641 }
1642
91d961ba 1643 darray_for_each(target->inodes, i) {
ef1669ff
KO
1644 ret = check_dirent_target(trans, iter, d,
1645 &i->inode, i->snapshot);
1646 if (ret)
fa8e94fa 1647 goto err;
d3ff7fec 1648 }
914f2786 1649 }
d3ff7fec 1650
ef1669ff 1651 if (d.v->d_type == DT_DIR)
49124d8a 1652 for_each_visible_inode(c, s, dir, equiv.snapshot, i)
ef1669ff 1653 i->count++;
1c6fdbd8 1654
fa8e94fa
KO
1655out:
1656err:
914f2786 1657fsck_err:
fa8e94fa 1658 printbuf_exit(&buf);
e492e7b6
KO
1659
1660 if (ret && ret != -EINTR)
1661 bch_err(c, "error %i from check_dirent()", ret);
914f2786
KO
1662 return ret;
1663}
1c6fdbd8 1664
914f2786
KO
1665/*
1666 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
1667 * validate d_type
1668 */
1669noinline_for_stack
1670static int check_dirents(struct bch_fs *c)
1671{
ef1669ff
KO
1672 struct inode_walker dir = inode_walker_init();
1673 struct inode_walker target = inode_walker_init();
1674 struct snapshots_seen s;
914f2786
KO
1675 struct bch_hash_info hash_info;
1676 struct btree_trans trans;
67e0dd8f 1677 struct btree_iter iter;
914f2786 1678 int ret = 0;
1c6fdbd8 1679
914f2786
KO
1680 bch_verbose(c, "checking dirents");
1681
ef1669ff 1682 snapshots_seen_init(&s);
914f2786 1683 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 1684
67e0dd8f
KO
1685 bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents,
1686 POS(BCACHEFS_ROOT_INO, 0),
1687 BTREE_ITER_INTENT|
ef1669ff
KO
1688 BTREE_ITER_PREFETCH|
1689 BTREE_ITER_ALL_SNAPSHOTS);
914f2786 1690
38200544 1691 do {
e68914ca 1692 ret = commit_do(&trans, NULL, NULL,
285b181a
KO
1693 BTREE_INSERT_LAZY_RW|
1694 BTREE_INSERT_NOFAIL,
ef1669ff
KO
1695 check_dirent(&trans, &iter, &hash_info,
1696 &dir, &target, &s));
914f2786
KO
1697 if (ret)
1698 break;
67e0dd8f
KO
1699 } while (bch2_btree_iter_advance(&iter));
1700 bch2_trans_iter_exit(&trans, &iter);
914f2786 1701
ef1669ff
KO
1702 bch2_trans_exit(&trans);
1703 snapshots_seen_exit(&s);
1704 inode_walker_exit(&dir);
1705 inode_walker_exit(&target);
e492e7b6
KO
1706
1707 if (ret)
1708 bch_err(c, "error %i from check_dirents()", ret);
ef1669ff 1709 return ret;
1c6fdbd8
KO
1710}
1711
285b181a
KO
1712static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
1713 struct bch_hash_info *hash_info,
1714 struct inode_walker *inode)
1715{
1716 struct bch_fs *c = trans->c;
1717 struct bkey_s_c k;
1718 int ret;
1719
1720 k = bch2_btree_iter_peek(iter);
1721 if (!k.k)
1722 return 0;
1723
1724 ret = bkey_err(k);
1725 if (ret)
1726 return ret;
1727
1728 ret = check_key_has_snapshot(trans, iter, k);
1729 if (ret)
1730 return ret;
1731
1732 ret = __walk_inode(trans, inode, k.k->p);
1733 if (ret < 0)
1734 return ret;
1735
1736 if (fsck_err_on(ret == INT_MAX, c,
1737 "xattr for missing inode %llu",
1738 k.k->p.inode))
1739 return bch2_btree_delete_at(trans, iter, 0);
1740
1741 if (ret == INT_MAX)
1742 return 0;
1743
1744 ret = 0;
1745
1746 if (inode->first_this_inode)
91d961ba 1747 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
285b181a
KO
1748
1749 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
1750fsck_err:
e492e7b6
KO
1751 if (ret && ret != -EINTR)
1752 bch_err(c, "error %i from check_xattr()", ret);
285b181a
KO
1753 return ret;
1754}
1755
1c6fdbd8
KO
1756/*
1757 * Walk xattrs: verify that they all have a corresponding inode
1758 */
1759noinline_for_stack
1760static int check_xattrs(struct bch_fs *c)
1761{
285b181a 1762 struct inode_walker inode = inode_walker_init();
7ac2c55e 1763 struct bch_hash_info hash_info;
d69f41d6 1764 struct btree_trans trans;
67e0dd8f 1765 struct btree_iter iter;
1c6fdbd8
KO
1766 int ret = 0;
1767
1768 bch_verbose(c, "checking xattrs");
1769
20bceecb 1770 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
d69f41d6 1771
67e0dd8f
KO
1772 bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
1773 POS(BCACHEFS_ROOT_INO, 0),
1774 BTREE_ITER_INTENT|
ef1669ff
KO
1775 BTREE_ITER_PREFETCH|
1776 BTREE_ITER_ALL_SNAPSHOTS);
1c6fdbd8 1777
285b181a 1778 do {
e68914ca 1779 ret = commit_do(&trans, NULL, NULL,
285b181a
KO
1780 BTREE_INSERT_LAZY_RW|
1781 BTREE_INSERT_NOFAIL,
1782 check_xattr(&trans, &iter, &hash_info,
1783 &inode));
1c6fdbd8 1784 if (ret)
abcecb49 1785 break;
285b181a 1786 } while (bch2_btree_iter_advance(&iter));
67e0dd8f 1787 bch2_trans_iter_exit(&trans, &iter);
285b181a 1788
9a796fdb 1789 bch2_trans_exit(&trans);
e492e7b6
KO
1790
1791 if (ret)
1792 bch_err(c, "error %i from check_xattrs()", ret);
9a796fdb 1793 return ret;
1c6fdbd8
KO
1794}
1795
285b181a 1796static int check_root_trans(struct btree_trans *trans)
1c6fdbd8 1797{
285b181a 1798 struct bch_fs *c = trans->c;
ef1669ff 1799 struct bch_inode_unpacked root_inode;
d3ff7fec 1800 u32 snapshot;
ef1669ff 1801 u64 inum;
1c6fdbd8
KO
1802 int ret;
1803
285b181a 1804 ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
1c6fdbd8
KO
1805 if (ret && ret != -ENOENT)
1806 return ret;
1807
ef1669ff
KO
1808 if (mustfix_fsck_err_on(ret, c, "root subvol missing")) {
1809 struct bkey_i_subvolume root_subvol;
1c6fdbd8 1810
ef1669ff
KO
1811 snapshot = U32_MAX;
1812 inum = BCACHEFS_ROOT_INO;
1c6fdbd8 1813
ef1669ff
KO
1814 bkey_subvolume_init(&root_subvol.k_i);
1815 root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
1816 root_subvol.v.flags = 0;
1817 root_subvol.v.snapshot = cpu_to_le32(snapshot);
1818 root_subvol.v.inode = cpu_to_le64(inum);
e68914ca 1819 ret = commit_do(trans, NULL, NULL,
ef1669ff
KO
1820 BTREE_INSERT_NOFAIL|
1821 BTREE_INSERT_LAZY_RW,
285b181a 1822 __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i));
ef1669ff
KO
1823 if (ret) {
1824 bch_err(c, "error writing root subvol: %i", ret);
1825 goto err;
1826 }
1827
1828 }
1829
285b181a 1830 ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
ef1669ff
KO
1831 if (ret && ret != -ENOENT)
1832 return ret;
1c6fdbd8 1833
ef1669ff
KO
1834 if (mustfix_fsck_err_on(ret, c, "root directory missing") ||
1835 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c,
1836 "root inode not a directory")) {
1837 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
1838 0, NULL);
1839 root_inode.bi_inum = inum;
1c6fdbd8 1840
285b181a 1841 ret = __write_inode(trans, &root_inode, snapshot);
ef1669ff
KO
1842 if (ret)
1843 bch_err(c, "error writing root inode: %i", ret);
1844 }
1845err:
1846fsck_err:
ef1669ff 1847 return ret;
1c6fdbd8
KO
1848}
1849
285b181a
KO
1850/* Get root directory, create if it doesn't exist: */
1851noinline_for_stack
1852static int check_root(struct bch_fs *c)
1853{
1854 bch_verbose(c, "checking root directory");
1855
1856 return bch2_trans_do(c, NULL, NULL,
1857 BTREE_INSERT_NOFAIL|
1858 BTREE_INSERT_LAZY_RW,
1859 check_root_trans(&trans));
1860}
1861
91d961ba
KO
1862struct pathbuf_entry {
1863 u64 inum;
1864 u32 snapshot;
1c6fdbd8
KO
1865};
1866
91d961ba
KO
1867typedef DARRAY(struct pathbuf_entry) pathbuf;
1868
1869static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
6e0c886d
KO
1870{
1871 struct pathbuf_entry *i;
1872
91d961ba 1873 darray_for_each(*p, i)
6e0c886d
KO
1874 if (i->inum == inum &&
1875 i->snapshot == snapshot)
1876 return true;
1877
1878 return false;
1879}
1880
91d961ba 1881static int path_down(struct bch_fs *c, pathbuf *p,
f0f41a6d 1882 u64 inum, u32 snapshot)
1c6fdbd8 1883{
91d961ba 1884 int ret = darray_push(p, ((struct pathbuf_entry) {
6e0c886d
KO
1885 .inum = inum,
1886 .snapshot = snapshot,
91d961ba
KO
1887 }));
1888
1889 if (ret)
1890 bch_err(c, "fsck: error allocating memory for pathbuf, size %zu",
1891 p->size);
1892 return ret;
1c6fdbd8
KO
1893}
1894
6e0c886d
KO
1895/*
1896 * Check that a given inode is reachable from the root:
1897 *
1898 * XXX: we should also be verifying that inodes are in the right subvolumes
1899 */
d3ff7fec 1900static int check_path(struct btree_trans *trans,
91d961ba 1901 pathbuf *p,
81ed9ce3
KO
1902 struct bch_inode_unpacked *inode,
1903 u32 snapshot)
1c6fdbd8 1904{
d3ff7fec 1905 struct bch_fs *c = trans->c;
1c6fdbd8
KO
1906 int ret = 0;
1907
49124d8a 1908 snapshot = bch2_snapshot_equiv(c, snapshot);
d3ff7fec 1909 p->nr = 0;
424eb881 1910
488f9776
KO
1911 while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
1912 inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
4db65027
KO
1913 struct btree_iter dirent_iter;
1914 struct bkey_s_c_dirent d;
6e0c886d
KO
1915 u32 parent_snapshot = snapshot;
1916
4db65027 1917 if (inode->bi_subvol) {
488f9776
KO
1918 u64 inum;
1919
1920 ret = subvol_lookup(trans, inode->bi_parent_subvol,
6e0c886d 1921 &parent_snapshot, &inum);
488f9776
KO
1922 if (ret)
1923 break;
1924 }
1925
d3ff7fec 1926 ret = lockrestart_do(trans,
4db65027
KO
1927 PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter,
1928 SPOS(inode->bi_dir, inode->bi_dir_offset,
1929 parent_snapshot))).k));
1930 if (ret && ret != -ENOENT)
d3ff7fec 1931 break;
1c6fdbd8 1932
4db65027
KO
1933 if (!ret && !dirent_points_to_inode(d, inode)) {
1934 bch2_trans_iter_exit(trans, &dirent_iter);
1935 ret = -ENOENT;
1936 }
1937
1938 if (ret == -ENOENT) {
285b181a 1939 if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
ef1669ff 1940 inode->bi_inum, snapshot,
285b181a 1941 bch2_d_type_str(inode_d_type(inode)),
d3ff7fec
KO
1942 inode->bi_nlink,
1943 inode->bi_dir,
1944 inode->bi_dir_offset))
81ed9ce3 1945 ret = reattach_inode(trans, inode, snapshot);
d3ff7fec
KO
1946 break;
1947 }
4db65027
KO
1948
1949 bch2_trans_iter_exit(trans, &dirent_iter);
1c6fdbd8 1950
d3ff7fec
KO
1951 if (!S_ISDIR(inode->bi_mode))
1952 break;
1c6fdbd8 1953
f0f41a6d 1954 ret = path_down(c, p, inode->bi_inum, snapshot);
d3ff7fec
KO
1955 if (ret) {
1956 bch_err(c, "memory allocation failure");
1957 return ret;
1958 }
1c6fdbd8 1959
6e0c886d
KO
1960 snapshot = parent_snapshot;
1961
1962 ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
1963 if (ret) {
1964 /* Should have been caught in dirents pass */
1965 bch_err(c, "error looking up parent directory: %i", ret);
1966 break;
1967 }
1968
1969 if (path_is_dup(p, inode->bi_inum, snapshot)) {
1970 struct pathbuf_entry *i;
1c6fdbd8 1971
d3ff7fec 1972 /* XXX print path */
6e0c886d
KO
1973 bch_err(c, "directory structure loop");
1974
91d961ba 1975 darray_for_each(*p, i)
6e0c886d
KO
1976 pr_err("%llu:%u", i->inum, i->snapshot);
1977 pr_err("%llu:%u", inode->bi_inum, snapshot);
1978
d3ff7fec
KO
1979 if (!fsck_err(c, "directory structure loop"))
1980 return 0;
1c6fdbd8 1981
e68914ca 1982 ret = commit_do(trans, NULL, NULL,
285b181a
KO
1983 BTREE_INSERT_NOFAIL|
1984 BTREE_INSERT_LAZY_RW,
81ed9ce3 1985 remove_backpointer(trans, inode));
1c6fdbd8 1986 if (ret) {
d3ff7fec
KO
1987 bch_err(c, "error removing dirent: %i", ret);
1988 break;
1c6fdbd8
KO
1989 }
1990
81ed9ce3 1991 ret = reattach_inode(trans, inode, snapshot);
1c6fdbd8 1992 }
1c6fdbd8 1993 }
d3ff7fec
KO
1994fsck_err:
1995 if (ret)
1996 bch_err(c, "%s: err %i", __func__, ret);
1997 return ret;
1998}
1c6fdbd8 1999
d3ff7fec
KO
2000/*
2001 * Check for unreachable inodes, as well as loops in the directory structure:
2002 * After check_dirents(), if an inode backpointer doesn't exist that means it's
2003 * unreachable:
2004 */
285b181a 2005noinline_for_stack
58686a25 2006static int check_directory_structure(struct bch_fs *c)
d3ff7fec
KO
2007{
2008 struct btree_trans trans;
67e0dd8f 2009 struct btree_iter iter;
d3ff7fec
KO
2010 struct bkey_s_c k;
2011 struct bch_inode_unpacked u;
91d961ba 2012 pathbuf path = { 0, };
d3ff7fec 2013 int ret;
1c6fdbd8 2014
d3ff7fec 2015 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 2016
909004d2
KO
2017 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
2018 BTREE_ITER_INTENT|
ef1669ff
KO
2019 BTREE_ITER_PREFETCH|
2020 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
3e52c222 2021 if (!bkey_is_inode(k.k))
1c6fdbd8
KO
2022 continue;
2023
3e52c222 2024 ret = bch2_inode_unpack(k, &u);
d3ff7fec
KO
2025 if (ret) {
2026 /* Should have been caught earlier in fsck: */
2027 bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
2028 break;
1c6fdbd8 2029 }
1c6fdbd8 2030
ef1669ff
KO
2031 if (u.bi_flags & BCH_INODE_UNLINKED)
2032 continue;
2033
81ed9ce3 2034 ret = check_path(&trans, &path, &u, iter.pos.snapshot);
d3ff7fec
KO
2035 if (ret)
2036 break;
1c6fdbd8 2037 }
67e0dd8f 2038 bch2_trans_iter_exit(&trans, &iter);
d3ff7fec
KO
2039
2040 BUG_ON(ret == -EINTR);
2041
91d961ba 2042 darray_exit(&path);
f24fab9c 2043
9a796fdb
KO
2044 bch2_trans_exit(&trans);
2045 return ret;
1c6fdbd8
KO
2046}
2047
fc51b041
KO
2048struct nlink_table {
2049 size_t nr;
2050 size_t size;
1c6fdbd8 2051
fc51b041
KO
2052 struct nlink {
2053 u64 inum;
2054 u32 snapshot;
2055 u32 count;
2056 } *d;
2057};
1c6fdbd8 2058
f0f41a6d
KO
2059static int add_nlink(struct bch_fs *c, struct nlink_table *t,
2060 u64 inum, u32 snapshot)
1c6fdbd8 2061{
fc51b041
KO
2062 if (t->nr == t->size) {
2063 size_t new_size = max_t(size_t, 128UL, t->size * 2);
2064 void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
2065 if (!d) {
f0f41a6d
KO
2066 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
2067 new_size);
fc51b041
KO
2068 return -ENOMEM;
2069 }
1c6fdbd8 2070
82355e28
KO
2071 if (t->d)
2072 memcpy(d, t->d, t->size * sizeof(t->d[0]));
fc51b041 2073 kvfree(t->d);
1c6fdbd8 2074
fc51b041
KO
2075 t->d = d;
2076 t->size = new_size;
2bb748a6
KO
2077 }
2078
fc51b041
KO
2079
2080 t->d[t->nr++] = (struct nlink) {
2081 .inum = inum,
2082 .snapshot = snapshot,
2083 };
2084
2085 return 0;
2086}
2087
2088static int nlink_cmp(const void *_l, const void *_r)
2089{
2090 const struct nlink *l = _l;
2091 const struct nlink *r = _r;
2092
2093 return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
2094}
2095
ef1669ff
KO
2096static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
2097 struct nlink_table *links,
2098 u64 range_start, u64 range_end, u64 inum, u32 snapshot)
fc51b041
KO
2099{
2100 struct nlink *link, key = {
2101 .inum = inum, .snapshot = U32_MAX,
2102 };
2103
2104 if (inum < range_start || inum >= range_end)
1c6fdbd8 2105 return;
fc51b041
KO
2106
2107 link = __inline_bsearch(&key, links->d, links->nr,
2108 sizeof(links->d[0]), nlink_cmp);
ef1669ff
KO
2109 if (!link)
2110 return;
2111
2112 while (link > links->d && link[0].inum == link[-1].inum)
2113 --link;
2114
2115 for (; link < links->d + links->nr && link->inum == inum; link++)
2116 if (ref_visible(c, s, snapshot, link->snapshot)) {
2117 link->count++;
2118 if (link->snapshot >= snapshot)
2119 break;
2120 }
fc51b041
KO
2121}
2122
2123noinline_for_stack
2124static int check_nlinks_find_hardlinks(struct bch_fs *c,
2125 struct nlink_table *t,
2126 u64 start, u64 *end)
2127{
2128 struct btree_trans trans;
67e0dd8f 2129 struct btree_iter iter;
fc51b041 2130 struct bkey_s_c k;
fc51b041
KO
2131 struct bch_inode_unpacked u;
2132 int ret = 0;
2133
2134 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
2135
2136 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
909004d2
KO
2137 POS(0, start),
2138 BTREE_ITER_INTENT|
ef1669ff
KO
2139 BTREE_ITER_PREFETCH|
2140 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
3e52c222 2141 if (!bkey_is_inode(k.k))
fc51b041
KO
2142 continue;
2143
3e52c222
KO
2144 /* Should never fail, checked by bch2_inode_invalid: */
2145 BUG_ON(bch2_inode_unpack(k, &u));
fc51b041
KO
2146
2147 /*
2148 * Backpointer and directory structure checks are sufficient for
2149 * directories, since they can't have hardlinks:
2150 */
3e52c222 2151 if (S_ISDIR(le16_to_cpu(u.bi_mode)))
fc51b041
KO
2152 continue;
2153
fc51b041
KO
2154 if (!u.bi_nlink)
2155 continue;
2156
f0f41a6d 2157 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
fc51b041
KO
2158 if (ret) {
2159 *end = k.k->p.offset;
2160 ret = 0;
2161 break;
2162 }
2163
1c6fdbd8 2164 }
67e0dd8f 2165 bch2_trans_iter_exit(&trans, &iter);
fc51b041
KO
2166 bch2_trans_exit(&trans);
2167
2168 if (ret)
2169 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1c6fdbd8 2170
fc51b041 2171 return ret;
1c6fdbd8
KO
2172}
2173
2174noinline_for_stack
fc51b041
KO
2175static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
2176 u64 range_start, u64 range_end)
1c6fdbd8 2177{
424eb881 2178 struct btree_trans trans;
ef1669ff 2179 struct snapshots_seen s;
67e0dd8f 2180 struct btree_iter iter;
1c6fdbd8
KO
2181 struct bkey_s_c k;
2182 struct bkey_s_c_dirent d;
1c6fdbd8
KO
2183 int ret;
2184
ef1669ff
KO
2185 snapshots_seen_init(&s);
2186
20bceecb 2187 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 2188
909004d2
KO
2189 for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
2190 BTREE_ITER_INTENT|
ef1669ff
KO
2191 BTREE_ITER_PREFETCH|
2192 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
49124d8a 2193 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
ef1669ff
KO
2194 if (ret)
2195 break;
2196
1c6fdbd8 2197 switch (k.k->type) {
26609b61 2198 case KEY_TYPE_dirent:
1c6fdbd8 2199 d = bkey_s_c_to_dirent(k);
1c6fdbd8 2200
ef1669ff
KO
2201 if (d.v->d_type != DT_DIR &&
2202 d.v->d_type != DT_SUBVOL)
2203 inc_link(c, &s, links, range_start, range_end,
2204 le64_to_cpu(d.v->d_inum),
49124d8a 2205 bch2_snapshot_equiv(c, d.k->p.snapshot));
1c6fdbd8
KO
2206 break;
2207 }
1c6fdbd8 2208 }
67e0dd8f 2209 bch2_trans_iter_exit(&trans, &iter);
abcecb49 2210
1c6fdbd8 2211 if (ret)
619f5bee 2212 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1c6fdbd8 2213
ef1669ff
KO
2214 bch2_trans_exit(&trans);
2215 snapshots_seen_exit(&s);
1c6fdbd8
KO
2216 return ret;
2217}
2218
1c6fdbd8 2219noinline_for_stack
fc51b041
KO
2220static int check_nlinks_update_hardlinks(struct bch_fs *c,
2221 struct nlink_table *links,
1c6fdbd8
KO
2222 u64 range_start, u64 range_end)
2223{
0564b167 2224 struct btree_trans trans;
67e0dd8f 2225 struct btree_iter iter;
1c6fdbd8 2226 struct bkey_s_c k;
fc51b041
KO
2227 struct bch_inode_unpacked u;
2228 struct nlink *link = links->d;
b906aadd 2229 int ret = 0;
1c6fdbd8 2230
20bceecb 2231 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
0564b167 2232
b906aadd 2233 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
909004d2
KO
2234 POS(0, range_start),
2235 BTREE_ITER_INTENT|
ef1669ff
KO
2236 BTREE_ITER_PREFETCH|
2237 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
fc51b041 2238 if (k.k->p.offset >= range_end)
1c6fdbd8
KO
2239 break;
2240
3e52c222 2241 if (!bkey_is_inode(k.k))
b906aadd 2242 continue;
1c6fdbd8 2243
3e52c222 2244 BUG_ON(bch2_inode_unpack(k, &u));
fc51b041 2245
3e52c222
KO
2246 if (S_ISDIR(le16_to_cpu(u.bi_mode)))
2247 continue;
1c6fdbd8 2248
fc51b041
KO
2249 if (!u.bi_nlink)
2250 continue;
2251
ef1669ff
KO
2252 while ((cmp_int(link->inum, k.k->p.offset) ?:
2253 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
fc51b041
KO
2254 link++;
2255 BUG_ON(link >= links->d + links->nr);
2256 }
2257
2258 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
0095aa94
KO
2259 "inode %llu type %s has wrong i_nlink (%u, should be %u)",
2260 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
fc51b041
KO
2261 bch2_inode_nlink_get(&u), link->count)) {
2262 bch2_inode_nlink_set(&u, link->count);
2263
ea0531f8 2264 ret = write_inode(&trans, &u, k.k->p.snapshot);
fc51b041
KO
2265 if (ret)
2266 bch_err(c, "error in fsck: error %i updating inode", ret);
2267 }
1c6fdbd8 2268 }
fc51b041 2269fsck_err:
67e0dd8f 2270 bch2_trans_iter_exit(&trans, &iter);
0564b167
KO
2271 bch2_trans_exit(&trans);
2272
b906aadd
KO
2273 if (ret)
2274 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1c6fdbd8 2275
b906aadd 2276 return ret;
1c6fdbd8
KO
2277}
2278
2279noinline_for_stack
58686a25 2280static int check_nlinks(struct bch_fs *c)
1c6fdbd8 2281{
fc51b041 2282 struct nlink_table links = { 0 };
1c6fdbd8
KO
2283 u64 this_iter_range_start, next_iter_range_start = 0;
2284 int ret = 0;
2285
2286 bch_verbose(c, "checking inode nlinks");
2287
1c6fdbd8
KO
2288 do {
2289 this_iter_range_start = next_iter_range_start;
2290 next_iter_range_start = U64_MAX;
2291
fc51b041
KO
2292 ret = check_nlinks_find_hardlinks(c, &links,
2293 this_iter_range_start,
2294 &next_iter_range_start);
2295
2296 ret = check_nlinks_walk_dirents(c, &links,
1c6fdbd8 2297 this_iter_range_start,
fc51b041 2298 next_iter_range_start);
1c6fdbd8
KO
2299 if (ret)
2300 break;
2301
fc51b041 2302 ret = check_nlinks_update_hardlinks(c, &links,
1c6fdbd8
KO
2303 this_iter_range_start,
2304 next_iter_range_start);
2305 if (ret)
2306 break;
2307
fc51b041 2308 links.nr = 0;
1c6fdbd8
KO
2309 } while (next_iter_range_start != U64_MAX);
2310
fc51b041 2311 kvfree(links.d);
1c6fdbd8
KO
2312
2313 return ret;
2314}
2315
bfe88863
KO
2316static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
2317{
2318 struct bkey_s_c k;
2319 struct bkey_s_c_reflink_p p;
2320 struct bkey_i_reflink_p *u;
2321 int ret;
2322
2323 k = bch2_btree_iter_peek(iter);
2324 if (!k.k)
2325 return 0;
2326
2327 ret = bkey_err(k);
2328 if (ret)
2329 return ret;
2330
2331 if (k.k->type != KEY_TYPE_reflink_p)
2332 return 0;
2333
2334 p = bkey_s_c_to_reflink_p(k);
2335
6d76aefe 2336 if (!p.v->front_pad && !p.v->back_pad)
bfe88863
KO
2337 return 0;
2338
2339 u = bch2_trans_kmalloc(trans, sizeof(*u));
2340 ret = PTR_ERR_OR_ZERO(u);
2341 if (ret)
2342 return ret;
2343
2344 bkey_reassemble(&u->k_i, k);
6d76aefe
KO
2345 u->v.front_pad = 0;
2346 u->v.back_pad = 0;
bfe88863 2347
6b3d8b89 2348 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN);
bfe88863
KO
2349}
2350
285b181a 2351noinline_for_stack
bfe88863
KO
2352static int fix_reflink_p(struct bch_fs *c)
2353{
2354 struct btree_trans trans;
2355 struct btree_iter iter;
2356 struct bkey_s_c k;
2357 int ret;
2358
2359 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
2360 return 0;
2361
285b181a
KO
2362 bch_verbose(c, "fixing reflink_p keys");
2363
bfe88863
KO
2364 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
2365
2366 for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN,
2367 BTREE_ITER_INTENT|
2368 BTREE_ITER_PREFETCH|
2369 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
2370 if (k.k->type == KEY_TYPE_reflink_p) {
e68914ca 2371 ret = commit_do(&trans, NULL, NULL,
bfe88863
KO
2372 BTREE_INSERT_NOFAIL|
2373 BTREE_INSERT_LAZY_RW,
2374 fix_reflink_p_key(&trans, &iter));
2375 if (ret)
2376 break;
2377 }
2378 }
2379 bch2_trans_iter_exit(&trans, &iter);
2380
2381 bch2_trans_exit(&trans);
2382 return ret;
2383}
2384
1c6fdbd8
KO
2385/*
2386 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
2387 * Doesn't fix them yet, mainly because they haven't yet been observed:
2388 */
619f5bee 2389int bch2_fsck_full(struct bch_fs *c)
1c6fdbd8 2390{
c7a09cb1
KO
2391 int ret;
2392again:
2393 ret = bch2_fs_check_snapshots(c) ?:
4ab35c34
KO
2394 bch2_fs_check_subvols(c) ?:
2395 bch2_delete_dead_snapshots(c) ?:
14b393ee 2396 check_inodes(c, true) ?:
5c16add5 2397 check_extents(c) ?:
1c6fdbd8
KO
2398 check_dirents(c) ?:
2399 check_xattrs(c) ?:
ef1669ff 2400 check_root(c) ?:
58686a25 2401 check_directory_structure(c) ?:
bfe88863
KO
2402 check_nlinks(c) ?:
2403 fix_reflink_p(c);
c7a09cb1
KO
2404
2405 if (ret == -NEED_SNAPSHOT_CLEANUP) {
2406 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
2407 goto again;
2408 }
2409
2410 return ret;
1c6fdbd8
KO
2411}
2412
619f5bee 2413int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1c6fdbd8 2414{
35f1a503
KO
2415 return bch2_fs_check_snapshots(c) ?:
2416 bch2_fs_check_subvols(c) ?:
2417 bch2_delete_dead_snapshots(c) ?:
2418 check_inodes(c, false);
1c6fdbd8 2419}