bcachefs: Kill bch2_verify_bucket_evacuated()
[linux-block.git] / fs / bcachefs / fsck.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
07a1006a 4#include "bkey_buf.h"
1c6fdbd8 5#include "btree_update.h"
e3dc75eb 6#include "buckets.h"
91d961ba 7#include "darray.h"
1c6fdbd8
KO
8#include "dirent.h"
9#include "error.h"
96385742 10#include "fs-common.h"
1c6fdbd8
KO
11#include "fsck.h"
12#include "inode.h"
13#include "keylist.h"
14b393ee 14#include "subvolume.h"
1c6fdbd8
KO
15#include "super.h"
16#include "xattr.h"
17
fc51b041 18#include <linux/bsearch.h>
1c6fdbd8 19#include <linux/dcache.h> /* struct qstr */
1c6fdbd8
KO
20
21#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
22
42590b53
KO
23/*
24 * XXX: this is handling transaction restarts without returning
25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
26 */
ef1669ff
KO
27static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
28 u32 snapshot)
424eb881 29{
67e0dd8f 30 struct btree_iter iter;
424eb881
KO
31 struct bkey_s_c k;
32 u64 sectors = 0;
94f651e2 33 int ret;
424eb881 34
c72f687a
KO
35 for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
36 SPOS(inum, 0, snapshot),
37 POS(inum, U64_MAX),
38 0, k, ret)
424eb881
KO
39 if (bkey_extent_is_allocation(k.k))
40 sectors += k.k->size;
424eb881 41
67e0dd8f 42 bch2_trans_iter_exit(trans, &iter);
94f651e2
KO
43
44 return ret ?: sectors;
424eb881
KO
45}
46
ef1669ff
KO
47static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
48 u32 snapshot)
49{
50 struct btree_iter iter;
51 struct bkey_s_c k;
52 struct bkey_s_c_dirent d;
53 u64 subdirs = 0;
54 int ret;
55
c72f687a
KO
56 for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
57 SPOS(inum, 0, snapshot),
58 POS(inum, U64_MAX),
59 0, k, ret) {
ef1669ff
KO
60 if (k.k->type != KEY_TYPE_dirent)
61 continue;
62
63 d = bkey_s_c_to_dirent(k);
64 if (d.v->d_type == DT_DIR)
65 subdirs++;
66 }
ef1669ff
KO
67 bch2_trans_iter_exit(trans, &iter);
68
69 return ret ?: subdirs;
70}
71
81ed9ce3
KO
72static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
73 u32 *subvol)
74{
75 struct btree_iter iter;
76 struct bkey_s_c k;
77 int ret;
78
79 bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
80 POS(0, snapshot), 0);
81 k = bch2_btree_iter_peek_slot(&iter);
82 ret = bkey_err(k);
83 if (ret)
84 goto err;
85
86 if (k.k->type != KEY_TYPE_snapshot) {
87 bch_err(trans->c, "snapshot %u not fonud", snapshot);
88 ret = -ENOENT;
89 goto err;
90 }
91
92 *subvol = le32_to_cpu(bkey_s_c_to_snapshot(k).v->subvol);
93err:
94 bch2_trans_iter_exit(trans, &iter);
95 return ret;
96
97}
98
ef1669ff
KO
99static int __subvol_lookup(struct btree_trans *trans, u32 subvol,
100 u32 *snapshot, u64 *inum)
81ed9ce3 101{
97996ddf 102 struct bch_subvolume s;
81ed9ce3
KO
103 int ret;
104
97996ddf 105 ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
81ed9ce3 106
97996ddf
KO
107 *snapshot = le32_to_cpu(s.snapshot);
108 *inum = le64_to_cpu(s.inode);
81ed9ce3 109 return ret;
81ed9ce3
KO
110}
111
ef1669ff
KO
112static int subvol_lookup(struct btree_trans *trans, u32 subvol,
113 u32 *snapshot, u64 *inum)
81ed9ce3 114{
ef1669ff 115 return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum));
81ed9ce3
KO
116}
117
c27314b4
KO
118static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
119 struct bch_inode_unpacked *inode)
120{
121 struct btree_iter iter;
122 struct bkey_s_c k;
123 int ret;
124
125 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
126 POS(0, inode_nr),
127 BTREE_ITER_ALL_SNAPSHOTS);
128 k = bch2_btree_iter_peek(&iter);
129 ret = bkey_err(k);
130 if (ret)
131 goto err;
132
e88a75eb 133 if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
c27314b4
KO
134 ret = -ENOENT;
135 goto err;
136 }
137
3e52c222 138 ret = bch2_inode_unpack(k, inode);
c27314b4 139err:
549d173c 140 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
141 bch_err(trans->c, "error fetching inode %llu: %s",
142 inode_nr, bch2_err_str(ret));
c27314b4
KO
143 bch2_trans_iter_exit(trans, &iter);
144 return ret;
145}
146
58686a25
KO
147static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
148 struct bch_inode_unpacked *inode,
149 u32 *snapshot)
8a85b20c 150{
67e0dd8f 151 struct btree_iter iter;
8a85b20c
KO
152 struct bkey_s_c k;
153 int ret;
154
67e0dd8f 155 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
ef1669ff 156 SPOS(0, inode_nr, *snapshot), 0);
67e0dd8f 157 k = bch2_btree_iter_peek_slot(&iter);
8a85b20c
KO
158 ret = bkey_err(k);
159 if (ret)
160 goto err;
161
3e52c222
KO
162 ret = bkey_is_inode(k.k)
163 ? bch2_inode_unpack(k, inode)
8a85b20c 164 : -ENOENT;
4db65027
KO
165 if (!ret)
166 *snapshot = iter.pos.snapshot;
8a85b20c 167err:
549d173c 168 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
169 bch_err(trans->c, "error fetching inode %llu:%u: %s",
170 inode_nr, *snapshot, bch2_err_str(ret));
67e0dd8f 171 bch2_trans_iter_exit(trans, &iter);
8a85b20c
KO
172 return ret;
173}
174
58686a25
KO
175static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
176 struct bch_inode_unpacked *inode,
177 u32 *snapshot)
178{
179 return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
180}
181
ef1669ff
KO
182static int __lookup_dirent(struct btree_trans *trans,
183 struct bch_hash_info hash_info,
184 subvol_inum dir, struct qstr *name,
185 u64 *target, unsigned *type)
186{
187 struct btree_iter iter;
188 struct bkey_s_c_dirent d;
189 int ret;
190
191 ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
192 &hash_info, dir, name, 0);
193 if (ret)
194 return ret;
195
196 d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
197 *target = le64_to_cpu(d.v->d_inum);
198 *type = d.v->d_type;
199 bch2_trans_iter_exit(trans, &iter);
200 return 0;
201}
202
58686a25
KO
203static int __write_inode(struct btree_trans *trans,
204 struct bch_inode_unpacked *inode,
205 u32 snapshot)
8a85b20c 206{
67e0dd8f
KO
207 struct btree_iter iter;
208 int ret;
209
210 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
211 SPOS(0, inode->bi_inum, snapshot),
212 BTREE_ITER_INTENT);
213
214 ret = bch2_btree_iter_traverse(&iter) ?:
215 bch2_inode_write(trans, &iter, inode);
216 bch2_trans_iter_exit(trans, &iter);
58686a25
KO
217 return ret;
218}
219
220static int write_inode(struct btree_trans *trans,
221 struct bch_inode_unpacked *inode,
222 u32 snapshot)
223{
e68914ca 224 int ret = commit_do(trans, NULL, NULL,
8a85b20c
KO
225 BTREE_INSERT_NOFAIL|
226 BTREE_INSERT_LAZY_RW,
58686a25 227 __write_inode(trans, inode, snapshot));
8a85b20c 228 if (ret)
d4bf5eec
KO
229 bch_err(trans->c, "error in fsck: error updating inode: %s",
230 bch2_err_str(ret));
8a85b20c
KO
231 return ret;
232}
233
ef1669ff
KO
234static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot)
235{
e4085b70 236 struct bch_fs *c = trans->c;
ef1669ff
KO
237 struct btree_iter iter = { NULL };
238 struct bkey_i_inode_generation delete;
239 struct bch_inode_unpacked inode_u;
240 struct bkey_s_c k;
241 int ret;
242
42590b53
KO
243 do {
244 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
245 SPOS(inum, 0, snapshot),
246 SPOS(inum, U64_MAX, snapshot),
247 0, NULL) ?:
248 bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
249 SPOS(inum, 0, snapshot),
250 SPOS(inum, U64_MAX, snapshot),
251 0, NULL) ?:
252 bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
253 SPOS(inum, 0, snapshot),
254 SPOS(inum, U64_MAX, snapshot),
255 0, NULL);
256 } while (ret == -BCH_ERR_transaction_restart_nested);
ef1669ff
KO
257 if (ret)
258 goto err;
259retry:
260 bch2_trans_begin(trans);
261
262 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
263 SPOS(0, inum, snapshot), BTREE_ITER_INTENT);
264 k = bch2_btree_iter_peek_slot(&iter);
265
266 ret = bkey_err(k);
267 if (ret)
268 goto err;
269
3e52c222 270 if (!bkey_is_inode(k.k)) {
e4085b70 271 bch2_fs_inconsistent(c,
ef1669ff
KO
272 "inode %llu:%u not found when deleting",
273 inum, snapshot);
274 ret = -EIO;
275 goto err;
276 }
277
3e52c222 278 bch2_inode_unpack(k, &inode_u);
ef1669ff
KO
279
280 /* Subvolume root? */
e4085b70
KO
281 if (inode_u.bi_subvol)
282 bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum);
ef1669ff
KO
283
284 bkey_inode_generation_init(&delete.k_i);
285 delete.k.p = iter.pos;
286 delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
287
288 ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
289 bch2_trans_commit(trans, NULL, NULL,
290 BTREE_INSERT_NOFAIL);
291err:
292 bch2_trans_iter_exit(trans, &iter);
549d173c 293 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ef1669ff
KO
294 goto retry;
295
efd0d038 296 return ret ?: -BCH_ERR_transaction_restart_nested;
ef1669ff
KO
297}
298
ae8bbb9f 299static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
1c6fdbd8 300{
0f238367 301 struct bch_fs *c = trans->c;
67e0dd8f 302 struct btree_iter iter;
1c6fdbd8
KO
303 struct bch_inode_unpacked dir_inode;
304 struct bch_hash_info dir_hash_info;
1c6fdbd8 305 int ret;
1c6fdbd8 306
c27314b4 307 ret = lookup_first_inode(trans, pos.inode, &dir_inode);
b1fd23df 308 if (ret)
e492e7b6 309 goto err;
1c6fdbd8
KO
310
311 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
312
67e0dd8f 313 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
b1fd23df 314
ae8bbb9f 315 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
41fc8622
KO
316 &dir_hash_info, &iter,
317 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
67e0dd8f 318 bch2_trans_iter_exit(trans, &iter);
e492e7b6 319err:
549d173c 320 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
df6a24f8 321 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
ae8bbb9f 322 return ret;
b1fd23df
KO
323}
324
58686a25 325/* Get lost+found, create if it doesn't exist: */
ef1669ff 326static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
58686a25 327 struct bch_inode_unpacked *lostfound)
1c6fdbd8 328{
58686a25
KO
329 struct bch_fs *c = trans->c;
330 struct bch_inode_unpacked root;
331 struct bch_hash_info root_hash_info;
332 struct qstr lostfound_str = QSTR("lost+found");
ef1669ff
KO
333 subvol_inum root_inum = { .subvol = subvol };
334 u64 inum = 0;
335 unsigned d_type = 0;
58686a25
KO
336 u32 snapshot;
337 int ret;
338
285b181a 339 ret = __subvol_lookup(trans, subvol, &snapshot, &root_inum.inum);
ef1669ff
KO
340 if (ret)
341 return ret;
81ed9ce3 342
285b181a
KO
343 ret = __lookup_inode(trans, root_inum.inum, &root, &snapshot);
344 if (ret)
58686a25
KO
345 return ret;
346
347 root_hash_info = bch2_hash_info_init(c, &root);
ef1669ff 348
285b181a 349 ret = __lookup_dirent(trans, root_hash_info, root_inum,
ef1669ff
KO
350 &lostfound_str, &inum, &d_type);
351 if (ret == -ENOENT) {
58686a25
KO
352 bch_notice(c, "creating lost+found");
353 goto create_lostfound;
354 }
355
549d173c 356 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec 357 bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret));
285b181a 358 if (ret)
ef1669ff 359 return ret;
ef1669ff
KO
360
361 if (d_type != DT_DIR) {
362 bch_err(c, "error looking up lost+found: not a directory");
363 return ret;
ef1669ff
KO
364 }
365
285b181a
KO
366 /*
367 * The check_dirents pass has already run, dangling dirents
368 * shouldn't exist here:
369 */
370 return __lookup_inode(trans, inum, lostfound, &snapshot);
58686a25 371
58686a25 372create_lostfound:
285b181a
KO
373 bch2_inode_init_early(c, lostfound);
374
375 ret = bch2_create_trans(trans, root_inum, &root,
376 lostfound, &lostfound_str,
377 0, 0, S_IFDIR|0700, 0, NULL, NULL,
378 (subvol_inum) { }, 0);
549d173c 379 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec 380 bch_err(c, "error creating lost+found: %s", bch2_err_str(ret));
285b181a 381 return ret;
58686a25
KO
382}
383
285b181a 384static int __reattach_inode(struct btree_trans *trans,
81ed9ce3 385 struct bch_inode_unpacked *inode,
ef1669ff 386 u32 inode_snapshot)
58686a25
KO
387{
388 struct bch_hash_info dir_hash;
389 struct bch_inode_unpacked lostfound;
1c6fdbd8
KO
390 char name_buf[20];
391 struct qstr name;
176cf4bf 392 u64 dir_offset = 0;
81ed9ce3 393 u32 subvol;
1c6fdbd8
KO
394 int ret;
395
285b181a 396 ret = __snapshot_lookup_subvol(trans, inode_snapshot, &subvol);
81ed9ce3
KO
397 if (ret)
398 return ret;
399
400 ret = lookup_lostfound(trans, subvol, &lostfound);
176cf4bf 401 if (ret)
d3ff7fec 402 return ret;
176cf4bf 403
58686a25
KO
404 if (S_ISDIR(inode->bi_mode)) {
405 lostfound.bi_nlink++;
176cf4bf 406
285b181a 407 ret = __write_inode(trans, &lostfound, U32_MAX);
176cf4bf 408 if (ret)
d3ff7fec 409 return ret;
176cf4bf
KO
410 }
411
58686a25 412 dir_hash = bch2_hash_info_init(trans->c, &lostfound);
176cf4bf 413
58686a25
KO
414 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
415 name = (struct qstr) QSTR(name_buf);
176cf4bf 416
285b181a
KO
417 ret = bch2_dirent_create(trans,
418 (subvol_inum) {
419 .subvol = subvol,
420 .inum = lostfound.bi_inum,
421 },
422 &dir_hash,
423 inode_d_type(inode),
424 &name, inode->bi_inum, &dir_offset,
425 BCH_HASH_SET_MUST_CREATE);
426 if (ret)
427 return ret;
428
429 inode->bi_dir = lostfound.bi_inum;
430 inode->bi_dir_offset = dir_offset;
431
432 return __write_inode(trans, inode, inode_snapshot);
433}
434
435static int reattach_inode(struct btree_trans *trans,
436 struct bch_inode_unpacked *inode,
437 u32 inode_snapshot)
438{
e68914ca 439 int ret = commit_do(trans, NULL, NULL,
285b181a
KO
440 BTREE_INSERT_LAZY_RW|
441 BTREE_INSERT_NOFAIL,
442 __reattach_inode(trans, inode, inode_snapshot));
58686a25 443 if (ret) {
d4bf5eec
KO
444 bch_err(trans->c, "error reattaching inode %llu: %s",
445 inode->bi_inum, bch2_err_str(ret));
58686a25
KO
446 return ret;
447 }
176cf4bf 448
285b181a 449 return ret;
1c6fdbd8
KO
450}
451
d3ff7fec
KO
452static int remove_backpointer(struct btree_trans *trans,
453 struct bch_inode_unpacked *inode)
454{
67e0dd8f 455 struct btree_iter iter;
d3ff7fec
KO
456 struct bkey_s_c k;
457 int ret;
458
67e0dd8f
KO
459 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
460 POS(inode->bi_dir, inode->bi_dir_offset), 0);
461 k = bch2_btree_iter_peek_slot(&iter);
d3ff7fec
KO
462 ret = bkey_err(k);
463 if (ret)
464 goto out;
465 if (k.k->type != KEY_TYPE_dirent) {
466 ret = -ENOENT;
467 goto out;
468 }
469
285b181a 470 ret = __remove_dirent(trans, k.k->p);
d3ff7fec 471out:
67e0dd8f 472 bch2_trans_iter_exit(trans, &iter);
d3ff7fec
KO
473 return ret;
474}
475
49124d8a
KO
476struct snapshots_seen_entry {
477 u32 id;
478 u32 equiv;
479};
480
481struct snapshots_seen {
482 struct bpos pos;
483 DARRAY(struct snapshots_seen_entry) ids;
484};
485
486static inline void snapshots_seen_exit(struct snapshots_seen *s)
487{
488 darray_exit(&s->ids);
489}
490
491static inline void snapshots_seen_init(struct snapshots_seen *s)
492{
493 memset(s, 0, sizeof(*s));
494}
495
0d06b4ec
KO
496static int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
497{
498 struct snapshots_seen_entry *i, n = { id, id };
499 int ret;
500
501 darray_for_each(s->ids, i) {
502 if (n.equiv < i->equiv)
503 break;
504
505 if (i->equiv == n.equiv) {
df6a24f8 506 bch_err(c, "%s(): adding duplicate snapshot", __func__);
0d06b4ec
KO
507 return -EINVAL;
508 }
509 }
510
511 ret = darray_insert_item(&s->ids, i - s->ids.data, n);
512 if (ret)
513 bch_err(c, "error reallocating snapshots_seen table (size %zu)",
514 s->ids.size);
515 return ret;
516}
517
49124d8a
KO
518static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
519 enum btree_id btree_id, struct bpos pos)
ef1669ff 520{
49124d8a
KO
521 struct snapshots_seen_entry *i, n = {
522 .id = pos.snapshot,
523 .equiv = bch2_snapshot_equiv(c, pos.snapshot),
524 };
c7be3cb5 525 int ret = 0;
ef1669ff 526
e88a75eb 527 if (!bkey_eq(s->pos, pos))
91d961ba 528 s->ids.nr = 0;
49124d8a
KO
529
530 pos.snapshot = n.equiv;
ef1669ff
KO
531 s->pos = pos;
532
49124d8a
KO
533 darray_for_each(s->ids, i)
534 if (i->equiv == n.equiv) {
c7be3cb5
KO
535 if (fsck_err_on(i->id != n.id, c,
536 "snapshot deletion did not run correctly:\n"
49124d8a
KO
537 " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
538 bch2_btree_ids[btree_id],
539 pos.inode, pos.offset,
c7be3cb5 540 i->id, n.id, n.equiv))
615f867c 541 return -BCH_ERR_need_snapshot_cleanup;
ef1669ff 542
49124d8a
KO
543 return 0;
544 }
545
546 ret = darray_push(&s->ids, n);
547 if (ret)
548 bch_err(c, "error reallocating snapshots_seen table (size %zu)",
549 s->ids.size);
c7be3cb5 550fsck_err:
49124d8a 551 return ret;
ef1669ff
KO
552}
553
554/**
555 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
556 * and @ancestor hasn't been overwritten in @seen
557 *
558 * That is, returns whether key in @ancestor snapshot is visible in @id snapshot
559 */
560static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
561 u32 id, u32 ancestor)
562{
563 ssize_t i;
49124d8a 564 u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0;
ef1669ff
KO
565
566 BUG_ON(id > ancestor);
49124d8a
KO
567 BUG_ON(!bch2_snapshot_is_equiv(c, id));
568 BUG_ON(!bch2_snapshot_is_equiv(c, ancestor));
ef1669ff
KO
569
570 /* @ancestor should be the snapshot most recently added to @seen */
49124d8a
KO
571 BUG_ON(ancestor != seen->pos.snapshot);
572 BUG_ON(ancestor != top);
ef1669ff
KO
573
574 if (id == ancestor)
575 return true;
576
577 if (!bch2_snapshot_is_ancestor(c, id, ancestor))
578 return false;
579
91d961ba 580 for (i = seen->ids.nr - 2;
49124d8a 581 i >= 0 && seen->ids.data[i].equiv >= id;
ef1669ff 582 --i)
49124d8a
KO
583 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) &&
584 bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor))
ef1669ff
KO
585 return false;
586
587 return true;
588}
589
590/**
591 * ref_visible - given a key with snapshot id @src that points to a key with
592 * snapshot id @dst, test whether there is some snapshot in which @dst is
593 * visible.
594 *
595 * This assumes we're visiting @src keys in natural key order.
596 *
597 * @s - list of snapshot IDs already seen at @src
598 * @src - snapshot ID of src key
599 * @dst - snapshot ID of dst key
600 */
601static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
602 u32 src, u32 dst)
603{
604 return dst <= src
605 ? key_visible_in_snapshot(c, s, dst, src)
606 : bch2_snapshot_is_ancestor(c, src, dst);
607}
608
c58029ec
DH
609static int ref_visible2(struct bch_fs *c,
610 u32 src, struct snapshots_seen *src_seen,
611 u32 dst, struct snapshots_seen *dst_seen)
612{
613 src = bch2_snapshot_equiv(c, src);
614 dst = bch2_snapshot_equiv(c, dst);
615
616 if (dst > src) {
617 swap(dst, src);
618 swap(dst_seen, src_seen);
619 }
620 return key_visible_in_snapshot(c, src_seen, dst, src);
621}
622
49124d8a
KO
623#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
624 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
625 (_i)->snapshot <= (_snapshot); _i++) \
ef1669ff
KO
626 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
627
91d961ba
KO
628struct inode_walker_entry {
629 struct bch_inode_unpacked inode;
630 u32 snapshot;
631 u64 count;
632};
633
1c6fdbd8 634struct inode_walker {
ef1669ff
KO
635 bool first_this_inode;
636 u64 cur_inum;
637
91d961ba 638 DARRAY(struct inode_walker_entry) inodes;
1c6fdbd8
KO
639};
640
ef1669ff
KO
641static void inode_walker_exit(struct inode_walker *w)
642{
91d961ba 643 darray_exit(&w->inodes);
ef1669ff
KO
644}
645
1c6fdbd8
KO
646static struct inode_walker inode_walker_init(void)
647{
ef1669ff
KO
648 return (struct inode_walker) { 0, };
649}
650
ef1669ff 651static int add_inode(struct bch_fs *c, struct inode_walker *w,
3e52c222 652 struct bkey_s_c inode)
ef1669ff
KO
653{
654 struct bch_inode_unpacked u;
ef1669ff
KO
655
656 BUG_ON(bch2_inode_unpack(inode, &u));
657
91d961ba 658 return darray_push(&w->inodes, ((struct inode_walker_entry) {
ef1669ff 659 .inode = u,
49124d8a 660 .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot),
91d961ba 661 }));
1c6fdbd8
KO
662}
663
914f2786 664static int __walk_inode(struct btree_trans *trans,
ef1669ff 665 struct inode_walker *w, struct bpos pos)
1c6fdbd8 666{
ef1669ff
KO
667 struct bch_fs *c = trans->c;
668 struct btree_iter iter;
669 struct bkey_s_c k;
0763c552 670 u32 restart_count = trans->restart_count;
49124d8a 671 unsigned i;
ef1669ff 672 int ret;
1c6fdbd8 673
49124d8a 674 pos.snapshot = bch2_snapshot_equiv(c, pos.snapshot);
1c6fdbd8 675
ef1669ff 676 if (pos.inode == w->cur_inum) {
6bd13057 677 w->first_this_inode = false;
ef1669ff 678 goto lookup_snapshot;
1c6fdbd8
KO
679 }
680
91d961ba 681 w->inodes.nr = 0;
ef1669ff
KO
682
683 for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, pos.inode),
684 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
685 if (k.k->p.offset != pos.inode)
686 break;
687
3e52c222
KO
688 if (bkey_is_inode(k.k))
689 add_inode(c, w, k);
ef1669ff
KO
690 }
691 bch2_trans_iter_exit(trans, &iter);
692
693 if (ret)
694 return ret;
695
696 w->cur_inum = pos.inode;
697 w->first_this_inode = true;
0763c552
KO
698
699 if (trans_was_restarted(trans, restart_count))
700 return -BCH_ERR_transaction_restart_nested;
701
ef1669ff 702lookup_snapshot:
91d961ba
KO
703 for (i = 0; i < w->inodes.nr; i++)
704 if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->inodes.data[i].snapshot))
ef1669ff
KO
705 goto found;
706 return INT_MAX;
707found:
91d961ba 708 BUG_ON(pos.snapshot > w->inodes.data[i].snapshot);
ef1669ff 709
91d961ba 710 if (pos.snapshot != w->inodes.data[i].snapshot) {
49124d8a
KO
711 struct inode_walker_entry e = w->inodes.data[i];
712
713 e.snapshot = pos.snapshot;
714 e.count = 0;
715
716 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
717 pos.inode, pos.snapshot, w->inodes.data[i].snapshot);
ef1669ff 718
91d961ba 719 while (i && w->inodes.data[i - 1].snapshot > pos.snapshot)
ef1669ff
KO
720 --i;
721
49124d8a 722 ret = darray_insert_item(&w->inodes, i, e);
ef1669ff
KO
723 if (ret)
724 return ret;
ef1669ff
KO
725 }
726
727 return i;
1c6fdbd8
KO
728}
729
ef1669ff
KO
730static int __get_visible_inodes(struct btree_trans *trans,
731 struct inode_walker *w,
732 struct snapshots_seen *s,
733 u64 inum)
734{
735 struct bch_fs *c = trans->c;
736 struct btree_iter iter;
737 struct bkey_s_c k;
738 int ret;
739
91d961ba 740 w->inodes.nr = 0;
ef1669ff 741
12043cf1 742 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
ef1669ff 743 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
49124d8a
KO
744 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
745
ef1669ff
KO
746 if (k.k->p.offset != inum)
747 break;
748
49124d8a 749 if (!ref_visible(c, s, s->pos.snapshot, equiv))
ef1669ff
KO
750 continue;
751
49124d8a 752 if (bkey_is_inode(k.k))
3e52c222 753 add_inode(c, w, k);
49124d8a
KO
754
755 if (equiv >= s->pos.snapshot)
756 break;
ef1669ff
KO
757 }
758 bch2_trans_iter_exit(trans, &iter);
759
760 return ret;
761}
762
763static int check_key_has_snapshot(struct btree_trans *trans,
764 struct btree_iter *iter,
765 struct bkey_s_c k)
766{
767 struct bch_fs *c = trans->c;
fa8e94fa 768 struct printbuf buf = PRINTBUF;
ef1669ff
KO
769 int ret = 0;
770
49124d8a 771 if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
ef1669ff 772 "key in missing snapshot: %s",
fa8e94fa
KO
773 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
774 ret = bch2_btree_delete_at(trans, iter,
285b181a 775 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1;
ef1669ff 776fsck_err:
fa8e94fa 777 printbuf_exit(&buf);
ef1669ff 778 return ret;
914f2786
KO
779}
780
7ac2c55e
KO
781static int hash_redo_key(struct btree_trans *trans,
782 const struct bch_hash_desc desc,
783 struct bch_hash_info *hash_info,
784 struct btree_iter *k_iter, struct bkey_s_c k)
1c6fdbd8 785{
e3b4b48c 786 struct bkey_i *delete;
1c6fdbd8 787 struct bkey_i *tmp;
1c6fdbd8 788
e3b4b48c
KO
789 delete = bch2_trans_kmalloc(trans, sizeof(*delete));
790 if (IS_ERR(delete))
791 return PTR_ERR(delete);
792
994ba475 793 tmp = bch2_bkey_make_mut(trans, k);
b1fd23df
KO
794 if (IS_ERR(tmp))
795 return PTR_ERR(tmp);
1c6fdbd8 796
e3b4b48c
KO
797 bkey_init(&delete->k);
798 delete->k.p = k_iter->pos;
8c3f6da9
KO
799 return bch2_btree_iter_traverse(k_iter) ?:
800 bch2_trans_update(trans, k_iter, delete, 0) ?:
5877d887
KO
801 bch2_hash_set_snapshot(trans, desc, hash_info,
802 (subvol_inum) { 0, k.k->p.inode },
803 k.k->p.snapshot, tmp,
804 BCH_HASH_SET_MUST_CREATE,
805 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
806 bch2_trans_commit(trans, NULL, NULL,
807 BTREE_INSERT_NOFAIL|
808 BTREE_INSERT_LAZY_RW);
1c6fdbd8
KO
809}
810
7ac2c55e
KO
811static int hash_check_key(struct btree_trans *trans,
812 const struct bch_hash_desc desc,
813 struct bch_hash_info *hash_info,
814 struct btree_iter *k_iter, struct bkey_s_c hash_k)
d69f41d6 815{
424eb881 816 struct bch_fs *c = trans->c;
67e0dd8f 817 struct btree_iter iter = { NULL };
fa8e94fa 818 struct printbuf buf = PRINTBUF;
7ac2c55e
KO
819 struct bkey_s_c k;
820 u64 hash;
d69f41d6
KO
821 int ret = 0;
822
7ac2c55e
KO
823 if (hash_k.k->type != desc.key_type)
824 return 0;
825
826 hash = desc.hash_bkey(hash_info, hash_k);
827
828 if (likely(hash == hash_k.k->p.offset))
d69f41d6
KO
829 return 0;
830
7ac2c55e
KO
831 if (hash_k.k->p.offset < hash)
832 goto bad_hash;
d69f41d6 833
d8f31407 834 for_each_btree_key_norestart(trans, iter, desc.btree_id,
419fc65f 835 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
d8f31407 836 BTREE_ITER_SLOTS, k, ret) {
e88a75eb 837 if (bkey_eq(k.k->p, hash_k.k->p))
d69f41d6
KO
838 break;
839
7ac2c55e
KO
840 if (fsck_err_on(k.k->type == desc.key_type &&
841 !desc.cmp_bkey(k, hash_k), c,
d69f41d6 842 "duplicate hash table keys:\n%s",
fa8e94fa
KO
843 (printbuf_reset(&buf),
844 bch2_bkey_val_to_text(&buf, c, hash_k),
845 buf.buf))) {
285b181a 846 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
d69f41d6
KO
847 break;
848 }
d69f41d6 849
7ac2c55e 850 if (bkey_deleted(k.k)) {
67e0dd8f 851 bch2_trans_iter_exit(trans, &iter);
7ac2c55e 852 goto bad_hash;
1c6fdbd8 853 }
7ac2c55e 854 }
fa8e94fa 855out:
67e0dd8f 856 bch2_trans_iter_exit(trans, &iter);
fa8e94fa 857 printbuf_exit(&buf);
1c6fdbd8 858 return ret;
7ac2c55e 859bad_hash:
a1019576 860 if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
d8f31407 861 bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
fa8e94fa 862 (printbuf_reset(&buf),
1ed0a5d2
KO
863 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
864 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
865 if (ret) {
866 bch_err(c, "hash_redo_key err %s", bch2_err_str(ret));
867 return ret;
868 }
869 ret = -BCH_ERR_transaction_restart_nested;
741daa5b 870 }
741daa5b 871fsck_err:
fa8e94fa 872 goto out;
741daa5b
KO
873}
874
5c16add5
KO
875static int check_inode(struct btree_trans *trans,
876 struct btree_iter *iter,
a1783320 877 struct bkey_s_c k,
ef1669ff 878 struct bch_inode_unpacked *prev,
49124d8a 879 struct snapshots_seen *s,
285b181a 880 bool full)
5c16add5
KO
881{
882 struct bch_fs *c = trans->c;
285b181a 883 struct bch_inode_unpacked u;
5c16add5 884 bool do_update = false;
285b181a
KO
885 int ret;
886
285b181a 887 ret = check_key_has_snapshot(trans, iter, k);
e492e7b6
KO
888 if (ret < 0)
889 goto err;
285b181a 890 if (ret)
e492e7b6 891 return 0;
285b181a 892
49124d8a
KO
893 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
894 if (ret)
895 goto err;
896
285b181a
KO
897 /*
898 * if snapshot id isn't a leaf node, skip it - deletion in
899 * particular is not atomic, so on the internal snapshot nodes
900 * we can see inodes marked for deletion after a clean shutdown
901 */
902 if (bch2_snapshot_internal_node(c, k.k->p.snapshot))
903 return 0;
904
3e52c222 905 if (!bkey_is_inode(k.k))
285b181a
KO
906 return 0;
907
3e52c222 908 BUG_ON(bch2_inode_unpack(k, &u));
285b181a
KO
909
910 if (!full &&
3e52c222
KO
911 !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|
912 BCH_INODE_I_SECTORS_DIRTY|
913 BCH_INODE_UNLINKED)))
285b181a
KO
914 return 0;
915
285b181a
KO
916 if (prev->bi_inum != u.bi_inum)
917 *prev = u;
918
919 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed ||
920 inode_d_type(prev) != inode_d_type(&u), c,
ef1669ff
KO
921 "inodes in different snapshots don't match")) {
922 bch_err(c, "repair not implemented yet");
923 return -EINVAL;
924 }
5c16add5
KO
925
926 if (u.bi_flags & BCH_INODE_UNLINKED &&
927 (!c->sb.clean ||
928 fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
929 u.bi_inum))) {
5c16add5
KO
930 bch2_trans_unlock(trans);
931 bch2_fs_lazy_rw(c);
932
ef1669ff 933 ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
efd0d038 934 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
935 bch_err(c, "error in fsck: error while deleting inode: %s",
936 bch2_err_str(ret));
5c16add5
KO
937 return ret;
938 }
939
940 if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
941 (!c->sb.clean ||
942 fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
943 u.bi_inum))) {
944 bch_verbose(c, "truncating inode %llu", u.bi_inum);
945
946 bch2_trans_unlock(trans);
947 bch2_fs_lazy_rw(c);
948
949 /*
950 * XXX: need to truncate partial blocks too here - or ideally
951 * just switch units to bytes and that issue goes away
952 */
953 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
ef1669ff
KO
954 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
955 iter->pos.snapshot),
5c16add5 956 POS(u.bi_inum, U64_MAX),
ef1669ff 957 0, NULL);
51fe0332 958 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
959 bch_err(c, "error in fsck: error truncating inode: %s",
960 bch2_err_str(ret));
51fe0332 961 if (ret)
5c16add5 962 return ret;
5c16add5
KO
963
964 /*
965 * We truncated without our normal sector accounting hook, just
966 * make sure we recalculate it:
967 */
968 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
969
970 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
971 do_update = true;
972 }
973
974 if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
975 (!c->sb.clean ||
976 fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
977 u.bi_inum))) {
978 s64 sectors;
979
980 bch_verbose(c, "recounting sectors for inode %llu",
981 u.bi_inum);
982
ef1669ff 983 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
5c16add5 984 if (sectors < 0) {
d4bf5eec
KO
985 bch_err(c, "error in fsck: error recounting inode sectors: %s",
986 bch2_err_str(sectors));
5c16add5
KO
987 return sectors;
988 }
989
990 u.bi_sectors = sectors;
991 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
992 do_update = true;
993 }
994
d3ff7fec
KO
995 if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
996 u.bi_dir = 0;
997 u.bi_dir_offset = 0;
998 u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
5c16add5
KO
999 do_update = true;
1000 }
1001
1002 if (do_update) {
a1783320 1003 ret = __write_inode(trans, &u, iter->pos.snapshot);
5c16add5 1004 if (ret)
d4bf5eec
KO
1005 bch_err(c, "error in fsck: error updating inode: %s",
1006 bch2_err_str(ret));
5c16add5 1007 }
e492e7b6 1008err:
5c16add5 1009fsck_err:
e492e7b6 1010 if (ret)
df6a24f8 1011 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
5c16add5
KO
1012 return ret;
1013}
1014
1015noinline_for_stack
1016static int check_inodes(struct bch_fs *c, bool full)
1017{
1018 struct btree_trans trans;
67e0dd8f 1019 struct btree_iter iter;
285b181a 1020 struct bch_inode_unpacked prev = { 0 };
49124d8a 1021 struct snapshots_seen s;
a1783320 1022 struct bkey_s_c k;
5c16add5
KO
1023 int ret;
1024
49124d8a 1025 snapshots_seen_init(&s);
5c16add5
KO
1026 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1027
a1783320
KO
1028 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
1029 POS_MIN,
eace11a7
KO
1030 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1031 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
a1783320 1032 check_inode(&trans, &iter, k, &prev, &s, full));
ef1669ff 1033
285b181a 1034 bch2_trans_exit(&trans);
49124d8a 1035 snapshots_seen_exit(&s);
e492e7b6 1036 if (ret)
df6a24f8 1037 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
285b181a
KO
1038 return ret;
1039}
ef1669ff 1040
ef1669ff
KO
1041/*
1042 * Checking for overlapping extents needs to be reimplemented
1043 */
1044#if 0
abcecb49 1045static int fix_overlapping_extent(struct btree_trans *trans,
e3e464ac
KO
1046 struct bkey_s_c k, struct bpos cut_at)
1047{
67e0dd8f 1048 struct btree_iter iter;
e3e464ac
KO
1049 struct bkey_i *u;
1050 int ret;
1051
1052 u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
1053 ret = PTR_ERR_OR_ZERO(u);
1054 if (ret)
1055 return ret;
1056
1057 bkey_reassemble(u, k);
1058 bch2_cut_front(cut_at, u);
1059
e3e464ac 1060
ef1669ff
KO
1061 /*
1062 * We don't want to go through the extent_handle_overwrites path:
1063 *
1064 * XXX: this is going to screw up disk accounting, extent triggers
1065 * assume things about extent overwrites - we should be running the
1066 * triggers manually here
1067 */
1068 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
1069 BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
1070
1071 BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
1072 ret = bch2_btree_iter_traverse(&iter) ?:
1073 bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
1074 bch2_trans_commit(trans, NULL, NULL,
1075 BTREE_INSERT_NOFAIL|
1076 BTREE_INSERT_LAZY_RW);
1077 bch2_trans_iter_exit(trans, &iter);
1078 return ret;
1079}
1080#endif
1081
4db65027
KO
1082static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
1083 struct btree_iter *iter,
1084 struct bpos pos)
1085{
1086 struct bkey_s_c k;
1087 int ret;
1088
1089 bch2_trans_iter_init(trans, iter, BTREE_ID_dirents, pos, 0);
1090 k = bch2_btree_iter_peek_slot(iter);
1091 ret = bkey_err(k);
1092 if (!ret && k.k->type != KEY_TYPE_dirent)
1093 ret = -ENOENT;
1094 if (ret) {
1095 bch2_trans_iter_exit(trans, iter);
1096 return (struct bkey_s_c_dirent) { .k = ERR_PTR(ret) };
1097 }
1098
1099 return bkey_s_c_to_dirent(k);
1100}
1101
1102static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
1103 struct bkey_s_c_dirent d)
1104{
1105 return inode->bi_dir == d.k->p.inode &&
1106 inode->bi_dir_offset == d.k->p.offset;
1107}
1108
1109static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
1110 struct bch_inode_unpacked *inode)
1111{
1112 return d.v->d_type == DT_SUBVOL
1113 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
1114 : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
1115}
1116
ef1669ff
KO
1117static int inode_backpointer_exists(struct btree_trans *trans,
1118 struct bch_inode_unpacked *inode,
1119 u32 snapshot)
1120{
1121 struct btree_iter iter;
4db65027 1122 struct bkey_s_c_dirent d;
ef1669ff
KO
1123 int ret;
1124
4db65027
KO
1125 d = dirent_get_by_pos(trans, &iter,
1126 SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
1127 ret = bkey_err(d.s_c);
ef1669ff 1128 if (ret)
e296b1f9 1129 return ret == -ENOENT ? 0 : ret;
488f9776 1130
4db65027 1131 ret = dirent_points_to_inode(d, inode);
ef1669ff
KO
1132 bch2_trans_iter_exit(trans, &iter);
1133 return ret;
1134}
1135
ef1669ff
KO
1136static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
1137{
1138 struct bch_fs *c = trans->c;
1139 struct inode_walker_entry *i;
0763c552
KO
1140 u32 restart_count = trans->restart_count;
1141 int ret = 0;
ef1669ff
KO
1142 s64 count2;
1143
91d961ba 1144 darray_for_each(w->inodes, i) {
ef1669ff
KO
1145 if (i->inode.bi_sectors == i->count)
1146 continue;
1147
7903e3d2 1148 count2 = bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot);
ef1669ff
KO
1149
1150 if (i->count != count2) {
1151 bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu",
1152 i->count, count2);
1153 i->count = count2;
1154 if (i->inode.bi_sectors == i->count)
1155 continue;
1156 }
1157
1158 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
1159 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
1160 w->cur_inum, i->snapshot,
1ed0a5d2
KO
1161 i->inode.bi_sectors, i->count)) {
1162 i->inode.bi_sectors = i->count;
1163 ret = write_inode(trans, &i->inode, i->snapshot);
1164 if (ret)
1165 break;
1ed0a5d2 1166 }
ef1669ff
KO
1167 }
1168fsck_err:
c497df8b 1169 if (ret)
df6a24f8 1170 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
c497df8b
KO
1171 if (!ret && trans_was_restarted(trans, restart_count))
1172 ret = -BCH_ERR_transaction_restart_nested;
1173 return ret;
ef1669ff
KO
1174}
1175
c58029ec
DH
1176struct extent_end {
1177 u32 snapshot;
1178 u64 offset;
1179 struct snapshots_seen seen;
1180};
1181
1182typedef DARRAY(struct extent_end) extent_ends;
1183
1184static int check_overlapping_extents(struct btree_trans *trans,
1185 struct snapshots_seen *seen,
1186 extent_ends *extent_ends,
1187 struct bkey_s_c k,
1188 struct btree_iter *iter)
1189{
1190 struct bch_fs *c = trans->c;
1191 struct extent_end *i;
1192 struct printbuf buf = PRINTBUF;
1193 int ret = 0;
1194
1195 darray_for_each(*extent_ends, i) {
1196 /* duplicate, due to transaction restart: */
1197 if (i->offset == k.k->p.offset &&
1198 i->snapshot == k.k->p.snapshot)
1199 continue;
1200
1201 if (!ref_visible2(c,
1202 k.k->p.snapshot, seen,
1203 i->snapshot, &i->seen))
1204 continue;
1205
1206 if (fsck_err_on(i->offset > bkey_start_offset(k.k), c,
1207 "overlapping extents: extent in snapshot %u ends at %llu overlaps with\n%s",
1208 i->snapshot,
1209 i->offset,
1210 (printbuf_reset(&buf),
1211 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1212 struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
1213 if ((ret = PTR_ERR_OR_ZERO(update)))
1214 goto err;
1215 bkey_reassemble(update, k);
1216 ret = bch2_trans_update_extent(trans, iter, update, 0);
1217 if (!ret)
1218 goto err;
1219 }
1220 }
1221err:
1222fsck_err:
1223 printbuf_exit(&buf);
1224 return ret;
1225}
1226
1227static int extent_ends_at(extent_ends *extent_ends,
1228 struct snapshots_seen *seen,
1229 struct bkey_s_c k)
1230{
1231 struct extent_end *i, n = (struct extent_end) {
1232 .snapshot = k.k->p.snapshot,
1233 .offset = k.k->p.offset,
1234 .seen = *seen,
1235 };
1236
1237 n.seen.ids.data = kmemdup(seen->ids.data,
1238 sizeof(seen->ids.data[0]) * seen->ids.size,
1239 GFP_KERNEL);
1240 if (!n.seen.ids.data)
65d48e35 1241 return -BCH_ERR_ENOMEM_fsck_extent_ends_at;
c58029ec
DH
1242
1243 darray_for_each(*extent_ends, i) {
1244 if (i->snapshot == k.k->p.snapshot) {
1245 snapshots_seen_exit(&i->seen);
1246 *i = n;
1247 return 0;
1248 }
1249
1250 if (i->snapshot >= k.k->p.snapshot)
1251 break;
1252 }
1253
1254 return darray_insert_item(extent_ends, i - extent_ends->data, n);
1255}
1256
1257static void extent_ends_reset(extent_ends *extent_ends)
1258{
1259 struct extent_end *i;
1260
1261 darray_for_each(*extent_ends, i)
1262 snapshots_seen_exit(&i->seen);
1263
1264 extent_ends->nr = 0;
1265}
1266
ef1669ff 1267static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
a1783320 1268 struct bkey_s_c k,
ef1669ff 1269 struct inode_walker *inode,
c58029ec
DH
1270 struct snapshots_seen *s,
1271 extent_ends *extent_ends)
ef1669ff
KO
1272{
1273 struct bch_fs *c = trans->c;
ef1669ff 1274 struct inode_walker_entry *i;
fa8e94fa 1275 struct printbuf buf = PRINTBUF;
49124d8a 1276 struct bpos equiv;
ef1669ff 1277 int ret = 0;
ef1669ff
KO
1278
1279 ret = check_key_has_snapshot(trans, iter, k);
fa8e94fa
KO
1280 if (ret) {
1281 ret = ret < 0 ? ret : 0;
1282 goto out;
1283 }
ef1669ff 1284
49124d8a
KO
1285 equiv = k.k->p;
1286 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
1287
1288 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
ef1669ff 1289 if (ret)
fa8e94fa 1290 goto err;
ef1669ff
KO
1291
1292 if (k.k->type == KEY_TYPE_whiteout)
fa8e94fa 1293 goto out;
ef1669ff
KO
1294
1295 if (inode->cur_inum != k.k->p.inode) {
1296 ret = check_i_sectors(trans, inode);
1297 if (ret)
fa8e94fa 1298 goto err;
c58029ec
DH
1299
1300 extent_ends_reset(extent_ends);
ef1669ff 1301 }
292dea86 1302
0763c552 1303 BUG_ON(!iter->path->should_be_locked);
ef1669ff 1304
c58029ec
DH
1305 ret = check_overlapping_extents(trans, s, extent_ends, k, iter);
1306 if (ret)
1307 goto err;
1308
1309 ret = extent_ends_at(extent_ends, s, k);
1310 if (ret)
1311 goto err;
ef1669ff 1312
49124d8a 1313 ret = __walk_inode(trans, inode, equiv);
ef1669ff 1314 if (ret < 0)
fa8e94fa 1315 goto err;
ef1669ff
KO
1316
1317 if (fsck_err_on(ret == INT_MAX, c,
1318 "extent in missing inode:\n %s",
fa8e94fa
KO
1319 (printbuf_reset(&buf),
1320 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1321 ret = bch2_btree_delete_at(trans, iter,
285b181a 1322 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
fa8e94fa
KO
1323 goto out;
1324 }
ef1669ff 1325
fa8e94fa
KO
1326 if (ret == INT_MAX) {
1327 ret = 0;
1328 goto out;
1329 }
e3e464ac 1330
91d961ba 1331 i = inode->inodes.data + ret;
ef1669ff 1332 ret = 0;
e3e464ac 1333
ef1669ff
KO
1334 if (fsck_err_on(!S_ISREG(i->inode.bi_mode) &&
1335 !S_ISLNK(i->inode.bi_mode), c,
1336 "extent in non regular inode mode %o:\n %s",
1337 i->inode.bi_mode,
fa8e94fa
KO
1338 (printbuf_reset(&buf),
1339 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1340 ret = bch2_btree_delete_at(trans, iter,
285b181a 1341 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
fa8e94fa
KO
1342 goto out;
1343 }
ef1669ff 1344
0d06b4ec
KO
1345 /*
1346 * Check inodes in reverse order, from oldest snapshots to newest, so
1347 * that we emit the fewest number of whiteouts necessary:
1348 */
1349 for (i = inode->inodes.data + inode->inodes.nr - 1;
1350 i >= inode->inodes.data;
1351 --i) {
1352 if (i->snapshot > equiv.snapshot ||
1353 !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot))
1354 continue;
1355
1356 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
79203111
KO
1357 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
1358 !bkey_extent_is_reservation(k), c,
0d06b4ec
KO
1359 "extent type past end of inode %llu:%u, i_size %llu\n %s",
1360 i->inode.bi_inum, i->snapshot, i->inode.bi_size,
1361 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1362 struct btree_iter iter2;
1363
1364 bch2_trans_copy_iter(&iter2, iter);
1365 bch2_btree_iter_set_snapshot(&iter2, i->snapshot);
1366 ret = bch2_btree_iter_traverse(&iter2) ?:
1367 bch2_btree_delete_at(trans, &iter2,
1368 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1369 bch2_trans_iter_exit(trans, &iter2);
1370 if (ret)
1371 goto err;
1372
1373 if (i->snapshot != equiv.snapshot) {
1374 ret = snapshots_seen_add(c, s, i->snapshot);
1375 if (ret)
1376 goto err;
ef1669ff
KO
1377 }
1378 }
1379 }
8a85b20c 1380
ef1669ff 1381 if (bkey_extent_is_allocation(k.k))
49124d8a 1382 for_each_visible_inode(c, s, inode, equiv.snapshot, i)
ef1669ff
KO
1383 i->count += k.k->size;
1384#if 0
1385 bch2_bkey_buf_reassemble(&prev, c, k);
1386#endif
8a85b20c 1387
fa8e94fa
KO
1388out:
1389err:
ef1669ff 1390fsck_err:
fa8e94fa 1391 printbuf_exit(&buf);
e492e7b6 1392
549d173c 1393 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
df6a24f8 1394 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
8a85b20c
KO
1395 return ret;
1396}
1397
1c6fdbd8
KO
1398/*
1399 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
1400 * that i_size an i_sectors are consistent
1401 */
1402noinline_for_stack
1403static int check_extents(struct bch_fs *c)
1404{
1405 struct inode_walker w = inode_walker_init();
ef1669ff 1406 struct snapshots_seen s;
424eb881 1407 struct btree_trans trans;
67e0dd8f 1408 struct btree_iter iter;
a1783320 1409 struct bkey_s_c k;
c58029ec 1410 extent_ends extent_ends = { 0 };
e3dc75eb 1411 struct disk_reservation res = { 0 };
1c6fdbd8
KO
1412 int ret = 0;
1413
ef1669ff 1414 snapshots_seen_init(&s);
20bceecb 1415 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 1416
1c6fdbd8
KO
1417 bch_verbose(c, "checking extents");
1418
a1783320
KO
1419 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
1420 POS(BCACHEFS_ROOT_INO, 0),
1421 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
e3dc75eb
KO
1422 &res, NULL,
1423 BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
1424 bch2_disk_reservation_put(c, &res);
1425 check_extent(&trans, &iter, k, &w, &s, &extent_ends);
1426 }));
c58029ec 1427
e3dc75eb 1428 bch2_disk_reservation_put(c, &res);
c58029ec
DH
1429 extent_ends_reset(&extent_ends);
1430 darray_exit(&extent_ends);
ef1669ff
KO
1431 inode_walker_exit(&w);
1432 bch2_trans_exit(&trans);
1433 snapshots_seen_exit(&s);
1434
e492e7b6 1435 if (ret)
df6a24f8 1436 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
ef1669ff
KO
1437 return ret;
1438}
1439
1440static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
1441{
1442 struct bch_fs *c = trans->c;
1443 struct inode_walker_entry *i;
0763c552
KO
1444 u32 restart_count = trans->restart_count;
1445 int ret = 0;
ef1669ff
KO
1446 s64 count2;
1447
91d961ba 1448 darray_for_each(w->inodes, i) {
ef1669ff
KO
1449 if (i->inode.bi_nlink == i->count)
1450 continue;
1451
9e343161
KO
1452 count2 = bch2_count_subdirs(trans, w->cur_inum, i->snapshot);
1453 if (count2 < 0)
1454 return count2;
ef1669ff
KO
1455
1456 if (i->count != count2) {
1457 bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
1458 i->count, count2);
1459 i->count = count2;
1460 if (i->inode.bi_nlink == i->count)
1461 continue;
1462 }
1463
1464 if (fsck_err_on(i->inode.bi_nlink != i->count, c,
1465 "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
1466 w->cur_inum, i->snapshot, i->inode.bi_nlink, i->count)) {
1467 i->inode.bi_nlink = i->count;
1468 ret = write_inode(trans, &i->inode, i->snapshot);
abcecb49
KO
1469 if (ret)
1470 break;
1471 }
ef1669ff
KO
1472 }
1473fsck_err:
12043cf1 1474 if (ret)
df6a24f8 1475 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
12043cf1
KO
1476 if (!ret && trans_was_restarted(trans, restart_count))
1477 ret = -BCH_ERR_transaction_restart_nested;
1478 return ret;
ef1669ff 1479}
abcecb49 1480
ef1669ff
KO
1481static int check_dirent_target(struct btree_trans *trans,
1482 struct btree_iter *iter,
1483 struct bkey_s_c_dirent d,
1484 struct bch_inode_unpacked *target,
1485 u32 target_snapshot)
1486{
1487 struct bch_fs *c = trans->c;
285b181a 1488 struct bkey_i_dirent *n;
ef1669ff 1489 bool backpointer_exists = true;
fa8e94fa 1490 struct printbuf buf = PRINTBUF;
ef1669ff
KO
1491 int ret = 0;
1492
1493 if (!target->bi_dir &&
1494 !target->bi_dir_offset) {
1495 target->bi_dir = d.k->p.inode;
1496 target->bi_dir_offset = d.k->p.offset;
1497
285b181a 1498 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1499 if (ret)
1500 goto err;
1501 }
1502
4db65027 1503 if (!inode_points_to_dirent(target, d)) {
ef1669ff
KO
1504 ret = inode_backpointer_exists(trans, target, d.k->p.snapshot);
1505 if (ret < 0)
1506 goto err;
e3e464ac 1507
ef1669ff
KO
1508 backpointer_exists = ret;
1509 ret = 0;
e3e464ac 1510
ef1669ff
KO
1511 if (fsck_err_on(S_ISDIR(target->bi_mode) &&
1512 backpointer_exists, c,
1513 "directory %llu with multiple links",
1514 target->bi_inum)) {
285b181a 1515 ret = __remove_dirent(trans, d.k->p);
fa8e94fa 1516 goto out;
e3e464ac 1517 }
e3e464ac 1518
ef1669ff
KO
1519 if (fsck_err_on(backpointer_exists &&
1520 !target->bi_nlink, c,
0095aa94
KO
1521 "inode %llu type %s has multiple links but i_nlink 0",
1522 target->bi_inum, bch2_d_types[d.v->d_type])) {
ef1669ff
KO
1523 target->bi_nlink++;
1524 target->bi_flags &= ~BCH_INODE_UNLINKED;
1c6fdbd8 1525
285b181a 1526 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1527 if (ret)
1528 goto err;
1c6fdbd8
KO
1529 }
1530
ef1669ff 1531 if (fsck_err_on(!backpointer_exists, c,
6e0c886d 1532 "inode %llu:%u has wrong backpointer:\n"
ef1669ff
KO
1533 "got %llu:%llu\n"
1534 "should be %llu:%llu",
6e0c886d 1535 target->bi_inum, target_snapshot,
ef1669ff
KO
1536 target->bi_dir,
1537 target->bi_dir_offset,
1538 d.k->p.inode,
1539 d.k->p.offset)) {
1540 target->bi_dir = d.k->p.inode;
1541 target->bi_dir_offset = d.k->p.offset;
1542
285b181a 1543 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1544 if (ret)
1545 goto err;
1c6fdbd8 1546 }
ef1669ff 1547 }
1c6fdbd8 1548
285b181a
KO
1549 if (fsck_err_on(d.v->d_type != inode_d_type(target), c,
1550 "incorrect d_type: got %s, should be %s:\n%s",
1551 bch2_d_type_str(d.v->d_type),
1552 bch2_d_type_str(inode_d_type(target)),
fa8e94fa
KO
1553 (printbuf_reset(&buf),
1554 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
285b181a
KO
1555 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
1556 ret = PTR_ERR_OR_ZERO(n);
1557 if (ret)
fa8e94fa 1558 goto err;
ef1669ff
KO
1559
1560 bkey_reassemble(&n->k_i, d.s_c);
285b181a 1561 n->v.d_type = inode_d_type(target);
ef1669ff 1562
285b181a
KO
1563 ret = bch2_trans_update(trans, iter, &n->k_i, 0);
1564 if (ret)
fa8e94fa 1565 goto err;
4db65027 1566
285b181a 1567 d = dirent_i_to_s_c(n);
4db65027
KO
1568 }
1569
1570 if (d.v->d_type == DT_SUBVOL &&
1571 target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
1572 (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
1573 fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u",
1574 le32_to_cpu(d.v->d_parent_subvol),
1575 target->bi_parent_subvol))) {
285b181a
KO
1576 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
1577 ret = PTR_ERR_OR_ZERO(n);
1578 if (ret)
fa8e94fa 1579 goto err;
4db65027
KO
1580
1581 bkey_reassemble(&n->k_i, d.s_c);
1582 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
1583
285b181a
KO
1584 ret = bch2_trans_update(trans, iter, &n->k_i, 0);
1585 if (ret)
fa8e94fa 1586 goto err;
4db65027 1587
285b181a 1588 d = dirent_i_to_s_c(n);
1c6fdbd8 1589 }
fa8e94fa 1590out:
ef1669ff 1591err:
1c6fdbd8 1592fsck_err:
fa8e94fa 1593 printbuf_exit(&buf);
e492e7b6 1594
549d173c 1595 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
df6a24f8 1596 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
ef1669ff 1597 return ret;
1c6fdbd8
KO
1598}
1599
914f2786 1600static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
a1783320 1601 struct bkey_s_c k,
914f2786 1602 struct bch_hash_info *hash_info,
ef1669ff
KO
1603 struct inode_walker *dir,
1604 struct inode_walker *target,
1605 struct snapshots_seen *s)
1c6fdbd8 1606{
914f2786 1607 struct bch_fs *c = trans->c;
914f2786 1608 struct bkey_s_c_dirent d;
ef1669ff 1609 struct inode_walker_entry *i;
fa8e94fa 1610 struct printbuf buf = PRINTBUF;
49124d8a 1611 struct bpos equiv;
fa8e94fa 1612 int ret = 0;
d69f41d6 1613
ef1669ff 1614 ret = check_key_has_snapshot(trans, iter, k);
fa8e94fa
KO
1615 if (ret) {
1616 ret = ret < 0 ? ret : 0;
1617 goto out;
1618 }
1c6fdbd8 1619
49124d8a
KO
1620 equiv = k.k->p;
1621 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
1622
1623 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
914f2786 1624 if (ret)
fa8e94fa 1625 goto err;
8a85b20c 1626
ef1669ff 1627 if (k.k->type == KEY_TYPE_whiteout)
fa8e94fa 1628 goto out;
ef1669ff
KO
1629
1630 if (dir->cur_inum != k.k->p.inode) {
1631 ret = check_subdir_count(trans, dir);
1632 if (ret)
fa8e94fa 1633 goto err;
ef1669ff
KO
1634 }
1635
0763c552 1636 BUG_ON(!iter->path->should_be_locked);
292dea86 1637
49124d8a 1638 ret = __walk_inode(trans, dir, equiv);
ef1669ff 1639 if (ret < 0)
fa8e94fa 1640 goto err;
1c6fdbd8 1641
ef1669ff 1642 if (fsck_err_on(ret == INT_MAX, c,
914f2786 1643 "dirent in nonexisting directory:\n%s",
fa8e94fa
KO
1644 (printbuf_reset(&buf),
1645 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1646 ret = bch2_btree_delete_at(trans, iter,
285b181a 1647 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
fa8e94fa
KO
1648 goto out;
1649 }
ef1669ff 1650
fa8e94fa
KO
1651 if (ret == INT_MAX) {
1652 ret = 0;
1653 goto out;
1654 }
ef1669ff 1655
91d961ba 1656 i = dir->inodes.data + ret;
ef1669ff
KO
1657 ret = 0;
1658
1659 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c,
285b181a
KO
1660 "dirent in non directory inode type %s:\n%s",
1661 bch2_d_type_str(inode_d_type(&i->inode)),
fa8e94fa
KO
1662 (printbuf_reset(&buf),
1663 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1664 ret = bch2_btree_delete_at(trans, iter, 0);
1665 goto out;
1666 }
8a85b20c 1667
ef1669ff 1668 if (dir->first_this_inode)
91d961ba 1669 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
1c6fdbd8 1670
914f2786
KO
1671 ret = hash_check_key(trans, bch2_dirent_hash_desc,
1672 hash_info, iter, k);
1673 if (ret < 0)
fa8e94fa
KO
1674 goto err;
1675 if (ret) {
1676 /* dirent has been deleted */
1677 ret = 0;
1678 goto out;
1679 }
7ac2c55e 1680
914f2786 1681 if (k.k->type != KEY_TYPE_dirent)
fa8e94fa 1682 goto out;
1c6fdbd8 1683
914f2786 1684 d = bkey_s_c_to_dirent(k);
1c6fdbd8 1685
4db65027
KO
1686 if (d.v->d_type == DT_SUBVOL) {
1687 struct bch_inode_unpacked subvol_root;
1688 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
1689 u32 target_snapshot;
1690 u64 target_inum;
b9e1adf5 1691
4db65027
KO
1692 ret = __subvol_lookup(trans, target_subvol,
1693 &target_snapshot, &target_inum);
1694 if (ret && ret != -ENOENT)
fa8e94fa 1695 goto err;
1c6fdbd8 1696
4db65027
KO
1697 if (fsck_err_on(ret, c,
1698 "dirent points to missing subvolume %llu",
fa8e94fa
KO
1699 le64_to_cpu(d.v->d_child_subvol))) {
1700 ret = __remove_dirent(trans, d.k->p);
1701 goto err;
1702 }
914f2786 1703
ef1669ff
KO
1704 ret = __lookup_inode(trans, target_inum,
1705 &subvol_root, &target_snapshot);
1706 if (ret && ret != -ENOENT)
fa8e94fa 1707 goto err;
914f2786 1708
ef1669ff
KO
1709 if (fsck_err_on(ret, c,
1710 "subvolume %u points to missing subvolume root %llu",
1711 target_subvol,
1712 target_inum)) {
1713 bch_err(c, "repair not implemented yet");
fa8e94fa
KO
1714 ret = -EINVAL;
1715 goto err;
ef1669ff 1716 }
914f2786 1717
ef1669ff
KO
1718 if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c,
1719 "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
1720 target_inum,
1721 subvol_root.bi_subvol, target_subvol)) {
1722 subvol_root.bi_subvol = target_subvol;
285b181a 1723 ret = __write_inode(trans, &subvol_root, target_snapshot);
ef1669ff 1724 if (ret)
fa8e94fa 1725 goto err;
ef1669ff 1726 }
914f2786 1727
ef1669ff
KO
1728 ret = check_dirent_target(trans, iter, d, &subvol_root,
1729 target_snapshot);
914f2786 1730 if (ret)
fa8e94fa 1731 goto err;
ef1669ff 1732 } else {
4db65027 1733 ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
ef1669ff 1734 if (ret)
fa8e94fa 1735 goto err;
1c6fdbd8 1736
91d961ba 1737 if (fsck_err_on(!target->inodes.nr, c,
49124d8a
KO
1738 "dirent points to missing inode: (equiv %u)\n%s",
1739 equiv.snapshot,
fa8e94fa
KO
1740 (printbuf_reset(&buf),
1741 bch2_bkey_val_to_text(&buf, c, k),
1742 buf.buf))) {
285b181a 1743 ret = __remove_dirent(trans, d.k->p);
ef1669ff 1744 if (ret)
fa8e94fa 1745 goto err;
1c6fdbd8
KO
1746 }
1747
91d961ba 1748 darray_for_each(target->inodes, i) {
ef1669ff
KO
1749 ret = check_dirent_target(trans, iter, d,
1750 &i->inode, i->snapshot);
1751 if (ret)
fa8e94fa 1752 goto err;
d3ff7fec 1753 }
914f2786 1754 }
d3ff7fec 1755
ef1669ff 1756 if (d.v->d_type == DT_DIR)
49124d8a 1757 for_each_visible_inode(c, s, dir, equiv.snapshot, i)
ef1669ff 1758 i->count++;
1c6fdbd8 1759
fa8e94fa
KO
1760out:
1761err:
914f2786 1762fsck_err:
fa8e94fa 1763 printbuf_exit(&buf);
e492e7b6 1764
549d173c 1765 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
df6a24f8 1766 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
914f2786
KO
1767 return ret;
1768}
1c6fdbd8 1769
914f2786
KO
1770/*
1771 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
1772 * validate d_type
1773 */
1774noinline_for_stack
1775static int check_dirents(struct bch_fs *c)
1776{
ef1669ff
KO
1777 struct inode_walker dir = inode_walker_init();
1778 struct inode_walker target = inode_walker_init();
1779 struct snapshots_seen s;
914f2786
KO
1780 struct bch_hash_info hash_info;
1781 struct btree_trans trans;
67e0dd8f 1782 struct btree_iter iter;
a1783320 1783 struct bkey_s_c k;
914f2786 1784 int ret = 0;
1c6fdbd8 1785
914f2786
KO
1786 bch_verbose(c, "checking dirents");
1787
ef1669ff 1788 snapshots_seen_init(&s);
914f2786 1789 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 1790
a1783320
KO
1791 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_dirents,
1792 POS(BCACHEFS_ROOT_INO, 0),
1793 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
1794 k,
1795 NULL, NULL,
1796 BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
1797 check_dirent(&trans, &iter, k, &hash_info, &dir, &target, &s));
914f2786 1798
ef1669ff
KO
1799 bch2_trans_exit(&trans);
1800 snapshots_seen_exit(&s);
1801 inode_walker_exit(&dir);
1802 inode_walker_exit(&target);
e492e7b6
KO
1803
1804 if (ret)
df6a24f8 1805 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
ef1669ff 1806 return ret;
1c6fdbd8
KO
1807}
1808
285b181a 1809static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
a1783320 1810 struct bkey_s_c k,
285b181a
KO
1811 struct bch_hash_info *hash_info,
1812 struct inode_walker *inode)
1813{
1814 struct bch_fs *c = trans->c;
285b181a
KO
1815 int ret;
1816
285b181a
KO
1817 ret = check_key_has_snapshot(trans, iter, k);
1818 if (ret)
1819 return ret;
1820
1821 ret = __walk_inode(trans, inode, k.k->p);
1822 if (ret < 0)
1823 return ret;
1824
1825 if (fsck_err_on(ret == INT_MAX, c,
1826 "xattr for missing inode %llu",
1827 k.k->p.inode))
1828 return bch2_btree_delete_at(trans, iter, 0);
1829
1830 if (ret == INT_MAX)
1831 return 0;
1832
1833 ret = 0;
1834
1835 if (inode->first_this_inode)
91d961ba 1836 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
285b181a
KO
1837
1838 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
1839fsck_err:
549d173c 1840 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
df6a24f8 1841 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
285b181a
KO
1842 return ret;
1843}
1844
1c6fdbd8
KO
1845/*
1846 * Walk xattrs: verify that they all have a corresponding inode
1847 */
1848noinline_for_stack
1849static int check_xattrs(struct bch_fs *c)
1850{
285b181a 1851 struct inode_walker inode = inode_walker_init();
7ac2c55e 1852 struct bch_hash_info hash_info;
d69f41d6 1853 struct btree_trans trans;
67e0dd8f 1854 struct btree_iter iter;
a1783320 1855 struct bkey_s_c k;
1c6fdbd8
KO
1856 int ret = 0;
1857
1858 bch_verbose(c, "checking xattrs");
1859
20bceecb 1860 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
d69f41d6 1861
a1783320
KO
1862 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
1863 POS(BCACHEFS_ROOT_INO, 0),
1864 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
1865 k,
1866 NULL, NULL,
1867 BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
1868 check_xattr(&trans, &iter, k, &hash_info, &inode));
285b181a 1869
9a796fdb 1870 bch2_trans_exit(&trans);
e492e7b6
KO
1871
1872 if (ret)
df6a24f8 1873 bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
9a796fdb 1874 return ret;
1c6fdbd8
KO
1875}
1876
285b181a 1877static int check_root_trans(struct btree_trans *trans)
1c6fdbd8 1878{
285b181a 1879 struct bch_fs *c = trans->c;
ef1669ff 1880 struct bch_inode_unpacked root_inode;
d3ff7fec 1881 u32 snapshot;
ef1669ff 1882 u64 inum;
1c6fdbd8
KO
1883 int ret;
1884
285b181a 1885 ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
1c6fdbd8
KO
1886 if (ret && ret != -ENOENT)
1887 return ret;
1888
ef1669ff
KO
1889 if (mustfix_fsck_err_on(ret, c, "root subvol missing")) {
1890 struct bkey_i_subvolume root_subvol;
1c6fdbd8 1891
ef1669ff
KO
1892 snapshot = U32_MAX;
1893 inum = BCACHEFS_ROOT_INO;
1c6fdbd8 1894
ef1669ff
KO
1895 bkey_subvolume_init(&root_subvol.k_i);
1896 root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
1897 root_subvol.v.flags = 0;
1898 root_subvol.v.snapshot = cpu_to_le32(snapshot);
1899 root_subvol.v.inode = cpu_to_le64(inum);
e68914ca 1900 ret = commit_do(trans, NULL, NULL,
ef1669ff
KO
1901 BTREE_INSERT_NOFAIL|
1902 BTREE_INSERT_LAZY_RW,
8ffa11a2
DH
1903 __bch2_btree_insert(trans, BTREE_ID_subvolumes,
1904 &root_subvol.k_i, 0));
ef1669ff 1905 if (ret) {
d4bf5eec 1906 bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
ef1669ff
KO
1907 goto err;
1908 }
1909
1910 }
1911
285b181a 1912 ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
ef1669ff
KO
1913 if (ret && ret != -ENOENT)
1914 return ret;
1c6fdbd8 1915
ef1669ff
KO
1916 if (mustfix_fsck_err_on(ret, c, "root directory missing") ||
1917 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c,
1918 "root inode not a directory")) {
1919 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
1920 0, NULL);
1921 root_inode.bi_inum = inum;
1c6fdbd8 1922
285b181a 1923 ret = __write_inode(trans, &root_inode, snapshot);
ef1669ff 1924 if (ret)
d4bf5eec 1925 bch_err(c, "error writing root inode: %s", bch2_err_str(ret));
ef1669ff
KO
1926 }
1927err:
1928fsck_err:
ef1669ff 1929 return ret;
1c6fdbd8
KO
1930}
1931
285b181a
KO
1932/* Get root directory, create if it doesn't exist: */
1933noinline_for_stack
1934static int check_root(struct bch_fs *c)
1935{
1936 bch_verbose(c, "checking root directory");
1937
1938 return bch2_trans_do(c, NULL, NULL,
1939 BTREE_INSERT_NOFAIL|
1940 BTREE_INSERT_LAZY_RW,
1941 check_root_trans(&trans));
1942}
1943
91d961ba
KO
1944struct pathbuf_entry {
1945 u64 inum;
1946 u32 snapshot;
1c6fdbd8
KO
1947};
1948
91d961ba
KO
1949typedef DARRAY(struct pathbuf_entry) pathbuf;
1950
1951static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
6e0c886d
KO
1952{
1953 struct pathbuf_entry *i;
1954
91d961ba 1955 darray_for_each(*p, i)
6e0c886d
KO
1956 if (i->inum == inum &&
1957 i->snapshot == snapshot)
1958 return true;
1959
1960 return false;
1961}
1962
91d961ba 1963static int path_down(struct bch_fs *c, pathbuf *p,
f0f41a6d 1964 u64 inum, u32 snapshot)
1c6fdbd8 1965{
91d961ba 1966 int ret = darray_push(p, ((struct pathbuf_entry) {
6e0c886d
KO
1967 .inum = inum,
1968 .snapshot = snapshot,
91d961ba
KO
1969 }));
1970
1971 if (ret)
1972 bch_err(c, "fsck: error allocating memory for pathbuf, size %zu",
1973 p->size);
1974 return ret;
1c6fdbd8
KO
1975}
1976
6e0c886d
KO
1977/*
1978 * Check that a given inode is reachable from the root:
1979 *
1980 * XXX: we should also be verifying that inodes are in the right subvolumes
1981 */
d3ff7fec 1982static int check_path(struct btree_trans *trans,
91d961ba 1983 pathbuf *p,
81ed9ce3
KO
1984 struct bch_inode_unpacked *inode,
1985 u32 snapshot)
1c6fdbd8 1986{
d3ff7fec 1987 struct bch_fs *c = trans->c;
1c6fdbd8
KO
1988 int ret = 0;
1989
49124d8a 1990 snapshot = bch2_snapshot_equiv(c, snapshot);
d3ff7fec 1991 p->nr = 0;
424eb881 1992
488f9776
KO
1993 while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
1994 inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
4db65027
KO
1995 struct btree_iter dirent_iter;
1996 struct bkey_s_c_dirent d;
6e0c886d
KO
1997 u32 parent_snapshot = snapshot;
1998
4db65027 1999 if (inode->bi_subvol) {
488f9776
KO
2000 u64 inum;
2001
2002 ret = subvol_lookup(trans, inode->bi_parent_subvol,
6e0c886d 2003 &parent_snapshot, &inum);
488f9776
KO
2004 if (ret)
2005 break;
2006 }
2007
d3ff7fec 2008 ret = lockrestart_do(trans,
4db65027
KO
2009 PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter,
2010 SPOS(inode->bi_dir, inode->bi_dir_offset,
2011 parent_snapshot))).k));
2012 if (ret && ret != -ENOENT)
d3ff7fec 2013 break;
1c6fdbd8 2014
4db65027
KO
2015 if (!ret && !dirent_points_to_inode(d, inode)) {
2016 bch2_trans_iter_exit(trans, &dirent_iter);
2017 ret = -ENOENT;
2018 }
2019
2020 if (ret == -ENOENT) {
285b181a 2021 if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
ef1669ff 2022 inode->bi_inum, snapshot,
285b181a 2023 bch2_d_type_str(inode_d_type(inode)),
d3ff7fec
KO
2024 inode->bi_nlink,
2025 inode->bi_dir,
2026 inode->bi_dir_offset))
81ed9ce3 2027 ret = reattach_inode(trans, inode, snapshot);
d3ff7fec
KO
2028 break;
2029 }
4db65027
KO
2030
2031 bch2_trans_iter_exit(trans, &dirent_iter);
1c6fdbd8 2032
d3ff7fec
KO
2033 if (!S_ISDIR(inode->bi_mode))
2034 break;
1c6fdbd8 2035
f0f41a6d 2036 ret = path_down(c, p, inode->bi_inum, snapshot);
d3ff7fec
KO
2037 if (ret) {
2038 bch_err(c, "memory allocation failure");
2039 return ret;
2040 }
1c6fdbd8 2041
6e0c886d
KO
2042 snapshot = parent_snapshot;
2043
2044 ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
2045 if (ret) {
2046 /* Should have been caught in dirents pass */
2047 bch_err(c, "error looking up parent directory: %i", ret);
2048 break;
2049 }
2050
2051 if (path_is_dup(p, inode->bi_inum, snapshot)) {
2052 struct pathbuf_entry *i;
1c6fdbd8 2053
d3ff7fec 2054 /* XXX print path */
6e0c886d
KO
2055 bch_err(c, "directory structure loop");
2056
91d961ba 2057 darray_for_each(*p, i)
6e0c886d
KO
2058 pr_err("%llu:%u", i->inum, i->snapshot);
2059 pr_err("%llu:%u", inode->bi_inum, snapshot);
2060
d3ff7fec
KO
2061 if (!fsck_err(c, "directory structure loop"))
2062 return 0;
1c6fdbd8 2063
e68914ca 2064 ret = commit_do(trans, NULL, NULL,
285b181a
KO
2065 BTREE_INSERT_NOFAIL|
2066 BTREE_INSERT_LAZY_RW,
81ed9ce3 2067 remove_backpointer(trans, inode));
1c6fdbd8 2068 if (ret) {
d3ff7fec
KO
2069 bch_err(c, "error removing dirent: %i", ret);
2070 break;
1c6fdbd8
KO
2071 }
2072
81ed9ce3 2073 ret = reattach_inode(trans, inode, snapshot);
1c6fdbd8 2074 }
1c6fdbd8 2075 }
d3ff7fec
KO
2076fsck_err:
2077 if (ret)
d4bf5eec 2078 bch_err(c, "%s: err %s", __func__, bch2_err_str(ret));
d3ff7fec
KO
2079 return ret;
2080}
1c6fdbd8 2081
d3ff7fec
KO
2082/*
2083 * Check for unreachable inodes, as well as loops in the directory structure:
2084 * After check_dirents(), if an inode backpointer doesn't exist that means it's
2085 * unreachable:
2086 */
285b181a 2087noinline_for_stack
58686a25 2088static int check_directory_structure(struct bch_fs *c)
d3ff7fec
KO
2089{
2090 struct btree_trans trans;
67e0dd8f 2091 struct btree_iter iter;
d3ff7fec
KO
2092 struct bkey_s_c k;
2093 struct bch_inode_unpacked u;
91d961ba 2094 pathbuf path = { 0, };
d3ff7fec 2095 int ret;
1c6fdbd8 2096
d3ff7fec 2097 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 2098
909004d2
KO
2099 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
2100 BTREE_ITER_INTENT|
ef1669ff
KO
2101 BTREE_ITER_PREFETCH|
2102 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
3e52c222 2103 if (!bkey_is_inode(k.k))
1c6fdbd8
KO
2104 continue;
2105
3e52c222 2106 ret = bch2_inode_unpack(k, &u);
d3ff7fec
KO
2107 if (ret) {
2108 /* Should have been caught earlier in fsck: */
2109 bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
2110 break;
1c6fdbd8 2111 }
1c6fdbd8 2112
ef1669ff
KO
2113 if (u.bi_flags & BCH_INODE_UNLINKED)
2114 continue;
2115
81ed9ce3 2116 ret = check_path(&trans, &path, &u, iter.pos.snapshot);
d3ff7fec
KO
2117 if (ret)
2118 break;
1c6fdbd8 2119 }
67e0dd8f 2120 bch2_trans_iter_exit(&trans, &iter);
d3ff7fec 2121
91d961ba 2122 darray_exit(&path);
f24fab9c 2123
9a796fdb
KO
2124 bch2_trans_exit(&trans);
2125 return ret;
1c6fdbd8
KO
2126}
2127
fc51b041
KO
2128struct nlink_table {
2129 size_t nr;
2130 size_t size;
1c6fdbd8 2131
fc51b041
KO
2132 struct nlink {
2133 u64 inum;
2134 u32 snapshot;
2135 u32 count;
2136 } *d;
2137};
1c6fdbd8 2138
f0f41a6d
KO
2139static int add_nlink(struct bch_fs *c, struct nlink_table *t,
2140 u64 inum, u32 snapshot)
1c6fdbd8 2141{
fc51b041
KO
2142 if (t->nr == t->size) {
2143 size_t new_size = max_t(size_t, 128UL, t->size * 2);
3e3e02e6
KO
2144 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
2145
fc51b041 2146 if (!d) {
f0f41a6d
KO
2147 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
2148 new_size);
65d48e35 2149 return -BCH_ERR_ENOMEM_fsck_add_nlink;
fc51b041 2150 }
1c6fdbd8 2151
82355e28
KO
2152 if (t->d)
2153 memcpy(d, t->d, t->size * sizeof(t->d[0]));
fc51b041 2154 kvfree(t->d);
1c6fdbd8 2155
fc51b041
KO
2156 t->d = d;
2157 t->size = new_size;
2bb748a6
KO
2158 }
2159
fc51b041
KO
2160
2161 t->d[t->nr++] = (struct nlink) {
2162 .inum = inum,
2163 .snapshot = snapshot,
2164 };
2165
2166 return 0;
2167}
2168
2169static int nlink_cmp(const void *_l, const void *_r)
2170{
2171 const struct nlink *l = _l;
2172 const struct nlink *r = _r;
2173
2174 return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
2175}
2176
ef1669ff
KO
2177static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
2178 struct nlink_table *links,
2179 u64 range_start, u64 range_end, u64 inum, u32 snapshot)
fc51b041
KO
2180{
2181 struct nlink *link, key = {
2182 .inum = inum, .snapshot = U32_MAX,
2183 };
2184
2185 if (inum < range_start || inum >= range_end)
1c6fdbd8 2186 return;
fc51b041
KO
2187
2188 link = __inline_bsearch(&key, links->d, links->nr,
2189 sizeof(links->d[0]), nlink_cmp);
ef1669ff
KO
2190 if (!link)
2191 return;
2192
2193 while (link > links->d && link[0].inum == link[-1].inum)
2194 --link;
2195
2196 for (; link < links->d + links->nr && link->inum == inum; link++)
2197 if (ref_visible(c, s, snapshot, link->snapshot)) {
2198 link->count++;
2199 if (link->snapshot >= snapshot)
2200 break;
2201 }
fc51b041
KO
2202}
2203
2204noinline_for_stack
2205static int check_nlinks_find_hardlinks(struct bch_fs *c,
2206 struct nlink_table *t,
2207 u64 start, u64 *end)
2208{
2209 struct btree_trans trans;
67e0dd8f 2210 struct btree_iter iter;
fc51b041 2211 struct bkey_s_c k;
fc51b041
KO
2212 struct bch_inode_unpacked u;
2213 int ret = 0;
2214
2215 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
2216
2217 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
909004d2
KO
2218 POS(0, start),
2219 BTREE_ITER_INTENT|
ef1669ff
KO
2220 BTREE_ITER_PREFETCH|
2221 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
3e52c222 2222 if (!bkey_is_inode(k.k))
fc51b041
KO
2223 continue;
2224
3e52c222
KO
2225 /* Should never fail, checked by bch2_inode_invalid: */
2226 BUG_ON(bch2_inode_unpack(k, &u));
fc51b041
KO
2227
2228 /*
2229 * Backpointer and directory structure checks are sufficient for
2230 * directories, since they can't have hardlinks:
2231 */
3e52c222 2232 if (S_ISDIR(le16_to_cpu(u.bi_mode)))
fc51b041
KO
2233 continue;
2234
fc51b041
KO
2235 if (!u.bi_nlink)
2236 continue;
2237
f0f41a6d 2238 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
fc51b041
KO
2239 if (ret) {
2240 *end = k.k->p.offset;
2241 ret = 0;
2242 break;
2243 }
2244
1c6fdbd8 2245 }
67e0dd8f 2246 bch2_trans_iter_exit(&trans, &iter);
fc51b041
KO
2247 bch2_trans_exit(&trans);
2248
2249 if (ret)
2250 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1c6fdbd8 2251
fc51b041 2252 return ret;
1c6fdbd8
KO
2253}
2254
2255noinline_for_stack
fc51b041
KO
2256static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
2257 u64 range_start, u64 range_end)
1c6fdbd8 2258{
424eb881 2259 struct btree_trans trans;
ef1669ff 2260 struct snapshots_seen s;
67e0dd8f 2261 struct btree_iter iter;
1c6fdbd8
KO
2262 struct bkey_s_c k;
2263 struct bkey_s_c_dirent d;
1c6fdbd8
KO
2264 int ret;
2265
ef1669ff
KO
2266 snapshots_seen_init(&s);
2267
20bceecb 2268 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 2269
909004d2
KO
2270 for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
2271 BTREE_ITER_INTENT|
ef1669ff
KO
2272 BTREE_ITER_PREFETCH|
2273 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
49124d8a 2274 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
ef1669ff
KO
2275 if (ret)
2276 break;
2277
1c6fdbd8 2278 switch (k.k->type) {
26609b61 2279 case KEY_TYPE_dirent:
1c6fdbd8 2280 d = bkey_s_c_to_dirent(k);
1c6fdbd8 2281
ef1669ff
KO
2282 if (d.v->d_type != DT_DIR &&
2283 d.v->d_type != DT_SUBVOL)
2284 inc_link(c, &s, links, range_start, range_end,
2285 le64_to_cpu(d.v->d_inum),
49124d8a 2286 bch2_snapshot_equiv(c, d.k->p.snapshot));
1c6fdbd8
KO
2287 break;
2288 }
1c6fdbd8 2289 }
67e0dd8f 2290 bch2_trans_iter_exit(&trans, &iter);
abcecb49 2291
1c6fdbd8 2292 if (ret)
619f5bee 2293 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1c6fdbd8 2294
ef1669ff
KO
2295 bch2_trans_exit(&trans);
2296 snapshots_seen_exit(&s);
1c6fdbd8
KO
2297 return ret;
2298}
2299
eace11a7
KO
2300static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter,
2301 struct bkey_s_c k,
2302 struct nlink_table *links,
2303 size_t *idx, u64 range_end)
2304{
2305 struct bch_fs *c = trans->c;
2306 struct bch_inode_unpacked u;
2307 struct nlink *link = &links->d[*idx];
2308 int ret = 0;
2309
2310 if (k.k->p.offset >= range_end)
2311 return 1;
2312
2313 if (!bkey_is_inode(k.k))
2314 return 0;
2315
2316 BUG_ON(bch2_inode_unpack(k, &u));
2317
2318 if (S_ISDIR(le16_to_cpu(u.bi_mode)))
2319 return 0;
2320
2321 if (!u.bi_nlink)
2322 return 0;
2323
2324 while ((cmp_int(link->inum, k.k->p.offset) ?:
2325 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
2326 BUG_ON(*idx == links->nr);
2327 link = &links->d[++*idx];
2328 }
2329
2330 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
2331 "inode %llu type %s has wrong i_nlink (%u, should be %u)",
2332 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
2333 bch2_inode_nlink_get(&u), link->count)) {
2334 bch2_inode_nlink_set(&u, link->count);
2335 ret = __write_inode(trans, &u, k.k->p.snapshot);
2336 }
2337fsck_err:
2338 return ret;
2339}
2340
1c6fdbd8 2341noinline_for_stack
fc51b041
KO
2342static int check_nlinks_update_hardlinks(struct bch_fs *c,
2343 struct nlink_table *links,
1c6fdbd8
KO
2344 u64 range_start, u64 range_end)
2345{
0564b167 2346 struct btree_trans trans;
67e0dd8f 2347 struct btree_iter iter;
1c6fdbd8 2348 struct bkey_s_c k;
eace11a7 2349 size_t idx = 0;
b906aadd 2350 int ret = 0;
1c6fdbd8 2351
20bceecb 2352 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
0564b167 2353
eace11a7
KO
2354 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
2355 POS(0, range_start),
2356 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
2357 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
2358 check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end));
fc51b041 2359
0564b167
KO
2360 bch2_trans_exit(&trans);
2361
eace11a7 2362 if (ret < 0) {
b906aadd 2363 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
eace11a7
KO
2364 return ret;
2365 }
1c6fdbd8 2366
eace11a7 2367 return 0;
1c6fdbd8
KO
2368}
2369
2370noinline_for_stack
58686a25 2371static int check_nlinks(struct bch_fs *c)
1c6fdbd8 2372{
fc51b041 2373 struct nlink_table links = { 0 };
1c6fdbd8
KO
2374 u64 this_iter_range_start, next_iter_range_start = 0;
2375 int ret = 0;
2376
2377 bch_verbose(c, "checking inode nlinks");
2378
1c6fdbd8
KO
2379 do {
2380 this_iter_range_start = next_iter_range_start;
2381 next_iter_range_start = U64_MAX;
2382
fc51b041
KO
2383 ret = check_nlinks_find_hardlinks(c, &links,
2384 this_iter_range_start,
2385 &next_iter_range_start);
2386
2387 ret = check_nlinks_walk_dirents(c, &links,
1c6fdbd8 2388 this_iter_range_start,
fc51b041 2389 next_iter_range_start);
1c6fdbd8
KO
2390 if (ret)
2391 break;
2392
fc51b041 2393 ret = check_nlinks_update_hardlinks(c, &links,
1c6fdbd8
KO
2394 this_iter_range_start,
2395 next_iter_range_start);
2396 if (ret)
2397 break;
2398
fc51b041 2399 links.nr = 0;
1c6fdbd8
KO
2400 } while (next_iter_range_start != U64_MAX);
2401
fc51b041 2402 kvfree(links.d);
1c6fdbd8
KO
2403
2404 return ret;
2405}
2406
eace11a7
KO
2407static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter,
2408 struct bkey_s_c k)
bfe88863 2409{
bfe88863
KO
2410 struct bkey_s_c_reflink_p p;
2411 struct bkey_i_reflink_p *u;
2412 int ret;
2413
bfe88863
KO
2414 if (k.k->type != KEY_TYPE_reflink_p)
2415 return 0;
2416
2417 p = bkey_s_c_to_reflink_p(k);
2418
6d76aefe 2419 if (!p.v->front_pad && !p.v->back_pad)
bfe88863
KO
2420 return 0;
2421
2422 u = bch2_trans_kmalloc(trans, sizeof(*u));
2423 ret = PTR_ERR_OR_ZERO(u);
2424 if (ret)
2425 return ret;
2426
2427 bkey_reassemble(&u->k_i, k);
6d76aefe
KO
2428 u->v.front_pad = 0;
2429 u->v.back_pad = 0;
bfe88863 2430
6b3d8b89 2431 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN);
bfe88863
KO
2432}
2433
285b181a 2434noinline_for_stack
bfe88863
KO
2435static int fix_reflink_p(struct bch_fs *c)
2436{
2437 struct btree_trans trans;
2438 struct btree_iter iter;
2439 struct bkey_s_c k;
2440 int ret;
2441
2442 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
2443 return 0;
2444
285b181a
KO
2445 bch_verbose(c, "fixing reflink_p keys");
2446
bfe88863
KO
2447 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
2448
eace11a7
KO
2449 ret = for_each_btree_key_commit(&trans, iter,
2450 BTREE_ID_extents, POS_MIN,
2451 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
2452 NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
2453 fix_reflink_p_key(&trans, &iter, k));
bfe88863
KO
2454
2455 bch2_trans_exit(&trans);
2456 return ret;
2457}
2458
1c6fdbd8
KO
2459/*
2460 * Checks for inconsistencies that shouldn't happen, unless we have a bug.
2461 * Doesn't fix them yet, mainly because they haven't yet been observed:
2462 */
619f5bee 2463int bch2_fsck_full(struct bch_fs *c)
1c6fdbd8 2464{
c7a09cb1
KO
2465 int ret;
2466again:
2467 ret = bch2_fs_check_snapshots(c) ?:
4ab35c34
KO
2468 bch2_fs_check_subvols(c) ?:
2469 bch2_delete_dead_snapshots(c) ?:
14b393ee 2470 check_inodes(c, true) ?:
5c16add5 2471 check_extents(c) ?:
1c6fdbd8
KO
2472 check_dirents(c) ?:
2473 check_xattrs(c) ?:
ef1669ff 2474 check_root(c) ?:
58686a25 2475 check_directory_structure(c) ?:
bfe88863
KO
2476 check_nlinks(c) ?:
2477 fix_reflink_p(c);
c7a09cb1 2478
615f867c 2479 if (bch2_err_matches(ret, BCH_ERR_need_snapshot_cleanup)) {
c7a09cb1
KO
2480 set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
2481 goto again;
2482 }
2483
2484 return ret;
1c6fdbd8
KO
2485}
2486
619f5bee 2487int bch2_fsck_walk_inodes_only(struct bch_fs *c)
1c6fdbd8 2488{
35f1a503
KO
2489 return bch2_fs_check_snapshots(c) ?:
2490 bch2_fs_check_subvols(c) ?:
2491 bch2_delete_dead_snapshots(c) ?:
2492 check_inodes(c, false);
1c6fdbd8 2493}