bcachefs: Refactor overlapping extent checks
[linux-block.git] / fs / bcachefs / fsck.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
07a1006a 4#include "bkey_buf.h"
1c6fdbd8 5#include "btree_update.h"
e3dc75eb 6#include "buckets.h"
91d961ba 7#include "darray.h"
1c6fdbd8
KO
8#include "dirent.h"
9#include "error.h"
96385742 10#include "fs-common.h"
1c6fdbd8
KO
11#include "fsck.h"
12#include "inode.h"
13#include "keylist.h"
14b393ee 14#include "subvolume.h"
1c6fdbd8
KO
15#include "super.h"
16#include "xattr.h"
17
fc51b041 18#include <linux/bsearch.h>
1c6fdbd8 19#include <linux/dcache.h> /* struct qstr */
1c6fdbd8
KO
20
21#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
22
42590b53
KO
23/*
24 * XXX: this is handling transaction restarts without returning
25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
26 */
ef1669ff
KO
27static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
28 u32 snapshot)
424eb881 29{
67e0dd8f 30 struct btree_iter iter;
424eb881
KO
31 struct bkey_s_c k;
32 u64 sectors = 0;
94f651e2 33 int ret;
424eb881 34
c72f687a
KO
35 for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
36 SPOS(inum, 0, snapshot),
37 POS(inum, U64_MAX),
38 0, k, ret)
424eb881
KO
39 if (bkey_extent_is_allocation(k.k))
40 sectors += k.k->size;
424eb881 41
67e0dd8f 42 bch2_trans_iter_exit(trans, &iter);
94f651e2
KO
43
44 return ret ?: sectors;
424eb881
KO
45}
46
ef1669ff
KO
47static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
48 u32 snapshot)
49{
50 struct btree_iter iter;
51 struct bkey_s_c k;
52 struct bkey_s_c_dirent d;
53 u64 subdirs = 0;
54 int ret;
55
c72f687a
KO
56 for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
57 SPOS(inum, 0, snapshot),
58 POS(inum, U64_MAX),
59 0, k, ret) {
ef1669ff
KO
60 if (k.k->type != KEY_TYPE_dirent)
61 continue;
62
63 d = bkey_s_c_to_dirent(k);
64 if (d.v->d_type == DT_DIR)
65 subdirs++;
66 }
ef1669ff
KO
67 bch2_trans_iter_exit(trans, &iter);
68
69 return ret ?: subdirs;
70}
71
81ed9ce3
KO
72static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot,
73 u32 *subvol)
74{
bcb79a51
KO
75 struct bch_snapshot s;
76 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots,
77 POS(0, snapshot), 0,
78 snapshot, &s);
79 if (!ret)
80 *subvol = le32_to_cpu(s.subvol);
e47a390a 81 else if (bch2_err_matches(ret, ENOENT))
81ed9ce3 82 bch_err(trans->c, "snapshot %u not fonud", snapshot);
81ed9ce3
KO
83 return ret;
84
85}
86
ef1669ff
KO
87static int __subvol_lookup(struct btree_trans *trans, u32 subvol,
88 u32 *snapshot, u64 *inum)
81ed9ce3 89{
97996ddf 90 struct bch_subvolume s;
81ed9ce3
KO
91 int ret;
92
97996ddf 93 ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
81ed9ce3 94
97996ddf
KO
95 *snapshot = le32_to_cpu(s.snapshot);
96 *inum = le64_to_cpu(s.inode);
81ed9ce3 97 return ret;
81ed9ce3
KO
98}
99
ef1669ff
KO
100static int subvol_lookup(struct btree_trans *trans, u32 subvol,
101 u32 *snapshot, u64 *inum)
81ed9ce3 102{
ef1669ff 103 return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum));
81ed9ce3
KO
104}
105
c27314b4
KO
106static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
107 struct bch_inode_unpacked *inode)
108{
109 struct btree_iter iter;
110 struct bkey_s_c k;
111 int ret;
112
113 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
114 POS(0, inode_nr),
115 BTREE_ITER_ALL_SNAPSHOTS);
116 k = bch2_btree_iter_peek(&iter);
117 ret = bkey_err(k);
118 if (ret)
119 goto err;
120
e88a75eb 121 if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
e47a390a 122 ret = -BCH_ERR_ENOENT_inode;
c27314b4
KO
123 goto err;
124 }
125
3e52c222 126 ret = bch2_inode_unpack(k, inode);
c27314b4 127err:
549d173c 128 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
129 bch_err(trans->c, "error fetching inode %llu: %s",
130 inode_nr, bch2_err_str(ret));
c27314b4
KO
131 bch2_trans_iter_exit(trans, &iter);
132 return ret;
133}
134
58686a25
KO
135static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
136 struct bch_inode_unpacked *inode,
137 u32 *snapshot)
8a85b20c 138{
67e0dd8f 139 struct btree_iter iter;
8a85b20c
KO
140 struct bkey_s_c k;
141 int ret;
142
bcb79a51
KO
143 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
144 SPOS(0, inode_nr, *snapshot), 0);
8a85b20c
KO
145 ret = bkey_err(k);
146 if (ret)
147 goto err;
148
3e52c222
KO
149 ret = bkey_is_inode(k.k)
150 ? bch2_inode_unpack(k, inode)
e47a390a 151 : -BCH_ERR_ENOENT_inode;
4db65027
KO
152 if (!ret)
153 *snapshot = iter.pos.snapshot;
8a85b20c 154err:
549d173c 155 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
156 bch_err(trans->c, "error fetching inode %llu:%u: %s",
157 inode_nr, *snapshot, bch2_err_str(ret));
67e0dd8f 158 bch2_trans_iter_exit(trans, &iter);
8a85b20c
KO
159 return ret;
160}
161
58686a25
KO
162static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
163 struct bch_inode_unpacked *inode,
164 u32 *snapshot)
165{
166 return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot));
167}
168
ef1669ff
KO
169static int __lookup_dirent(struct btree_trans *trans,
170 struct bch_hash_info hash_info,
171 subvol_inum dir, struct qstr *name,
172 u64 *target, unsigned *type)
173{
174 struct btree_iter iter;
175 struct bkey_s_c_dirent d;
176 int ret;
177
178 ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
179 &hash_info, dir, name, 0);
180 if (ret)
181 return ret;
182
183 d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
184 *target = le64_to_cpu(d.v->d_inum);
185 *type = d.v->d_type;
186 bch2_trans_iter_exit(trans, &iter);
187 return 0;
188}
189
58686a25
KO
190static int __write_inode(struct btree_trans *trans,
191 struct bch_inode_unpacked *inode,
192 u32 snapshot)
8a85b20c 193{
75da9764
KO
194 struct bkey_inode_buf *inode_p =
195 bch2_trans_kmalloc(trans, sizeof(*inode_p));
67e0dd8f 196
75da9764
KO
197 if (IS_ERR(inode_p))
198 return PTR_ERR(inode_p);
67e0dd8f 199
75da9764
KO
200 bch2_inode_pack(inode_p, inode);
201 inode_p->inode.k.p.snapshot = snapshot;
202
203 return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
204 &inode_p->inode.k_i,
205 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
58686a25
KO
206}
207
208static int write_inode(struct btree_trans *trans,
209 struct bch_inode_unpacked *inode,
210 u32 snapshot)
211{
e68914ca 212 int ret = commit_do(trans, NULL, NULL,
8a85b20c
KO
213 BTREE_INSERT_NOFAIL|
214 BTREE_INSERT_LAZY_RW,
58686a25 215 __write_inode(trans, inode, snapshot));
8a85b20c 216 if (ret)
d4bf5eec
KO
217 bch_err(trans->c, "error in fsck: error updating inode: %s",
218 bch2_err_str(ret));
8a85b20c
KO
219 return ret;
220}
221
ef1669ff
KO
222static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot)
223{
e4085b70 224 struct bch_fs *c = trans->c;
ef1669ff
KO
225 struct btree_iter iter = { NULL };
226 struct bkey_i_inode_generation delete;
227 struct bch_inode_unpacked inode_u;
228 struct bkey_s_c k;
229 int ret;
230
42590b53
KO
231 do {
232 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
233 SPOS(inum, 0, snapshot),
234 SPOS(inum, U64_MAX, snapshot),
235 0, NULL) ?:
236 bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
237 SPOS(inum, 0, snapshot),
238 SPOS(inum, U64_MAX, snapshot),
239 0, NULL) ?:
240 bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
241 SPOS(inum, 0, snapshot),
242 SPOS(inum, U64_MAX, snapshot),
243 0, NULL);
244 } while (ret == -BCH_ERR_transaction_restart_nested);
ef1669ff
KO
245 if (ret)
246 goto err;
247retry:
248 bch2_trans_begin(trans);
249
bcb79a51
KO
250 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
251 SPOS(0, inum, snapshot), BTREE_ITER_INTENT);
ef1669ff
KO
252 ret = bkey_err(k);
253 if (ret)
254 goto err;
255
3e52c222 256 if (!bkey_is_inode(k.k)) {
e4085b70 257 bch2_fs_inconsistent(c,
ef1669ff
KO
258 "inode %llu:%u not found when deleting",
259 inum, snapshot);
260 ret = -EIO;
261 goto err;
262 }
263
3e52c222 264 bch2_inode_unpack(k, &inode_u);
ef1669ff
KO
265
266 /* Subvolume root? */
e4085b70
KO
267 if (inode_u.bi_subvol)
268 bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum);
ef1669ff
KO
269
270 bkey_inode_generation_init(&delete.k_i);
271 delete.k.p = iter.pos;
272 delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
273
274 ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
275 bch2_trans_commit(trans, NULL, NULL,
276 BTREE_INSERT_NOFAIL);
277err:
278 bch2_trans_iter_exit(trans, &iter);
549d173c 279 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ef1669ff
KO
280 goto retry;
281
efd0d038 282 return ret ?: -BCH_ERR_transaction_restart_nested;
ef1669ff
KO
283}
284
ae8bbb9f 285static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
1c6fdbd8 286{
0f238367 287 struct bch_fs *c = trans->c;
67e0dd8f 288 struct btree_iter iter;
1c6fdbd8
KO
289 struct bch_inode_unpacked dir_inode;
290 struct bch_hash_info dir_hash_info;
1c6fdbd8 291 int ret;
1c6fdbd8 292
c27314b4 293 ret = lookup_first_inode(trans, pos.inode, &dir_inode);
b1fd23df 294 if (ret)
e492e7b6 295 goto err;
1c6fdbd8
KO
296
297 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
298
67e0dd8f 299 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
b1fd23df 300
ae8bbb9f 301 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
41fc8622
KO
302 &dir_hash_info, &iter,
303 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
67e0dd8f 304 bch2_trans_iter_exit(trans, &iter);
e492e7b6 305err:
549d173c 306 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
1bb3c2a9 307 bch_err_fn(c, ret);
ae8bbb9f 308 return ret;
b1fd23df
KO
309}
310
58686a25 311/* Get lost+found, create if it doesn't exist: */
ef1669ff 312static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
58686a25 313 struct bch_inode_unpacked *lostfound)
1c6fdbd8 314{
58686a25
KO
315 struct bch_fs *c = trans->c;
316 struct bch_inode_unpacked root;
317 struct bch_hash_info root_hash_info;
318 struct qstr lostfound_str = QSTR("lost+found");
ef1669ff
KO
319 subvol_inum root_inum = { .subvol = subvol };
320 u64 inum = 0;
321 unsigned d_type = 0;
58686a25
KO
322 u32 snapshot;
323 int ret;
324
285b181a 325 ret = __subvol_lookup(trans, subvol, &snapshot, &root_inum.inum);
ef1669ff
KO
326 if (ret)
327 return ret;
81ed9ce3 328
285b181a
KO
329 ret = __lookup_inode(trans, root_inum.inum, &root, &snapshot);
330 if (ret)
58686a25
KO
331 return ret;
332
333 root_hash_info = bch2_hash_info_init(c, &root);
ef1669ff 334
285b181a 335 ret = __lookup_dirent(trans, root_hash_info, root_inum,
ef1669ff 336 &lostfound_str, &inum, &d_type);
e47a390a 337 if (bch2_err_matches(ret, ENOENT)) {
58686a25
KO
338 bch_notice(c, "creating lost+found");
339 goto create_lostfound;
340 }
341
549d173c 342 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec 343 bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret));
285b181a 344 if (ret)
ef1669ff 345 return ret;
ef1669ff
KO
346
347 if (d_type != DT_DIR) {
348 bch_err(c, "error looking up lost+found: not a directory");
349 return ret;
ef1669ff
KO
350 }
351
285b181a 352 /*
067d228b 353 * The bch2_check_dirents pass has already run, dangling dirents
285b181a
KO
354 * shouldn't exist here:
355 */
356 return __lookup_inode(trans, inum, lostfound, &snapshot);
58686a25 357
58686a25 358create_lostfound:
285b181a
KO
359 bch2_inode_init_early(c, lostfound);
360
361 ret = bch2_create_trans(trans, root_inum, &root,
362 lostfound, &lostfound_str,
363 0, 0, S_IFDIR|0700, 0, NULL, NULL,
364 (subvol_inum) { }, 0);
549d173c 365 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec 366 bch_err(c, "error creating lost+found: %s", bch2_err_str(ret));
285b181a 367 return ret;
58686a25
KO
368}
369
285b181a 370static int __reattach_inode(struct btree_trans *trans,
81ed9ce3 371 struct bch_inode_unpacked *inode,
ef1669ff 372 u32 inode_snapshot)
58686a25
KO
373{
374 struct bch_hash_info dir_hash;
375 struct bch_inode_unpacked lostfound;
1c6fdbd8
KO
376 char name_buf[20];
377 struct qstr name;
176cf4bf 378 u64 dir_offset = 0;
81ed9ce3 379 u32 subvol;
1c6fdbd8
KO
380 int ret;
381
285b181a 382 ret = __snapshot_lookup_subvol(trans, inode_snapshot, &subvol);
81ed9ce3
KO
383 if (ret)
384 return ret;
385
386 ret = lookup_lostfound(trans, subvol, &lostfound);
176cf4bf 387 if (ret)
d3ff7fec 388 return ret;
176cf4bf 389
58686a25
KO
390 if (S_ISDIR(inode->bi_mode)) {
391 lostfound.bi_nlink++;
176cf4bf 392
285b181a 393 ret = __write_inode(trans, &lostfound, U32_MAX);
176cf4bf 394 if (ret)
d3ff7fec 395 return ret;
176cf4bf
KO
396 }
397
58686a25 398 dir_hash = bch2_hash_info_init(trans->c, &lostfound);
176cf4bf 399
58686a25
KO
400 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
401 name = (struct qstr) QSTR(name_buf);
176cf4bf 402
285b181a
KO
403 ret = bch2_dirent_create(trans,
404 (subvol_inum) {
405 .subvol = subvol,
406 .inum = lostfound.bi_inum,
407 },
408 &dir_hash,
409 inode_d_type(inode),
410 &name, inode->bi_inum, &dir_offset,
411 BCH_HASH_SET_MUST_CREATE);
412 if (ret)
413 return ret;
414
415 inode->bi_dir = lostfound.bi_inum;
416 inode->bi_dir_offset = dir_offset;
417
418 return __write_inode(trans, inode, inode_snapshot);
419}
420
421static int reattach_inode(struct btree_trans *trans,
422 struct bch_inode_unpacked *inode,
423 u32 inode_snapshot)
424{
e68914ca 425 int ret = commit_do(trans, NULL, NULL,
285b181a
KO
426 BTREE_INSERT_LAZY_RW|
427 BTREE_INSERT_NOFAIL,
428 __reattach_inode(trans, inode, inode_snapshot));
58686a25 429 if (ret) {
d4bf5eec
KO
430 bch_err(trans->c, "error reattaching inode %llu: %s",
431 inode->bi_inum, bch2_err_str(ret));
58686a25
KO
432 return ret;
433 }
176cf4bf 434
285b181a 435 return ret;
1c6fdbd8
KO
436}
437
d3ff7fec
KO
438static int remove_backpointer(struct btree_trans *trans,
439 struct bch_inode_unpacked *inode)
440{
67e0dd8f 441 struct btree_iter iter;
bcb79a51 442 struct bkey_s_c_dirent d;
d3ff7fec
KO
443 int ret;
444
bcb79a51
KO
445 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents,
446 POS(inode->bi_dir, inode->bi_dir_offset), 0,
447 dirent);
448 ret = bkey_err(d) ?:
449 __remove_dirent(trans, d.k->p);
67e0dd8f 450 bch2_trans_iter_exit(trans, &iter);
d3ff7fec
KO
451 return ret;
452}
453
49124d8a
KO
454struct snapshots_seen_entry {
455 u32 id;
456 u32 equiv;
457};
458
459struct snapshots_seen {
460 struct bpos pos;
461 DARRAY(struct snapshots_seen_entry) ids;
462};
463
464static inline void snapshots_seen_exit(struct snapshots_seen *s)
465{
466 darray_exit(&s->ids);
467}
468
469static inline void snapshots_seen_init(struct snapshots_seen *s)
470{
471 memset(s, 0, sizeof(*s));
472}
473
474static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
475 enum btree_id btree_id, struct bpos pos)
ef1669ff 476{
49124d8a
KO
477 struct snapshots_seen_entry *i, n = {
478 .id = pos.snapshot,
479 .equiv = bch2_snapshot_equiv(c, pos.snapshot),
480 };
c7be3cb5 481 int ret = 0;
ef1669ff 482
e88a75eb 483 if (!bkey_eq(s->pos, pos))
91d961ba 484 s->ids.nr = 0;
49124d8a
KO
485
486 pos.snapshot = n.equiv;
ef1669ff
KO
487 s->pos = pos;
488
49124d8a
KO
489 darray_for_each(s->ids, i)
490 if (i->equiv == n.equiv) {
c7be3cb5
KO
491 if (fsck_err_on(i->id != n.id, c,
492 "snapshot deletion did not run correctly:\n"
49124d8a
KO
493 " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
494 bch2_btree_ids[btree_id],
495 pos.inode, pos.offset,
c7be3cb5 496 i->id, n.id, n.equiv))
615f867c 497 return -BCH_ERR_need_snapshot_cleanup;
ef1669ff 498
49124d8a
KO
499 return 0;
500 }
501
502 ret = darray_push(&s->ids, n);
503 if (ret)
504 bch_err(c, "error reallocating snapshots_seen table (size %zu)",
505 s->ids.size);
c7be3cb5 506fsck_err:
49124d8a 507 return ret;
ef1669ff
KO
508}
509
510/**
511 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
512 * and @ancestor hasn't been overwritten in @seen
513 *
514 * That is, returns whether key in @ancestor snapshot is visible in @id snapshot
515 */
516static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
517 u32 id, u32 ancestor)
518{
519 ssize_t i;
49124d8a 520 u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0;
ef1669ff
KO
521
522 BUG_ON(id > ancestor);
49124d8a
KO
523 BUG_ON(!bch2_snapshot_is_equiv(c, id));
524 BUG_ON(!bch2_snapshot_is_equiv(c, ancestor));
ef1669ff
KO
525
526 /* @ancestor should be the snapshot most recently added to @seen */
49124d8a
KO
527 BUG_ON(ancestor != seen->pos.snapshot);
528 BUG_ON(ancestor != top);
ef1669ff
KO
529
530 if (id == ancestor)
531 return true;
532
533 if (!bch2_snapshot_is_ancestor(c, id, ancestor))
534 return false;
535
91d961ba 536 for (i = seen->ids.nr - 2;
49124d8a 537 i >= 0 && seen->ids.data[i].equiv >= id;
ef1669ff 538 --i)
49124d8a
KO
539 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) &&
540 bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor))
ef1669ff
KO
541 return false;
542
543 return true;
544}
545
546/**
547 * ref_visible - given a key with snapshot id @src that points to a key with
548 * snapshot id @dst, test whether there is some snapshot in which @dst is
549 * visible.
550 *
551 * This assumes we're visiting @src keys in natural key order.
552 *
553 * @s - list of snapshot IDs already seen at @src
554 * @src - snapshot ID of src key
555 * @dst - snapshot ID of dst key
556 */
557static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
558 u32 src, u32 dst)
559{
560 return dst <= src
561 ? key_visible_in_snapshot(c, s, dst, src)
562 : bch2_snapshot_is_ancestor(c, src, dst);
563}
564
c58029ec
DH
565static int ref_visible2(struct bch_fs *c,
566 u32 src, struct snapshots_seen *src_seen,
567 u32 dst, struct snapshots_seen *dst_seen)
568{
569 src = bch2_snapshot_equiv(c, src);
570 dst = bch2_snapshot_equiv(c, dst);
571
572 if (dst > src) {
573 swap(dst, src);
574 swap(dst_seen, src_seen);
575 }
576 return key_visible_in_snapshot(c, src_seen, dst, src);
577}
578
49124d8a
KO
579#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
580 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
581 (_i)->snapshot <= (_snapshot); _i++) \
ef1669ff
KO
582 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
583
91d961ba
KO
584struct inode_walker_entry {
585 struct bch_inode_unpacked inode;
586 u32 snapshot;
f9f52bc4 587 bool seen_this_pos;
91d961ba
KO
588 u64 count;
589};
590
1c6fdbd8 591struct inode_walker {
ef1669ff 592 bool first_this_inode;
43b81a4e 593 bool recalculate_sums;
f9f52bc4 594 struct bpos last_pos;
ef1669ff 595
91d961ba 596 DARRAY(struct inode_walker_entry) inodes;
1c6fdbd8
KO
597};
598
ef1669ff
KO
599static void inode_walker_exit(struct inode_walker *w)
600{
91d961ba 601 darray_exit(&w->inodes);
ef1669ff
KO
602}
603
1c6fdbd8
KO
604static struct inode_walker inode_walker_init(void)
605{
ef1669ff
KO
606 return (struct inode_walker) { 0, };
607}
608
ef1669ff 609static int add_inode(struct bch_fs *c, struct inode_walker *w,
3e52c222 610 struct bkey_s_c inode)
ef1669ff
KO
611{
612 struct bch_inode_unpacked u;
ef1669ff
KO
613
614 BUG_ON(bch2_inode_unpack(inode, &u));
615
91d961ba 616 return darray_push(&w->inodes, ((struct inode_walker_entry) {
ef1669ff 617 .inode = u,
49124d8a 618 .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot),
91d961ba 619 }));
1c6fdbd8
KO
620}
621
06dcca51
KO
622static int get_inodes_all_snapshots(struct btree_trans *trans,
623 struct inode_walker *w, u64 inum)
1c6fdbd8 624{
ef1669ff
KO
625 struct bch_fs *c = trans->c;
626 struct btree_iter iter;
627 struct bkey_s_c k;
0763c552 628 u32 restart_count = trans->restart_count;
ef1669ff 629 int ret;
1c6fdbd8 630
43b81a4e 631 w->recalculate_sums = false;
91d961ba 632 w->inodes.nr = 0;
ef1669ff 633
06dcca51 634 for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
ef1669ff 635 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
06dcca51 636 if (k.k->p.offset != inum)
ef1669ff
KO
637 break;
638
3e52c222
KO
639 if (bkey_is_inode(k.k))
640 add_inode(c, w, k);
ef1669ff
KO
641 }
642 bch2_trans_iter_exit(trans, &iter);
643
644 if (ret)
645 return ret;
646
f9f52bc4 647 w->first_this_inode = true;
0763c552
KO
648
649 if (trans_was_restarted(trans, restart_count))
650 return -BCH_ERR_transaction_restart_nested;
651
06dcca51
KO
652 return 0;
653}
654
655static struct inode_walker_entry *
a57f4d61
KO
656lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
657 u32 snapshot, bool is_whiteout)
06dcca51
KO
658{
659 struct inode_walker_entry *i;
660
661 snapshot = bch2_snapshot_equiv(c, snapshot);
662
663 darray_for_each(w->inodes, i)
664 if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
ef1669ff 665 goto found;
06dcca51
KO
666
667 return NULL;
ef1669ff 668found:
06dcca51 669 BUG_ON(snapshot > i->snapshot);
ef1669ff 670
a57f4d61 671 if (snapshot != i->snapshot && !is_whiteout) {
06dcca51
KO
672 struct inode_walker_entry new = *i;
673 int ret;
49124d8a 674
06dcca51
KO
675 new.snapshot = snapshot;
676 new.count = 0;
49124d8a
KO
677
678 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
f9f52bc4 679 w->last_pos.inode, snapshot, i->snapshot);
ef1669ff 680
06dcca51 681 while (i > w->inodes.data && i[-1].snapshot > snapshot)
ef1669ff
KO
682 --i;
683
06dcca51 684 ret = darray_insert_item(&w->inodes, i - w->inodes.data, new);
ef1669ff 685 if (ret)
06dcca51 686 return ERR_PTR(ret);
ef1669ff
KO
687 }
688
689 return i;
1c6fdbd8
KO
690}
691
06dcca51 692static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
a57f4d61
KO
693 struct inode_walker *w, struct bpos pos,
694 bool is_whiteout)
06dcca51 695{
f9f52bc4
KO
696 if (w->last_pos.inode != pos.inode) {
697 int ret = get_inodes_all_snapshots(trans, w, pos.inode);
698 if (ret)
699 return ERR_PTR(ret);
700 } else if (bkey_cmp(w->last_pos, pos)) {
701 struct inode_walker_entry *i;
702
703 darray_for_each(w->inodes, i)
704 i->seen_this_pos = false;
705
706 }
707
708 w->last_pos = pos;
06dcca51 709
a57f4d61 710 return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
06dcca51
KO
711}
712
ef1669ff
KO
713static int __get_visible_inodes(struct btree_trans *trans,
714 struct inode_walker *w,
715 struct snapshots_seen *s,
716 u64 inum)
717{
718 struct bch_fs *c = trans->c;
719 struct btree_iter iter;
720 struct bkey_s_c k;
721 int ret;
722
91d961ba 723 w->inodes.nr = 0;
ef1669ff 724
12043cf1 725 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
ef1669ff 726 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
49124d8a
KO
727 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
728
ef1669ff
KO
729 if (k.k->p.offset != inum)
730 break;
731
49124d8a 732 if (!ref_visible(c, s, s->pos.snapshot, equiv))
ef1669ff
KO
733 continue;
734
49124d8a 735 if (bkey_is_inode(k.k))
3e52c222 736 add_inode(c, w, k);
49124d8a
KO
737
738 if (equiv >= s->pos.snapshot)
739 break;
ef1669ff
KO
740 }
741 bch2_trans_iter_exit(trans, &iter);
742
743 return ret;
744}
745
746static int check_key_has_snapshot(struct btree_trans *trans,
747 struct btree_iter *iter,
748 struct bkey_s_c k)
749{
750 struct bch_fs *c = trans->c;
fa8e94fa 751 struct printbuf buf = PRINTBUF;
ef1669ff
KO
752 int ret = 0;
753
49124d8a 754 if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
ef1669ff 755 "key in missing snapshot: %s",
fa8e94fa
KO
756 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
757 ret = bch2_btree_delete_at(trans, iter,
285b181a 758 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1;
ef1669ff 759fsck_err:
fa8e94fa 760 printbuf_exit(&buf);
ef1669ff 761 return ret;
914f2786
KO
762}
763
7ac2c55e
KO
764static int hash_redo_key(struct btree_trans *trans,
765 const struct bch_hash_desc desc,
766 struct bch_hash_info *hash_info,
767 struct btree_iter *k_iter, struct bkey_s_c k)
1c6fdbd8 768{
e3b4b48c 769 struct bkey_i *delete;
1c6fdbd8 770 struct bkey_i *tmp;
1c6fdbd8 771
e3b4b48c
KO
772 delete = bch2_trans_kmalloc(trans, sizeof(*delete));
773 if (IS_ERR(delete))
774 return PTR_ERR(delete);
775
dbda63bb 776 tmp = bch2_bkey_make_mut_noupdate(trans, k);
b1fd23df
KO
777 if (IS_ERR(tmp))
778 return PTR_ERR(tmp);
1c6fdbd8 779
e3b4b48c
KO
780 bkey_init(&delete->k);
781 delete->k.p = k_iter->pos;
8c3f6da9
KO
782 return bch2_btree_iter_traverse(k_iter) ?:
783 bch2_trans_update(trans, k_iter, delete, 0) ?:
5877d887
KO
784 bch2_hash_set_snapshot(trans, desc, hash_info,
785 (subvol_inum) { 0, k.k->p.inode },
786 k.k->p.snapshot, tmp,
787 BCH_HASH_SET_MUST_CREATE,
788 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
789 bch2_trans_commit(trans, NULL, NULL,
790 BTREE_INSERT_NOFAIL|
791 BTREE_INSERT_LAZY_RW);
1c6fdbd8
KO
792}
793
7ac2c55e
KO
794static int hash_check_key(struct btree_trans *trans,
795 const struct bch_hash_desc desc,
796 struct bch_hash_info *hash_info,
797 struct btree_iter *k_iter, struct bkey_s_c hash_k)
d69f41d6 798{
424eb881 799 struct bch_fs *c = trans->c;
67e0dd8f 800 struct btree_iter iter = { NULL };
fa8e94fa 801 struct printbuf buf = PRINTBUF;
7ac2c55e
KO
802 struct bkey_s_c k;
803 u64 hash;
d69f41d6
KO
804 int ret = 0;
805
7ac2c55e
KO
806 if (hash_k.k->type != desc.key_type)
807 return 0;
808
809 hash = desc.hash_bkey(hash_info, hash_k);
810
811 if (likely(hash == hash_k.k->p.offset))
d69f41d6
KO
812 return 0;
813
7ac2c55e
KO
814 if (hash_k.k->p.offset < hash)
815 goto bad_hash;
d69f41d6 816
d8f31407 817 for_each_btree_key_norestart(trans, iter, desc.btree_id,
419fc65f 818 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
d8f31407 819 BTREE_ITER_SLOTS, k, ret) {
e88a75eb 820 if (bkey_eq(k.k->p, hash_k.k->p))
d69f41d6
KO
821 break;
822
7ac2c55e
KO
823 if (fsck_err_on(k.k->type == desc.key_type &&
824 !desc.cmp_bkey(k, hash_k), c,
d69f41d6 825 "duplicate hash table keys:\n%s",
fa8e94fa
KO
826 (printbuf_reset(&buf),
827 bch2_bkey_val_to_text(&buf, c, hash_k),
828 buf.buf))) {
285b181a 829 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
d69f41d6
KO
830 break;
831 }
d69f41d6 832
7ac2c55e 833 if (bkey_deleted(k.k)) {
67e0dd8f 834 bch2_trans_iter_exit(trans, &iter);
7ac2c55e 835 goto bad_hash;
1c6fdbd8 836 }
7ac2c55e 837 }
fa8e94fa 838out:
67e0dd8f 839 bch2_trans_iter_exit(trans, &iter);
fa8e94fa 840 printbuf_exit(&buf);
1c6fdbd8 841 return ret;
7ac2c55e 842bad_hash:
a1019576 843 if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
d8f31407 844 bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
fa8e94fa 845 (printbuf_reset(&buf),
1ed0a5d2
KO
846 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
847 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
c8d5b714 848 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
1ed0a5d2 849 bch_err(c, "hash_redo_key err %s", bch2_err_str(ret));
c8d5b714 850 if (ret)
1ed0a5d2 851 return ret;
1ed0a5d2 852 ret = -BCH_ERR_transaction_restart_nested;
741daa5b 853 }
741daa5b 854fsck_err:
fa8e94fa 855 goto out;
741daa5b
KO
856}
857
5c16add5
KO
858static int check_inode(struct btree_trans *trans,
859 struct btree_iter *iter,
a1783320 860 struct bkey_s_c k,
ef1669ff 861 struct bch_inode_unpacked *prev,
49124d8a 862 struct snapshots_seen *s,
285b181a 863 bool full)
5c16add5
KO
864{
865 struct bch_fs *c = trans->c;
285b181a 866 struct bch_inode_unpacked u;
5c16add5 867 bool do_update = false;
285b181a
KO
868 int ret;
869
285b181a 870 ret = check_key_has_snapshot(trans, iter, k);
e492e7b6
KO
871 if (ret < 0)
872 goto err;
285b181a 873 if (ret)
e492e7b6 874 return 0;
285b181a 875
49124d8a
KO
876 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
877 if (ret)
878 goto err;
879
285b181a
KO
880 /*
881 * if snapshot id isn't a leaf node, skip it - deletion in
882 * particular is not atomic, so on the internal snapshot nodes
883 * we can see inodes marked for deletion after a clean shutdown
884 */
8479938d 885 if (bch2_snapshot_is_internal_node(c, k.k->p.snapshot))
285b181a
KO
886 return 0;
887
3e52c222 888 if (!bkey_is_inode(k.k))
285b181a
KO
889 return 0;
890
3e52c222 891 BUG_ON(bch2_inode_unpack(k, &u));
285b181a
KO
892
893 if (!full &&
3e52c222
KO
894 !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|
895 BCH_INODE_I_SECTORS_DIRTY|
896 BCH_INODE_UNLINKED)))
285b181a
KO
897 return 0;
898
285b181a
KO
899 if (prev->bi_inum != u.bi_inum)
900 *prev = u;
901
902 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed ||
903 inode_d_type(prev) != inode_d_type(&u), c,
ef1669ff
KO
904 "inodes in different snapshots don't match")) {
905 bch_err(c, "repair not implemented yet");
906 return -EINVAL;
907 }
5c16add5
KO
908
909 if (u.bi_flags & BCH_INODE_UNLINKED &&
910 (!c->sb.clean ||
911 fsck_err(c, "filesystem marked clean, but inode %llu unlinked",
912 u.bi_inum))) {
5c16add5
KO
913 bch2_trans_unlock(trans);
914 bch2_fs_lazy_rw(c);
915
ef1669ff 916 ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
efd0d038 917 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
918 bch_err(c, "error in fsck: error while deleting inode: %s",
919 bch2_err_str(ret));
5c16add5
KO
920 return ret;
921 }
922
923 if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
924 (!c->sb.clean ||
925 fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty",
926 u.bi_inum))) {
927 bch_verbose(c, "truncating inode %llu", u.bi_inum);
928
929 bch2_trans_unlock(trans);
930 bch2_fs_lazy_rw(c);
931
932 /*
933 * XXX: need to truncate partial blocks too here - or ideally
934 * just switch units to bytes and that issue goes away
935 */
936 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
ef1669ff
KO
937 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
938 iter->pos.snapshot),
5c16add5 939 POS(u.bi_inum, U64_MAX),
ef1669ff 940 0, NULL);
51fe0332 941 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
d4bf5eec
KO
942 bch_err(c, "error in fsck: error truncating inode: %s",
943 bch2_err_str(ret));
51fe0332 944 if (ret)
5c16add5 945 return ret;
5c16add5
KO
946
947 /*
948 * We truncated without our normal sector accounting hook, just
949 * make sure we recalculate it:
950 */
951 u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
952
953 u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
954 do_update = true;
955 }
956
957 if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
958 (!c->sb.clean ||
959 fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty",
960 u.bi_inum))) {
961 s64 sectors;
962
963 bch_verbose(c, "recounting sectors for inode %llu",
964 u.bi_inum);
965
ef1669ff 966 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
5c16add5 967 if (sectors < 0) {
d4bf5eec
KO
968 bch_err(c, "error in fsck: error recounting inode sectors: %s",
969 bch2_err_str(sectors));
5c16add5
KO
970 return sectors;
971 }
972
973 u.bi_sectors = sectors;
974 u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
975 do_update = true;
976 }
977
d3ff7fec
KO
978 if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
979 u.bi_dir = 0;
980 u.bi_dir_offset = 0;
981 u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
5c16add5
KO
982 do_update = true;
983 }
984
985 if (do_update) {
a1783320 986 ret = __write_inode(trans, &u, iter->pos.snapshot);
5c16add5 987 if (ret)
d4bf5eec
KO
988 bch_err(c, "error in fsck: error updating inode: %s",
989 bch2_err_str(ret));
5c16add5 990 }
e492e7b6 991err:
5c16add5 992fsck_err:
e492e7b6 993 if (ret)
1bb3c2a9 994 bch_err_fn(c, ret);
5c16add5
KO
995 return ret;
996}
997
998noinline_for_stack
067d228b 999int bch2_check_inodes(struct bch_fs *c)
5c16add5 1000{
067d228b 1001 bool full = c->opts.fsck;
5c16add5 1002 struct btree_trans trans;
67e0dd8f 1003 struct btree_iter iter;
285b181a 1004 struct bch_inode_unpacked prev = { 0 };
49124d8a 1005 struct snapshots_seen s;
a1783320 1006 struct bkey_s_c k;
5c16add5
KO
1007 int ret;
1008
49124d8a 1009 snapshots_seen_init(&s);
5c16add5
KO
1010 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1011
a1783320
KO
1012 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
1013 POS_MIN,
eace11a7
KO
1014 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
1015 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
a1783320 1016 check_inode(&trans, &iter, k, &prev, &s, full));
ef1669ff 1017
285b181a 1018 bch2_trans_exit(&trans);
49124d8a 1019 snapshots_seen_exit(&s);
e492e7b6 1020 if (ret)
1bb3c2a9 1021 bch_err_fn(c, ret);
285b181a
KO
1022 return ret;
1023}
ef1669ff 1024
ef1669ff
KO
1025/*
1026 * Checking for overlapping extents needs to be reimplemented
1027 */
1028#if 0
abcecb49 1029static int fix_overlapping_extent(struct btree_trans *trans,
e3e464ac
KO
1030 struct bkey_s_c k, struct bpos cut_at)
1031{
67e0dd8f 1032 struct btree_iter iter;
e3e464ac
KO
1033 struct bkey_i *u;
1034 int ret;
1035
1036 u = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
1037 ret = PTR_ERR_OR_ZERO(u);
1038 if (ret)
1039 return ret;
1040
1041 bkey_reassemble(u, k);
1042 bch2_cut_front(cut_at, u);
1043
e3e464ac 1044
ef1669ff
KO
1045 /*
1046 * We don't want to go through the extent_handle_overwrites path:
1047 *
1048 * XXX: this is going to screw up disk accounting, extent triggers
1049 * assume things about extent overwrites - we should be running the
1050 * triggers manually here
1051 */
1052 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p,
1053 BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS);
1054
1055 BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS);
1056 ret = bch2_btree_iter_traverse(&iter) ?:
1057 bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?:
1058 bch2_trans_commit(trans, NULL, NULL,
1059 BTREE_INSERT_NOFAIL|
1060 BTREE_INSERT_LAZY_RW);
1061 bch2_trans_iter_exit(trans, &iter);
1062 return ret;
1063}
1064#endif
1065
4db65027
KO
1066static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
1067 struct btree_iter *iter,
1068 struct bpos pos)
1069{
bcb79a51 1070 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
4db65027
KO
1071}
1072
1073static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
1074 struct bkey_s_c_dirent d)
1075{
1076 return inode->bi_dir == d.k->p.inode &&
1077 inode->bi_dir_offset == d.k->p.offset;
1078}
1079
1080static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
1081 struct bch_inode_unpacked *inode)
1082{
1083 return d.v->d_type == DT_SUBVOL
1084 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
1085 : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
1086}
1087
ef1669ff
KO
1088static int inode_backpointer_exists(struct btree_trans *trans,
1089 struct bch_inode_unpacked *inode,
1090 u32 snapshot)
1091{
1092 struct btree_iter iter;
4db65027 1093 struct bkey_s_c_dirent d;
ef1669ff
KO
1094 int ret;
1095
4db65027
KO
1096 d = dirent_get_by_pos(trans, &iter,
1097 SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
ab158fce 1098 ret = bkey_err(d);
ef1669ff 1099 if (ret)
e47a390a 1100 return bch2_err_matches(ret, ENOENT) ? 0 : ret;
488f9776 1101
4db65027 1102 ret = dirent_points_to_inode(d, inode);
ef1669ff
KO
1103 bch2_trans_iter_exit(trans, &iter);
1104 return ret;
1105}
1106
ef1669ff
KO
1107static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
1108{
1109 struct bch_fs *c = trans->c;
1110 struct inode_walker_entry *i;
0763c552
KO
1111 u32 restart_count = trans->restart_count;
1112 int ret = 0;
ef1669ff
KO
1113 s64 count2;
1114
91d961ba 1115 darray_for_each(w->inodes, i) {
ef1669ff
KO
1116 if (i->inode.bi_sectors == i->count)
1117 continue;
1118
f9f52bc4 1119 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot);
ef1669ff 1120
43b81a4e 1121 if (w->recalculate_sums)
ef1669ff 1122 i->count = count2;
43b81a4e
KO
1123
1124 if (i->count != count2) {
1125 bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
1126 w->last_pos.inode, i->snapshot, i->count, count2);
1127 return -BCH_ERR_internal_fsck_err;
ef1669ff
KO
1128 }
1129
1130 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
1131 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
f9f52bc4 1132 w->last_pos.inode, i->snapshot,
1ed0a5d2
KO
1133 i->inode.bi_sectors, i->count)) {
1134 i->inode.bi_sectors = i->count;
1135 ret = write_inode(trans, &i->inode, i->snapshot);
1136 if (ret)
1137 break;
1ed0a5d2 1138 }
ef1669ff
KO
1139 }
1140fsck_err:
c497df8b 1141 if (ret)
1bb3c2a9 1142 bch_err_fn(c, ret);
c497df8b
KO
1143 if (!ret && trans_was_restarted(trans, restart_count))
1144 ret = -BCH_ERR_transaction_restart_nested;
1145 return ret;
ef1669ff
KO
1146}
1147
c58029ec
DH
1148struct extent_end {
1149 u32 snapshot;
1150 u64 offset;
1151 struct snapshots_seen seen;
1152};
1153
a397b8df
KO
1154struct extent_ends {
1155 struct bpos last_pos;
1156 DARRAY(struct extent_end) e;
1157};
1158
1159static void extent_ends_reset(struct extent_ends *extent_ends)
1160{
1161 struct extent_end *i;
1162
1163 darray_for_each(extent_ends->e, i)
1164 snapshots_seen_exit(&i->seen);
1165
1166 extent_ends->e.nr = 0;
1167}
1168
1169static void extent_ends_exit(struct extent_ends *extent_ends)
1170{
1171 extent_ends_reset(extent_ends);
1172 darray_exit(&extent_ends->e);
1173}
1174
1175static void extent_ends_init(struct extent_ends *extent_ends)
1176{
1177 memset(extent_ends, 0, sizeof(*extent_ends));
1178}
1179
1180static int extent_ends_at(struct bch_fs *c,
1181 struct extent_ends *extent_ends,
1182 struct snapshots_seen *seen,
1183 struct bkey_s_c k)
1184{
1185 struct extent_end *i, n = (struct extent_end) {
1186 .offset = k.k->p.offset,
1187 .snapshot = k.k->p.snapshot,
1188 .seen = *seen,
1189 };
1190
1191 n.seen.ids.data = kmemdup(seen->ids.data,
1192 sizeof(seen->ids.data[0]) * seen->ids.size,
1193 GFP_KERNEL);
1194 if (!n.seen.ids.data)
1195 return -BCH_ERR_ENOMEM_fsck_extent_ends_at;
1196
1197 darray_for_each(extent_ends->e, i) {
1198 if (i->snapshot == k.k->p.snapshot) {
1199 snapshots_seen_exit(&i->seen);
1200 *i = n;
1201 return 0;
1202 }
1203
1204 if (i->snapshot >= k.k->p.snapshot)
1205 break;
1206 }
1207
1208 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n);
1209}
c58029ec 1210
43b81a4e
KO
1211static int overlapping_extents_found(struct btree_trans *trans,
1212 enum btree_id btree,
1213 struct bpos pos1, struct bkey pos2,
1214 bool *fixed)
454377d8 1215{
43b81a4e
KO
1216 struct bch_fs *c = trans->c;
1217 struct printbuf buf = PRINTBUF;
454377d8
KO
1218 struct btree_iter iter;
1219 struct bkey_s_c k;
43b81a4e 1220 u32 snapshot = min(pos1.snapshot, pos2.p.snapshot);
454377d8
KO
1221 int ret;
1222
43b81a4e
KO
1223 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
1224
1225 bch2_trans_iter_init(trans, &iter, btree, SPOS(pos1.inode, pos1.offset - 1, snapshot), 0);
1226 k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
454377d8
KO
1227 ret = bkey_err(k);
1228 if (ret)
43b81a4e
KO
1229 goto err;
1230
1231 prt_str(&buf, "\n ");
1232 bch2_bkey_val_to_text(&buf, c, k);
1233
1234 if (!bpos_eq(pos1, k.k->p)) {
1235 bch_err(c, "%s: error finding first overlapping extent when repairing%s",
1236 __func__, buf.buf);
1237 ret = -BCH_ERR_internal_fsck_err;
1238 goto err;
1239 }
1240
1241 while (1) {
1242 bch2_btree_iter_advance(&iter);
1243
1244 k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
1245 ret = bkey_err(k);
1246 if (ret)
1247 goto err;
1248
1249 if (bkey_ge(k.k->p, pos2.p))
1250 break;
1251
1252 }
1253
1254 prt_str(&buf, "\n ");
1255 bch2_bkey_val_to_text(&buf, c, k);
1256
1257 if (bkey_gt(k.k->p, pos2.p) ||
1258 pos2.size != k.k->size) {
1259 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
1260 __func__, buf.buf);
1261 ret = -BCH_ERR_internal_fsck_err;
1262 goto err;
1263 }
1264
1265 if (fsck_err(c, "overlapping extents%s", buf.buf)) {
1266 struct bpos update_pos = pos1.snapshot < pos2.p.snapshot ? pos1 : pos2.p;
1267 struct btree_iter update_iter;
454377d8 1268
43b81a4e
KO
1269 struct bkey_i *update = bch2_bkey_get_mut(trans, &update_iter,
1270 btree, update_pos,
1271 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1272 bch2_trans_iter_exit(trans, &update_iter);
1273 if ((ret = PTR_ERR_OR_ZERO(update)))
1274 goto err;
1275
1276 *fixed = true;
1277 }
1278fsck_err:
1279err:
454377d8 1280 bch2_trans_iter_exit(trans, &iter);
43b81a4e
KO
1281 printbuf_exit(&buf);
1282 return ret;
454377d8
KO
1283}
1284
c58029ec
DH
1285static int check_overlapping_extents(struct btree_trans *trans,
1286 struct snapshots_seen *seen,
a397b8df 1287 struct extent_ends *extent_ends,
c58029ec 1288 struct bkey_s_c k,
a397b8df 1289 u32 equiv,
c58029ec
DH
1290 struct btree_iter *iter)
1291{
1292 struct bch_fs *c = trans->c;
1293 struct extent_end *i;
43b81a4e 1294 bool fixed = false;
c58029ec
DH
1295 int ret = 0;
1296
a397b8df
KO
1297 /* transaction restart, running again */
1298 if (bpos_eq(extent_ends->last_pos, k.k->p))
1299 return 0;
1300
1301 if (extent_ends->last_pos.inode != k.k->p.inode)
1302 extent_ends_reset(extent_ends);
1303
1304 darray_for_each(extent_ends->e, i) {
1305 if (i->offset <= bkey_start_offset(k.k))
c58029ec
DH
1306 continue;
1307
1308 if (!ref_visible2(c,
1309 k.k->p.snapshot, seen,
1310 i->snapshot, &i->seen))
1311 continue;
1312
a397b8df
KO
1313 ret = overlapping_extents_found(trans, iter->btree_id,
1314 SPOS(iter->pos.inode,
1315 i->offset,
1316 i->snapshot),
1317 *k.k, &fixed);
1318 if (ret)
1319 goto err;
c58029ec
DH
1320 }
1321
a397b8df
KO
1322 ret = extent_ends_at(c, extent_ends, seen, k);
1323 if (ret)
1324 goto err;
c58029ec 1325
a397b8df
KO
1326 extent_ends->last_pos = k.k->p;
1327err:
1328 return ret ?: fixed;
c58029ec
DH
1329}
1330
ef1669ff 1331static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
a1783320 1332 struct bkey_s_c k,
ef1669ff 1333 struct inode_walker *inode,
c58029ec 1334 struct snapshots_seen *s,
a397b8df 1335 struct extent_ends *extent_ends)
ef1669ff
KO
1336{
1337 struct bch_fs *c = trans->c;
ef1669ff 1338 struct inode_walker_entry *i;
fa8e94fa 1339 struct printbuf buf = PRINTBUF;
0d8f320d 1340 struct bpos equiv = k.k->p;
ef1669ff 1341 int ret = 0;
ef1669ff 1342
0d8f320d
KO
1343 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
1344
ef1669ff 1345 ret = check_key_has_snapshot(trans, iter, k);
fa8e94fa
KO
1346 if (ret) {
1347 ret = ret < 0 ? ret : 0;
1348 goto out;
1349 }
ef1669ff 1350
f9f52bc4 1351 if (inode->last_pos.inode != k.k->p.inode) {
ef1669ff
KO
1352 ret = check_i_sectors(trans, inode);
1353 if (ret)
fa8e94fa 1354 goto err;
ef1669ff 1355 }
292dea86 1356
a57f4d61
KO
1357 i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
1358 ret = PTR_ERR_OR_ZERO(i);
1359 if (ret)
1360 goto err;
1361
650eb16b 1362 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
c58029ec
DH
1363 if (ret)
1364 goto err;
ef1669ff 1365
650eb16b
KO
1366 if (k.k->type != KEY_TYPE_whiteout) {
1367 if (fsck_err_on(!i, c,
1368 "extent in missing inode:\n %s",
1369 (printbuf_reset(&buf),
1370 bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
1371 goto delete;
1372
1373 if (fsck_err_on(i &&
1374 !S_ISREG(i->inode.bi_mode) &&
1375 !S_ISLNK(i->inode.bi_mode), c,
1376 "extent in non regular inode mode %o:\n %s",
1377 i->inode.bi_mode,
1378 (printbuf_reset(&buf),
1379 bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
1380 goto delete;
ef1669ff 1381
a397b8df
KO
1382 ret = check_overlapping_extents(trans, s, extent_ends, k,
1383 equiv.snapshot, iter);
650eb16b
KO
1384 if (ret < 0)
1385 goto err;
1386
1387 if (ret)
1388 inode->recalculate_sums = true;
a397b8df 1389 ret = 0;
650eb16b 1390 }
ef1669ff 1391
0d06b4ec 1392 /*
a0076086
KO
1393 * Check inodes in reverse order, from oldest snapshots to newest,
1394 * starting from the inode that matches this extent's snapshot. If we
1395 * didn't have one, iterate over all inodes:
0d06b4ec 1396 */
a0076086
KO
1397 if (!i)
1398 i = inode->inodes.data + inode->inodes.nr - 1;
1399
1400 for (;
650eb16b 1401 inode->inodes.data && i >= inode->inodes.data;
0d06b4ec
KO
1402 --i) {
1403 if (i->snapshot > equiv.snapshot ||
1404 !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot))
1405 continue;
1406
650eb16b
KO
1407 if (k.k->type != KEY_TYPE_whiteout) {
1408 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
1409 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
1410 !bkey_extent_is_reservation(k), c,
1411 "extent type past end of inode %llu:%u, i_size %llu\n %s",
1412 i->inode.bi_inum, i->snapshot, i->inode.bi_size,
1413 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1414 struct btree_iter iter2;
1415
1416 bch2_trans_copy_iter(&iter2, iter);
1417 bch2_btree_iter_set_snapshot(&iter2, i->snapshot);
1418 ret = bch2_btree_iter_traverse(&iter2) ?:
1419 bch2_btree_delete_at(trans, &iter2,
1420 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1421 bch2_trans_iter_exit(trans, &iter2);
0d06b4ec
KO
1422 if (ret)
1423 goto err;
650eb16b 1424
650eb16b 1425 iter->k.type = KEY_TYPE_whiteout;
ef1669ff 1426 }
a0076086
KO
1427
1428 if (bkey_extent_is_allocation(k.k))
1429 i->count += k.k->size;
ef1669ff 1430 }
8a85b20c 1431
a0076086
KO
1432 i->seen_this_pos = true;
1433 }
fa8e94fa
KO
1434out:
1435err:
ef1669ff 1436fsck_err:
fa8e94fa 1437 printbuf_exit(&buf);
e492e7b6 1438
549d173c 1439 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
1bb3c2a9 1440 bch_err_fn(c, ret);
8a85b20c 1441 return ret;
0d8f320d
KO
1442delete:
1443 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
1444 goto out;
8a85b20c
KO
1445}
1446
1c6fdbd8
KO
1447/*
1448 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
1449 * that i_size an i_sectors are consistent
1450 */
067d228b 1451int bch2_check_extents(struct bch_fs *c)
1c6fdbd8
KO
1452{
1453 struct inode_walker w = inode_walker_init();
ef1669ff 1454 struct snapshots_seen s;
424eb881 1455 struct btree_trans trans;
67e0dd8f 1456 struct btree_iter iter;
a1783320 1457 struct bkey_s_c k;
a397b8df 1458 struct extent_ends extent_ends;
e3dc75eb 1459 struct disk_reservation res = { 0 };
1c6fdbd8
KO
1460 int ret = 0;
1461
ef1669ff 1462 snapshots_seen_init(&s);
a397b8df 1463 extent_ends_init(&extent_ends);
20bceecb 1464 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 1465
a1783320
KO
1466 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
1467 POS(BCACHEFS_ROOT_INO, 0),
1468 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
e3dc75eb
KO
1469 &res, NULL,
1470 BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
1471 bch2_disk_reservation_put(c, &res);
1472 check_extent(&trans, &iter, k, &w, &s, &extent_ends);
5897505e
KO
1473 })) ?:
1474 check_i_sectors(&trans, &w);
c58029ec 1475
e3dc75eb 1476 bch2_disk_reservation_put(c, &res);
a397b8df 1477 extent_ends_exit(&extent_ends);
ef1669ff
KO
1478 inode_walker_exit(&w);
1479 bch2_trans_exit(&trans);
1480 snapshots_seen_exit(&s);
1481
e492e7b6 1482 if (ret)
1bb3c2a9 1483 bch_err_fn(c, ret);
ef1669ff
KO
1484 return ret;
1485}
1486
1487static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
1488{
1489 struct bch_fs *c = trans->c;
1490 struct inode_walker_entry *i;
0763c552
KO
1491 u32 restart_count = trans->restart_count;
1492 int ret = 0;
ef1669ff
KO
1493 s64 count2;
1494
91d961ba 1495 darray_for_each(w->inodes, i) {
ef1669ff
KO
1496 if (i->inode.bi_nlink == i->count)
1497 continue;
1498
f9f52bc4 1499 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot);
9e343161
KO
1500 if (count2 < 0)
1501 return count2;
ef1669ff
KO
1502
1503 if (i->count != count2) {
1504 bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
1505 i->count, count2);
1506 i->count = count2;
1507 if (i->inode.bi_nlink == i->count)
1508 continue;
1509 }
1510
1511 if (fsck_err_on(i->inode.bi_nlink != i->count, c,
1512 "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
f9f52bc4 1513 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
ef1669ff
KO
1514 i->inode.bi_nlink = i->count;
1515 ret = write_inode(trans, &i->inode, i->snapshot);
abcecb49
KO
1516 if (ret)
1517 break;
1518 }
ef1669ff
KO
1519 }
1520fsck_err:
12043cf1 1521 if (ret)
1bb3c2a9 1522 bch_err_fn(c, ret);
12043cf1
KO
1523 if (!ret && trans_was_restarted(trans, restart_count))
1524 ret = -BCH_ERR_transaction_restart_nested;
1525 return ret;
ef1669ff 1526}
abcecb49 1527
ef1669ff
KO
1528static int check_dirent_target(struct btree_trans *trans,
1529 struct btree_iter *iter,
1530 struct bkey_s_c_dirent d,
1531 struct bch_inode_unpacked *target,
1532 u32 target_snapshot)
1533{
1534 struct bch_fs *c = trans->c;
285b181a 1535 struct bkey_i_dirent *n;
ef1669ff 1536 bool backpointer_exists = true;
fa8e94fa 1537 struct printbuf buf = PRINTBUF;
ef1669ff
KO
1538 int ret = 0;
1539
1540 if (!target->bi_dir &&
1541 !target->bi_dir_offset) {
1542 target->bi_dir = d.k->p.inode;
1543 target->bi_dir_offset = d.k->p.offset;
1544
285b181a 1545 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1546 if (ret)
1547 goto err;
1548 }
1549
4db65027 1550 if (!inode_points_to_dirent(target, d)) {
ef1669ff
KO
1551 ret = inode_backpointer_exists(trans, target, d.k->p.snapshot);
1552 if (ret < 0)
1553 goto err;
e3e464ac 1554
ef1669ff
KO
1555 backpointer_exists = ret;
1556 ret = 0;
e3e464ac 1557
ef1669ff
KO
1558 if (fsck_err_on(S_ISDIR(target->bi_mode) &&
1559 backpointer_exists, c,
1560 "directory %llu with multiple links",
1561 target->bi_inum)) {
285b181a 1562 ret = __remove_dirent(trans, d.k->p);
fa8e94fa 1563 goto out;
e3e464ac 1564 }
e3e464ac 1565
ef1669ff
KO
1566 if (fsck_err_on(backpointer_exists &&
1567 !target->bi_nlink, c,
0095aa94
KO
1568 "inode %llu type %s has multiple links but i_nlink 0",
1569 target->bi_inum, bch2_d_types[d.v->d_type])) {
ef1669ff
KO
1570 target->bi_nlink++;
1571 target->bi_flags &= ~BCH_INODE_UNLINKED;
1c6fdbd8 1572
285b181a 1573 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1574 if (ret)
1575 goto err;
1c6fdbd8
KO
1576 }
1577
ef1669ff 1578 if (fsck_err_on(!backpointer_exists, c,
6e0c886d 1579 "inode %llu:%u has wrong backpointer:\n"
ef1669ff
KO
1580 "got %llu:%llu\n"
1581 "should be %llu:%llu",
6e0c886d 1582 target->bi_inum, target_snapshot,
ef1669ff
KO
1583 target->bi_dir,
1584 target->bi_dir_offset,
1585 d.k->p.inode,
1586 d.k->p.offset)) {
1587 target->bi_dir = d.k->p.inode;
1588 target->bi_dir_offset = d.k->p.offset;
1589
285b181a 1590 ret = __write_inode(trans, target, target_snapshot);
ef1669ff
KO
1591 if (ret)
1592 goto err;
1c6fdbd8 1593 }
ef1669ff 1594 }
1c6fdbd8 1595
285b181a
KO
1596 if (fsck_err_on(d.v->d_type != inode_d_type(target), c,
1597 "incorrect d_type: got %s, should be %s:\n%s",
1598 bch2_d_type_str(d.v->d_type),
1599 bch2_d_type_str(inode_d_type(target)),
fa8e94fa
KO
1600 (printbuf_reset(&buf),
1601 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
285b181a
KO
1602 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
1603 ret = PTR_ERR_OR_ZERO(n);
1604 if (ret)
fa8e94fa 1605 goto err;
ef1669ff
KO
1606
1607 bkey_reassemble(&n->k_i, d.s_c);
285b181a 1608 n->v.d_type = inode_d_type(target);
ef1669ff 1609
285b181a
KO
1610 ret = bch2_trans_update(trans, iter, &n->k_i, 0);
1611 if (ret)
fa8e94fa 1612 goto err;
4db65027 1613
285b181a 1614 d = dirent_i_to_s_c(n);
4db65027
KO
1615 }
1616
1617 if (d.v->d_type == DT_SUBVOL &&
1618 target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
1619 (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
1620 fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u",
1621 le32_to_cpu(d.v->d_parent_subvol),
1622 target->bi_parent_subvol))) {
285b181a
KO
1623 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
1624 ret = PTR_ERR_OR_ZERO(n);
1625 if (ret)
fa8e94fa 1626 goto err;
4db65027
KO
1627
1628 bkey_reassemble(&n->k_i, d.s_c);
1629 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
1630
285b181a
KO
1631 ret = bch2_trans_update(trans, iter, &n->k_i, 0);
1632 if (ret)
fa8e94fa 1633 goto err;
4db65027 1634
285b181a 1635 d = dirent_i_to_s_c(n);
1c6fdbd8 1636 }
fa8e94fa 1637out:
ef1669ff 1638err:
1c6fdbd8 1639fsck_err:
fa8e94fa 1640 printbuf_exit(&buf);
e492e7b6 1641
549d173c 1642 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
1bb3c2a9 1643 bch_err_fn(c, ret);
ef1669ff 1644 return ret;
1c6fdbd8
KO
1645}
1646
914f2786 1647static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
a1783320 1648 struct bkey_s_c k,
914f2786 1649 struct bch_hash_info *hash_info,
ef1669ff
KO
1650 struct inode_walker *dir,
1651 struct inode_walker *target,
1652 struct snapshots_seen *s)
1c6fdbd8 1653{
914f2786 1654 struct bch_fs *c = trans->c;
914f2786 1655 struct bkey_s_c_dirent d;
ef1669ff 1656 struct inode_walker_entry *i;
fa8e94fa 1657 struct printbuf buf = PRINTBUF;
49124d8a 1658 struct bpos equiv;
fa8e94fa 1659 int ret = 0;
d69f41d6 1660
ef1669ff 1661 ret = check_key_has_snapshot(trans, iter, k);
fa8e94fa
KO
1662 if (ret) {
1663 ret = ret < 0 ? ret : 0;
1664 goto out;
1665 }
1c6fdbd8 1666
49124d8a
KO
1667 equiv = k.k->p;
1668 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
1669
1670 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
914f2786 1671 if (ret)
fa8e94fa 1672 goto err;
8a85b20c 1673
ef1669ff 1674 if (k.k->type == KEY_TYPE_whiteout)
fa8e94fa 1675 goto out;
ef1669ff 1676
f9f52bc4 1677 if (dir->last_pos.inode != k.k->p.inode) {
ef1669ff
KO
1678 ret = check_subdir_count(trans, dir);
1679 if (ret)
fa8e94fa 1680 goto err;
ef1669ff
KO
1681 }
1682
0763c552 1683 BUG_ON(!iter->path->should_be_locked);
292dea86 1684
a57f4d61 1685 i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
06dcca51 1686 ret = PTR_ERR_OR_ZERO(i);
ef1669ff 1687 if (ret < 0)
fa8e94fa 1688 goto err;
1c6fdbd8 1689
c8d5b714
KO
1690 if (dir->first_this_inode)
1691 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
1692 dir->first_this_inode = false;
1693
06dcca51 1694 if (fsck_err_on(!i, c,
914f2786 1695 "dirent in nonexisting directory:\n%s",
fa8e94fa
KO
1696 (printbuf_reset(&buf),
1697 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1698 ret = bch2_btree_delete_at(trans, iter,
285b181a 1699 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
fa8e94fa
KO
1700 goto out;
1701 }
ef1669ff 1702
06dcca51 1703 if (!i)
fa8e94fa 1704 goto out;
ef1669ff
KO
1705
1706 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c,
285b181a
KO
1707 "dirent in non directory inode type %s:\n%s",
1708 bch2_d_type_str(inode_d_type(&i->inode)),
fa8e94fa
KO
1709 (printbuf_reset(&buf),
1710 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1711 ret = bch2_btree_delete_at(trans, iter, 0);
1712 goto out;
1713 }
8a85b20c 1714
c8d5b714 1715 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k);
914f2786 1716 if (ret < 0)
fa8e94fa
KO
1717 goto err;
1718 if (ret) {
1719 /* dirent has been deleted */
1720 ret = 0;
1721 goto out;
1722 }
7ac2c55e 1723
914f2786 1724 if (k.k->type != KEY_TYPE_dirent)
fa8e94fa 1725 goto out;
1c6fdbd8 1726
914f2786 1727 d = bkey_s_c_to_dirent(k);
1c6fdbd8 1728
4db65027
KO
1729 if (d.v->d_type == DT_SUBVOL) {
1730 struct bch_inode_unpacked subvol_root;
1731 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
1732 u32 target_snapshot;
1733 u64 target_inum;
b9e1adf5 1734
4db65027
KO
1735 ret = __subvol_lookup(trans, target_subvol,
1736 &target_snapshot, &target_inum);
e47a390a 1737 if (ret && !bch2_err_matches(ret, ENOENT))
fa8e94fa 1738 goto err;
1c6fdbd8 1739
4db65027 1740 if (fsck_err_on(ret, c,
73bd774d
KO
1741 "dirent points to missing subvolume %u",
1742 le32_to_cpu(d.v->d_child_subvol))) {
fa8e94fa
KO
1743 ret = __remove_dirent(trans, d.k->p);
1744 goto err;
1745 }
914f2786 1746
ef1669ff
KO
1747 ret = __lookup_inode(trans, target_inum,
1748 &subvol_root, &target_snapshot);
e47a390a 1749 if (ret && !bch2_err_matches(ret, ENOENT))
fa8e94fa 1750 goto err;
914f2786 1751
ef1669ff
KO
1752 if (fsck_err_on(ret, c,
1753 "subvolume %u points to missing subvolume root %llu",
1754 target_subvol,
1755 target_inum)) {
1756 bch_err(c, "repair not implemented yet");
fa8e94fa
KO
1757 ret = -EINVAL;
1758 goto err;
ef1669ff 1759 }
914f2786 1760
ef1669ff
KO
1761 if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c,
1762 "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
1763 target_inum,
1764 subvol_root.bi_subvol, target_subvol)) {
1765 subvol_root.bi_subvol = target_subvol;
285b181a 1766 ret = __write_inode(trans, &subvol_root, target_snapshot);
ef1669ff 1767 if (ret)
fa8e94fa 1768 goto err;
ef1669ff 1769 }
914f2786 1770
ef1669ff
KO
1771 ret = check_dirent_target(trans, iter, d, &subvol_root,
1772 target_snapshot);
914f2786 1773 if (ret)
fa8e94fa 1774 goto err;
ef1669ff 1775 } else {
4db65027 1776 ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
ef1669ff 1777 if (ret)
fa8e94fa 1778 goto err;
1c6fdbd8 1779
91d961ba 1780 if (fsck_err_on(!target->inodes.nr, c,
49124d8a
KO
1781 "dirent points to missing inode: (equiv %u)\n%s",
1782 equiv.snapshot,
fa8e94fa
KO
1783 (printbuf_reset(&buf),
1784 bch2_bkey_val_to_text(&buf, c, k),
1785 buf.buf))) {
285b181a 1786 ret = __remove_dirent(trans, d.k->p);
ef1669ff 1787 if (ret)
fa8e94fa 1788 goto err;
1c6fdbd8
KO
1789 }
1790
91d961ba 1791 darray_for_each(target->inodes, i) {
ef1669ff
KO
1792 ret = check_dirent_target(trans, iter, d,
1793 &i->inode, i->snapshot);
1794 if (ret)
fa8e94fa 1795 goto err;
d3ff7fec 1796 }
914f2786 1797 }
d3ff7fec 1798
ef1669ff 1799 if (d.v->d_type == DT_DIR)
49124d8a 1800 for_each_visible_inode(c, s, dir, equiv.snapshot, i)
ef1669ff 1801 i->count++;
1c6fdbd8 1802
fa8e94fa
KO
1803out:
1804err:
914f2786 1805fsck_err:
fa8e94fa 1806 printbuf_exit(&buf);
e492e7b6 1807
549d173c 1808 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
1bb3c2a9 1809 bch_err_fn(c, ret);
914f2786
KO
1810 return ret;
1811}
1c6fdbd8 1812
914f2786
KO
1813/*
1814 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
1815 * validate d_type
1816 */
067d228b 1817int bch2_check_dirents(struct bch_fs *c)
914f2786 1818{
ef1669ff
KO
1819 struct inode_walker dir = inode_walker_init();
1820 struct inode_walker target = inode_walker_init();
1821 struct snapshots_seen s;
914f2786
KO
1822 struct bch_hash_info hash_info;
1823 struct btree_trans trans;
67e0dd8f 1824 struct btree_iter iter;
a1783320 1825 struct bkey_s_c k;
914f2786 1826 int ret = 0;
1c6fdbd8 1827
ef1669ff 1828 snapshots_seen_init(&s);
914f2786 1829 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 1830
a1783320
KO
1831 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_dirents,
1832 POS(BCACHEFS_ROOT_INO, 0),
1833 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
1834 k,
1835 NULL, NULL,
1836 BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
1837 check_dirent(&trans, &iter, k, &hash_info, &dir, &target, &s));
914f2786 1838
ef1669ff
KO
1839 bch2_trans_exit(&trans);
1840 snapshots_seen_exit(&s);
1841 inode_walker_exit(&dir);
1842 inode_walker_exit(&target);
e492e7b6
KO
1843
1844 if (ret)
1bb3c2a9 1845 bch_err_fn(c, ret);
ef1669ff 1846 return ret;
1c6fdbd8
KO
1847}
1848
285b181a 1849static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
a1783320 1850 struct bkey_s_c k,
285b181a
KO
1851 struct bch_hash_info *hash_info,
1852 struct inode_walker *inode)
1853{
1854 struct bch_fs *c = trans->c;
06dcca51 1855 struct inode_walker_entry *i;
285b181a
KO
1856 int ret;
1857
285b181a
KO
1858 ret = check_key_has_snapshot(trans, iter, k);
1859 if (ret)
1860 return ret;
1861
a57f4d61 1862 i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
06dcca51
KO
1863 ret = PTR_ERR_OR_ZERO(i);
1864 if (ret)
285b181a
KO
1865 return ret;
1866
c8d5b714
KO
1867 if (inode->first_this_inode)
1868 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
1869 inode->first_this_inode = false;
1870
06dcca51 1871 if (fsck_err_on(!i, c,
285b181a
KO
1872 "xattr for missing inode %llu",
1873 k.k->p.inode))
1874 return bch2_btree_delete_at(trans, iter, 0);
1875
06dcca51 1876 if (!i)
285b181a
KO
1877 return 0;
1878
285b181a
KO
1879 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
1880fsck_err:
549d173c 1881 if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
1bb3c2a9 1882 bch_err_fn(c, ret);
285b181a
KO
1883 return ret;
1884}
1885
1c6fdbd8
KO
1886/*
1887 * Walk xattrs: verify that they all have a corresponding inode
1888 */
067d228b 1889int bch2_check_xattrs(struct bch_fs *c)
1c6fdbd8 1890{
285b181a 1891 struct inode_walker inode = inode_walker_init();
7ac2c55e 1892 struct bch_hash_info hash_info;
d69f41d6 1893 struct btree_trans trans;
67e0dd8f 1894 struct btree_iter iter;
a1783320 1895 struct bkey_s_c k;
1c6fdbd8
KO
1896 int ret = 0;
1897
20bceecb 1898 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
d69f41d6 1899
a1783320
KO
1900 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
1901 POS(BCACHEFS_ROOT_INO, 0),
1902 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
1903 k,
1904 NULL, NULL,
1905 BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
1906 check_xattr(&trans, &iter, k, &hash_info, &inode));
285b181a 1907
9a796fdb 1908 bch2_trans_exit(&trans);
e492e7b6
KO
1909
1910 if (ret)
1bb3c2a9 1911 bch_err_fn(c, ret);
9a796fdb 1912 return ret;
1c6fdbd8
KO
1913}
1914
285b181a 1915static int check_root_trans(struct btree_trans *trans)
1c6fdbd8 1916{
285b181a 1917 struct bch_fs *c = trans->c;
ef1669ff 1918 struct bch_inode_unpacked root_inode;
d3ff7fec 1919 u32 snapshot;
ef1669ff 1920 u64 inum;
1c6fdbd8
KO
1921 int ret;
1922
285b181a 1923 ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
e47a390a 1924 if (ret && !bch2_err_matches(ret, ENOENT))
1c6fdbd8
KO
1925 return ret;
1926
ef1669ff
KO
1927 if (mustfix_fsck_err_on(ret, c, "root subvol missing")) {
1928 struct bkey_i_subvolume root_subvol;
1c6fdbd8 1929
ef1669ff
KO
1930 snapshot = U32_MAX;
1931 inum = BCACHEFS_ROOT_INO;
1c6fdbd8 1932
ef1669ff
KO
1933 bkey_subvolume_init(&root_subvol.k_i);
1934 root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
1935 root_subvol.v.flags = 0;
1936 root_subvol.v.snapshot = cpu_to_le32(snapshot);
1937 root_subvol.v.inode = cpu_to_le64(inum);
e68914ca 1938 ret = commit_do(trans, NULL, NULL,
ef1669ff
KO
1939 BTREE_INSERT_NOFAIL|
1940 BTREE_INSERT_LAZY_RW,
8ffa11a2
DH
1941 __bch2_btree_insert(trans, BTREE_ID_subvolumes,
1942 &root_subvol.k_i, 0));
ef1669ff 1943 if (ret) {
d4bf5eec 1944 bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
ef1669ff
KO
1945 goto err;
1946 }
1947
1948 }
1949
285b181a 1950 ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
e47a390a 1951 if (ret && !bch2_err_matches(ret, ENOENT))
ef1669ff 1952 return ret;
1c6fdbd8 1953
ef1669ff
KO
1954 if (mustfix_fsck_err_on(ret, c, "root directory missing") ||
1955 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c,
1956 "root inode not a directory")) {
1957 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
1958 0, NULL);
1959 root_inode.bi_inum = inum;
1c6fdbd8 1960
285b181a 1961 ret = __write_inode(trans, &root_inode, snapshot);
ef1669ff 1962 if (ret)
d4bf5eec 1963 bch_err(c, "error writing root inode: %s", bch2_err_str(ret));
ef1669ff
KO
1964 }
1965err:
1966fsck_err:
ef1669ff 1967 return ret;
1c6fdbd8
KO
1968}
1969
285b181a 1970/* Get root directory, create if it doesn't exist: */
067d228b 1971int bch2_check_root(struct bch_fs *c)
285b181a 1972{
1bb3c2a9
KO
1973 int ret;
1974
1bb3c2a9 1975 ret = bch2_trans_do(c, NULL, NULL,
285b181a
KO
1976 BTREE_INSERT_NOFAIL|
1977 BTREE_INSERT_LAZY_RW,
1978 check_root_trans(&trans));
1bb3c2a9
KO
1979
1980 if (ret)
1981 bch_err_fn(c, ret);
1982 return ret;
285b181a
KO
1983}
1984
91d961ba
KO
1985struct pathbuf_entry {
1986 u64 inum;
1987 u32 snapshot;
1c6fdbd8
KO
1988};
1989
91d961ba
KO
1990typedef DARRAY(struct pathbuf_entry) pathbuf;
1991
1992static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
6e0c886d
KO
1993{
1994 struct pathbuf_entry *i;
1995
91d961ba 1996 darray_for_each(*p, i)
6e0c886d
KO
1997 if (i->inum == inum &&
1998 i->snapshot == snapshot)
1999 return true;
2000
2001 return false;
2002}
2003
91d961ba 2004static int path_down(struct bch_fs *c, pathbuf *p,
f0f41a6d 2005 u64 inum, u32 snapshot)
1c6fdbd8 2006{
91d961ba 2007 int ret = darray_push(p, ((struct pathbuf_entry) {
6e0c886d
KO
2008 .inum = inum,
2009 .snapshot = snapshot,
91d961ba
KO
2010 }));
2011
2012 if (ret)
2013 bch_err(c, "fsck: error allocating memory for pathbuf, size %zu",
2014 p->size);
2015 return ret;
1c6fdbd8
KO
2016}
2017
6e0c886d
KO
2018/*
2019 * Check that a given inode is reachable from the root:
2020 *
2021 * XXX: we should also be verifying that inodes are in the right subvolumes
2022 */
d3ff7fec 2023static int check_path(struct btree_trans *trans,
91d961ba 2024 pathbuf *p,
81ed9ce3
KO
2025 struct bch_inode_unpacked *inode,
2026 u32 snapshot)
1c6fdbd8 2027{
d3ff7fec 2028 struct bch_fs *c = trans->c;
1c6fdbd8
KO
2029 int ret = 0;
2030
49124d8a 2031 snapshot = bch2_snapshot_equiv(c, snapshot);
d3ff7fec 2032 p->nr = 0;
424eb881 2033
488f9776
KO
2034 while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
2035 inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
4db65027
KO
2036 struct btree_iter dirent_iter;
2037 struct bkey_s_c_dirent d;
6e0c886d
KO
2038 u32 parent_snapshot = snapshot;
2039
4db65027 2040 if (inode->bi_subvol) {
488f9776
KO
2041 u64 inum;
2042
2043 ret = subvol_lookup(trans, inode->bi_parent_subvol,
6e0c886d 2044 &parent_snapshot, &inum);
488f9776
KO
2045 if (ret)
2046 break;
2047 }
2048
d3ff7fec 2049 ret = lockrestart_do(trans,
4db65027
KO
2050 PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter,
2051 SPOS(inode->bi_dir, inode->bi_dir_offset,
2052 parent_snapshot))).k));
e47a390a 2053 if (ret && !bch2_err_matches(ret, ENOENT))
d3ff7fec 2054 break;
1c6fdbd8 2055
4db65027
KO
2056 if (!ret && !dirent_points_to_inode(d, inode)) {
2057 bch2_trans_iter_exit(trans, &dirent_iter);
e47a390a 2058 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
4db65027
KO
2059 }
2060
e47a390a 2061 if (bch2_err_matches(ret, ENOENT)) {
285b181a 2062 if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
ef1669ff 2063 inode->bi_inum, snapshot,
285b181a 2064 bch2_d_type_str(inode_d_type(inode)),
d3ff7fec
KO
2065 inode->bi_nlink,
2066 inode->bi_dir,
2067 inode->bi_dir_offset))
81ed9ce3 2068 ret = reattach_inode(trans, inode, snapshot);
d3ff7fec
KO
2069 break;
2070 }
4db65027
KO
2071
2072 bch2_trans_iter_exit(trans, &dirent_iter);
1c6fdbd8 2073
d3ff7fec
KO
2074 if (!S_ISDIR(inode->bi_mode))
2075 break;
1c6fdbd8 2076
f0f41a6d 2077 ret = path_down(c, p, inode->bi_inum, snapshot);
d3ff7fec
KO
2078 if (ret) {
2079 bch_err(c, "memory allocation failure");
2080 return ret;
2081 }
1c6fdbd8 2082
6e0c886d
KO
2083 snapshot = parent_snapshot;
2084
2085 ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
2086 if (ret) {
2087 /* Should have been caught in dirents pass */
2088 bch_err(c, "error looking up parent directory: %i", ret);
2089 break;
2090 }
2091
2092 if (path_is_dup(p, inode->bi_inum, snapshot)) {
2093 struct pathbuf_entry *i;
1c6fdbd8 2094
d3ff7fec 2095 /* XXX print path */
6e0c886d
KO
2096 bch_err(c, "directory structure loop");
2097
91d961ba 2098 darray_for_each(*p, i)
6e0c886d
KO
2099 pr_err("%llu:%u", i->inum, i->snapshot);
2100 pr_err("%llu:%u", inode->bi_inum, snapshot);
2101
d3ff7fec
KO
2102 if (!fsck_err(c, "directory structure loop"))
2103 return 0;
1c6fdbd8 2104
e68914ca 2105 ret = commit_do(trans, NULL, NULL,
285b181a
KO
2106 BTREE_INSERT_NOFAIL|
2107 BTREE_INSERT_LAZY_RW,
81ed9ce3 2108 remove_backpointer(trans, inode));
1c6fdbd8 2109 if (ret) {
d3ff7fec
KO
2110 bch_err(c, "error removing dirent: %i", ret);
2111 break;
1c6fdbd8
KO
2112 }
2113
81ed9ce3 2114 ret = reattach_inode(trans, inode, snapshot);
1c6fdbd8 2115 }
1c6fdbd8 2116 }
d3ff7fec
KO
2117fsck_err:
2118 if (ret)
1bb3c2a9 2119 bch_err_fn(c, ret);
d3ff7fec
KO
2120 return ret;
2121}
1c6fdbd8 2122
d3ff7fec
KO
2123/*
2124 * Check for unreachable inodes, as well as loops in the directory structure:
067d228b 2125 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's
d3ff7fec
KO
2126 * unreachable:
2127 */
067d228b 2128int bch2_check_directory_structure(struct bch_fs *c)
d3ff7fec
KO
2129{
2130 struct btree_trans trans;
67e0dd8f 2131 struct btree_iter iter;
d3ff7fec
KO
2132 struct bkey_s_c k;
2133 struct bch_inode_unpacked u;
91d961ba 2134 pathbuf path = { 0, };
d3ff7fec 2135 int ret;
1c6fdbd8 2136
d3ff7fec 2137 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
1c6fdbd8 2138
909004d2
KO
2139 for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN,
2140 BTREE_ITER_INTENT|
ef1669ff
KO
2141 BTREE_ITER_PREFETCH|
2142 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
3e52c222 2143 if (!bkey_is_inode(k.k))
1c6fdbd8
KO
2144 continue;
2145
3e52c222 2146 ret = bch2_inode_unpack(k, &u);
d3ff7fec
KO
2147 if (ret) {
2148 /* Should have been caught earlier in fsck: */
2149 bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret);
2150 break;
1c6fdbd8 2151 }
1c6fdbd8 2152
ef1669ff
KO
2153 if (u.bi_flags & BCH_INODE_UNLINKED)
2154 continue;
2155
81ed9ce3 2156 ret = check_path(&trans, &path, &u, iter.pos.snapshot);
d3ff7fec
KO
2157 if (ret)
2158 break;
1c6fdbd8 2159 }
67e0dd8f 2160 bch2_trans_iter_exit(&trans, &iter);
1bb3c2a9 2161 bch2_trans_exit(&trans);
91d961ba 2162 darray_exit(&path);
f24fab9c 2163
1bb3c2a9
KO
2164 if (ret)
2165 bch_err_fn(c, ret);
9a796fdb 2166 return ret;
1c6fdbd8
KO
2167}
2168
fc51b041
KO
2169struct nlink_table {
2170 size_t nr;
2171 size_t size;
1c6fdbd8 2172
fc51b041
KO
2173 struct nlink {
2174 u64 inum;
2175 u32 snapshot;
2176 u32 count;
2177 } *d;
2178};
1c6fdbd8 2179
f0f41a6d
KO
2180static int add_nlink(struct bch_fs *c, struct nlink_table *t,
2181 u64 inum, u32 snapshot)
1c6fdbd8 2182{
fc51b041
KO
2183 if (t->nr == t->size) {
2184 size_t new_size = max_t(size_t, 128UL, t->size * 2);
3e3e02e6
KO
2185 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
2186
fc51b041 2187 if (!d) {
f0f41a6d
KO
2188 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
2189 new_size);
65d48e35 2190 return -BCH_ERR_ENOMEM_fsck_add_nlink;
fc51b041 2191 }
1c6fdbd8 2192
82355e28
KO
2193 if (t->d)
2194 memcpy(d, t->d, t->size * sizeof(t->d[0]));
fc51b041 2195 kvfree(t->d);
1c6fdbd8 2196
fc51b041
KO
2197 t->d = d;
2198 t->size = new_size;
2bb748a6
KO
2199 }
2200
fc51b041
KO
2201
2202 t->d[t->nr++] = (struct nlink) {
2203 .inum = inum,
2204 .snapshot = snapshot,
2205 };
2206
2207 return 0;
2208}
2209
2210static int nlink_cmp(const void *_l, const void *_r)
2211{
2212 const struct nlink *l = _l;
2213 const struct nlink *r = _r;
2214
2215 return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
2216}
2217
ef1669ff
KO
2218static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
2219 struct nlink_table *links,
2220 u64 range_start, u64 range_end, u64 inum, u32 snapshot)
fc51b041
KO
2221{
2222 struct nlink *link, key = {
2223 .inum = inum, .snapshot = U32_MAX,
2224 };
2225
2226 if (inum < range_start || inum >= range_end)
1c6fdbd8 2227 return;
fc51b041
KO
2228
2229 link = __inline_bsearch(&key, links->d, links->nr,
2230 sizeof(links->d[0]), nlink_cmp);
ef1669ff
KO
2231 if (!link)
2232 return;
2233
2234 while (link > links->d && link[0].inum == link[-1].inum)
2235 --link;
2236
2237 for (; link < links->d + links->nr && link->inum == inum; link++)
2238 if (ref_visible(c, s, snapshot, link->snapshot)) {
2239 link->count++;
2240 if (link->snapshot >= snapshot)
2241 break;
2242 }
fc51b041
KO
2243}
2244
2245noinline_for_stack
2246static int check_nlinks_find_hardlinks(struct bch_fs *c,
2247 struct nlink_table *t,
2248 u64 start, u64 *end)
2249{
2250 struct btree_trans trans;
67e0dd8f 2251 struct btree_iter iter;
fc51b041 2252 struct bkey_s_c k;
fc51b041
KO
2253 struct bch_inode_unpacked u;
2254 int ret = 0;
2255
2256 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
2257
2258 for_each_btree_key(&trans, iter, BTREE_ID_inodes,
909004d2
KO
2259 POS(0, start),
2260 BTREE_ITER_INTENT|
ef1669ff
KO
2261 BTREE_ITER_PREFETCH|
2262 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
3e52c222 2263 if (!bkey_is_inode(k.k))
fc51b041
KO
2264 continue;
2265
3e52c222
KO
2266 /* Should never fail, checked by bch2_inode_invalid: */
2267 BUG_ON(bch2_inode_unpack(k, &u));
fc51b041
KO
2268
2269 /*
2270 * Backpointer and directory structure checks are sufficient for
2271 * directories, since they can't have hardlinks:
2272 */
73bd774d 2273 if (S_ISDIR(u.bi_mode))
fc51b041
KO
2274 continue;
2275
fc51b041
KO
2276 if (!u.bi_nlink)
2277 continue;
2278
f0f41a6d 2279 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
fc51b041
KO
2280 if (ret) {
2281 *end = k.k->p.offset;
2282 ret = 0;
2283 break;
2284 }
2285
1c6fdbd8 2286 }
67e0dd8f 2287 bch2_trans_iter_exit(&trans, &iter);
fc51b041
KO
2288 bch2_trans_exit(&trans);
2289
2290 if (ret)
2291 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
1c6fdbd8 2292
fc51b041 2293 return ret;
1c6fdbd8
KO
2294}
2295
2296noinline_for_stack
fc51b041
KO
2297static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
2298 u64 range_start, u64 range_end)
1c6fdbd8 2299{
424eb881 2300 struct btree_trans trans;
ef1669ff 2301 struct snapshots_seen s;
67e0dd8f 2302 struct btree_iter iter;
1c6fdbd8
KO
2303 struct bkey_s_c k;
2304 struct bkey_s_c_dirent d;
1c6fdbd8
KO
2305 int ret;
2306
ef1669ff
KO
2307 snapshots_seen_init(&s);
2308
20bceecb 2309 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
424eb881 2310
909004d2
KO
2311 for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN,
2312 BTREE_ITER_INTENT|
ef1669ff
KO
2313 BTREE_ITER_PREFETCH|
2314 BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
49124d8a 2315 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
ef1669ff
KO
2316 if (ret)
2317 break;
2318
1c6fdbd8 2319 switch (k.k->type) {
26609b61 2320 case KEY_TYPE_dirent:
1c6fdbd8 2321 d = bkey_s_c_to_dirent(k);
1c6fdbd8 2322
ef1669ff
KO
2323 if (d.v->d_type != DT_DIR &&
2324 d.v->d_type != DT_SUBVOL)
2325 inc_link(c, &s, links, range_start, range_end,
2326 le64_to_cpu(d.v->d_inum),
49124d8a 2327 bch2_snapshot_equiv(c, d.k->p.snapshot));
1c6fdbd8
KO
2328 break;
2329 }
1c6fdbd8 2330 }
67e0dd8f 2331 bch2_trans_iter_exit(&trans, &iter);
abcecb49 2332
1c6fdbd8 2333 if (ret)
619f5bee 2334 bch_err(c, "error in fsck: btree error %i while walking dirents", ret);
1c6fdbd8 2335
ef1669ff
KO
2336 bch2_trans_exit(&trans);
2337 snapshots_seen_exit(&s);
1c6fdbd8
KO
2338 return ret;
2339}
2340
eace11a7
KO
2341static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter,
2342 struct bkey_s_c k,
2343 struct nlink_table *links,
2344 size_t *idx, u64 range_end)
2345{
2346 struct bch_fs *c = trans->c;
2347 struct bch_inode_unpacked u;
2348 struct nlink *link = &links->d[*idx];
2349 int ret = 0;
2350
2351 if (k.k->p.offset >= range_end)
2352 return 1;
2353
2354 if (!bkey_is_inode(k.k))
2355 return 0;
2356
2357 BUG_ON(bch2_inode_unpack(k, &u));
2358
73bd774d 2359 if (S_ISDIR(u.bi_mode))
eace11a7
KO
2360 return 0;
2361
2362 if (!u.bi_nlink)
2363 return 0;
2364
2365 while ((cmp_int(link->inum, k.k->p.offset) ?:
2366 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
2367 BUG_ON(*idx == links->nr);
2368 link = &links->d[++*idx];
2369 }
2370
2371 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
2372 "inode %llu type %s has wrong i_nlink (%u, should be %u)",
2373 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
2374 bch2_inode_nlink_get(&u), link->count)) {
2375 bch2_inode_nlink_set(&u, link->count);
2376 ret = __write_inode(trans, &u, k.k->p.snapshot);
2377 }
2378fsck_err:
2379 return ret;
2380}
2381
1c6fdbd8 2382noinline_for_stack
fc51b041
KO
2383static int check_nlinks_update_hardlinks(struct bch_fs *c,
2384 struct nlink_table *links,
1c6fdbd8
KO
2385 u64 range_start, u64 range_end)
2386{
0564b167 2387 struct btree_trans trans;
67e0dd8f 2388 struct btree_iter iter;
1c6fdbd8 2389 struct bkey_s_c k;
eace11a7 2390 size_t idx = 0;
b906aadd 2391 int ret = 0;
1c6fdbd8 2392
20bceecb 2393 bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
0564b167 2394
eace11a7
KO
2395 ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
2396 POS(0, range_start),
2397 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
2398 NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
2399 check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end));
fc51b041 2400
0564b167
KO
2401 bch2_trans_exit(&trans);
2402
eace11a7 2403 if (ret < 0) {
b906aadd 2404 bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
eace11a7
KO
2405 return ret;
2406 }
1c6fdbd8 2407
eace11a7 2408 return 0;
1c6fdbd8
KO
2409}
2410
067d228b 2411int bch2_check_nlinks(struct bch_fs *c)
1c6fdbd8 2412{
fc51b041 2413 struct nlink_table links = { 0 };
1c6fdbd8
KO
2414 u64 this_iter_range_start, next_iter_range_start = 0;
2415 int ret = 0;
2416
1c6fdbd8
KO
2417 do {
2418 this_iter_range_start = next_iter_range_start;
2419 next_iter_range_start = U64_MAX;
2420
fc51b041
KO
2421 ret = check_nlinks_find_hardlinks(c, &links,
2422 this_iter_range_start,
2423 &next_iter_range_start);
2424
2425 ret = check_nlinks_walk_dirents(c, &links,
1c6fdbd8 2426 this_iter_range_start,
fc51b041 2427 next_iter_range_start);
1c6fdbd8
KO
2428 if (ret)
2429 break;
2430
fc51b041 2431 ret = check_nlinks_update_hardlinks(c, &links,
1c6fdbd8
KO
2432 this_iter_range_start,
2433 next_iter_range_start);
2434 if (ret)
2435 break;
2436
fc51b041 2437 links.nr = 0;
1c6fdbd8
KO
2438 } while (next_iter_range_start != U64_MAX);
2439
fc51b041 2440 kvfree(links.d);
1c6fdbd8 2441
1bb3c2a9
KO
2442 if (ret)
2443 bch_err_fn(c, ret);
1c6fdbd8
KO
2444 return ret;
2445}
2446
eace11a7
KO
2447static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter,
2448 struct bkey_s_c k)
bfe88863 2449{
bfe88863
KO
2450 struct bkey_s_c_reflink_p p;
2451 struct bkey_i_reflink_p *u;
2452 int ret;
2453
bfe88863
KO
2454 if (k.k->type != KEY_TYPE_reflink_p)
2455 return 0;
2456
2457 p = bkey_s_c_to_reflink_p(k);
2458
6d76aefe 2459 if (!p.v->front_pad && !p.v->back_pad)
bfe88863
KO
2460 return 0;
2461
2462 u = bch2_trans_kmalloc(trans, sizeof(*u));
2463 ret = PTR_ERR_OR_ZERO(u);
2464 if (ret)
2465 return ret;
2466
2467 bkey_reassemble(&u->k_i, k);
6d76aefe
KO
2468 u->v.front_pad = 0;
2469 u->v.back_pad = 0;
bfe88863 2470
6b3d8b89 2471 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN);
bfe88863
KO
2472}
2473
067d228b 2474int bch2_fix_reflink_p(struct bch_fs *c)
bfe88863 2475{
bfe88863
KO
2476 struct btree_iter iter;
2477 struct bkey_s_c k;
2478 int ret;
2479
2480 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
2481 return 0;
2482
1bb3c2a9
KO
2483 ret = bch2_trans_run(c,
2484 for_each_btree_key_commit(&trans, iter,
2485 BTREE_ID_extents, POS_MIN,
2486 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|
2487 BTREE_ITER_ALL_SNAPSHOTS, k,
2488 NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
2489 fix_reflink_p_key(&trans, &iter, k)));
bfe88863 2490
1bb3c2a9
KO
2491 if (ret)
2492 bch_err_fn(c, ret);
bfe88863
KO
2493 return ret;
2494}