Commit | Line | Data |
---|---|---|
1c6fdbd8 KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "bcachefs.h" | |
07a1006a | 4 | #include "bkey_buf.h" |
1c6fdbd8 KO |
5 | #include "btree_update.h" |
6 | #include "dirent.h" | |
7 | #include "error.h" | |
96385742 | 8 | #include "fs-common.h" |
1c6fdbd8 KO |
9 | #include "fsck.h" |
10 | #include "inode.h" | |
11 | #include "keylist.h" | |
14b393ee | 12 | #include "subvolume.h" |
1c6fdbd8 KO |
13 | #include "super.h" |
14 | #include "xattr.h" | |
15 | ||
fc51b041 | 16 | #include <linux/bsearch.h> |
1c6fdbd8 | 17 | #include <linux/dcache.h> /* struct qstr */ |
1c6fdbd8 KO |
18 | |
19 | #define QSTR(n) { { { .len = strlen(n) } }, .name = n } | |
20 | ||
ef1669ff KO |
21 | static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, |
22 | u32 snapshot) | |
424eb881 | 23 | { |
67e0dd8f | 24 | struct btree_iter iter; |
424eb881 KO |
25 | struct bkey_s_c k; |
26 | u64 sectors = 0; | |
94f651e2 | 27 | int ret; |
424eb881 | 28 | |
41f8b09e | 29 | for_each_btree_key(trans, iter, BTREE_ID_extents, |
ef1669ff | 30 | SPOS(inum, 0, snapshot), 0, k, ret) { |
424eb881 KO |
31 | if (k.k->p.inode != inum) |
32 | break; | |
33 | ||
34 | if (bkey_extent_is_allocation(k.k)) | |
35 | sectors += k.k->size; | |
36 | } | |
37 | ||
67e0dd8f | 38 | bch2_trans_iter_exit(trans, &iter); |
94f651e2 KO |
39 | |
40 | return ret ?: sectors; | |
424eb881 KO |
41 | } |
42 | ||
ef1669ff KO |
43 | static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, |
44 | u32 snapshot) | |
45 | { | |
46 | struct btree_iter iter; | |
47 | struct bkey_s_c k; | |
48 | struct bkey_s_c_dirent d; | |
49 | u64 subdirs = 0; | |
50 | int ret; | |
51 | ||
52 | for_each_btree_key(trans, iter, BTREE_ID_dirents, | |
53 | SPOS(inum, 0, snapshot), 0, k, ret) { | |
54 | if (k.k->p.inode != inum) | |
55 | break; | |
56 | ||
57 | if (k.k->type != KEY_TYPE_dirent) | |
58 | continue; | |
59 | ||
60 | d = bkey_s_c_to_dirent(k); | |
61 | if (d.v->d_type == DT_DIR) | |
62 | subdirs++; | |
63 | } | |
64 | ||
65 | bch2_trans_iter_exit(trans, &iter); | |
66 | ||
67 | return ret ?: subdirs; | |
68 | } | |
69 | ||
81ed9ce3 KO |
70 | static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, |
71 | u32 *subvol) | |
72 | { | |
73 | struct btree_iter iter; | |
74 | struct bkey_s_c k; | |
75 | int ret; | |
76 | ||
77 | bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, | |
78 | POS(0, snapshot), 0); | |
79 | k = bch2_btree_iter_peek_slot(&iter); | |
80 | ret = bkey_err(k); | |
81 | if (ret) | |
82 | goto err; | |
83 | ||
84 | if (k.k->type != KEY_TYPE_snapshot) { | |
85 | bch_err(trans->c, "snapshot %u not fonud", snapshot); | |
86 | ret = -ENOENT; | |
87 | goto err; | |
88 | } | |
89 | ||
90 | *subvol = le32_to_cpu(bkey_s_c_to_snapshot(k).v->subvol); | |
91 | err: | |
92 | bch2_trans_iter_exit(trans, &iter); | |
93 | return ret; | |
94 | ||
95 | } | |
96 | ||
97 | static int snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, | |
98 | u32 *subvol) | |
99 | { | |
100 | return lockrestart_do(trans, __snapshot_lookup_subvol(trans, snapshot, subvol)); | |
101 | } | |
102 | ||
ef1669ff KO |
103 | static int __subvol_lookup(struct btree_trans *trans, u32 subvol, |
104 | u32 *snapshot, u64 *inum) | |
81ed9ce3 | 105 | { |
97996ddf | 106 | struct bch_subvolume s; |
81ed9ce3 KO |
107 | int ret; |
108 | ||
97996ddf | 109 | ret = bch2_subvolume_get(trans, subvol, false, 0, &s); |
81ed9ce3 | 110 | |
97996ddf KO |
111 | *snapshot = le32_to_cpu(s.snapshot); |
112 | *inum = le64_to_cpu(s.inode); | |
81ed9ce3 | 113 | return ret; |
81ed9ce3 KO |
114 | } |
115 | ||
ef1669ff KO |
116 | static int subvol_lookup(struct btree_trans *trans, u32 subvol, |
117 | u32 *snapshot, u64 *inum) | |
81ed9ce3 | 118 | { |
ef1669ff | 119 | return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum)); |
81ed9ce3 KO |
120 | } |
121 | ||
58686a25 KO |
122 | static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, |
123 | struct bch_inode_unpacked *inode, | |
124 | u32 *snapshot) | |
8a85b20c | 125 | { |
67e0dd8f | 126 | struct btree_iter iter; |
8a85b20c KO |
127 | struct bkey_s_c k; |
128 | int ret; | |
129 | ||
67e0dd8f | 130 | bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, |
ef1669ff | 131 | SPOS(0, inode_nr, *snapshot), 0); |
67e0dd8f | 132 | k = bch2_btree_iter_peek_slot(&iter); |
8a85b20c KO |
133 | ret = bkey_err(k); |
134 | if (ret) | |
135 | goto err; | |
136 | ||
ef1669ff | 137 | *snapshot = iter.pos.snapshot; |
8a85b20c KO |
138 | ret = k.k->type == KEY_TYPE_inode |
139 | ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode) | |
140 | : -ENOENT; | |
141 | err: | |
67e0dd8f | 142 | bch2_trans_iter_exit(trans, &iter); |
8a85b20c KO |
143 | return ret; |
144 | } | |
145 | ||
58686a25 KO |
146 | static int lookup_inode(struct btree_trans *trans, u64 inode_nr, |
147 | struct bch_inode_unpacked *inode, | |
148 | u32 *snapshot) | |
149 | { | |
150 | return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot)); | |
151 | } | |
152 | ||
ef1669ff KO |
153 | static int __lookup_dirent(struct btree_trans *trans, |
154 | struct bch_hash_info hash_info, | |
155 | subvol_inum dir, struct qstr *name, | |
156 | u64 *target, unsigned *type) | |
157 | { | |
158 | struct btree_iter iter; | |
159 | struct bkey_s_c_dirent d; | |
160 | int ret; | |
161 | ||
162 | ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc, | |
163 | &hash_info, dir, name, 0); | |
164 | if (ret) | |
165 | return ret; | |
166 | ||
167 | d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); | |
168 | *target = le64_to_cpu(d.v->d_inum); | |
169 | *type = d.v->d_type; | |
170 | bch2_trans_iter_exit(trans, &iter); | |
171 | return 0; | |
172 | } | |
173 | ||
174 | static int lookup_dirent(struct btree_trans *trans, | |
175 | struct bch_hash_info hash_info, | |
176 | subvol_inum dir, struct qstr *name, | |
177 | u64 *target, unsigned *type) | |
178 | { | |
179 | return lockrestart_do(trans, | |
180 | __lookup_dirent(trans, hash_info, dir, name, target, type)); | |
181 | } | |
182 | ||
58686a25 KO |
183 | static int __write_inode(struct btree_trans *trans, |
184 | struct bch_inode_unpacked *inode, | |
185 | u32 snapshot) | |
8a85b20c | 186 | { |
67e0dd8f KO |
187 | struct btree_iter iter; |
188 | int ret; | |
189 | ||
190 | bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, | |
191 | SPOS(0, inode->bi_inum, snapshot), | |
192 | BTREE_ITER_INTENT); | |
193 | ||
194 | ret = bch2_btree_iter_traverse(&iter) ?: | |
195 | bch2_inode_write(trans, &iter, inode); | |
196 | bch2_trans_iter_exit(trans, &iter); | |
58686a25 KO |
197 | return ret; |
198 | } | |
199 | ||
200 | static int write_inode(struct btree_trans *trans, | |
201 | struct bch_inode_unpacked *inode, | |
202 | u32 snapshot) | |
203 | { | |
8a85b20c KO |
204 | int ret = __bch2_trans_do(trans, NULL, NULL, |
205 | BTREE_INSERT_NOFAIL| | |
206 | BTREE_INSERT_LAZY_RW, | |
58686a25 | 207 | __write_inode(trans, inode, snapshot)); |
8a85b20c KO |
208 | if (ret) |
209 | bch_err(trans->c, "error in fsck: error %i updating inode", ret); | |
210 | return ret; | |
211 | } | |
212 | ||
ef1669ff KO |
213 | static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot) |
214 | { | |
215 | struct btree_iter iter = { NULL }; | |
216 | struct bkey_i_inode_generation delete; | |
217 | struct bch_inode_unpacked inode_u; | |
218 | struct bkey_s_c k; | |
219 | int ret; | |
220 | ||
221 | ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, | |
222 | SPOS(inum, 0, snapshot), | |
223 | SPOS(inum, U64_MAX, snapshot), | |
224 | 0, NULL) ?: | |
225 | bch2_btree_delete_range_trans(trans, BTREE_ID_dirents, | |
226 | SPOS(inum, 0, snapshot), | |
227 | SPOS(inum, U64_MAX, snapshot), | |
228 | 0, NULL) ?: | |
229 | bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs, | |
230 | SPOS(inum, 0, snapshot), | |
231 | SPOS(inum, U64_MAX, snapshot), | |
232 | 0, NULL); | |
233 | if (ret) | |
234 | goto err; | |
235 | retry: | |
236 | bch2_trans_begin(trans); | |
237 | ||
238 | bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, | |
239 | SPOS(0, inum, snapshot), BTREE_ITER_INTENT); | |
240 | k = bch2_btree_iter_peek_slot(&iter); | |
241 | ||
242 | ret = bkey_err(k); | |
243 | if (ret) | |
244 | goto err; | |
245 | ||
246 | if (k.k->type != KEY_TYPE_inode) { | |
247 | bch2_fs_inconsistent(trans->c, | |
248 | "inode %llu:%u not found when deleting", | |
249 | inum, snapshot); | |
250 | ret = -EIO; | |
251 | goto err; | |
252 | } | |
253 | ||
254 | bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); | |
255 | ||
256 | /* Subvolume root? */ | |
257 | if (inode_u.bi_subvol) { | |
258 | ret = bch2_subvolume_delete(trans, inode_u.bi_subvol, -1); | |
259 | if (ret) | |
260 | goto err; | |
261 | } | |
262 | ||
263 | bkey_inode_generation_init(&delete.k_i); | |
264 | delete.k.p = iter.pos; | |
265 | delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); | |
266 | ||
267 | ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: | |
268 | bch2_trans_commit(trans, NULL, NULL, | |
269 | BTREE_INSERT_NOFAIL); | |
270 | err: | |
271 | bch2_trans_iter_exit(trans, &iter); | |
272 | if (ret == -EINTR) | |
273 | goto retry; | |
274 | ||
275 | return ret; | |
276 | } | |
277 | ||
ae8bbb9f | 278 | static int __remove_dirent(struct btree_trans *trans, struct bpos pos) |
1c6fdbd8 | 279 | { |
0f238367 | 280 | struct bch_fs *c = trans->c; |
67e0dd8f | 281 | struct btree_iter iter; |
1c6fdbd8 KO |
282 | struct bch_inode_unpacked dir_inode; |
283 | struct bch_hash_info dir_hash_info; | |
1c6fdbd8 | 284 | int ret; |
1c6fdbd8 | 285 | |
ae8bbb9f | 286 | ret = lookup_inode(trans, pos.inode, &dir_inode, NULL); |
b1fd23df KO |
287 | if (ret) |
288 | return ret; | |
1c6fdbd8 KO |
289 | |
290 | dir_hash_info = bch2_hash_info_init(c, &dir_inode); | |
291 | ||
67e0dd8f | 292 | bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); |
b1fd23df | 293 | |
ae8bbb9f | 294 | ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, |
42d23732 | 295 | &dir_hash_info, &iter, 0); |
67e0dd8f | 296 | bch2_trans_iter_exit(trans, &iter); |
ae8bbb9f | 297 | return ret; |
b1fd23df KO |
298 | } |
299 | ||
ae8bbb9f | 300 | static int remove_dirent(struct btree_trans *trans, struct bpos pos) |
b1fd23df | 301 | { |
ae8bbb9f KO |
302 | int ret = __bch2_trans_do(trans, NULL, NULL, |
303 | BTREE_INSERT_NOFAIL| | |
304 | BTREE_INSERT_LAZY_RW, | |
305 | __remove_dirent(trans, pos)); | |
306 | if (ret) | |
307 | bch_err(trans->c, "remove_dirent: err %i deleting dirent", ret); | |
308 | return ret; | |
1c6fdbd8 KO |
309 | } |
310 | ||
58686a25 | 311 | /* Get lost+found, create if it doesn't exist: */ |
ef1669ff | 312 | static int lookup_lostfound(struct btree_trans *trans, u32 subvol, |
58686a25 | 313 | struct bch_inode_unpacked *lostfound) |
1c6fdbd8 | 314 | { |
58686a25 KO |
315 | struct bch_fs *c = trans->c; |
316 | struct bch_inode_unpacked root; | |
317 | struct bch_hash_info root_hash_info; | |
318 | struct qstr lostfound_str = QSTR("lost+found"); | |
ef1669ff KO |
319 | subvol_inum root_inum = { .subvol = subvol }; |
320 | u64 inum = 0; | |
321 | unsigned d_type = 0; | |
58686a25 KO |
322 | u32 snapshot; |
323 | int ret; | |
324 | ||
ef1669ff KO |
325 | ret = subvol_lookup(trans, subvol, &snapshot, &root_inum.inum); |
326 | if (ret) | |
327 | return ret; | |
81ed9ce3 | 328 | |
ef1669ff KO |
329 | ret = lookup_inode(trans, root_inum.inum, &root, &snapshot); |
330 | if (ret) { | |
331 | bch_err(c, "error fetching subvol root: %i", ret); | |
58686a25 | 332 | return ret; |
ef1669ff | 333 | } |
58686a25 KO |
334 | |
335 | root_hash_info = bch2_hash_info_init(c, &root); | |
ef1669ff KO |
336 | |
337 | ret = lookup_dirent(trans, root_hash_info, root_inum, | |
338 | &lostfound_str, &inum, &d_type); | |
339 | if (ret == -ENOENT) { | |
58686a25 KO |
340 | bch_notice(c, "creating lost+found"); |
341 | goto create_lostfound; | |
342 | } | |
343 | ||
ef1669ff KO |
344 | if (ret) { |
345 | bch_err(c, "error looking up lost+found: %i", ret); | |
346 | return ret; | |
347 | } | |
348 | ||
349 | if (d_type != DT_DIR) { | |
350 | bch_err(c, "error looking up lost+found: not a directory"); | |
351 | return ret; | |
352 | ||
353 | } | |
354 | ||
58686a25 KO |
355 | ret = lookup_inode(trans, inum, lostfound, &snapshot); |
356 | if (ret && ret != -ENOENT) { | |
357 | /* | |
358 | * The check_dirents pass has already run, dangling dirents | |
359 | * shouldn't exist here: | |
360 | */ | |
361 | bch_err(c, "error looking up lost+found: %i", ret); | |
362 | return ret; | |
363 | } | |
364 | ||
365 | if (ret == -ENOENT) { | |
366 | create_lostfound: | |
367 | bch2_inode_init_early(c, lostfound); | |
368 | ||
369 | ret = __bch2_trans_do(trans, NULL, NULL, | |
370 | BTREE_INSERT_NOFAIL| | |
371 | BTREE_INSERT_LAZY_RW, | |
ef1669ff KO |
372 | bch2_create_trans(trans, root_inum, &root, |
373 | lostfound, &lostfound_str, | |
42d23732 KO |
374 | 0, 0, S_IFDIR|0700, 0, NULL, NULL, |
375 | (subvol_inum) { }, 0)); | |
58686a25 KO |
376 | if (ret) |
377 | bch_err(c, "error creating lost+found: %i", ret); | |
378 | } | |
379 | ||
380 | return 0; | |
381 | } | |
382 | ||
383 | static int reattach_inode(struct btree_trans *trans, | |
81ed9ce3 | 384 | struct bch_inode_unpacked *inode, |
ef1669ff | 385 | u32 inode_snapshot) |
58686a25 KO |
386 | { |
387 | struct bch_hash_info dir_hash; | |
388 | struct bch_inode_unpacked lostfound; | |
1c6fdbd8 KO |
389 | char name_buf[20]; |
390 | struct qstr name; | |
176cf4bf | 391 | u64 dir_offset = 0; |
81ed9ce3 | 392 | u32 subvol; |
1c6fdbd8 KO |
393 | int ret; |
394 | ||
ef1669ff | 395 | ret = snapshot_lookup_subvol(trans, inode_snapshot, &subvol); |
81ed9ce3 KO |
396 | if (ret) |
397 | return ret; | |
398 | ||
399 | ret = lookup_lostfound(trans, subvol, &lostfound); | |
176cf4bf | 400 | if (ret) |
d3ff7fec | 401 | return ret; |
176cf4bf | 402 | |
58686a25 KO |
403 | if (S_ISDIR(inode->bi_mode)) { |
404 | lostfound.bi_nlink++; | |
176cf4bf | 405 | |
58686a25 | 406 | ret = write_inode(trans, &lostfound, U32_MAX); |
176cf4bf | 407 | if (ret) |
d3ff7fec | 408 | return ret; |
176cf4bf KO |
409 | } |
410 | ||
58686a25 | 411 | dir_hash = bch2_hash_info_init(trans->c, &lostfound); |
176cf4bf | 412 | |
58686a25 KO |
413 | snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); |
414 | name = (struct qstr) QSTR(name_buf); | |
176cf4bf | 415 | |
58686a25 | 416 | ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, |
ef1669ff KO |
417 | bch2_dirent_create(trans, |
418 | (subvol_inum) { | |
419 | .subvol = subvol, | |
420 | .inum = lostfound.bi_inum, | |
421 | }, | |
422 | &dir_hash, | |
423 | mode_to_type(inode->bi_mode), | |
424 | &name, inode->bi_inum, &dir_offset, | |
425 | BCH_HASH_SET_MUST_CREATE)); | |
58686a25 KO |
426 | if (ret) { |
427 | bch_err(trans->c, "error %i reattaching inode %llu", | |
428 | ret, inode->bi_inum); | |
429 | return ret; | |
430 | } | |
176cf4bf | 431 | |
58686a25 KO |
432 | inode->bi_dir = lostfound.bi_inum; |
433 | inode->bi_dir_offset = dir_offset; | |
1c6fdbd8 | 434 | |
ef1669ff | 435 | return write_inode(trans, inode, inode_snapshot); |
1c6fdbd8 KO |
436 | } |
437 | ||
d3ff7fec KO |
438 | static int remove_backpointer(struct btree_trans *trans, |
439 | struct bch_inode_unpacked *inode) | |
440 | { | |
67e0dd8f | 441 | struct btree_iter iter; |
d3ff7fec KO |
442 | struct bkey_s_c k; |
443 | int ret; | |
444 | ||
67e0dd8f KO |
445 | bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, |
446 | POS(inode->bi_dir, inode->bi_dir_offset), 0); | |
447 | k = bch2_btree_iter_peek_slot(&iter); | |
d3ff7fec KO |
448 | ret = bkey_err(k); |
449 | if (ret) | |
450 | goto out; | |
451 | if (k.k->type != KEY_TYPE_dirent) { | |
452 | ret = -ENOENT; | |
453 | goto out; | |
454 | } | |
455 | ||
ae8bbb9f | 456 | ret = remove_dirent(trans, k.k->p); |
d3ff7fec | 457 | out: |
67e0dd8f | 458 | bch2_trans_iter_exit(trans, &iter); |
d3ff7fec KO |
459 | return ret; |
460 | } | |
461 | ||
ef1669ff KO |
462 | static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos) |
463 | { | |
464 | pos.snapshot = snapshot_t(c, pos.snapshot)->equiv; | |
465 | ||
466 | if (bkey_cmp(s->pos, pos)) | |
467 | s->nr = 0; | |
468 | s->pos = pos; | |
469 | ||
ef1669ff KO |
470 | /* Might get called multiple times due to lock restarts */ |
471 | if (s->nr && s->d[s->nr - 1] == pos.snapshot) | |
472 | return 0; | |
473 | ||
18443cb9 | 474 | return snapshots_seen_add(c, s, pos.snapshot); |
ef1669ff KO |
475 | } |
476 | ||
477 | /** | |
478 | * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, | |
479 | * and @ancestor hasn't been overwritten in @seen | |
480 | * | |
481 | * That is, returns whether key in @ancestor snapshot is visible in @id snapshot | |
482 | */ | |
483 | static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, | |
484 | u32 id, u32 ancestor) | |
485 | { | |
486 | ssize_t i; | |
487 | ||
488 | BUG_ON(id > ancestor); | |
489 | ||
490 | id = snapshot_t(c, id)->equiv; | |
491 | ancestor = snapshot_t(c, ancestor)->equiv; | |
492 | ||
493 | /* @ancestor should be the snapshot most recently added to @seen */ | |
494 | BUG_ON(!seen->nr || seen->d[seen->nr - 1] != ancestor); | |
495 | BUG_ON(seen->pos.snapshot != ancestor); | |
496 | ||
497 | if (id == ancestor) | |
498 | return true; | |
499 | ||
500 | if (!bch2_snapshot_is_ancestor(c, id, ancestor)) | |
501 | return false; | |
502 | ||
503 | for (i = seen->nr - 2; | |
504 | i >= 0 && seen->d[i] >= id; | |
505 | --i) | |
506 | if (bch2_snapshot_is_ancestor(c, id, seen->d[i]) && | |
507 | bch2_snapshot_is_ancestor(c, seen->d[i], ancestor)) | |
508 | return false; | |
509 | ||
510 | return true; | |
511 | } | |
512 | ||
513 | /** | |
514 | * ref_visible - given a key with snapshot id @src that points to a key with | |
515 | * snapshot id @dst, test whether there is some snapshot in which @dst is | |
516 | * visible. | |
517 | * | |
518 | * This assumes we're visiting @src keys in natural key order. | |
519 | * | |
520 | * @s - list of snapshot IDs already seen at @src | |
521 | * @src - snapshot ID of src key | |
522 | * @dst - snapshot ID of dst key | |
523 | */ | |
524 | static int ref_visible(struct bch_fs *c, struct snapshots_seen *s, | |
525 | u32 src, u32 dst) | |
526 | { | |
527 | return dst <= src | |
528 | ? key_visible_in_snapshot(c, s, dst, src) | |
529 | : bch2_snapshot_is_ancestor(c, src, dst); | |
530 | } | |
531 | ||
532 | #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ | |
533 | for (_i = (_w)->d; _i < (_w)->d + (_w)->nr && (_i)->snapshot <= (_snapshot); _i++)\ | |
534 | if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) | |
535 | ||
1c6fdbd8 | 536 | struct inode_walker { |
ef1669ff KO |
537 | bool first_this_inode; |
538 | u64 cur_inum; | |
539 | ||
540 | size_t nr; | |
541 | size_t size; | |
542 | struct inode_walker_entry { | |
543 | struct bch_inode_unpacked inode; | |
544 | u32 snapshot; | |
545 | u64 count; | |
546 | } *d; | |
1c6fdbd8 KO |
547 | }; |
548 | ||
ef1669ff KO |
549 | static void inode_walker_exit(struct inode_walker *w) |
550 | { | |
551 | kfree(w->d); | |
552 | w->d = NULL; | |
553 | } | |
554 | ||
1c6fdbd8 KO |
555 | static struct inode_walker inode_walker_init(void) |
556 | { | |
ef1669ff KO |
557 | return (struct inode_walker) { 0, }; |
558 | } | |
559 | ||
560 | static int inode_walker_realloc(struct inode_walker *w) | |
561 | { | |
562 | if (w->nr == w->size) { | |
563 | size_t new_size = max_t(size_t, 8UL, w->size * 2); | |
564 | void *d = krealloc(w->d, new_size * sizeof(w->d[0]), | |
565 | GFP_KERNEL); | |
566 | if (!d) | |
567 | return -ENOMEM; | |
568 | ||
569 | w->d = d; | |
570 | w->size = new_size; | |
571 | } | |
572 | ||
573 | return 0; | |
574 | } | |
575 | ||
576 | static int add_inode(struct bch_fs *c, struct inode_walker *w, | |
577 | struct bkey_s_c_inode inode) | |
578 | { | |
579 | struct bch_inode_unpacked u; | |
580 | int ret; | |
581 | ||
582 | ret = inode_walker_realloc(w); | |
583 | if (ret) | |
584 | return ret; | |
585 | ||
586 | BUG_ON(bch2_inode_unpack(inode, &u)); | |
587 | ||
588 | w->d[w->nr++] = (struct inode_walker_entry) { | |
589 | .inode = u, | |
590 | .snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv, | |
1c6fdbd8 | 591 | }; |
ef1669ff KO |
592 | |
593 | return 0; | |
1c6fdbd8 KO |
594 | } |
595 | ||
914f2786 | 596 | static int __walk_inode(struct btree_trans *trans, |
ef1669ff | 597 | struct inode_walker *w, struct bpos pos) |
1c6fdbd8 | 598 | { |
ef1669ff KO |
599 | struct bch_fs *c = trans->c; |
600 | struct btree_iter iter; | |
601 | struct bkey_s_c k; | |
602 | unsigned i, ancestor_pos; | |
603 | int ret; | |
1c6fdbd8 | 604 | |
ef1669ff | 605 | pos.snapshot = snapshot_t(c, pos.snapshot)->equiv; |
1c6fdbd8 | 606 | |
ef1669ff | 607 | if (pos.inode == w->cur_inum) { |
6bd13057 | 608 | w->first_this_inode = false; |
ef1669ff | 609 | goto lookup_snapshot; |
1c6fdbd8 KO |
610 | } |
611 | ||
ef1669ff KO |
612 | w->nr = 0; |
613 | ||
614 | for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, pos.inode), | |
615 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
616 | if (k.k->p.offset != pos.inode) | |
617 | break; | |
618 | ||
619 | if (k.k->type == KEY_TYPE_inode) | |
620 | add_inode(c, w, bkey_s_c_to_inode(k)); | |
621 | } | |
622 | bch2_trans_iter_exit(trans, &iter); | |
623 | ||
624 | if (ret) | |
625 | return ret; | |
626 | ||
627 | w->cur_inum = pos.inode; | |
628 | w->first_this_inode = true; | |
629 | lookup_snapshot: | |
630 | for (i = 0; i < w->nr; i++) | |
631 | if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->d[i].snapshot)) | |
632 | goto found; | |
633 | return INT_MAX; | |
634 | found: | |
635 | BUG_ON(pos.snapshot > w->d[i].snapshot); | |
636 | ||
637 | if (pos.snapshot != w->d[i].snapshot) { | |
638 | ancestor_pos = i; | |
639 | ||
640 | while (i && w->d[i - 1].snapshot > pos.snapshot) | |
641 | --i; | |
642 | ||
643 | ret = inode_walker_realloc(w); | |
644 | if (ret) | |
645 | return ret; | |
646 | ||
647 | array_insert_item(w->d, w->nr, i, w->d[ancestor_pos]); | |
648 | w->d[i].snapshot = pos.snapshot; | |
649 | w->d[i].count = 0; | |
650 | } | |
651 | ||
652 | return i; | |
1c6fdbd8 KO |
653 | } |
654 | ||
914f2786 | 655 | static int walk_inode(struct btree_trans *trans, |
ef1669ff | 656 | struct inode_walker *w, struct bpos pos) |
914f2786 | 657 | { |
ef1669ff KO |
658 | return lockrestart_do(trans, __walk_inode(trans, w, pos)); |
659 | } | |
660 | ||
661 | static int __get_visible_inodes(struct btree_trans *trans, | |
662 | struct inode_walker *w, | |
663 | struct snapshots_seen *s, | |
664 | u64 inum) | |
665 | { | |
666 | struct bch_fs *c = trans->c; | |
667 | struct btree_iter iter; | |
668 | struct bkey_s_c k; | |
669 | int ret; | |
670 | ||
671 | w->nr = 0; | |
672 | ||
673 | for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), | |
674 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
675 | if (k.k->p.offset != inum) | |
676 | break; | |
677 | ||
678 | if (k.k->type != KEY_TYPE_inode) | |
679 | continue; | |
680 | ||
681 | if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) { | |
682 | add_inode(c, w, bkey_s_c_to_inode(k)); | |
683 | if (k.k->p.snapshot >= s->pos.snapshot) | |
684 | break; | |
685 | } | |
686 | } | |
687 | bch2_trans_iter_exit(trans, &iter); | |
688 | ||
689 | return ret; | |
690 | } | |
691 | ||
692 | static int check_key_has_snapshot(struct btree_trans *trans, | |
693 | struct btree_iter *iter, | |
694 | struct bkey_s_c k) | |
695 | { | |
696 | struct bch_fs *c = trans->c; | |
697 | char buf[200]; | |
698 | int ret = 0; | |
699 | ||
700 | if (fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c, | |
701 | "key in missing snapshot: %s", | |
702 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { | |
703 | ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, | |
704 | bch2_btree_delete_at(trans, iter, | |
705 | BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); | |
706 | return ret ?: -EINTR; | |
707 | } | |
708 | fsck_err: | |
709 | return ret; | |
914f2786 KO |
710 | } |
711 | ||
7ac2c55e KO |
712 | static int hash_redo_key(struct btree_trans *trans, |
713 | const struct bch_hash_desc desc, | |
714 | struct bch_hash_info *hash_info, | |
715 | struct btree_iter *k_iter, struct bkey_s_c k) | |
1c6fdbd8 | 716 | { |
ef1669ff KO |
717 | bch_err(trans->c, "hash_redo_key() not implemented yet"); |
718 | return -EINVAL; | |
719 | #if 0 | |
e3b4b48c | 720 | struct bkey_i *delete; |
1c6fdbd8 | 721 | struct bkey_i *tmp; |
1c6fdbd8 | 722 | |
e3b4b48c KO |
723 | delete = bch2_trans_kmalloc(trans, sizeof(*delete)); |
724 | if (IS_ERR(delete)) | |
725 | return PTR_ERR(delete); | |
726 | ||
b1fd23df KO |
727 | tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); |
728 | if (IS_ERR(tmp)) | |
729 | return PTR_ERR(tmp); | |
1c6fdbd8 KO |
730 | |
731 | bkey_reassemble(tmp, k); | |
732 | ||
e3b4b48c KO |
733 | bkey_init(&delete->k); |
734 | delete->k.p = k_iter->pos; | |
8c3f6da9 KO |
735 | return bch2_btree_iter_traverse(k_iter) ?: |
736 | bch2_trans_update(trans, k_iter, delete, 0) ?: | |
bc3f8b25 | 737 | bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0); |
ef1669ff | 738 | #endif |
1c6fdbd8 KO |
739 | } |
740 | ||
424eb881 KO |
741 | static int fsck_hash_delete_at(struct btree_trans *trans, |
742 | const struct bch_hash_desc desc, | |
1c6fdbd8 | 743 | struct bch_hash_info *info, |
424eb881 | 744 | struct btree_iter *iter) |
1c6fdbd8 | 745 | { |
1c6fdbd8 | 746 | int ret; |
1c6fdbd8 | 747 | retry: |
42d23732 | 748 | ret = bch2_hash_delete_at(trans, desc, info, iter, 0) ?: |
424eb881 | 749 | bch2_trans_commit(trans, NULL, NULL, |
134915f3 KO |
750 | BTREE_INSERT_NOFAIL| |
751 | BTREE_INSERT_LAZY_RW); | |
424eb881 KO |
752 | if (ret == -EINTR) { |
753 | ret = bch2_btree_iter_traverse(iter); | |
754 | if (!ret) | |
755 | goto retry; | |
756 | } | |
1c6fdbd8 | 757 | |
1c6fdbd8 KO |
758 | return ret; |
759 | } | |
760 | ||
7ac2c55e KO |
761 | static int hash_check_key(struct btree_trans *trans, |
762 | const struct bch_hash_desc desc, | |
763 | struct bch_hash_info *hash_info, | |
764 | struct btree_iter *k_iter, struct bkey_s_c hash_k) | |
d69f41d6 | 765 | { |
424eb881 | 766 | struct bch_fs *c = trans->c; |
67e0dd8f | 767 | struct btree_iter iter = { NULL }; |
d69f41d6 | 768 | char buf[200]; |
7ac2c55e KO |
769 | struct bkey_s_c k; |
770 | u64 hash; | |
d69f41d6 KO |
771 | int ret = 0; |
772 | ||
7ac2c55e KO |
773 | if (hash_k.k->type != desc.key_type) |
774 | return 0; | |
775 | ||
776 | hash = desc.hash_bkey(hash_info, hash_k); | |
777 | ||
778 | if (likely(hash == hash_k.k->p.offset)) | |
d69f41d6 KO |
779 | return 0; |
780 | ||
7ac2c55e KO |
781 | if (hash_k.k->p.offset < hash) |
782 | goto bad_hash; | |
d69f41d6 | 783 | |
7ac2c55e KO |
784 | for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash), |
785 | BTREE_ITER_SLOTS, k, ret) { | |
786 | if (!bkey_cmp(k.k->p, hash_k.k->p)) | |
d69f41d6 KO |
787 | break; |
788 | ||
7ac2c55e KO |
789 | if (fsck_err_on(k.k->type == desc.key_type && |
790 | !desc.cmp_bkey(k, hash_k), c, | |
d69f41d6 | 791 | "duplicate hash table keys:\n%s", |
319f9ac3 | 792 | (bch2_bkey_val_to_text(&PBUF(buf), c, |
7ac2c55e KO |
793 | hash_k), buf))) { |
794 | ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter); | |
d69f41d6 KO |
795 | if (ret) |
796 | return ret; | |
797 | ret = 1; | |
798 | break; | |
799 | } | |
d69f41d6 | 800 | |
7ac2c55e | 801 | if (bkey_deleted(k.k)) { |
67e0dd8f | 802 | bch2_trans_iter_exit(trans, &iter); |
7ac2c55e | 803 | goto bad_hash; |
1c6fdbd8 | 804 | } |
1c6fdbd8 | 805 | |
7ac2c55e | 806 | } |
67e0dd8f | 807 | bch2_trans_iter_exit(trans, &iter); |
1c6fdbd8 | 808 | return ret; |
7ac2c55e KO |
809 | bad_hash: |
810 | if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " | |
e3b4b48c KO |
811 | "hashed to %llu\n%s", |
812 | desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash, | |
7ac2c55e | 813 | (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE) |
741daa5b KO |
814 | return 0; |
815 | ||
7ac2c55e KO |
816 | ret = __bch2_trans_do(trans, NULL, NULL, |
817 | BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, | |
818 | hash_redo_key(trans, desc, hash_info, k_iter, hash_k)); | |
819 | if (ret) { | |
820 | bch_err(c, "hash_redo_key err %i", ret); | |
821 | return ret; | |
741daa5b | 822 | } |
7ac2c55e | 823 | return -EINTR; |
741daa5b | 824 | fsck_err: |
741daa5b | 825 | return ret; |
741daa5b KO |
826 | } |
827 | ||
5c16add5 KO |
828 | static int check_inode(struct btree_trans *trans, |
829 | struct btree_iter *iter, | |
ef1669ff KO |
830 | struct bch_inode_unpacked *prev, |
831 | struct bch_inode_unpacked u) | |
5c16add5 KO |
832 | { |
833 | struct bch_fs *c = trans->c; | |
5c16add5 KO |
834 | bool do_update = false; |
835 | int ret = 0; | |
836 | ||
ef1669ff KO |
837 | if (fsck_err_on(prev && |
838 | (prev->bi_hash_seed != u.bi_hash_seed || | |
839 | mode_to_type(prev->bi_mode) != mode_to_type(u.bi_mode)), c, | |
840 | "inodes in different snapshots don't match")) { | |
841 | bch_err(c, "repair not implemented yet"); | |
842 | return -EINVAL; | |
843 | } | |
5c16add5 KO |
844 | |
845 | if (u.bi_flags & BCH_INODE_UNLINKED && | |
846 | (!c->sb.clean || | |
847 | fsck_err(c, "filesystem marked clean, but inode %llu unlinked", | |
848 | u.bi_inum))) { | |
5c16add5 KO |
849 | bch2_trans_unlock(trans); |
850 | bch2_fs_lazy_rw(c); | |
851 | ||
ef1669ff | 852 | ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot); |
5c16add5 KO |
853 | if (ret) |
854 | bch_err(c, "error in fsck: error %i while deleting inode", ret); | |
855 | return ret; | |
856 | } | |
857 | ||
858 | if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY && | |
859 | (!c->sb.clean || | |
860 | fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty", | |
861 | u.bi_inum))) { | |
862 | bch_verbose(c, "truncating inode %llu", u.bi_inum); | |
863 | ||
864 | bch2_trans_unlock(trans); | |
865 | bch2_fs_lazy_rw(c); | |
866 | ||
867 | /* | |
868 | * XXX: need to truncate partial blocks too here - or ideally | |
869 | * just switch units to bytes and that issue goes away | |
870 | */ | |
871 | ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, | |
ef1669ff KO |
872 | SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, |
873 | iter->pos.snapshot), | |
5c16add5 | 874 | POS(u.bi_inum, U64_MAX), |
ef1669ff | 875 | 0, NULL); |
5c16add5 KO |
876 | if (ret) { |
877 | bch_err(c, "error in fsck: error %i truncating inode", ret); | |
878 | return ret; | |
879 | } | |
880 | ||
881 | /* | |
882 | * We truncated without our normal sector accounting hook, just | |
883 | * make sure we recalculate it: | |
884 | */ | |
885 | u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY; | |
886 | ||
887 | u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; | |
888 | do_update = true; | |
889 | } | |
890 | ||
891 | if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY && | |
892 | (!c->sb.clean || | |
893 | fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty", | |
894 | u.bi_inum))) { | |
895 | s64 sectors; | |
896 | ||
897 | bch_verbose(c, "recounting sectors for inode %llu", | |
898 | u.bi_inum); | |
899 | ||
ef1669ff | 900 | sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); |
5c16add5 KO |
901 | if (sectors < 0) { |
902 | bch_err(c, "error in fsck: error %i recounting inode sectors", | |
903 | (int) sectors); | |
904 | return sectors; | |
905 | } | |
906 | ||
907 | u.bi_sectors = sectors; | |
908 | u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY; | |
909 | do_update = true; | |
910 | } | |
911 | ||
d3ff7fec KO |
912 | if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) { |
913 | u.bi_dir = 0; | |
914 | u.bi_dir_offset = 0; | |
915 | u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED; | |
5c16add5 KO |
916 | do_update = true; |
917 | } | |
918 | ||
919 | if (do_update) { | |
ef1669ff | 920 | ret = write_inode(trans, &u, iter->pos.snapshot); |
5c16add5 KO |
921 | if (ret) |
922 | bch_err(c, "error in fsck: error %i " | |
923 | "updating inode", ret); | |
924 | } | |
925 | fsck_err: | |
926 | return ret; | |
927 | } | |
928 | ||
929 | noinline_for_stack | |
930 | static int check_inodes(struct bch_fs *c, bool full) | |
931 | { | |
932 | struct btree_trans trans; | |
67e0dd8f | 933 | struct btree_iter iter; |
5c16add5 KO |
934 | struct bkey_s_c k; |
935 | struct bkey_s_c_inode inode; | |
ef1669ff | 936 | struct bch_inode_unpacked prev, u; |
5c16add5 KO |
937 | int ret; |
938 | ||
ef1669ff KO |
939 | memset(&prev, 0, sizeof(prev)); |
940 | ||
5c16add5 KO |
941 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
942 | ||
909004d2 KO |
943 | for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, |
944 | BTREE_ITER_INTENT| | |
ef1669ff KO |
945 | BTREE_ITER_PREFETCH| |
946 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
947 | ret = check_key_has_snapshot(&trans, &iter, k); | |
948 | if (ret) | |
949 | break; | |
950 | ||
951 | /* | |
952 | * if snapshot id isn't a leaf node, skip it - deletion in | |
953 | * particular is not atomic, so on the internal snapshot nodes | |
954 | * we can see inodes marked for deletion after a clean shutdown | |
955 | */ | |
956 | if (bch2_snapshot_internal_node(c, k.k->p.snapshot)) | |
957 | continue; | |
958 | ||
5c16add5 KO |
959 | if (k.k->type != KEY_TYPE_inode) |
960 | continue; | |
961 | ||
962 | inode = bkey_s_c_to_inode(k); | |
963 | ||
ef1669ff KO |
964 | if (!full && |
965 | !(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| | |
966 | BCH_INODE_I_SECTORS_DIRTY| | |
967 | BCH_INODE_UNLINKED))) | |
968 | continue; | |
969 | ||
970 | BUG_ON(bch2_inode_unpack(inode, &u)); | |
971 | ||
972 | ret = check_inode(&trans, &iter, | |
973 | full && prev.bi_inum == u.bi_inum | |
974 | ? &prev : NULL, u); | |
975 | if (ret) | |
976 | break; | |
977 | ||
978 | prev = u; | |
5c16add5 | 979 | } |
67e0dd8f | 980 | bch2_trans_iter_exit(&trans, &iter); |
5c16add5 KO |
981 | |
982 | BUG_ON(ret == -EINTR); | |
983 | ||
9a796fdb KO |
984 | bch2_trans_exit(&trans); |
985 | return ret; | |
5c16add5 KO |
986 | } |
987 | ||
ef1669ff KO |
988 | noinline_for_stack |
989 | static int check_subvols(struct bch_fs *c) | |
990 | { | |
991 | struct btree_trans trans; | |
992 | struct btree_iter iter; | |
993 | struct bkey_s_c k; | |
994 | int ret; | |
995 | ||
996 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); | |
997 | ||
998 | for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN, | |
999 | 0, k, ret) { | |
1000 | } | |
1001 | bch2_trans_iter_exit(&trans, &iter); | |
1002 | ||
1003 | bch2_trans_exit(&trans); | |
1004 | return ret; | |
1005 | } | |
1006 | ||
1007 | /* | |
1008 | * Checking for overlapping extents needs to be reimplemented | |
1009 | */ | |
1010 | #if 0 | |
abcecb49 | 1011 | static int fix_overlapping_extent(struct btree_trans *trans, |
e3e464ac KO |
1012 | struct bkey_s_c k, struct bpos cut_at) |
1013 | { | |
67e0dd8f | 1014 | struct btree_iter iter; |
e3e464ac KO |
1015 | struct bkey_i *u; |
1016 | int ret; | |
1017 | ||
1018 | u = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); | |
1019 | ret = PTR_ERR_OR_ZERO(u); | |
1020 | if (ret) | |
1021 | return ret; | |
1022 | ||
1023 | bkey_reassemble(u, k); | |
1024 | bch2_cut_front(cut_at, u); | |
1025 | ||
e3e464ac | 1026 | |
ef1669ff KO |
1027 | /* |
1028 | * We don't want to go through the extent_handle_overwrites path: | |
1029 | * | |
1030 | * XXX: this is going to screw up disk accounting, extent triggers | |
1031 | * assume things about extent overwrites - we should be running the | |
1032 | * triggers manually here | |
1033 | */ | |
1034 | bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p, | |
1035 | BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); | |
1036 | ||
1037 | BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS); | |
1038 | ret = bch2_btree_iter_traverse(&iter) ?: | |
1039 | bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?: | |
1040 | bch2_trans_commit(trans, NULL, NULL, | |
1041 | BTREE_INSERT_NOFAIL| | |
1042 | BTREE_INSERT_LAZY_RW); | |
1043 | bch2_trans_iter_exit(trans, &iter); | |
1044 | return ret; | |
1045 | } | |
1046 | #endif | |
1047 | ||
1048 | static int inode_backpointer_exists(struct btree_trans *trans, | |
1049 | struct bch_inode_unpacked *inode, | |
1050 | u32 snapshot) | |
1051 | { | |
1052 | struct btree_iter iter; | |
1053 | struct bkey_s_c k; | |
488f9776 KO |
1054 | u32 target_subvol, target_snapshot; |
1055 | u64 target_inum; | |
ef1669ff KO |
1056 | int ret; |
1057 | ||
1058 | bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, | |
1059 | SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot), 0); | |
1060 | k = bch2_btree_iter_peek_slot(&iter); | |
1061 | ret = bkey_err(k); | |
1062 | if (ret) | |
1063 | goto out; | |
1064 | if (k.k->type != KEY_TYPE_dirent) | |
1065 | goto out; | |
1066 | ||
488f9776 KO |
1067 | ret = __bch2_dirent_read_target(trans, bkey_s_c_to_dirent(k), |
1068 | &target_subvol, | |
1069 | &target_snapshot, | |
1070 | &target_inum, | |
1071 | true); | |
1072 | if (ret) | |
1073 | goto out; | |
1074 | ||
1075 | ret = target_inum == inode->bi_inum; | |
ef1669ff KO |
1076 | out: |
1077 | bch2_trans_iter_exit(trans, &iter); | |
1078 | return ret; | |
1079 | } | |
1080 | ||
1081 | static bool inode_backpointer_matches(struct bkey_s_c_dirent d, | |
1082 | struct bch_inode_unpacked *inode) | |
1083 | { | |
1084 | return d.k->p.inode == inode->bi_dir && | |
1085 | d.k->p.offset == inode->bi_dir_offset; | |
1086 | } | |
1087 | ||
1088 | static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) | |
1089 | { | |
1090 | struct bch_fs *c = trans->c; | |
1091 | struct inode_walker_entry *i; | |
1092 | int ret = 0, ret2 = 0; | |
1093 | s64 count2; | |
1094 | ||
1095 | for (i = w->d; i < w->d + w->nr; i++) { | |
1096 | if (i->inode.bi_sectors == i->count) | |
1097 | continue; | |
1098 | ||
1099 | count2 = lockrestart_do(trans, | |
1100 | bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot)); | |
1101 | ||
1102 | if (i->count != count2) { | |
1103 | bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu", | |
1104 | i->count, count2); | |
1105 | i->count = count2; | |
1106 | if (i->inode.bi_sectors == i->count) | |
1107 | continue; | |
1108 | } | |
1109 | ||
1110 | if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c, | |
1111 | "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", | |
1112 | w->cur_inum, i->snapshot, | |
1113 | i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE) | |
1114 | continue; | |
1115 | ||
1116 | i->inode.bi_sectors = i->count; | |
1117 | ret = write_inode(trans, &i->inode, i->snapshot); | |
1118 | if (ret) | |
1119 | break; | |
1120 | ret2 = -EINTR; | |
1121 | } | |
1122 | fsck_err: | |
1123 | return ret ?: ret2; | |
1124 | } | |
1125 | ||
1126 | static int check_extent(struct btree_trans *trans, struct btree_iter *iter, | |
1127 | struct inode_walker *inode, | |
1128 | struct snapshots_seen *s) | |
1129 | { | |
1130 | struct bch_fs *c = trans->c; | |
1131 | struct bkey_s_c k; | |
1132 | struct inode_walker_entry *i; | |
1133 | char buf[200]; | |
1134 | int ret = 0; | |
1135 | ||
1136 | k = bch2_btree_iter_peek(iter); | |
1137 | if (!k.k) | |
1138 | return 0; | |
1139 | ||
1140 | ret = bkey_err(k); | |
1141 | if (ret) | |
1142 | return ret; | |
1143 | ||
1144 | ret = check_key_has_snapshot(trans, iter, k); | |
1145 | if (ret) | |
1146 | return ret; | |
1147 | ||
1148 | ret = snapshots_seen_update(c, s, k.k->p); | |
1149 | if (ret) | |
1150 | return ret; | |
1151 | ||
1152 | if (k.k->type == KEY_TYPE_whiteout) | |
1153 | return 0; | |
1154 | ||
1155 | if (inode->cur_inum != k.k->p.inode) { | |
1156 | ret = check_i_sectors(trans, inode); | |
1157 | if (ret) | |
1158 | return ret; | |
1159 | } | |
1160 | #if 0 | |
1161 | if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { | |
1162 | char buf1[200]; | |
1163 | char buf2[200]; | |
1164 | ||
1165 | bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); | |
1166 | bch2_bkey_val_to_text(&PBUF(buf2), c, k); | |
1167 | ||
1168 | if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) | |
1169 | return fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR; | |
1170 | } | |
1171 | #endif | |
1172 | ret = __walk_inode(trans, inode, k.k->p); | |
1173 | if (ret < 0) | |
1174 | return ret; | |
1175 | ||
1176 | if (fsck_err_on(ret == INT_MAX, c, | |
1177 | "extent in missing inode:\n %s", | |
1178 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) | |
1179 | return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, | |
1180 | bch2_btree_delete_at(trans, iter, | |
1181 | BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); | |
1182 | ||
1183 | if (ret == INT_MAX) | |
1184 | return 0; | |
e3e464ac | 1185 | |
ef1669ff KO |
1186 | i = inode->d + ret; |
1187 | ret = 0; | |
e3e464ac | 1188 | |
ef1669ff KO |
1189 | if (fsck_err_on(!S_ISREG(i->inode.bi_mode) && |
1190 | !S_ISLNK(i->inode.bi_mode), c, | |
1191 | "extent in non regular inode mode %o:\n %s", | |
1192 | i->inode.bi_mode, | |
1193 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) | |
1194 | return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, | |
1195 | bch2_btree_delete_at(trans, iter, | |
1196 | BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); | |
1197 | ||
1198 | if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) { | |
1199 | for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) { | |
1200 | if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && | |
1201 | k.k->type != KEY_TYPE_reservation && | |
1202 | k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, | |
1203 | "extent type %u offset %llu past end of inode %llu, i_size %llu", | |
1204 | k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) { | |
1205 | bch2_fs_lazy_rw(c); | |
1206 | return bch2_btree_delete_range_trans(trans, BTREE_ID_extents, | |
1207 | SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9, | |
1208 | k.k->p.snapshot), | |
1209 | POS(k.k->p.inode, U64_MAX), | |
1210 | 0, NULL) ?: -EINTR; | |
1211 | } | |
1212 | } | |
1213 | } | |
8a85b20c | 1214 | |
ef1669ff KO |
1215 | if (bkey_extent_is_allocation(k.k)) |
1216 | for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) | |
1217 | i->count += k.k->size; | |
1218 | #if 0 | |
1219 | bch2_bkey_buf_reassemble(&prev, c, k); | |
1220 | #endif | |
8a85b20c | 1221 | |
ef1669ff | 1222 | fsck_err: |
8a85b20c KO |
1223 | return ret; |
1224 | } | |
1225 | ||
1c6fdbd8 KO |
1226 | /* |
1227 | * Walk extents: verify that extents have a corresponding S_ISREG inode, and | |
1228 | * that i_size an i_sectors are consistent | |
1229 | */ | |
1230 | noinline_for_stack | |
1231 | static int check_extents(struct bch_fs *c) | |
1232 | { | |
1233 | struct inode_walker w = inode_walker_init(); | |
ef1669ff | 1234 | struct snapshots_seen s; |
424eb881 | 1235 | struct btree_trans trans; |
67e0dd8f | 1236 | struct btree_iter iter; |
1c6fdbd8 KO |
1237 | int ret = 0; |
1238 | ||
ef1669ff KO |
1239 | #if 0 |
1240 | struct bkey_buf prev; | |
07a1006a | 1241 | bch2_bkey_buf_init(&prev); |
f7005e01 | 1242 | prev.k->k = KEY(0, 0, 0); |
ef1669ff KO |
1243 | #endif |
1244 | snapshots_seen_init(&s); | |
20bceecb | 1245 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
424eb881 | 1246 | |
1c6fdbd8 KO |
1247 | bch_verbose(c, "checking extents"); |
1248 | ||
67e0dd8f KO |
1249 | bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, |
1250 | POS(BCACHEFS_ROOT_INO, 0), | |
1251 | BTREE_ITER_INTENT| | |
ef1669ff KO |
1252 | BTREE_ITER_PREFETCH| |
1253 | BTREE_ITER_ALL_SNAPSHOTS); | |
1254 | ||
1255 | do { | |
1256 | ret = lockrestart_do(&trans, | |
1257 | check_extent(&trans, &iter, &w, &s)); | |
1258 | if (ret) | |
1259 | break; | |
1260 | } while (bch2_btree_iter_advance(&iter)); | |
1261 | bch2_trans_iter_exit(&trans, &iter); | |
1262 | #if 0 | |
1263 | bch2_bkey_buf_exit(&prev, c); | |
1264 | #endif | |
1265 | inode_walker_exit(&w); | |
1266 | bch2_trans_exit(&trans); | |
1267 | snapshots_seen_exit(&s); | |
1268 | ||
1269 | return ret; | |
1270 | } | |
1271 | ||
1272 | static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) | |
1273 | { | |
1274 | struct bch_fs *c = trans->c; | |
1275 | struct inode_walker_entry *i; | |
1276 | int ret = 0, ret2 = 0; | |
1277 | s64 count2; | |
1278 | ||
1279 | for (i = w->d; i < w->d + w->nr; i++) { | |
1280 | if (i->inode.bi_nlink == i->count) | |
1281 | continue; | |
1282 | ||
1283 | count2 = lockrestart_do(trans, | |
1284 | bch2_count_subdirs(trans, w->cur_inum, i->snapshot)); | |
1285 | ||
1286 | if (i->count != count2) { | |
1287 | bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu", | |
1288 | i->count, count2); | |
1289 | i->count = count2; | |
1290 | if (i->inode.bi_nlink == i->count) | |
1291 | continue; | |
1292 | } | |
1293 | ||
1294 | if (fsck_err_on(i->inode.bi_nlink != i->count, c, | |
1295 | "directory %llu:%u with wrong i_nlink: got %u, should be %llu", | |
1296 | w->cur_inum, i->snapshot, i->inode.bi_nlink, i->count)) { | |
1297 | i->inode.bi_nlink = i->count; | |
1298 | ret = write_inode(trans, &i->inode, i->snapshot); | |
abcecb49 KO |
1299 | if (ret) |
1300 | break; | |
ef1669ff | 1301 | ret2 = -EINTR; |
abcecb49 | 1302 | } |
ef1669ff KO |
1303 | } |
1304 | fsck_err: | |
1305 | return ret ?: ret2; | |
1306 | } | |
abcecb49 | 1307 | |
ef1669ff KO |
1308 | static int check_dirent_target(struct btree_trans *trans, |
1309 | struct btree_iter *iter, | |
1310 | struct bkey_s_c_dirent d, | |
1311 | struct bch_inode_unpacked *target, | |
1312 | u32 target_snapshot) | |
1313 | { | |
1314 | struct bch_fs *c = trans->c; | |
1315 | bool backpointer_exists = true; | |
1316 | char buf[200]; | |
1317 | int ret = 0; | |
1318 | ||
1319 | if (!target->bi_dir && | |
1320 | !target->bi_dir_offset) { | |
1321 | target->bi_dir = d.k->p.inode; | |
1322 | target->bi_dir_offset = d.k->p.offset; | |
1323 | ||
1324 | ret = write_inode(trans, target, target_snapshot); | |
1325 | if (ret) | |
1326 | goto err; | |
1327 | } | |
1328 | ||
1329 | if (!inode_backpointer_matches(d, target)) { | |
1330 | ret = inode_backpointer_exists(trans, target, d.k->p.snapshot); | |
1331 | if (ret < 0) | |
1332 | goto err; | |
e3e464ac | 1333 | |
ef1669ff KO |
1334 | backpointer_exists = ret; |
1335 | ret = 0; | |
e3e464ac | 1336 | |
ef1669ff KO |
1337 | if (fsck_err_on(S_ISDIR(target->bi_mode) && |
1338 | backpointer_exists, c, | |
1339 | "directory %llu with multiple links", | |
1340 | target->bi_inum)) { | |
1341 | ret = remove_dirent(trans, d.k->p); | |
1342 | if (ret) | |
1343 | goto err; | |
1344 | return 0; | |
e3e464ac | 1345 | } |
e3e464ac | 1346 | |
ef1669ff KO |
1347 | if (fsck_err_on(backpointer_exists && |
1348 | !target->bi_nlink, c, | |
1349 | "inode %llu has multiple links but i_nlink 0", | |
1350 | target->bi_inum)) { | |
1351 | target->bi_nlink++; | |
1352 | target->bi_flags &= ~BCH_INODE_UNLINKED; | |
1c6fdbd8 | 1353 | |
ef1669ff KO |
1354 | ret = write_inode(trans, target, target_snapshot); |
1355 | if (ret) | |
1356 | goto err; | |
1c6fdbd8 KO |
1357 | } |
1358 | ||
ef1669ff | 1359 | if (fsck_err_on(!backpointer_exists, c, |
6e0c886d | 1360 | "inode %llu:%u has wrong backpointer:\n" |
ef1669ff KO |
1361 | "got %llu:%llu\n" |
1362 | "should be %llu:%llu", | |
6e0c886d | 1363 | target->bi_inum, target_snapshot, |
ef1669ff KO |
1364 | target->bi_dir, |
1365 | target->bi_dir_offset, | |
1366 | d.k->p.inode, | |
1367 | d.k->p.offset)) { | |
1368 | target->bi_dir = d.k->p.inode; | |
1369 | target->bi_dir_offset = d.k->p.offset; | |
1370 | ||
1371 | ret = write_inode(trans, target, target_snapshot); | |
1372 | if (ret) | |
1373 | goto err; | |
1c6fdbd8 | 1374 | } |
ef1669ff | 1375 | } |
1c6fdbd8 | 1376 | |
ef1669ff KO |
1377 | if (fsck_err_on(vfs_d_type(d.v->d_type) != mode_to_type(target->bi_mode), c, |
1378 | "incorrect d_type: should be %u:\n%s", | |
1379 | mode_to_type(target->bi_mode), | |
1380 | (bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) { | |
1381 | struct bkey_i_dirent *n; | |
1c6fdbd8 | 1382 | |
ef1669ff KO |
1383 | n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); |
1384 | if (!n) { | |
1385 | ret = -ENOMEM; | |
1386 | goto err; | |
1387 | } | |
1388 | ||
1389 | bkey_reassemble(&n->k_i, d.s_c); | |
1390 | n->v.d_type = mode_to_type(target->bi_mode); | |
1391 | ||
1392 | ret = __bch2_trans_do(trans, NULL, NULL, | |
1393 | BTREE_INSERT_NOFAIL| | |
1394 | BTREE_INSERT_LAZY_RW, | |
1395 | bch2_trans_update(trans, iter, &n->k_i, 0)); | |
1396 | kfree(n); | |
1397 | if (ret) | |
1398 | goto err; | |
1c6fdbd8 | 1399 | } |
ef1669ff | 1400 | err: |
1c6fdbd8 | 1401 | fsck_err: |
ef1669ff | 1402 | return ret; |
1c6fdbd8 KO |
1403 | } |
1404 | ||
914f2786 KO |
1405 | static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, |
1406 | struct bch_hash_info *hash_info, | |
ef1669ff KO |
1407 | struct inode_walker *dir, |
1408 | struct inode_walker *target, | |
1409 | struct snapshots_seen *s) | |
1c6fdbd8 | 1410 | { |
914f2786 | 1411 | struct bch_fs *c = trans->c; |
1c6fdbd8 | 1412 | struct bkey_s_c k; |
914f2786 | 1413 | struct bkey_s_c_dirent d; |
ef1669ff | 1414 | struct inode_walker_entry *i; |
914f2786 | 1415 | u32 target_snapshot; |
b9e1adf5 | 1416 | u32 target_subvol; |
ef1669ff | 1417 | u64 target_inum; |
1c6fdbd8 | 1418 | char buf[200]; |
914f2786 | 1419 | int ret; |
1c6fdbd8 | 1420 | |
914f2786 KO |
1421 | k = bch2_btree_iter_peek(iter); |
1422 | if (!k.k) | |
38200544 | 1423 | return 0; |
1c6fdbd8 | 1424 | |
914f2786 KO |
1425 | ret = bkey_err(k); |
1426 | if (ret) | |
1427 | return ret; | |
d69f41d6 | 1428 | |
ef1669ff KO |
1429 | ret = check_key_has_snapshot(trans, iter, k); |
1430 | if (ret) | |
1431 | return ret; | |
1c6fdbd8 | 1432 | |
ef1669ff | 1433 | ret = snapshots_seen_update(c, s, k.k->p); |
914f2786 KO |
1434 | if (ret) |
1435 | return ret; | |
8a85b20c | 1436 | |
ef1669ff KO |
1437 | if (k.k->type == KEY_TYPE_whiteout) |
1438 | return 0; | |
1439 | ||
1440 | if (dir->cur_inum != k.k->p.inode) { | |
1441 | ret = check_subdir_count(trans, dir); | |
1442 | if (ret) | |
1443 | return ret; | |
1444 | } | |
1445 | ||
1446 | ret = __walk_inode(trans, dir, k.k->p); | |
1447 | if (ret < 0) | |
1448 | return ret; | |
1c6fdbd8 | 1449 | |
ef1669ff | 1450 | if (fsck_err_on(ret == INT_MAX, c, |
914f2786 | 1451 | "dirent in nonexisting directory:\n%s", |
ef1669ff KO |
1452 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) |
1453 | return __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW, | |
1454 | bch2_btree_delete_at(trans, iter, | |
1455 | BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); | |
1456 | ||
1457 | if (ret == INT_MAX) | |
1458 | return 0; | |
1459 | ||
1460 | i = dir->d + ret; | |
1461 | ret = 0; | |
1462 | ||
1463 | if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c, | |
914f2786 | 1464 | "dirent in non directory inode type %u:\n%s", |
ef1669ff | 1465 | mode_to_type(i->inode.bi_mode), |
914f2786 KO |
1466 | (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) |
1467 | return __bch2_trans_do(trans, NULL, NULL, 0, | |
1468 | bch2_btree_delete_at(trans, iter, 0)); | |
8a85b20c | 1469 | |
ef1669ff KO |
1470 | if (dir->first_this_inode) |
1471 | *hash_info = bch2_hash_info_init(c, &dir->d[0].inode); | |
1c6fdbd8 | 1472 | |
914f2786 KO |
1473 | ret = hash_check_key(trans, bch2_dirent_hash_desc, |
1474 | hash_info, iter, k); | |
1475 | if (ret < 0) | |
1476 | return ret; | |
1477 | if (ret) /* dirent has been deleted */ | |
1478 | return 0; | |
7ac2c55e | 1479 | |
914f2786 KO |
1480 | if (k.k->type != KEY_TYPE_dirent) |
1481 | return 0; | |
1c6fdbd8 | 1482 | |
914f2786 | 1483 | d = bkey_s_c_to_dirent(k); |
1c6fdbd8 | 1484 | |
ef1669ff | 1485 | ret = __bch2_dirent_read_target(trans, d, |
6fed42bb KO |
1486 | &target_subvol, |
1487 | &target_snapshot, | |
ef1669ff KO |
1488 | &target_inum, |
1489 | true); | |
b9e1adf5 KO |
1490 | if (ret && ret != -ENOENT) |
1491 | return ret; | |
1492 | ||
ef1669ff KO |
1493 | if (fsck_err_on(ret, c, |
1494 | "dirent points to missing subvolume %llu", | |
1495 | le64_to_cpu(d.v->d_inum))) | |
1496 | return remove_dirent(trans, d.k->p); | |
1c6fdbd8 | 1497 | |
ef1669ff KO |
1498 | if (target_subvol) { |
1499 | struct bch_inode_unpacked subvol_root; | |
914f2786 | 1500 | |
ef1669ff KO |
1501 | ret = __lookup_inode(trans, target_inum, |
1502 | &subvol_root, &target_snapshot); | |
1503 | if (ret && ret != -ENOENT) | |
1504 | return ret; | |
914f2786 | 1505 | |
ef1669ff KO |
1506 | if (fsck_err_on(ret, c, |
1507 | "subvolume %u points to missing subvolume root %llu", | |
1508 | target_subvol, | |
1509 | target_inum)) { | |
1510 | bch_err(c, "repair not implemented yet"); | |
1511 | return -EINVAL; | |
1512 | } | |
914f2786 | 1513 | |
ef1669ff KO |
1514 | if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c, |
1515 | "subvol root %llu has wrong bi_subvol field: got %u, should be %u", | |
1516 | target_inum, | |
1517 | subvol_root.bi_subvol, target_subvol)) { | |
1518 | subvol_root.bi_subvol = target_subvol; | |
1519 | ret = write_inode(trans, &subvol_root, target_snapshot); | |
1520 | if (ret) | |
1521 | return ret; | |
1522 | } | |
914f2786 | 1523 | |
ef1669ff KO |
1524 | ret = check_dirent_target(trans, iter, d, &subvol_root, |
1525 | target_snapshot); | |
914f2786 KO |
1526 | if (ret) |
1527 | return ret; | |
ef1669ff KO |
1528 | } else { |
1529 | ret = __get_visible_inodes(trans, target, s, target_inum); | |
1530 | if (ret) | |
914f2786 | 1531 | return ret; |
1c6fdbd8 | 1532 | |
ef1669ff KO |
1533 | if (fsck_err_on(!target->nr, c, |
1534 | "dirent points to missing inode:\n%s", | |
1535 | (bch2_bkey_val_to_text(&PBUF(buf), c, | |
1536 | k), buf))) { | |
1537 | ret = remove_dirent(trans, d.k->p); | |
1538 | if (ret) | |
1539 | return ret; | |
1c6fdbd8 KO |
1540 | } |
1541 | ||
ef1669ff KO |
1542 | for (i = target->d; i < target->d + target->nr; i++) { |
1543 | ret = check_dirent_target(trans, iter, d, | |
1544 | &i->inode, i->snapshot); | |
1545 | if (ret) | |
1546 | return ret; | |
d3ff7fec | 1547 | } |
914f2786 | 1548 | } |
d3ff7fec | 1549 | |
ef1669ff KO |
1550 | if (d.v->d_type == DT_DIR) |
1551 | for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) | |
1552 | i->count++; | |
1c6fdbd8 | 1553 | |
914f2786 KO |
1554 | fsck_err: |
1555 | return ret; | |
1556 | } | |
1c6fdbd8 | 1557 | |
914f2786 KO |
1558 | /* |
1559 | * Walk dirents: verify that they all have a corresponding S_ISDIR inode, | |
1560 | * validate d_type | |
1561 | */ | |
1562 | noinline_for_stack | |
1563 | static int check_dirents(struct bch_fs *c) | |
1564 | { | |
ef1669ff KO |
1565 | struct inode_walker dir = inode_walker_init(); |
1566 | struct inode_walker target = inode_walker_init(); | |
1567 | struct snapshots_seen s; | |
914f2786 KO |
1568 | struct bch_hash_info hash_info; |
1569 | struct btree_trans trans; | |
67e0dd8f | 1570 | struct btree_iter iter; |
914f2786 | 1571 | int ret = 0; |
1c6fdbd8 | 1572 | |
914f2786 KO |
1573 | bch_verbose(c, "checking dirents"); |
1574 | ||
ef1669ff | 1575 | snapshots_seen_init(&s); |
914f2786 | 1576 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
1c6fdbd8 | 1577 | |
67e0dd8f KO |
1578 | bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents, |
1579 | POS(BCACHEFS_ROOT_INO, 0), | |
1580 | BTREE_ITER_INTENT| | |
ef1669ff KO |
1581 | BTREE_ITER_PREFETCH| |
1582 | BTREE_ITER_ALL_SNAPSHOTS); | |
914f2786 | 1583 | |
38200544 | 1584 | do { |
914f2786 | 1585 | ret = lockrestart_do(&trans, |
ef1669ff KO |
1586 | check_dirent(&trans, &iter, &hash_info, |
1587 | &dir, &target, &s)); | |
914f2786 KO |
1588 | if (ret) |
1589 | break; | |
67e0dd8f KO |
1590 | } while (bch2_btree_iter_advance(&iter)); |
1591 | bch2_trans_iter_exit(&trans, &iter); | |
914f2786 | 1592 | |
ef1669ff KO |
1593 | bch2_trans_exit(&trans); |
1594 | snapshots_seen_exit(&s); | |
1595 | inode_walker_exit(&dir); | |
1596 | inode_walker_exit(&target); | |
1597 | return ret; | |
1c6fdbd8 KO |
1598 | } |
1599 | ||
1600 | /* | |
1601 | * Walk xattrs: verify that they all have a corresponding inode | |
1602 | */ | |
1603 | noinline_for_stack | |
1604 | static int check_xattrs(struct bch_fs *c) | |
1605 | { | |
1606 | struct inode_walker w = inode_walker_init(); | |
7ac2c55e | 1607 | struct bch_hash_info hash_info; |
d69f41d6 | 1608 | struct btree_trans trans; |
67e0dd8f | 1609 | struct btree_iter iter; |
1c6fdbd8 KO |
1610 | struct bkey_s_c k; |
1611 | int ret = 0; | |
1612 | ||
1613 | bch_verbose(c, "checking xattrs"); | |
1614 | ||
20bceecb | 1615 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
d69f41d6 | 1616 | |
67e0dd8f KO |
1617 | bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, |
1618 | POS(BCACHEFS_ROOT_INO, 0), | |
1619 | BTREE_ITER_INTENT| | |
ef1669ff KO |
1620 | BTREE_ITER_PREFETCH| |
1621 | BTREE_ITER_ALL_SNAPSHOTS); | |
6bd13057 | 1622 | retry: |
ef1669ff KO |
1623 | bch2_trans_begin(&trans); |
1624 | ||
67e0dd8f | 1625 | while ((k = bch2_btree_iter_peek(&iter)).k && |
abcecb49 | 1626 | !(ret = bkey_err(k))) { |
ef1669ff | 1627 | ret = check_key_has_snapshot(&trans, &iter, k); |
1c6fdbd8 KO |
1628 | if (ret) |
1629 | break; | |
1630 | ||
ef1669ff KO |
1631 | ret = walk_inode(&trans, &w, k.k->p); |
1632 | if (ret < 0) | |
1633 | break; | |
1634 | ||
1635 | if (fsck_err_on(ret == INT_MAX, c, | |
1c6fdbd8 KO |
1636 | "xattr for missing inode %llu", |
1637 | k.k->p.inode)) { | |
67e0dd8f | 1638 | ret = bch2_btree_delete_at(&trans, &iter, 0); |
1c6fdbd8 | 1639 | if (ret) |
abcecb49 | 1640 | break; |
1c6fdbd8 KO |
1641 | continue; |
1642 | } | |
1643 | ||
ef1669ff KO |
1644 | if (ret == INT_MAX) |
1645 | goto next; | |
1646 | ret = 0; | |
1647 | ||
1648 | if (w.first_this_inode) | |
1649 | hash_info = bch2_hash_info_init(c, &w.d[0].inode); | |
1c6fdbd8 | 1650 | |
424eb881 | 1651 | ret = hash_check_key(&trans, bch2_xattr_hash_desc, |
67e0dd8f | 1652 | &hash_info, &iter, k); |
1c6fdbd8 | 1653 | if (ret) |
abcecb49 | 1654 | break; |
ef1669ff | 1655 | next: |
67e0dd8f | 1656 | bch2_btree_iter_advance(&iter); |
1c6fdbd8 | 1657 | } |
1c6fdbd8 | 1658 | fsck_err: |
6bd13057 KO |
1659 | if (ret == -EINTR) |
1660 | goto retry; | |
abcecb49 | 1661 | |
67e0dd8f | 1662 | bch2_trans_iter_exit(&trans, &iter); |
9a796fdb KO |
1663 | bch2_trans_exit(&trans); |
1664 | return ret; | |
1c6fdbd8 KO |
1665 | } |
1666 | ||
1667 | /* Get root directory, create if it doesn't exist: */ | |
ef1669ff | 1668 | static int check_root(struct bch_fs *c) |
1c6fdbd8 | 1669 | { |
ef1669ff KO |
1670 | struct btree_trans trans; |
1671 | struct bch_inode_unpacked root_inode; | |
d3ff7fec | 1672 | u32 snapshot; |
ef1669ff | 1673 | u64 inum; |
1c6fdbd8 KO |
1674 | int ret; |
1675 | ||
ef1669ff KO |
1676 | bch2_trans_init(&trans, c, 0, 0); |
1677 | ||
1c6fdbd8 KO |
1678 | bch_verbose(c, "checking root directory"); |
1679 | ||
ef1669ff | 1680 | ret = subvol_lookup(&trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); |
1c6fdbd8 KO |
1681 | if (ret && ret != -ENOENT) |
1682 | return ret; | |
1683 | ||
ef1669ff KO |
1684 | if (mustfix_fsck_err_on(ret, c, "root subvol missing")) { |
1685 | struct bkey_i_subvolume root_subvol; | |
1c6fdbd8 | 1686 | |
ef1669ff KO |
1687 | snapshot = U32_MAX; |
1688 | inum = BCACHEFS_ROOT_INO; | |
1c6fdbd8 | 1689 | |
ef1669ff KO |
1690 | bkey_subvolume_init(&root_subvol.k_i); |
1691 | root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL; | |
1692 | root_subvol.v.flags = 0; | |
1693 | root_subvol.v.snapshot = cpu_to_le32(snapshot); | |
1694 | root_subvol.v.inode = cpu_to_le64(inum); | |
1695 | ret = __bch2_trans_do(&trans, NULL, NULL, | |
1696 | BTREE_INSERT_NOFAIL| | |
1697 | BTREE_INSERT_LAZY_RW, | |
1698 | __bch2_btree_insert(&trans, BTREE_ID_subvolumes, &root_subvol.k_i)); | |
1699 | if (ret) { | |
1700 | bch_err(c, "error writing root subvol: %i", ret); | |
1701 | goto err; | |
1702 | } | |
1703 | ||
1704 | } | |
1705 | ||
1706 | ret = lookup_inode(&trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); | |
1707 | if (ret && ret != -ENOENT) | |
1708 | return ret; | |
1c6fdbd8 | 1709 | |
ef1669ff KO |
1710 | if (mustfix_fsck_err_on(ret, c, "root directory missing") || |
1711 | mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c, | |
1712 | "root inode not a directory")) { | |
1713 | bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, | |
1714 | 0, NULL); | |
1715 | root_inode.bi_inum = inum; | |
1c6fdbd8 | 1716 | |
ef1669ff KO |
1717 | ret = write_inode(&trans, &root_inode, snapshot); |
1718 | if (ret) | |
1719 | bch_err(c, "error writing root inode: %i", ret); | |
1720 | } | |
1721 | err: | |
1722 | fsck_err: | |
1723 | bch2_trans_exit(&trans); | |
1724 | return ret; | |
1c6fdbd8 KO |
1725 | } |
1726 | ||
1c6fdbd8 KO |
1727 | struct pathbuf { |
1728 | size_t nr; | |
1729 | size_t size; | |
1730 | ||
1731 | struct pathbuf_entry { | |
1732 | u64 inum; | |
6e0c886d | 1733 | u32 snapshot; |
1c6fdbd8 KO |
1734 | } *entries; |
1735 | }; | |
1736 | ||
6e0c886d KO |
1737 | static bool path_is_dup(struct pathbuf *p, u64 inum, u32 snapshot) |
1738 | { | |
1739 | struct pathbuf_entry *i; | |
1740 | ||
1741 | for (i = p->entries; i < p->entries + p->nr; i++) | |
1742 | if (i->inum == inum && | |
1743 | i->snapshot == snapshot) | |
1744 | return true; | |
1745 | ||
1746 | return false; | |
1747 | } | |
1748 | ||
1749 | static int path_down(struct pathbuf *p, u64 inum, u32 snapshot) | |
1c6fdbd8 KO |
1750 | { |
1751 | if (p->nr == p->size) { | |
1752 | size_t new_size = max_t(size_t, 256UL, p->size * 2); | |
1753 | void *n = krealloc(p->entries, | |
1754 | new_size * sizeof(p->entries[0]), | |
1755 | GFP_KERNEL); | |
d3ff7fec | 1756 | if (!n) { |
1c6fdbd8 | 1757 | return -ENOMEM; |
d3ff7fec | 1758 | } |
1c6fdbd8 KO |
1759 | |
1760 | p->entries = n; | |
1761 | p->size = new_size; | |
1762 | }; | |
1763 | ||
1764 | p->entries[p->nr++] = (struct pathbuf_entry) { | |
6e0c886d KO |
1765 | .inum = inum, |
1766 | .snapshot = snapshot, | |
1c6fdbd8 KO |
1767 | }; |
1768 | return 0; | |
1769 | } | |
1770 | ||
6e0c886d KO |
1771 | /* |
1772 | * Check that a given inode is reachable from the root: | |
1773 | * | |
1774 | * XXX: we should also be verifying that inodes are in the right subvolumes | |
1775 | */ | |
d3ff7fec | 1776 | static int check_path(struct btree_trans *trans, |
d3ff7fec | 1777 | struct pathbuf *p, |
81ed9ce3 KO |
1778 | struct bch_inode_unpacked *inode, |
1779 | u32 snapshot) | |
1c6fdbd8 | 1780 | { |
d3ff7fec | 1781 | struct bch_fs *c = trans->c; |
1c6fdbd8 KO |
1782 | int ret = 0; |
1783 | ||
ef1669ff | 1784 | snapshot = snapshot_t(c, snapshot)->equiv; |
d3ff7fec | 1785 | p->nr = 0; |
424eb881 | 1786 | |
488f9776 KO |
1787 | while (!(inode->bi_inum == BCACHEFS_ROOT_INO && |
1788 | inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) { | |
6e0c886d KO |
1789 | u32 parent_snapshot = snapshot; |
1790 | ||
488f9776 KO |
1791 | if (inode->bi_parent_subvol) { |
1792 | u64 inum; | |
1793 | ||
1794 | ret = subvol_lookup(trans, inode->bi_parent_subvol, | |
6e0c886d | 1795 | &parent_snapshot, &inum); |
488f9776 KO |
1796 | if (ret) |
1797 | break; | |
1798 | } | |
1799 | ||
d3ff7fec | 1800 | ret = lockrestart_do(trans, |
6e0c886d | 1801 | inode_backpointer_exists(trans, inode, parent_snapshot)); |
d3ff7fec KO |
1802 | if (ret < 0) |
1803 | break; | |
1c6fdbd8 | 1804 | |
d3ff7fec | 1805 | if (!ret) { |
ef1669ff KO |
1806 | if (fsck_err(c, "unreachable inode %llu:%u, type %u nlink %u backptr %llu:%llu", |
1807 | inode->bi_inum, snapshot, | |
d3ff7fec KO |
1808 | mode_to_type(inode->bi_mode), |
1809 | inode->bi_nlink, | |
1810 | inode->bi_dir, | |
1811 | inode->bi_dir_offset)) | |
81ed9ce3 | 1812 | ret = reattach_inode(trans, inode, snapshot); |
d3ff7fec KO |
1813 | break; |
1814 | } | |
1815 | ret = 0; | |
1c6fdbd8 | 1816 | |
d3ff7fec KO |
1817 | if (!S_ISDIR(inode->bi_mode)) |
1818 | break; | |
1c6fdbd8 | 1819 | |
6e0c886d | 1820 | ret = path_down(p, inode->bi_inum, snapshot); |
d3ff7fec KO |
1821 | if (ret) { |
1822 | bch_err(c, "memory allocation failure"); | |
1823 | return ret; | |
1824 | } | |
1c6fdbd8 | 1825 | |
6e0c886d KO |
1826 | snapshot = parent_snapshot; |
1827 | ||
1828 | ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot); | |
1829 | if (ret) { | |
1830 | /* Should have been caught in dirents pass */ | |
1831 | bch_err(c, "error looking up parent directory: %i", ret); | |
1832 | break; | |
1833 | } | |
1834 | ||
1835 | if (path_is_dup(p, inode->bi_inum, snapshot)) { | |
1836 | struct pathbuf_entry *i; | |
1c6fdbd8 | 1837 | |
d3ff7fec | 1838 | /* XXX print path */ |
6e0c886d KO |
1839 | bch_err(c, "directory structure loop"); |
1840 | ||
1841 | for (i = p->entries; i < p->entries + p->nr; i++) | |
1842 | pr_err("%llu:%u", i->inum, i->snapshot); | |
1843 | pr_err("%llu:%u", inode->bi_inum, snapshot); | |
1844 | ||
d3ff7fec KO |
1845 | if (!fsck_err(c, "directory structure loop")) |
1846 | return 0; | |
1c6fdbd8 | 1847 | |
d3ff7fec | 1848 | ret = lockrestart_do(trans, |
81ed9ce3 | 1849 | remove_backpointer(trans, inode)); |
1c6fdbd8 | 1850 | if (ret) { |
d3ff7fec KO |
1851 | bch_err(c, "error removing dirent: %i", ret); |
1852 | break; | |
1c6fdbd8 KO |
1853 | } |
1854 | ||
81ed9ce3 | 1855 | ret = reattach_inode(trans, inode, snapshot); |
1c6fdbd8 | 1856 | } |
1c6fdbd8 | 1857 | } |
d3ff7fec KO |
1858 | fsck_err: |
1859 | if (ret) | |
1860 | bch_err(c, "%s: err %i", __func__, ret); | |
1861 | return ret; | |
1862 | } | |
1c6fdbd8 | 1863 | |
d3ff7fec KO |
1864 | /* |
1865 | * Check for unreachable inodes, as well as loops in the directory structure: | |
1866 | * After check_dirents(), if an inode backpointer doesn't exist that means it's | |
1867 | * unreachable: | |
1868 | */ | |
58686a25 | 1869 | static int check_directory_structure(struct bch_fs *c) |
d3ff7fec KO |
1870 | { |
1871 | struct btree_trans trans; | |
67e0dd8f | 1872 | struct btree_iter iter; |
d3ff7fec KO |
1873 | struct bkey_s_c k; |
1874 | struct bch_inode_unpacked u; | |
1875 | struct pathbuf path = { 0, 0, NULL }; | |
1876 | int ret; | |
1c6fdbd8 | 1877 | |
d3ff7fec | 1878 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
1c6fdbd8 | 1879 | |
909004d2 KO |
1880 | for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, |
1881 | BTREE_ITER_INTENT| | |
ef1669ff KO |
1882 | BTREE_ITER_PREFETCH| |
1883 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
d3ff7fec | 1884 | if (k.k->type != KEY_TYPE_inode) |
1c6fdbd8 KO |
1885 | continue; |
1886 | ||
d3ff7fec KO |
1887 | ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u); |
1888 | if (ret) { | |
1889 | /* Should have been caught earlier in fsck: */ | |
1890 | bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret); | |
1891 | break; | |
1c6fdbd8 | 1892 | } |
1c6fdbd8 | 1893 | |
ef1669ff KO |
1894 | if (u.bi_flags & BCH_INODE_UNLINKED) |
1895 | continue; | |
1896 | ||
81ed9ce3 | 1897 | ret = check_path(&trans, &path, &u, iter.pos.snapshot); |
d3ff7fec KO |
1898 | if (ret) |
1899 | break; | |
1c6fdbd8 | 1900 | } |
67e0dd8f | 1901 | bch2_trans_iter_exit(&trans, &iter); |
d3ff7fec KO |
1902 | |
1903 | BUG_ON(ret == -EINTR); | |
1904 | ||
f24fab9c KO |
1905 | kfree(path.entries); |
1906 | ||
9a796fdb KO |
1907 | bch2_trans_exit(&trans); |
1908 | return ret; | |
1c6fdbd8 KO |
1909 | } |
1910 | ||
fc51b041 KO |
1911 | struct nlink_table { |
1912 | size_t nr; | |
1913 | size_t size; | |
1c6fdbd8 | 1914 | |
fc51b041 KO |
1915 | struct nlink { |
1916 | u64 inum; | |
1917 | u32 snapshot; | |
1918 | u32 count; | |
1919 | } *d; | |
1920 | }; | |
1c6fdbd8 | 1921 | |
fc51b041 | 1922 | static int add_nlink(struct nlink_table *t, u64 inum, u32 snapshot) |
1c6fdbd8 | 1923 | { |
fc51b041 KO |
1924 | if (t->nr == t->size) { |
1925 | size_t new_size = max_t(size_t, 128UL, t->size * 2); | |
1926 | void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL); | |
1927 | if (!d) { | |
1928 | return -ENOMEM; | |
1929 | } | |
1c6fdbd8 | 1930 | |
82355e28 KO |
1931 | if (t->d) |
1932 | memcpy(d, t->d, t->size * sizeof(t->d[0])); | |
fc51b041 | 1933 | kvfree(t->d); |
1c6fdbd8 | 1934 | |
fc51b041 KO |
1935 | t->d = d; |
1936 | t->size = new_size; | |
2bb748a6 KO |
1937 | } |
1938 | ||
fc51b041 KO |
1939 | |
1940 | t->d[t->nr++] = (struct nlink) { | |
1941 | .inum = inum, | |
1942 | .snapshot = snapshot, | |
1943 | }; | |
1944 | ||
1945 | return 0; | |
1946 | } | |
1947 | ||
1948 | static int nlink_cmp(const void *_l, const void *_r) | |
1949 | { | |
1950 | const struct nlink *l = _l; | |
1951 | const struct nlink *r = _r; | |
1952 | ||
1953 | return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot); | |
1954 | } | |
1955 | ||
ef1669ff KO |
1956 | static void inc_link(struct bch_fs *c, struct snapshots_seen *s, |
1957 | struct nlink_table *links, | |
1958 | u64 range_start, u64 range_end, u64 inum, u32 snapshot) | |
fc51b041 KO |
1959 | { |
1960 | struct nlink *link, key = { | |
1961 | .inum = inum, .snapshot = U32_MAX, | |
1962 | }; | |
1963 | ||
1964 | if (inum < range_start || inum >= range_end) | |
1c6fdbd8 | 1965 | return; |
fc51b041 KO |
1966 | |
1967 | link = __inline_bsearch(&key, links->d, links->nr, | |
1968 | sizeof(links->d[0]), nlink_cmp); | |
ef1669ff KO |
1969 | if (!link) |
1970 | return; | |
1971 | ||
1972 | while (link > links->d && link[0].inum == link[-1].inum) | |
1973 | --link; | |
1974 | ||
1975 | for (; link < links->d + links->nr && link->inum == inum; link++) | |
1976 | if (ref_visible(c, s, snapshot, link->snapshot)) { | |
1977 | link->count++; | |
1978 | if (link->snapshot >= snapshot) | |
1979 | break; | |
1980 | } | |
fc51b041 KO |
1981 | } |
1982 | ||
1983 | noinline_for_stack | |
1984 | static int check_nlinks_find_hardlinks(struct bch_fs *c, | |
1985 | struct nlink_table *t, | |
1986 | u64 start, u64 *end) | |
1987 | { | |
1988 | struct btree_trans trans; | |
67e0dd8f | 1989 | struct btree_iter iter; |
fc51b041 KO |
1990 | struct bkey_s_c k; |
1991 | struct bkey_s_c_inode inode; | |
1992 | struct bch_inode_unpacked u; | |
1993 | int ret = 0; | |
1994 | ||
1995 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); | |
1996 | ||
1997 | for_each_btree_key(&trans, iter, BTREE_ID_inodes, | |
909004d2 KO |
1998 | POS(0, start), |
1999 | BTREE_ITER_INTENT| | |
ef1669ff KO |
2000 | BTREE_ITER_PREFETCH| |
2001 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
fc51b041 KO |
2002 | if (k.k->type != KEY_TYPE_inode) |
2003 | continue; | |
2004 | ||
2005 | inode = bkey_s_c_to_inode(k); | |
2006 | ||
2007 | /* | |
2008 | * Backpointer and directory structure checks are sufficient for | |
2009 | * directories, since they can't have hardlinks: | |
2010 | */ | |
2011 | if (S_ISDIR(le16_to_cpu(inode.v->bi_mode))) | |
2012 | continue; | |
2013 | ||
2014 | /* Should never fail, checked by bch2_inode_invalid: */ | |
2015 | BUG_ON(bch2_inode_unpack(inode, &u)); | |
2016 | ||
2017 | if (!u.bi_nlink) | |
2018 | continue; | |
2019 | ||
2020 | ret = add_nlink(t, k.k->p.offset, k.k->p.snapshot); | |
2021 | if (ret) { | |
2022 | *end = k.k->p.offset; | |
2023 | ret = 0; | |
2024 | break; | |
2025 | } | |
2026 | ||
1c6fdbd8 | 2027 | } |
67e0dd8f | 2028 | bch2_trans_iter_exit(&trans, &iter); |
fc51b041 KO |
2029 | bch2_trans_exit(&trans); |
2030 | ||
2031 | if (ret) | |
2032 | bch_err(c, "error in fsck: btree error %i while walking inodes", ret); | |
1c6fdbd8 | 2033 | |
fc51b041 | 2034 | return ret; |
1c6fdbd8 KO |
2035 | } |
2036 | ||
2037 | noinline_for_stack | |
fc51b041 KO |
2038 | static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, |
2039 | u64 range_start, u64 range_end) | |
1c6fdbd8 | 2040 | { |
424eb881 | 2041 | struct btree_trans trans; |
ef1669ff | 2042 | struct snapshots_seen s; |
67e0dd8f | 2043 | struct btree_iter iter; |
1c6fdbd8 KO |
2044 | struct bkey_s_c k; |
2045 | struct bkey_s_c_dirent d; | |
1c6fdbd8 KO |
2046 | int ret; |
2047 | ||
ef1669ff KO |
2048 | snapshots_seen_init(&s); |
2049 | ||
20bceecb | 2050 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
424eb881 | 2051 | |
909004d2 KO |
2052 | for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN, |
2053 | BTREE_ITER_INTENT| | |
ef1669ff KO |
2054 | BTREE_ITER_PREFETCH| |
2055 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
2056 | ret = snapshots_seen_update(c, &s, k.k->p); | |
2057 | if (ret) | |
2058 | break; | |
2059 | ||
1c6fdbd8 | 2060 | switch (k.k->type) { |
26609b61 | 2061 | case KEY_TYPE_dirent: |
1c6fdbd8 | 2062 | d = bkey_s_c_to_dirent(k); |
1c6fdbd8 | 2063 | |
ef1669ff KO |
2064 | if (d.v->d_type != DT_DIR && |
2065 | d.v->d_type != DT_SUBVOL) | |
2066 | inc_link(c, &s, links, range_start, range_end, | |
2067 | le64_to_cpu(d.v->d_inum), | |
2068 | d.k->p.snapshot); | |
1c6fdbd8 KO |
2069 | break; |
2070 | } | |
2071 | ||
424eb881 | 2072 | bch2_trans_cond_resched(&trans); |
1c6fdbd8 | 2073 | } |
67e0dd8f | 2074 | bch2_trans_iter_exit(&trans, &iter); |
abcecb49 | 2075 | |
1c6fdbd8 | 2076 | if (ret) |
619f5bee | 2077 | bch_err(c, "error in fsck: btree error %i while walking dirents", ret); |
1c6fdbd8 | 2078 | |
ef1669ff KO |
2079 | bch2_trans_exit(&trans); |
2080 | snapshots_seen_exit(&s); | |
1c6fdbd8 KO |
2081 | return ret; |
2082 | } | |
2083 | ||
1c6fdbd8 | 2084 | noinline_for_stack |
fc51b041 KO |
2085 | static int check_nlinks_update_hardlinks(struct bch_fs *c, |
2086 | struct nlink_table *links, | |
1c6fdbd8 KO |
2087 | u64 range_start, u64 range_end) |
2088 | { | |
0564b167 | 2089 | struct btree_trans trans; |
67e0dd8f | 2090 | struct btree_iter iter; |
1c6fdbd8 | 2091 | struct bkey_s_c k; |
fc51b041 KO |
2092 | struct bkey_s_c_inode inode; |
2093 | struct bch_inode_unpacked u; | |
2094 | struct nlink *link = links->d; | |
b906aadd | 2095 | int ret = 0; |
1c6fdbd8 | 2096 | |
20bceecb | 2097 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); |
0564b167 | 2098 | |
b906aadd | 2099 | for_each_btree_key(&trans, iter, BTREE_ID_inodes, |
909004d2 KO |
2100 | POS(0, range_start), |
2101 | BTREE_ITER_INTENT| | |
ef1669ff KO |
2102 | BTREE_ITER_PREFETCH| |
2103 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
fc51b041 | 2104 | if (k.k->p.offset >= range_end) |
1c6fdbd8 KO |
2105 | break; |
2106 | ||
b906aadd KO |
2107 | if (k.k->type != KEY_TYPE_inode) |
2108 | continue; | |
1c6fdbd8 | 2109 | |
fc51b041 KO |
2110 | inode = bkey_s_c_to_inode(k); |
2111 | if (S_ISDIR(le16_to_cpu(inode.v->bi_mode))) | |
2112 | continue; | |
2113 | ||
2114 | BUG_ON(bch2_inode_unpack(inode, &u)); | |
1c6fdbd8 | 2115 | |
fc51b041 KO |
2116 | if (!u.bi_nlink) |
2117 | continue; | |
2118 | ||
ef1669ff KO |
2119 | while ((cmp_int(link->inum, k.k->p.offset) ?: |
2120 | cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { | |
fc51b041 KO |
2121 | link++; |
2122 | BUG_ON(link >= links->d + links->nr); | |
2123 | } | |
2124 | ||
2125 | if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c, | |
2126 | "inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)", | |
2127 | u.bi_inum, mode_to_type(u.bi_mode), | |
2128 | bch2_inode_nlink_get(&u), link->count)) { | |
2129 | bch2_inode_nlink_set(&u, link->count); | |
2130 | ||
ea0531f8 | 2131 | ret = write_inode(&trans, &u, k.k->p.snapshot); |
fc51b041 KO |
2132 | if (ret) |
2133 | bch_err(c, "error in fsck: error %i updating inode", ret); | |
2134 | } | |
1c6fdbd8 | 2135 | } |
fc51b041 | 2136 | fsck_err: |
67e0dd8f | 2137 | bch2_trans_iter_exit(&trans, &iter); |
0564b167 KO |
2138 | bch2_trans_exit(&trans); |
2139 | ||
b906aadd KO |
2140 | if (ret) |
2141 | bch_err(c, "error in fsck: btree error %i while walking inodes", ret); | |
1c6fdbd8 | 2142 | |
b906aadd | 2143 | return ret; |
1c6fdbd8 KO |
2144 | } |
2145 | ||
2146 | noinline_for_stack | |
58686a25 | 2147 | static int check_nlinks(struct bch_fs *c) |
1c6fdbd8 | 2148 | { |
fc51b041 | 2149 | struct nlink_table links = { 0 }; |
1c6fdbd8 KO |
2150 | u64 this_iter_range_start, next_iter_range_start = 0; |
2151 | int ret = 0; | |
2152 | ||
2153 | bch_verbose(c, "checking inode nlinks"); | |
2154 | ||
1c6fdbd8 KO |
2155 | do { |
2156 | this_iter_range_start = next_iter_range_start; | |
2157 | next_iter_range_start = U64_MAX; | |
2158 | ||
fc51b041 KO |
2159 | ret = check_nlinks_find_hardlinks(c, &links, |
2160 | this_iter_range_start, | |
2161 | &next_iter_range_start); | |
2162 | ||
2163 | ret = check_nlinks_walk_dirents(c, &links, | |
1c6fdbd8 | 2164 | this_iter_range_start, |
fc51b041 | 2165 | next_iter_range_start); |
1c6fdbd8 KO |
2166 | if (ret) |
2167 | break; | |
2168 | ||
fc51b041 | 2169 | ret = check_nlinks_update_hardlinks(c, &links, |
1c6fdbd8 KO |
2170 | this_iter_range_start, |
2171 | next_iter_range_start); | |
2172 | if (ret) | |
2173 | break; | |
2174 | ||
fc51b041 | 2175 | links.nr = 0; |
1c6fdbd8 KO |
2176 | } while (next_iter_range_start != U64_MAX); |
2177 | ||
fc51b041 | 2178 | kvfree(links.d); |
1c6fdbd8 KO |
2179 | |
2180 | return ret; | |
2181 | } | |
2182 | ||
bfe88863 KO |
2183 | static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter) |
2184 | { | |
2185 | struct bkey_s_c k; | |
2186 | struct bkey_s_c_reflink_p p; | |
2187 | struct bkey_i_reflink_p *u; | |
2188 | int ret; | |
2189 | ||
2190 | k = bch2_btree_iter_peek(iter); | |
2191 | if (!k.k) | |
2192 | return 0; | |
2193 | ||
2194 | ret = bkey_err(k); | |
2195 | if (ret) | |
2196 | return ret; | |
2197 | ||
2198 | if (k.k->type != KEY_TYPE_reflink_p) | |
2199 | return 0; | |
2200 | ||
2201 | p = bkey_s_c_to_reflink_p(k); | |
2202 | ||
6d76aefe | 2203 | if (!p.v->front_pad && !p.v->back_pad) |
bfe88863 KO |
2204 | return 0; |
2205 | ||
2206 | u = bch2_trans_kmalloc(trans, sizeof(*u)); | |
2207 | ret = PTR_ERR_OR_ZERO(u); | |
2208 | if (ret) | |
2209 | return ret; | |
2210 | ||
2211 | bkey_reassemble(&u->k_i, k); | |
6d76aefe KO |
2212 | u->v.front_pad = 0; |
2213 | u->v.back_pad = 0; | |
bfe88863 KO |
2214 | |
2215 | return bch2_trans_update(trans, iter, &u->k_i, 0); | |
2216 | } | |
2217 | ||
2218 | static int fix_reflink_p(struct bch_fs *c) | |
2219 | { | |
2220 | struct btree_trans trans; | |
2221 | struct btree_iter iter; | |
2222 | struct bkey_s_c k; | |
2223 | int ret; | |
2224 | ||
2225 | if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) | |
2226 | return 0; | |
2227 | ||
2228 | bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); | |
2229 | ||
2230 | for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, | |
2231 | BTREE_ITER_INTENT| | |
2232 | BTREE_ITER_PREFETCH| | |
2233 | BTREE_ITER_ALL_SNAPSHOTS, k, ret) { | |
2234 | if (k.k->type == KEY_TYPE_reflink_p) { | |
2235 | ret = __bch2_trans_do(&trans, NULL, NULL, | |
2236 | BTREE_INSERT_NOFAIL| | |
2237 | BTREE_INSERT_LAZY_RW, | |
2238 | fix_reflink_p_key(&trans, &iter)); | |
2239 | if (ret) | |
2240 | break; | |
2241 | } | |
2242 | } | |
2243 | bch2_trans_iter_exit(&trans, &iter); | |
2244 | ||
2245 | bch2_trans_exit(&trans); | |
2246 | return ret; | |
2247 | } | |
2248 | ||
1c6fdbd8 KO |
2249 | /* |
2250 | * Checks for inconsistencies that shouldn't happen, unless we have a bug. | |
2251 | * Doesn't fix them yet, mainly because they haven't yet been observed: | |
2252 | */ | |
619f5bee | 2253 | int bch2_fsck_full(struct bch_fs *c) |
1c6fdbd8 | 2254 | { |
14b393ee KO |
2255 | return bch2_fs_snapshots_check(c) ?: |
2256 | check_inodes(c, true) ?: | |
ef1669ff | 2257 | check_subvols(c) ?: |
5c16add5 | 2258 | check_extents(c) ?: |
1c6fdbd8 KO |
2259 | check_dirents(c) ?: |
2260 | check_xattrs(c) ?: | |
ef1669ff | 2261 | check_root(c) ?: |
58686a25 | 2262 | check_directory_structure(c) ?: |
bfe88863 KO |
2263 | check_nlinks(c) ?: |
2264 | fix_reflink_p(c); | |
1c6fdbd8 KO |
2265 | } |
2266 | ||
619f5bee | 2267 | int bch2_fsck_walk_inodes_only(struct bch_fs *c) |
1c6fdbd8 | 2268 | { |
5c16add5 | 2269 | return check_inodes(c, false); |
1c6fdbd8 | 2270 | } |