Commit | Line | Data |
---|---|---|
edc09b52 DW |
1 | /* |
2 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #include "xfs.h" | |
21 | #include "xfs_fs.h" | |
22 | #include "xfs_shared.h" | |
23 | #include "xfs_format.h" | |
24 | #include "xfs_trans_resv.h" | |
25 | #include "xfs_mount.h" | |
26 | #include "xfs_defer.h" | |
27 | #include "xfs_btree.h" | |
28 | #include "xfs_bit.h" | |
29 | #include "xfs_log_format.h" | |
30 | #include "xfs_trans.h" | |
31 | #include "xfs_sb.h" | |
32 | #include "xfs_alloc.h" | |
33 | #include "xfs_rmap.h" | |
f6d5fc21 | 34 | #include "xfs_refcount.h" |
edc09b52 DW |
35 | #include "scrub/xfs_scrub.h" |
36 | #include "scrub/scrub.h" | |
37 | #include "scrub/common.h" | |
38 | #include "scrub/btree.h" | |
39 | #include "scrub/trace.h" | |
40 | ||
41 | /* | |
42 | * Set us up to scrub reference count btrees. | |
43 | */ | |
44 | int | |
45 | xfs_scrub_setup_ag_refcountbt( | |
46 | struct xfs_scrub_context *sc, | |
47 | struct xfs_inode *ip) | |
48 | { | |
49 | return xfs_scrub_setup_ag_btree(sc, ip, false); | |
50 | } | |
51 | ||
52 | /* Reference count btree scrubber. */ | |
53 | ||
dbde19da DW |
54 | /* |
55 | * Confirming Reference Counts via Reverse Mappings | |
56 | * | |
57 | * We want to count the reverse mappings overlapping a refcount record | |
58 | * (bno, len, refcount), allowing for the possibility that some of the | |
59 | * overlap may come from smaller adjoining reverse mappings, while some | |
60 | * comes from single extents which overlap the range entirely. The | |
61 | * outer loop is as follows: | |
62 | * | |
63 | * 1. For all reverse mappings overlapping the refcount extent, | |
64 | * a. If a given rmap completely overlaps, mark it as seen. | |
65 | * b. Otherwise, record the fragment (in agbno order) for later | |
66 | * processing. | |
67 | * | |
68 | * Once we've seen all the rmaps, we know that for all blocks in the | |
69 | * refcount record we want to find $refcount owners and we've already | |
70 | * visited $seen extents that overlap all the blocks. Therefore, we | |
71 | * need to find ($refcount - $seen) owners for every block in the | |
72 | * extent; call that quantity $target_nr. Proceed as follows: | |
73 | * | |
74 | * 2. Pull the first $target_nr fragments from the list; all of them | |
75 | * should start at or before the start of the extent. | |
76 | * Call this subset of fragments the working set. | |
77 | * 3. Until there are no more unprocessed fragments, | |
78 | * a. Find the shortest fragments in the set and remove them. | |
79 | * b. Note the block number of the end of these fragments. | |
80 | * c. Pull the same number of fragments from the list. All of these | |
81 | * fragments should start at the block number recorded in the | |
82 | * previous step. | |
83 | * d. Put those fragments in the set. | |
84 | * 4. Check that there are $target_nr fragments remaining in the list, | |
85 | * and that they all end at or beyond the end of the refcount extent. | |
86 | * | |
87 | * If the refcount is correct, all the check conditions in the algorithm | |
88 | * should always hold true. If not, the refcount is incorrect. | |
89 | */ | |
90 | struct xfs_scrub_refcnt_frag { | |
91 | struct list_head list; | |
92 | struct xfs_rmap_irec rm; | |
93 | }; | |
94 | ||
95 | struct xfs_scrub_refcnt_check { | |
96 | struct xfs_scrub_context *sc; | |
97 | struct list_head fragments; | |
98 | ||
99 | /* refcount extent we're examining */ | |
100 | xfs_agblock_t bno; | |
101 | xfs_extlen_t len; | |
102 | xfs_nlink_t refcount; | |
103 | ||
104 | /* number of owners seen */ | |
105 | xfs_nlink_t seen; | |
106 | }; | |
107 | ||
108 | /* | |
109 | * Decide if the given rmap is large enough that we can redeem it | |
110 | * towards refcount verification now, or if it's a fragment, in | |
111 | * which case we'll hang onto it in the hopes that we'll later | |
112 | * discover that we've collected exactly the correct number of | |
113 | * fragments as the refcountbt says we should have. | |
114 | */ | |
115 | STATIC int | |
116 | xfs_scrub_refcountbt_rmap_check( | |
117 | struct xfs_btree_cur *cur, | |
118 | struct xfs_rmap_irec *rec, | |
119 | void *priv) | |
120 | { | |
121 | struct xfs_scrub_refcnt_check *refchk = priv; | |
122 | struct xfs_scrub_refcnt_frag *frag; | |
123 | xfs_agblock_t rm_last; | |
124 | xfs_agblock_t rc_last; | |
125 | int error = 0; | |
126 | ||
127 | if (xfs_scrub_should_terminate(refchk->sc, &error)) | |
128 | return error; | |
129 | ||
130 | rm_last = rec->rm_startblock + rec->rm_blockcount - 1; | |
131 | rc_last = refchk->bno + refchk->len - 1; | |
132 | ||
133 | /* Confirm that a single-owner refc extent is a CoW stage. */ | |
134 | if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { | |
135 | xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); | |
136 | return 0; | |
137 | } | |
138 | ||
139 | if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { | |
140 | /* | |
141 | * The rmap overlaps the refcount record, so we can confirm | |
142 | * one refcount owner seen. | |
143 | */ | |
144 | refchk->seen++; | |
145 | } else { | |
146 | /* | |
147 | * This rmap covers only part of the refcount record, so | |
148 | * save the fragment for later processing. If the rmapbt | |
149 | * is healthy each rmap_irec we see will be in agbno order | |
150 | * so we don't need insertion sort here. | |
151 | */ | |
152 | frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), | |
153 | KM_MAYFAIL | KM_NOFS); | |
154 | if (!frag) | |
155 | return -ENOMEM; | |
156 | memcpy(&frag->rm, rec, sizeof(frag->rm)); | |
157 | list_add_tail(&frag->list, &refchk->fragments); | |
158 | } | |
159 | ||
160 | return 0; | |
161 | } | |
162 | ||
163 | /* | |
164 | * Given a bunch of rmap fragments, iterate through them, keeping | |
165 | * a running tally of the refcount. If this ever deviates from | |
166 | * what we expect (which is the refcountbt's refcount minus the | |
167 | * number of extents that totally covered the refcountbt extent), | |
168 | * we have a refcountbt error. | |
169 | */ | |
170 | STATIC void | |
171 | xfs_scrub_refcountbt_process_rmap_fragments( | |
172 | struct xfs_scrub_refcnt_check *refchk) | |
173 | { | |
174 | struct list_head worklist; | |
175 | struct xfs_scrub_refcnt_frag *frag; | |
176 | struct xfs_scrub_refcnt_frag *n; | |
177 | xfs_agblock_t bno; | |
178 | xfs_agblock_t rbno; | |
179 | xfs_agblock_t next_rbno; | |
180 | xfs_nlink_t nr; | |
181 | xfs_nlink_t target_nr; | |
182 | ||
183 | target_nr = refchk->refcount - refchk->seen; | |
184 | if (target_nr == 0) | |
185 | return; | |
186 | ||
187 | /* | |
188 | * There are (refchk->rc.rc_refcount - refchk->nr refcount) | |
189 | * references we haven't found yet. Pull that many off the | |
190 | * fragment list and figure out where the smallest rmap ends | |
191 | * (and therefore the next rmap should start). All the rmaps | |
192 | * we pull off should start at or before the beginning of the | |
193 | * refcount record's range. | |
194 | */ | |
195 | INIT_LIST_HEAD(&worklist); | |
196 | rbno = NULLAGBLOCK; | |
197 | nr = 1; | |
198 | ||
199 | /* Make sure the fragments actually /are/ in agbno order. */ | |
200 | bno = 0; | |
201 | list_for_each_entry(frag, &refchk->fragments, list) { | |
202 | if (frag->rm.rm_startblock < bno) | |
203 | goto done; | |
204 | bno = frag->rm.rm_startblock; | |
205 | } | |
206 | ||
207 | /* | |
208 | * Find all the rmaps that start at or before the refc extent, | |
209 | * and put them on the worklist. | |
210 | */ | |
211 | list_for_each_entry_safe(frag, n, &refchk->fragments, list) { | |
212 | if (frag->rm.rm_startblock > refchk->bno) | |
213 | goto done; | |
214 | bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; | |
215 | if (bno < rbno) | |
216 | rbno = bno; | |
217 | list_move_tail(&frag->list, &worklist); | |
218 | if (nr == target_nr) | |
219 | break; | |
220 | nr++; | |
221 | } | |
222 | ||
223 | /* | |
224 | * We should have found exactly $target_nr rmap fragments starting | |
225 | * at or before the refcount extent. | |
226 | */ | |
227 | if (nr != target_nr) | |
228 | goto done; | |
229 | ||
230 | while (!list_empty(&refchk->fragments)) { | |
231 | /* Discard any fragments ending at rbno from the worklist. */ | |
232 | nr = 0; | |
233 | next_rbno = NULLAGBLOCK; | |
234 | list_for_each_entry_safe(frag, n, &worklist, list) { | |
235 | bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; | |
236 | if (bno != rbno) { | |
237 | if (bno < next_rbno) | |
238 | next_rbno = bno; | |
239 | continue; | |
240 | } | |
241 | list_del(&frag->list); | |
242 | kmem_free(frag); | |
243 | nr++; | |
244 | } | |
245 | ||
246 | /* Try to add nr rmaps starting at rbno to the worklist. */ | |
247 | list_for_each_entry_safe(frag, n, &refchk->fragments, list) { | |
248 | bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; | |
249 | if (frag->rm.rm_startblock != rbno) | |
250 | goto done; | |
251 | list_move_tail(&frag->list, &worklist); | |
252 | if (next_rbno > bno) | |
253 | next_rbno = bno; | |
254 | nr--; | |
255 | if (nr == 0) | |
256 | break; | |
257 | } | |
258 | ||
259 | /* | |
260 | * If we get here and nr > 0, this means that we added fewer | |
261 | * items to the worklist than we discarded because the fragment | |
262 | * list ran out of items. Therefore, we cannot maintain the | |
263 | * required refcount. Something is wrong, so we're done. | |
264 | */ | |
265 | if (nr) | |
266 | goto done; | |
267 | ||
268 | rbno = next_rbno; | |
269 | } | |
270 | ||
271 | /* | |
272 | * Make sure the last extent we processed ends at or beyond | |
273 | * the end of the refcount extent. | |
274 | */ | |
275 | if (rbno < refchk->bno + refchk->len) | |
276 | goto done; | |
277 | ||
278 | /* Actually record us having seen the remaining refcount. */ | |
279 | refchk->seen = refchk->refcount; | |
280 | done: | |
281 | /* Delete fragments and work list. */ | |
282 | list_for_each_entry_safe(frag, n, &worklist, list) { | |
283 | list_del(&frag->list); | |
284 | kmem_free(frag); | |
285 | } | |
286 | list_for_each_entry_safe(frag, n, &refchk->fragments, list) { | |
287 | list_del(&frag->list); | |
288 | kmem_free(frag); | |
289 | } | |
290 | } | |
291 | ||
292 | /* Use the rmap entries covering this extent to verify the refcount. */ | |
293 | STATIC void | |
294 | xfs_scrub_refcountbt_xref_rmap( | |
295 | struct xfs_scrub_context *sc, | |
296 | xfs_agblock_t bno, | |
297 | xfs_extlen_t len, | |
298 | xfs_nlink_t refcount) | |
299 | { | |
300 | struct xfs_scrub_refcnt_check refchk = { | |
301 | .sc = sc, | |
302 | .bno = bno, | |
303 | .len = len, | |
304 | .refcount = refcount, | |
305 | .seen = 0, | |
306 | }; | |
307 | struct xfs_rmap_irec low; | |
308 | struct xfs_rmap_irec high; | |
309 | struct xfs_scrub_refcnt_frag *frag; | |
310 | struct xfs_scrub_refcnt_frag *n; | |
311 | int error; | |
312 | ||
313 | if (!sc->sa.rmap_cur) | |
314 | return; | |
315 | ||
316 | /* Cross-reference with the rmapbt to confirm the refcount. */ | |
317 | memset(&low, 0, sizeof(low)); | |
318 | low.rm_startblock = bno; | |
319 | memset(&high, 0xFF, sizeof(high)); | |
320 | high.rm_startblock = bno + len - 1; | |
321 | ||
322 | INIT_LIST_HEAD(&refchk.fragments); | |
323 | error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, | |
324 | &xfs_scrub_refcountbt_rmap_check, &refchk); | |
325 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) | |
326 | goto out_free; | |
327 | ||
328 | xfs_scrub_refcountbt_process_rmap_fragments(&refchk); | |
329 | if (refcount != refchk.seen) | |
330 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); | |
331 | ||
332 | out_free: | |
333 | list_for_each_entry_safe(frag, n, &refchk.fragments, list) { | |
334 | list_del(&frag->list); | |
335 | kmem_free(frag); | |
336 | } | |
337 | } | |
338 | ||
166d7641 DW |
339 | /* Cross-reference with the other btrees. */ |
340 | STATIC void | |
341 | xfs_scrub_refcountbt_xref( | |
342 | struct xfs_scrub_context *sc, | |
343 | xfs_agblock_t agbno, | |
344 | xfs_extlen_t len, | |
345 | xfs_nlink_t refcount) | |
346 | { | |
347 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | |
348 | return; | |
52dc4b44 DW |
349 | |
350 | xfs_scrub_xref_is_used_space(sc, agbno, len); | |
2e6f2756 | 351 | xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); |
dbde19da | 352 | xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); |
166d7641 DW |
353 | } |
354 | ||
edc09b52 DW |
355 | /* Scrub a refcountbt record. */ |
356 | STATIC int | |
357 | xfs_scrub_refcountbt_rec( | |
358 | struct xfs_scrub_btree *bs, | |
359 | union xfs_btree_rec *rec) | |
360 | { | |
361 | struct xfs_mount *mp = bs->cur->bc_mp; | |
dbde19da | 362 | xfs_agblock_t *cow_blocks = bs->private; |
edc09b52 DW |
363 | xfs_agnumber_t agno = bs->cur->bc_private.a.agno; |
364 | xfs_agblock_t bno; | |
365 | xfs_extlen_t len; | |
366 | xfs_nlink_t refcount; | |
367 | bool has_cowflag; | |
368 | int error = 0; | |
369 | ||
370 | bno = be32_to_cpu(rec->refc.rc_startblock); | |
371 | len = be32_to_cpu(rec->refc.rc_blockcount); | |
372 | refcount = be32_to_cpu(rec->refc.rc_refcount); | |
373 | ||
374 | /* Only CoW records can have refcount == 1. */ | |
375 | has_cowflag = (bno & XFS_REFC_COW_START); | |
376 | if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) | |
377 | xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); | |
dbde19da DW |
378 | if (has_cowflag) |
379 | (*cow_blocks) += len; | |
edc09b52 DW |
380 | |
381 | /* Check the extent. */ | |
382 | bno &= ~XFS_REFC_COW_START; | |
383 | if (bno + len <= bno || | |
384 | !xfs_verify_agbno(mp, agno, bno) || | |
385 | !xfs_verify_agbno(mp, agno, bno + len - 1)) | |
386 | xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); | |
387 | ||
388 | if (refcount == 0) | |
389 | xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); | |
390 | ||
166d7641 DW |
391 | xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); |
392 | ||
edc09b52 DW |
393 | return error; |
394 | } | |
395 | ||
dbde19da DW |
396 | /* Make sure we have as many refc blocks as the rmap says. */ |
397 | STATIC void | |
398 | xfs_scrub_refcount_xref_rmap( | |
399 | struct xfs_scrub_context *sc, | |
400 | struct xfs_owner_info *oinfo, | |
401 | xfs_filblks_t cow_blocks) | |
402 | { | |
403 | xfs_extlen_t refcbt_blocks = 0; | |
404 | xfs_filblks_t blocks; | |
405 | int error; | |
406 | ||
407 | if (!sc->sa.rmap_cur) | |
408 | return; | |
409 | ||
410 | /* Check that we saw as many refcbt blocks as the rmap knows about. */ | |
411 | error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); | |
412 | if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) | |
413 | return; | |
414 | error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, | |
415 | &blocks); | |
416 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) | |
417 | return; | |
418 | if (blocks != refcbt_blocks) | |
419 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); | |
420 | ||
421 | /* Check that we saw as many cow blocks as the rmap knows about. */ | |
422 | xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); | |
423 | error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, | |
424 | &blocks); | |
425 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) | |
426 | return; | |
427 | if (blocks != cow_blocks) | |
428 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); | |
429 | } | |
430 | ||
edc09b52 DW |
431 | /* Scrub the refcount btree for some AG. */ |
432 | int | |
433 | xfs_scrub_refcountbt( | |
434 | struct xfs_scrub_context *sc) | |
435 | { | |
436 | struct xfs_owner_info oinfo; | |
dbde19da DW |
437 | xfs_agblock_t cow_blocks = 0; |
438 | int error; | |
edc09b52 DW |
439 | |
440 | xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); | |
dbde19da DW |
441 | error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, |
442 | &oinfo, &cow_blocks); | |
443 | if (error) | |
444 | return error; | |
445 | ||
446 | xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); | |
447 | ||
448 | return 0; | |
edc09b52 | 449 | } |
f6d5fc21 DW |
450 | |
451 | /* xref check that a cow staging extent is marked in the refcountbt. */ | |
452 | void | |
453 | xfs_scrub_xref_is_cow_staging( | |
454 | struct xfs_scrub_context *sc, | |
455 | xfs_agblock_t agbno, | |
456 | xfs_extlen_t len) | |
457 | { | |
458 | struct xfs_refcount_irec rc; | |
459 | bool has_cowflag; | |
460 | int has_refcount; | |
461 | int error; | |
462 | ||
463 | if (!sc->sa.refc_cur) | |
464 | return; | |
465 | ||
466 | /* Find the CoW staging extent. */ | |
467 | error = xfs_refcount_lookup_le(sc->sa.refc_cur, | |
468 | agbno + XFS_REFC_COW_START, &has_refcount); | |
469 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) | |
470 | return; | |
471 | if (!has_refcount) { | |
472 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); | |
473 | return; | |
474 | } | |
475 | ||
476 | error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); | |
477 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) | |
478 | return; | |
479 | if (!has_refcount) { | |
480 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); | |
481 | return; | |
482 | } | |
483 | ||
484 | /* CoW flag must be set, refcount must be 1. */ | |
485 | has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); | |
486 | if (!has_cowflag || rc.rc_refcount != 1) | |
487 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); | |
488 | ||
489 | /* Must be at least as long as what was passed in */ | |
490 | if (rc.rc_blockcount < len) | |
491 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); | |
492 | } | |
493 | ||
494 | /* | |
495 | * xref check that the extent is not shared. Only file data blocks | |
496 | * can have multiple owners. | |
497 | */ | |
498 | void | |
499 | xfs_scrub_xref_is_not_shared( | |
500 | struct xfs_scrub_context *sc, | |
501 | xfs_agblock_t agbno, | |
502 | xfs_extlen_t len) | |
503 | { | |
504 | bool shared; | |
505 | int error; | |
506 | ||
507 | if (!sc->sa.refc_cur) | |
508 | return; | |
509 | ||
510 | error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); | |
511 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur)) | |
512 | return; | |
513 | if (shared) | |
514 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); | |
515 | } |