Commit | Line | Data |
---|---|---|
edc09b52 DW |
1 | /* |
2 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #include "xfs.h" | |
21 | #include "xfs_fs.h" | |
22 | #include "xfs_shared.h" | |
23 | #include "xfs_format.h" | |
24 | #include "xfs_trans_resv.h" | |
25 | #include "xfs_mount.h" | |
26 | #include "xfs_defer.h" | |
27 | #include "xfs_btree.h" | |
28 | #include "xfs_bit.h" | |
29 | #include "xfs_log_format.h" | |
30 | #include "xfs_trans.h" | |
31 | #include "xfs_sb.h" | |
32 | #include "xfs_alloc.h" | |
33 | #include "xfs_rmap.h" | |
34 | #include "scrub/xfs_scrub.h" | |
35 | #include "scrub/scrub.h" | |
36 | #include "scrub/common.h" | |
37 | #include "scrub/btree.h" | |
38 | #include "scrub/trace.h" | |
39 | ||
40 | /* | |
41 | * Set us up to scrub reference count btrees. | |
42 | */ | |
43 | int | |
44 | xfs_scrub_setup_ag_refcountbt( | |
45 | struct xfs_scrub_context *sc, | |
46 | struct xfs_inode *ip) | |
47 | { | |
48 | return xfs_scrub_setup_ag_btree(sc, ip, false); | |
49 | } | |
50 | ||
51 | /* Reference count btree scrubber. */ | |
52 | ||
dbde19da DW |
53 | /* |
54 | * Confirming Reference Counts via Reverse Mappings | |
55 | * | |
56 | * We want to count the reverse mappings overlapping a refcount record | |
57 | * (bno, len, refcount), allowing for the possibility that some of the | |
58 | * overlap may come from smaller adjoining reverse mappings, while some | |
59 | * comes from single extents which overlap the range entirely. The | |
60 | * outer loop is as follows: | |
61 | * | |
62 | * 1. For all reverse mappings overlapping the refcount extent, | |
63 | * a. If a given rmap completely overlaps, mark it as seen. | |
64 | * b. Otherwise, record the fragment (in agbno order) for later | |
65 | * processing. | |
66 | * | |
67 | * Once we've seen all the rmaps, we know that for all blocks in the | |
68 | * refcount record we want to find $refcount owners and we've already | |
69 | * visited $seen extents that overlap all the blocks. Therefore, we | |
70 | * need to find ($refcount - $seen) owners for every block in the | |
71 | * extent; call that quantity $target_nr. Proceed as follows: | |
72 | * | |
73 | * 2. Pull the first $target_nr fragments from the list; all of them | |
74 | * should start at or before the start of the extent. | |
75 | * Call this subset of fragments the working set. | |
76 | * 3. Until there are no more unprocessed fragments, | |
77 | * a. Find the shortest fragments in the set and remove them. | |
78 | * b. Note the block number of the end of these fragments. | |
79 | * c. Pull the same number of fragments from the list. All of these | |
80 | * fragments should start at the block number recorded in the | |
81 | * previous step. | |
82 | * d. Put those fragments in the set. | |
83 | * 4. Check that there are $target_nr fragments remaining in the list, | |
84 | * and that they all end at or beyond the end of the refcount extent. | |
85 | * | |
86 | * If the refcount is correct, all the check conditions in the algorithm | |
87 | * should always hold true. If not, the refcount is incorrect. | |
88 | */ | |
89 | struct xfs_scrub_refcnt_frag { | |
90 | struct list_head list; | |
91 | struct xfs_rmap_irec rm; | |
92 | }; | |
93 | ||
94 | struct xfs_scrub_refcnt_check { | |
95 | struct xfs_scrub_context *sc; | |
96 | struct list_head fragments; | |
97 | ||
98 | /* refcount extent we're examining */ | |
99 | xfs_agblock_t bno; | |
100 | xfs_extlen_t len; | |
101 | xfs_nlink_t refcount; | |
102 | ||
103 | /* number of owners seen */ | |
104 | xfs_nlink_t seen; | |
105 | }; | |
106 | ||
107 | /* | |
108 | * Decide if the given rmap is large enough that we can redeem it | |
109 | * towards refcount verification now, or if it's a fragment, in | |
110 | * which case we'll hang onto it in the hopes that we'll later | |
111 | * discover that we've collected exactly the correct number of | |
112 | * fragments as the refcountbt says we should have. | |
113 | */ | |
114 | STATIC int | |
115 | xfs_scrub_refcountbt_rmap_check( | |
116 | struct xfs_btree_cur *cur, | |
117 | struct xfs_rmap_irec *rec, | |
118 | void *priv) | |
119 | { | |
120 | struct xfs_scrub_refcnt_check *refchk = priv; | |
121 | struct xfs_scrub_refcnt_frag *frag; | |
122 | xfs_agblock_t rm_last; | |
123 | xfs_agblock_t rc_last; | |
124 | int error = 0; | |
125 | ||
126 | if (xfs_scrub_should_terminate(refchk->sc, &error)) | |
127 | return error; | |
128 | ||
129 | rm_last = rec->rm_startblock + rec->rm_blockcount - 1; | |
130 | rc_last = refchk->bno + refchk->len - 1; | |
131 | ||
132 | /* Confirm that a single-owner refc extent is a CoW stage. */ | |
133 | if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { | |
134 | xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0); | |
135 | return 0; | |
136 | } | |
137 | ||
138 | if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { | |
139 | /* | |
140 | * The rmap overlaps the refcount record, so we can confirm | |
141 | * one refcount owner seen. | |
142 | */ | |
143 | refchk->seen++; | |
144 | } else { | |
145 | /* | |
146 | * This rmap covers only part of the refcount record, so | |
147 | * save the fragment for later processing. If the rmapbt | |
148 | * is healthy each rmap_irec we see will be in agbno order | |
149 | * so we don't need insertion sort here. | |
150 | */ | |
151 | frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag), | |
152 | KM_MAYFAIL | KM_NOFS); | |
153 | if (!frag) | |
154 | return -ENOMEM; | |
155 | memcpy(&frag->rm, rec, sizeof(frag->rm)); | |
156 | list_add_tail(&frag->list, &refchk->fragments); | |
157 | } | |
158 | ||
159 | return 0; | |
160 | } | |
161 | ||
162 | /* | |
163 | * Given a bunch of rmap fragments, iterate through them, keeping | |
164 | * a running tally of the refcount. If this ever deviates from | |
165 | * what we expect (which is the refcountbt's refcount minus the | |
166 | * number of extents that totally covered the refcountbt extent), | |
167 | * we have a refcountbt error. | |
168 | */ | |
169 | STATIC void | |
170 | xfs_scrub_refcountbt_process_rmap_fragments( | |
171 | struct xfs_scrub_refcnt_check *refchk) | |
172 | { | |
173 | struct list_head worklist; | |
174 | struct xfs_scrub_refcnt_frag *frag; | |
175 | struct xfs_scrub_refcnt_frag *n; | |
176 | xfs_agblock_t bno; | |
177 | xfs_agblock_t rbno; | |
178 | xfs_agblock_t next_rbno; | |
179 | xfs_nlink_t nr; | |
180 | xfs_nlink_t target_nr; | |
181 | ||
182 | target_nr = refchk->refcount - refchk->seen; | |
183 | if (target_nr == 0) | |
184 | return; | |
185 | ||
186 | /* | |
187 | * There are (refchk->rc.rc_refcount - refchk->nr refcount) | |
188 | * references we haven't found yet. Pull that many off the | |
189 | * fragment list and figure out where the smallest rmap ends | |
190 | * (and therefore the next rmap should start). All the rmaps | |
191 | * we pull off should start at or before the beginning of the | |
192 | * refcount record's range. | |
193 | */ | |
194 | INIT_LIST_HEAD(&worklist); | |
195 | rbno = NULLAGBLOCK; | |
196 | nr = 1; | |
197 | ||
198 | /* Make sure the fragments actually /are/ in agbno order. */ | |
199 | bno = 0; | |
200 | list_for_each_entry(frag, &refchk->fragments, list) { | |
201 | if (frag->rm.rm_startblock < bno) | |
202 | goto done; | |
203 | bno = frag->rm.rm_startblock; | |
204 | } | |
205 | ||
206 | /* | |
207 | * Find all the rmaps that start at or before the refc extent, | |
208 | * and put them on the worklist. | |
209 | */ | |
210 | list_for_each_entry_safe(frag, n, &refchk->fragments, list) { | |
211 | if (frag->rm.rm_startblock > refchk->bno) | |
212 | goto done; | |
213 | bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; | |
214 | if (bno < rbno) | |
215 | rbno = bno; | |
216 | list_move_tail(&frag->list, &worklist); | |
217 | if (nr == target_nr) | |
218 | break; | |
219 | nr++; | |
220 | } | |
221 | ||
222 | /* | |
223 | * We should have found exactly $target_nr rmap fragments starting | |
224 | * at or before the refcount extent. | |
225 | */ | |
226 | if (nr != target_nr) | |
227 | goto done; | |
228 | ||
229 | while (!list_empty(&refchk->fragments)) { | |
230 | /* Discard any fragments ending at rbno from the worklist. */ | |
231 | nr = 0; | |
232 | next_rbno = NULLAGBLOCK; | |
233 | list_for_each_entry_safe(frag, n, &worklist, list) { | |
234 | bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; | |
235 | if (bno != rbno) { | |
236 | if (bno < next_rbno) | |
237 | next_rbno = bno; | |
238 | continue; | |
239 | } | |
240 | list_del(&frag->list); | |
241 | kmem_free(frag); | |
242 | nr++; | |
243 | } | |
244 | ||
245 | /* Try to add nr rmaps starting at rbno to the worklist. */ | |
246 | list_for_each_entry_safe(frag, n, &refchk->fragments, list) { | |
247 | bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; | |
248 | if (frag->rm.rm_startblock != rbno) | |
249 | goto done; | |
250 | list_move_tail(&frag->list, &worklist); | |
251 | if (next_rbno > bno) | |
252 | next_rbno = bno; | |
253 | nr--; | |
254 | if (nr == 0) | |
255 | break; | |
256 | } | |
257 | ||
258 | /* | |
259 | * If we get here and nr > 0, this means that we added fewer | |
260 | * items to the worklist than we discarded because the fragment | |
261 | * list ran out of items. Therefore, we cannot maintain the | |
262 | * required refcount. Something is wrong, so we're done. | |
263 | */ | |
264 | if (nr) | |
265 | goto done; | |
266 | ||
267 | rbno = next_rbno; | |
268 | } | |
269 | ||
270 | /* | |
271 | * Make sure the last extent we processed ends at or beyond | |
272 | * the end of the refcount extent. | |
273 | */ | |
274 | if (rbno < refchk->bno + refchk->len) | |
275 | goto done; | |
276 | ||
277 | /* Actually record us having seen the remaining refcount. */ | |
278 | refchk->seen = refchk->refcount; | |
279 | done: | |
280 | /* Delete fragments and work list. */ | |
281 | list_for_each_entry_safe(frag, n, &worklist, list) { | |
282 | list_del(&frag->list); | |
283 | kmem_free(frag); | |
284 | } | |
285 | list_for_each_entry_safe(frag, n, &refchk->fragments, list) { | |
286 | list_del(&frag->list); | |
287 | kmem_free(frag); | |
288 | } | |
289 | } | |
290 | ||
291 | /* Use the rmap entries covering this extent to verify the refcount. */ | |
292 | STATIC void | |
293 | xfs_scrub_refcountbt_xref_rmap( | |
294 | struct xfs_scrub_context *sc, | |
295 | xfs_agblock_t bno, | |
296 | xfs_extlen_t len, | |
297 | xfs_nlink_t refcount) | |
298 | { | |
299 | struct xfs_scrub_refcnt_check refchk = { | |
300 | .sc = sc, | |
301 | .bno = bno, | |
302 | .len = len, | |
303 | .refcount = refcount, | |
304 | .seen = 0, | |
305 | }; | |
306 | struct xfs_rmap_irec low; | |
307 | struct xfs_rmap_irec high; | |
308 | struct xfs_scrub_refcnt_frag *frag; | |
309 | struct xfs_scrub_refcnt_frag *n; | |
310 | int error; | |
311 | ||
312 | if (!sc->sa.rmap_cur) | |
313 | return; | |
314 | ||
315 | /* Cross-reference with the rmapbt to confirm the refcount. */ | |
316 | memset(&low, 0, sizeof(low)); | |
317 | low.rm_startblock = bno; | |
318 | memset(&high, 0xFF, sizeof(high)); | |
319 | high.rm_startblock = bno + len - 1; | |
320 | ||
321 | INIT_LIST_HEAD(&refchk.fragments); | |
322 | error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, | |
323 | &xfs_scrub_refcountbt_rmap_check, &refchk); | |
324 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) | |
325 | goto out_free; | |
326 | ||
327 | xfs_scrub_refcountbt_process_rmap_fragments(&refchk); | |
328 | if (refcount != refchk.seen) | |
329 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); | |
330 | ||
331 | out_free: | |
332 | list_for_each_entry_safe(frag, n, &refchk.fragments, list) { | |
333 | list_del(&frag->list); | |
334 | kmem_free(frag); | |
335 | } | |
336 | } | |
337 | ||
166d7641 DW |
338 | /* Cross-reference with the other btrees. */ |
339 | STATIC void | |
340 | xfs_scrub_refcountbt_xref( | |
341 | struct xfs_scrub_context *sc, | |
342 | xfs_agblock_t agbno, | |
343 | xfs_extlen_t len, | |
344 | xfs_nlink_t refcount) | |
345 | { | |
346 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) | |
347 | return; | |
52dc4b44 DW |
348 | |
349 | xfs_scrub_xref_is_used_space(sc, agbno, len); | |
2e6f2756 | 350 | xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len); |
dbde19da | 351 | xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount); |
166d7641 DW |
352 | } |
353 | ||
edc09b52 DW |
354 | /* Scrub a refcountbt record. */ |
355 | STATIC int | |
356 | xfs_scrub_refcountbt_rec( | |
357 | struct xfs_scrub_btree *bs, | |
358 | union xfs_btree_rec *rec) | |
359 | { | |
360 | struct xfs_mount *mp = bs->cur->bc_mp; | |
dbde19da | 361 | xfs_agblock_t *cow_blocks = bs->private; |
edc09b52 DW |
362 | xfs_agnumber_t agno = bs->cur->bc_private.a.agno; |
363 | xfs_agblock_t bno; | |
364 | xfs_extlen_t len; | |
365 | xfs_nlink_t refcount; | |
366 | bool has_cowflag; | |
367 | int error = 0; | |
368 | ||
369 | bno = be32_to_cpu(rec->refc.rc_startblock); | |
370 | len = be32_to_cpu(rec->refc.rc_blockcount); | |
371 | refcount = be32_to_cpu(rec->refc.rc_refcount); | |
372 | ||
373 | /* Only CoW records can have refcount == 1. */ | |
374 | has_cowflag = (bno & XFS_REFC_COW_START); | |
375 | if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) | |
376 | xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); | |
dbde19da DW |
377 | if (has_cowflag) |
378 | (*cow_blocks) += len; | |
edc09b52 DW |
379 | |
380 | /* Check the extent. */ | |
381 | bno &= ~XFS_REFC_COW_START; | |
382 | if (bno + len <= bno || | |
383 | !xfs_verify_agbno(mp, agno, bno) || | |
384 | !xfs_verify_agbno(mp, agno, bno + len - 1)) | |
385 | xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); | |
386 | ||
387 | if (refcount == 0) | |
388 | xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); | |
389 | ||
166d7641 DW |
390 | xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount); |
391 | ||
edc09b52 DW |
392 | return error; |
393 | } | |
394 | ||
dbde19da DW |
395 | /* Make sure we have as many refc blocks as the rmap says. */ |
396 | STATIC void | |
397 | xfs_scrub_refcount_xref_rmap( | |
398 | struct xfs_scrub_context *sc, | |
399 | struct xfs_owner_info *oinfo, | |
400 | xfs_filblks_t cow_blocks) | |
401 | { | |
402 | xfs_extlen_t refcbt_blocks = 0; | |
403 | xfs_filblks_t blocks; | |
404 | int error; | |
405 | ||
406 | if (!sc->sa.rmap_cur) | |
407 | return; | |
408 | ||
409 | /* Check that we saw as many refcbt blocks as the rmap knows about. */ | |
410 | error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); | |
411 | if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) | |
412 | return; | |
413 | error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, | |
414 | &blocks); | |
415 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) | |
416 | return; | |
417 | if (blocks != refcbt_blocks) | |
418 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); | |
419 | ||
420 | /* Check that we saw as many cow blocks as the rmap knows about. */ | |
421 | xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW); | |
422 | error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo, | |
423 | &blocks); | |
424 | if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur)) | |
425 | return; | |
426 | if (blocks != cow_blocks) | |
427 | xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); | |
428 | } | |
429 | ||
edc09b52 DW |
430 | /* Scrub the refcount btree for some AG. */ |
431 | int | |
432 | xfs_scrub_refcountbt( | |
433 | struct xfs_scrub_context *sc) | |
434 | { | |
435 | struct xfs_owner_info oinfo; | |
dbde19da DW |
436 | xfs_agblock_t cow_blocks = 0; |
437 | int error; | |
edc09b52 DW |
438 | |
439 | xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC); | |
dbde19da DW |
440 | error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec, |
441 | &oinfo, &cow_blocks); | |
442 | if (error) | |
443 | return error; | |
444 | ||
445 | xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks); | |
446 | ||
447 | return 0; | |
edc09b52 | 448 | } |