2 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
32 #include "xfs_inode.h"
33 #include "xfs_alloc.h"
34 #include "scrub/scrub.h"
35 #include "scrub/common.h"
36 #include "scrub/btree.h"
37 #include "scrub/trace.h"
42 * Check for btree operation errors. See the section about handling
43 * operational errors in common.c.
46 __xfs_scrub_btree_process_error(
47 struct xfs_scrub_context *sc,
48 struct xfs_btree_cur *cur,
59 /* Used to restart an op with deadlock avoidance. */
60 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
64 /* Note the badness but don't abort. */
65 sc->sm->sm_flags |= errflag;
69 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
70 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
73 trace_xfs_scrub_btree_op_error(sc, cur, level,
81 xfs_scrub_btree_process_error(
82 struct xfs_scrub_context *sc,
83 struct xfs_btree_cur *cur,
87 return __xfs_scrub_btree_process_error(sc, cur, level, error,
88 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
92 xfs_scrub_btree_xref_process_error(
93 struct xfs_scrub_context *sc,
94 struct xfs_btree_cur *cur,
98 return __xfs_scrub_btree_process_error(sc, cur, level, error,
99 XFS_SCRUB_OFLAG_XFAIL, __return_address);
102 /* Record btree block corruption. */
104 __xfs_scrub_btree_set_corrupt(
105 struct xfs_scrub_context *sc,
106 struct xfs_btree_cur *cur,
111 sc->sm->sm_flags |= errflag;
113 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
114 trace_xfs_scrub_ifork_btree_error(sc, cur, level,
117 trace_xfs_scrub_btree_error(sc, cur, level,
122 xfs_scrub_btree_set_corrupt(
123 struct xfs_scrub_context *sc,
124 struct xfs_btree_cur *cur,
127 __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
132 xfs_scrub_btree_xref_set_corrupt(
133 struct xfs_scrub_context *sc,
134 struct xfs_btree_cur *cur,
137 __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
142 * Make sure this record is in order and doesn't stray outside of the parent
147 struct xfs_scrub_btree *bs)
149 struct xfs_btree_cur *cur = bs->cur;
150 union xfs_btree_rec *rec;
151 union xfs_btree_key key;
152 union xfs_btree_key hkey;
153 union xfs_btree_key *keyp;
154 struct xfs_btree_block *block;
155 struct xfs_btree_block *keyblock;
158 block = xfs_btree_get_block(cur, 0, &bp);
159 rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
161 trace_xfs_scrub_btree_rec(bs->sc, cur, 0);
163 /* If this isn't the first record, are they in order? */
164 if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
165 xfs_scrub_btree_set_corrupt(bs->sc, cur, 0);
166 bs->firstrec = false;
167 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
169 if (cur->bc_nlevels == 1)
172 /* Is this at least as large as the parent low key? */
173 cur->bc_ops->init_key_from_rec(&key, rec);
174 keyblock = xfs_btree_get_block(cur, 1, &bp);
175 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
176 if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
177 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
179 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
182 /* Is this no larger than the parent high key? */
183 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
184 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
185 if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
186 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
190 * Make sure this key is in order and doesn't stray outside of the parent
195 struct xfs_scrub_btree *bs,
198 struct xfs_btree_cur *cur = bs->cur;
199 union xfs_btree_key *key;
200 union xfs_btree_key *keyp;
201 struct xfs_btree_block *block;
202 struct xfs_btree_block *keyblock;
205 block = xfs_btree_get_block(cur, level, &bp);
206 key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
208 trace_xfs_scrub_btree_key(bs->sc, cur, level);
210 /* If this isn't the first key, are they in order? */
211 if (!bs->firstkey[level] &&
212 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
213 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
214 bs->firstkey[level] = false;
215 memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
217 if (level + 1 >= cur->bc_nlevels)
220 /* Is this at least as large as the parent low key? */
221 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
222 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
223 if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
224 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
226 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
229 /* Is this no larger than the parent high key? */
230 key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
231 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
232 if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
233 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
237 * Check a btree pointer. Returns true if it's ok to use this pointer.
238 * Callers do not need to set the corrupt flag.
241 xfs_scrub_btree_ptr_ok(
242 struct xfs_scrub_btree *bs,
244 union xfs_btree_ptr *ptr)
248 /* A btree rooted in an inode has no block pointer to the root. */
249 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
250 level == bs->cur->bc_nlevels)
253 /* Otherwise, check the pointers. */
254 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
255 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
257 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
259 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
264 /* Check that a btree block's sibling matches what we expect it. */
266 xfs_scrub_btree_block_check_sibling(
267 struct xfs_scrub_btree *bs,
270 union xfs_btree_ptr *sibling)
272 struct xfs_btree_cur *cur = bs->cur;
273 struct xfs_btree_block *pblock;
275 struct xfs_btree_cur *ncur = NULL;
276 union xfs_btree_ptr *pp;
280 error = xfs_btree_dup_cursor(cur, &ncur);
281 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) ||
286 * If the pointer is null, we shouldn't be able to move the upper
287 * level pointer anywhere.
289 if (xfs_btree_ptr_is_null(cur, sibling)) {
291 error = xfs_btree_increment(ncur, level + 1, &success);
293 error = xfs_btree_decrement(ncur, level + 1, &success);
294 if (error == 0 && success)
295 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
300 /* Increment upper level pointer. */
302 error = xfs_btree_increment(ncur, level + 1, &success);
304 error = xfs_btree_decrement(ncur, level + 1, &success);
305 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error))
308 xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1);
312 /* Compare upper level pointer to sibling pointer. */
313 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
314 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
315 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
318 xfs_scrub_buffer_recheck(bs->sc, pbp);
320 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
321 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
323 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
327 /* Check the siblings of a btree block. */
329 xfs_scrub_btree_block_check_siblings(
330 struct xfs_scrub_btree *bs,
331 struct xfs_btree_block *block)
333 struct xfs_btree_cur *cur = bs->cur;
334 union xfs_btree_ptr leftsib;
335 union xfs_btree_ptr rightsib;
339 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
340 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
341 level = xfs_btree_get_level(block);
343 /* Root block should never have siblings. */
344 if (level == cur->bc_nlevels - 1) {
345 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
346 !xfs_btree_ptr_is_null(cur, &rightsib))
347 xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
352 * Does the left & right sibling pointers match the adjacent
353 * parent level pointers?
354 * (These function absorbs error codes for us.)
356 error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib);
359 error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib);
367 struct list_head list;
373 * Make sure this btree block isn't in the free list and that there's
374 * an rmap record for it.
377 xfs_scrub_btree_check_block_owner(
378 struct xfs_scrub_btree *bs,
391 btnum = bs->cur->bc_btnum;
392 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
393 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
395 init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
397 error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa);
398 if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur,
403 xfs_scrub_xref_is_used_space(bs->sc, agbno, 1);
405 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
406 * have to nullify it (to shut down further block owner checks) if
407 * self-xref encounters problems.
409 if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
412 xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
413 if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
417 xfs_scrub_ag_free(bs->sc, &bs->sc->sa);
422 /* Check the owner of a btree block. */
424 xfs_scrub_btree_check_owner(
425 struct xfs_scrub_btree *bs,
429 struct xfs_btree_cur *cur = bs->cur;
430 struct check_owner *co;
432 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
436 * We want to cross-reference each btree block with the bnobt
437 * and the rmapbt. We cannot cross-reference the bnobt or
438 * rmapbt while scanning the bnobt or rmapbt, respectively,
439 * because we cannot alter the cursor and we'd prefer not to
440 * duplicate cursors. Therefore, save the buffer daddr for
443 if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
444 co = kmem_alloc(sizeof(struct check_owner),
445 KM_MAYFAIL | KM_NOFS);
449 co->daddr = XFS_BUF_ADDR(bp);
450 list_add_tail(&co->list, &bs->to_check);
454 return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
458 * Grab and scrub a btree block given a btree pointer. Returns block
459 * and buffer pointers (if applicable) if they're ok to use.
462 xfs_scrub_btree_get_block(
463 struct xfs_scrub_btree *bs,
465 union xfs_btree_ptr *pp,
466 struct xfs_btree_block **pblock,
467 struct xfs_buf **pbp)
475 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
476 if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) ||
480 xfs_btree_get_block(bs->cur, level, pbp);
481 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
482 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
485 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
488 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
492 xfs_scrub_buffer_recheck(bs->sc, *pbp);
495 * Check the block's owner; this function absorbs error codes
498 error = xfs_scrub_btree_check_owner(bs, level, *pbp);
503 * Check the block's siblings; this function absorbs error codes
506 return xfs_scrub_btree_block_check_siblings(bs, *pblock);
510 * Check that the low and high keys of this block match the keys stored
511 * in the parent block.
514 xfs_scrub_btree_block_keys(
515 struct xfs_scrub_btree *bs,
517 struct xfs_btree_block *block)
519 union xfs_btree_key block_keys;
520 struct xfs_btree_cur *cur = bs->cur;
521 union xfs_btree_key *high_bk;
522 union xfs_btree_key *parent_keys;
523 union xfs_btree_key *high_pk;
524 struct xfs_btree_block *parent_block;
527 if (level >= cur->bc_nlevels - 1)
530 /* Calculate the keys for this block. */
531 xfs_btree_get_keys(cur, block, &block_keys);
533 /* Obtain the parent's copy of the keys for this block. */
534 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
535 parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
538 if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
539 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
541 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
545 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
546 high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
549 if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
550 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
554 * Visit all nodes and leaves of a btree. Check that all pointers and
555 * records are in order, that the keys reflect the records, and use a callback
556 * so that the caller can verify individual records.
560 struct xfs_scrub_context *sc,
561 struct xfs_btree_cur *cur,
562 xfs_scrub_btree_rec_fn scrub_fn,
563 struct xfs_owner_info *oinfo,
566 struct xfs_scrub_btree bs = { NULL };
567 union xfs_btree_ptr ptr;
568 union xfs_btree_ptr *pp;
569 union xfs_btree_rec *recp;
570 struct xfs_btree_block *block;
573 struct check_owner *co;
574 struct check_owner *n;
578 /* Initialize scrub state */
580 bs.scrub_rec = scrub_fn;
583 bs.private = private;
585 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
586 bs.firstkey[i] = true;
587 INIT_LIST_HEAD(&bs.to_check);
589 /* Don't try to check a tree with a height we can't handle. */
590 if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
591 xfs_scrub_btree_set_corrupt(sc, cur, 0);
596 * Load the root of the btree. The helper function absorbs
597 * error codes for us.
599 level = cur->bc_nlevels - 1;
600 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
601 if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
603 error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp);
607 cur->bc_ptrs[level] = 1;
609 while (level < cur->bc_nlevels) {
610 block = xfs_btree_get_block(cur, level, &bp);
613 /* End of leaf, pop back towards the root. */
614 if (cur->bc_ptrs[level] >
615 be16_to_cpu(block->bb_numrecs)) {
616 xfs_scrub_btree_block_keys(&bs, level, block);
617 if (level < cur->bc_nlevels - 1)
618 cur->bc_ptrs[level + 1]++;
623 /* Records in order for scrub? */
624 xfs_scrub_btree_rec(&bs);
626 /* Call out to the record checker. */
627 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
628 error = bs.scrub_rec(&bs, recp);
631 if (xfs_scrub_should_terminate(sc, &error) ||
632 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
635 cur->bc_ptrs[level]++;
639 /* End of node, pop back towards the root. */
640 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
641 xfs_scrub_btree_block_keys(&bs, level, block);
642 if (level < cur->bc_nlevels - 1)
643 cur->bc_ptrs[level + 1]++;
648 /* Keys in order for scrub? */
649 xfs_scrub_btree_key(&bs, level);
651 /* Drill another level deeper. */
652 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
653 if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) {
654 cur->bc_ptrs[level]++;
658 error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp);
662 cur->bc_ptrs[level] = 1;
666 /* Process deferred owner checks on btree blocks. */
667 list_for_each_entry_safe(co, n, &bs.to_check, list) {
668 if (!error && bs.cur)
669 error = xfs_scrub_btree_check_block_owner(&bs,
670 co->level, co->daddr);