1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
18 #include "xfs_inode.h"
19 #include "xfs_alloc.h"
20 #include "scrub/scrub.h"
21 #include "scrub/common.h"
22 #include "scrub/btree.h"
23 #include "scrub/trace.h"
28 * Check for btree operation errors. See the section about handling
29 * operational errors in common.c.
32 __xchk_btree_process_error(
34 struct xfs_btree_cur *cur,
45 /* Used to restart an op with deadlock avoidance. */
46 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
50 /* Note the badness but don't abort. */
51 sc->sm->sm_flags |= errflag;
55 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
56 trace_xchk_ifork_btree_op_error(sc, cur, level,
59 trace_xchk_btree_op_error(sc, cur, level,
67 xchk_btree_process_error(
69 struct xfs_btree_cur *cur,
73 return __xchk_btree_process_error(sc, cur, level, error,
74 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
78 xchk_btree_xref_process_error(
80 struct xfs_btree_cur *cur,
84 return __xchk_btree_process_error(sc, cur, level, error,
85 XFS_SCRUB_OFLAG_XFAIL, __return_address);
88 /* Record btree block corruption. */
90 __xchk_btree_set_corrupt(
92 struct xfs_btree_cur *cur,
97 sc->sm->sm_flags |= errflag;
99 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
100 trace_xchk_ifork_btree_error(sc, cur, level,
103 trace_xchk_btree_error(sc, cur, level,
108 xchk_btree_set_corrupt(
109 struct xfs_scrub *sc,
110 struct xfs_btree_cur *cur,
113 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
118 xchk_btree_xref_set_corrupt(
119 struct xfs_scrub *sc,
120 struct xfs_btree_cur *cur,
123 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
128 * Make sure this record is in order and doesn't stray outside of the parent
133 struct xchk_btree *bs)
135 struct xfs_btree_cur *cur = bs->cur;
136 union xfs_btree_rec *rec;
137 union xfs_btree_key key;
138 union xfs_btree_key hkey;
139 union xfs_btree_key *keyp;
140 struct xfs_btree_block *block;
141 struct xfs_btree_block *keyblock;
144 block = xfs_btree_get_block(cur, 0, &bp);
145 rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
147 trace_xchk_btree_rec(bs->sc, cur, 0);
149 /* If this isn't the first record, are they in order? */
150 if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
151 xchk_btree_set_corrupt(bs->sc, cur, 0);
152 bs->firstrec = false;
153 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
155 if (cur->bc_nlevels == 1)
158 /* Is this at least as large as the parent low key? */
159 cur->bc_ops->init_key_from_rec(&key, rec);
160 keyblock = xfs_btree_get_block(cur, 1, &bp);
161 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
162 if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
163 xchk_btree_set_corrupt(bs->sc, cur, 1);
165 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
168 /* Is this no larger than the parent high key? */
169 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
170 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
171 if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
172 xchk_btree_set_corrupt(bs->sc, cur, 1);
176 * Make sure this key is in order and doesn't stray outside of the parent
181 struct xchk_btree *bs,
184 struct xfs_btree_cur *cur = bs->cur;
185 union xfs_btree_key *key;
186 union xfs_btree_key *keyp;
187 struct xfs_btree_block *block;
188 struct xfs_btree_block *keyblock;
191 block = xfs_btree_get_block(cur, level, &bp);
192 key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
194 trace_xchk_btree_key(bs->sc, cur, level);
196 /* If this isn't the first key, are they in order? */
197 if (!bs->firstkey[level] &&
198 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
199 xchk_btree_set_corrupt(bs->sc, cur, level);
200 bs->firstkey[level] = false;
201 memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
203 if (level + 1 >= cur->bc_nlevels)
206 /* Is this at least as large as the parent low key? */
207 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
208 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
209 if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
210 xchk_btree_set_corrupt(bs->sc, cur, level);
212 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
215 /* Is this no larger than the parent high key? */
216 key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
217 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
218 if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
219 xchk_btree_set_corrupt(bs->sc, cur, level);
223 * Check a btree pointer. Returns true if it's ok to use this pointer.
224 * Callers do not need to set the corrupt flag.
228 struct xchk_btree *bs,
230 union xfs_btree_ptr *ptr)
234 /* A btree rooted in an inode has no block pointer to the root. */
235 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
236 level == bs->cur->bc_nlevels)
239 /* Otherwise, check the pointers. */
240 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
241 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
243 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
245 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
250 /* Check that a btree block's sibling matches what we expect it. */
252 xchk_btree_block_check_sibling(
253 struct xchk_btree *bs,
256 union xfs_btree_ptr *sibling)
258 struct xfs_btree_cur *cur = bs->cur;
259 struct xfs_btree_block *pblock;
261 struct xfs_btree_cur *ncur = NULL;
262 union xfs_btree_ptr *pp;
266 error = xfs_btree_dup_cursor(cur, &ncur);
267 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
272 * If the pointer is null, we shouldn't be able to move the upper
273 * level pointer anywhere.
275 if (xfs_btree_ptr_is_null(cur, sibling)) {
277 error = xfs_btree_increment(ncur, level + 1, &success);
279 error = xfs_btree_decrement(ncur, level + 1, &success);
280 if (error == 0 && success)
281 xchk_btree_set_corrupt(bs->sc, cur, level);
286 /* Increment upper level pointer. */
288 error = xfs_btree_increment(ncur, level + 1, &success);
290 error = xfs_btree_decrement(ncur, level + 1, &success);
291 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
294 xchk_btree_set_corrupt(bs->sc, cur, level + 1);
298 /* Compare upper level pointer to sibling pointer. */
299 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
300 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
301 if (!xchk_btree_ptr_ok(bs, level + 1, pp))
304 xchk_buffer_recheck(bs->sc, pbp);
306 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
307 xchk_btree_set_corrupt(bs->sc, cur, level);
309 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
313 /* Check the siblings of a btree block. */
315 xchk_btree_block_check_siblings(
316 struct xchk_btree *bs,
317 struct xfs_btree_block *block)
319 struct xfs_btree_cur *cur = bs->cur;
320 union xfs_btree_ptr leftsib;
321 union xfs_btree_ptr rightsib;
325 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
326 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
327 level = xfs_btree_get_level(block);
329 /* Root block should never have siblings. */
330 if (level == cur->bc_nlevels - 1) {
331 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
332 !xfs_btree_ptr_is_null(cur, &rightsib))
333 xchk_btree_set_corrupt(bs->sc, cur, level);
338 * Does the left & right sibling pointers match the adjacent
339 * parent level pointers?
340 * (These function absorbs error codes for us.)
342 error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
345 error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
353 struct list_head list;
359 * Make sure this btree block isn't in the free list and that there's
360 * an rmap record for it.
363 xchk_btree_check_block_owner(
364 struct xchk_btree *bs,
377 btnum = bs->cur->bc_btnum;
378 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
379 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
381 init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
383 error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
384 if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
389 xchk_xref_is_used_space(bs->sc, agbno, 1);
391 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
392 * have to nullify it (to shut down further block owner checks) if
393 * self-xref encounters problems.
395 if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
398 xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
399 if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
403 xchk_ag_free(bs->sc, &bs->sc->sa);
408 /* Check the owner of a btree block. */
410 xchk_btree_check_owner(
411 struct xchk_btree *bs,
415 struct xfs_btree_cur *cur = bs->cur;
416 struct check_owner *co;
418 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
422 * We want to cross-reference each btree block with the bnobt
423 * and the rmapbt. We cannot cross-reference the bnobt or
424 * rmapbt while scanning the bnobt or rmapbt, respectively,
425 * because we cannot alter the cursor and we'd prefer not to
426 * duplicate cursors. Therefore, save the buffer daddr for
429 if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
430 co = kmem_alloc(sizeof(struct check_owner),
435 co->daddr = XFS_BUF_ADDR(bp);
436 list_add_tail(&co->list, &bs->to_check);
440 return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
444 * Check that this btree block has at least minrecs records or is one of the
445 * special blocks that don't require that.
448 xchk_btree_check_minrecs(
449 struct xchk_btree *bs,
451 struct xfs_btree_block *block)
453 unsigned int numrecs;
456 numrecs = be16_to_cpu(block->bb_numrecs);
458 /* More records than minrecs means the block is ok. */
459 if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
463 * Certain btree blocks /can/ have fewer than minrecs records. Any
464 * level greater than or equal to the level of the highest dedicated
465 * btree block are allowed to violate this constraint.
467 * For a btree rooted in a block, the btree root can have fewer than
468 * minrecs records. If the btree is rooted in an inode and does not
469 * store records in the root, the direct children of the root and the
470 * root itself can have fewer than minrecs records.
472 ok_level = bs->cur->bc_nlevels - 1;
473 if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
475 if (level >= ok_level)
478 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
482 * Grab and scrub a btree block given a btree pointer. Returns block
483 * and buffer pointers (if applicable) if they're ok to use.
486 xchk_btree_get_block(
487 struct xchk_btree *bs,
489 union xfs_btree_ptr *pp,
490 struct xfs_btree_block **pblock,
491 struct xfs_buf **pbp)
493 xfs_failaddr_t failed_at;
499 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
500 if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
504 xfs_btree_get_block(bs->cur, level, pbp);
505 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
506 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
509 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
512 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
516 xchk_buffer_recheck(bs->sc, *pbp);
518 xchk_btree_check_minrecs(bs, level, *pblock);
521 * Check the block's owner; this function absorbs error codes
524 error = xchk_btree_check_owner(bs, level, *pbp);
529 * Check the block's siblings; this function absorbs error codes
532 return xchk_btree_block_check_siblings(bs, *pblock);
536 * Check that the low and high keys of this block match the keys stored
537 * in the parent block.
540 xchk_btree_block_keys(
541 struct xchk_btree *bs,
543 struct xfs_btree_block *block)
545 union xfs_btree_key block_keys;
546 struct xfs_btree_cur *cur = bs->cur;
547 union xfs_btree_key *high_bk;
548 union xfs_btree_key *parent_keys;
549 union xfs_btree_key *high_pk;
550 struct xfs_btree_block *parent_block;
553 if (level >= cur->bc_nlevels - 1)
556 /* Calculate the keys for this block. */
557 xfs_btree_get_keys(cur, block, &block_keys);
559 /* Obtain the parent's copy of the keys for this block. */
560 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
561 parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
564 if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
565 xchk_btree_set_corrupt(bs->sc, cur, 1);
567 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
571 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
572 high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
575 if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
576 xchk_btree_set_corrupt(bs->sc, cur, 1);
580 * Visit all nodes and leaves of a btree. Check that all pointers and
581 * records are in order, that the keys reflect the records, and use a callback
582 * so that the caller can verify individual records.
586 struct xfs_scrub *sc,
587 struct xfs_btree_cur *cur,
588 xchk_btree_rec_fn scrub_fn,
589 struct xfs_owner_info *oinfo,
592 struct xchk_btree bs = { NULL };
593 union xfs_btree_ptr ptr;
594 union xfs_btree_ptr *pp;
595 union xfs_btree_rec *recp;
596 struct xfs_btree_block *block;
599 struct check_owner *co;
600 struct check_owner *n;
604 /* Initialize scrub state */
606 bs.scrub_rec = scrub_fn;
609 bs.private = private;
611 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
612 bs.firstkey[i] = true;
613 INIT_LIST_HEAD(&bs.to_check);
615 /* Don't try to check a tree with a height we can't handle. */
616 if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
617 xchk_btree_set_corrupt(sc, cur, 0);
622 * Load the root of the btree. The helper function absorbs
623 * error codes for us.
625 level = cur->bc_nlevels - 1;
626 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
627 if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
629 error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
633 cur->bc_ptrs[level] = 1;
635 while (level < cur->bc_nlevels) {
636 block = xfs_btree_get_block(cur, level, &bp);
639 /* End of leaf, pop back towards the root. */
640 if (cur->bc_ptrs[level] >
641 be16_to_cpu(block->bb_numrecs)) {
642 xchk_btree_block_keys(&bs, level, block);
643 if (level < cur->bc_nlevels - 1)
644 cur->bc_ptrs[level + 1]++;
649 /* Records in order for scrub? */
652 /* Call out to the record checker. */
653 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
654 error = bs.scrub_rec(&bs, recp);
657 if (xchk_should_terminate(sc, &error) ||
658 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
661 cur->bc_ptrs[level]++;
665 /* End of node, pop back towards the root. */
666 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
667 xchk_btree_block_keys(&bs, level, block);
668 if (level < cur->bc_nlevels - 1)
669 cur->bc_ptrs[level + 1]++;
674 /* Keys in order for scrub? */
675 xchk_btree_key(&bs, level);
677 /* Drill another level deeper. */
678 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
679 if (!xchk_btree_ptr_ok(&bs, level, pp)) {
680 cur->bc_ptrs[level]++;
684 error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
688 cur->bc_ptrs[level] = 1;
692 /* Process deferred owner checks on btree blocks. */
693 list_for_each_entry_safe(co, n, &bs.to_check, list) {
694 if (!error && bs.cur)
695 error = xchk_btree_check_block_owner(&bs,
696 co->level, co->daddr);