xfs: support file data forks containing metadata btrees
authorDarrick J. Wong <djwong@kernel.org>
Thu, 21 Nov 2024 00:20:27 +0000 (16:20 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Mon, 23 Dec 2024 21:06:05 +0000 (13:06 -0800)
Create a new fork format type for metadata btrees.  This fork type
requires that the inode is in the metadata directory tree, and only
applies to the data fork.  The actual type of the metadata btree itself
is determined by the di_metatype field.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
12 files changed:
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/scrub/bmap.c
fs/xfs/scrub/bmap_repair.c
fs/xfs/scrub/inode.c
fs/xfs/scrub/inode_repair.c
fs/xfs/scrub/rmap_repair.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_inode_item_recover.c
fs/xfs/xfs_trace.h

index 469fc7afa591b4c3bb036b1a09906902cf7c518e..41ea4283c43cb4ddefb0da45fc7712a61805573a 100644 (file)
@@ -997,7 +997,8 @@ enum xfs_dinode_fmt {
        XFS_DINODE_FMT_LOCAL,           /* bulk data */
        XFS_DINODE_FMT_EXTENTS,         /* struct xfs_bmbt_rec */
        XFS_DINODE_FMT_BTREE,           /* struct xfs_bmdr_block */
-       XFS_DINODE_FMT_UUID             /* added long ago, but never used */
+       XFS_DINODE_FMT_UUID,            /* added long ago, but never used */
+       XFS_DINODE_FMT_META_BTREE,      /* metadata btree */
 };
 
 #define XFS_INODE_FORMAT_STR \
@@ -1005,7 +1006,8 @@ enum xfs_dinode_fmt {
        { XFS_DINODE_FMT_LOCAL,         "local" }, \
        { XFS_DINODE_FMT_EXTENTS,       "extent" }, \
        { XFS_DINODE_FMT_BTREE,         "btree" }, \
-       { XFS_DINODE_FMT_UUID,          "uuid" }
+       { XFS_DINODE_FMT_UUID,          "uuid" }, \
+       { XFS_DINODE_FMT_META_BTREE,    "meta_btree" }
 
 /*
  * Max values for extnum and aextnum.
index 424861fbf1bd49f710e264bef63e22b5df86b9f7..1648d72d6ed95a620316884a5f3e576bca4c56e9 100644 (file)
@@ -441,6 +441,16 @@ xfs_dinode_verify_fork(
                if (di_nextents > max_extents)
                        return __this_address;
                break;
+       case XFS_DINODE_FMT_META_BTREE:
+               if (!xfs_has_metadir(mp))
+                       return __this_address;
+               if (!(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA)))
+                       return __this_address;
+               switch (be16_to_cpu(dip->di_metatype)) {
+               default:
+                       return __this_address;
+               }
+               break;
        default:
                return __this_address;
        }
@@ -460,6 +470,10 @@ xfs_dinode_verify_forkoff(
                if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
                        return __this_address;
                break;
+       case XFS_DINODE_FMT_META_BTREE:
+               if (!xfs_has_metadir(mp) || !xfs_has_parent(mp))
+                       return __this_address;
+               fallthrough;
        case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
        case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
        case XFS_DINODE_FMT_BTREE:
@@ -637,9 +651,6 @@ xfs_dinode_verify(
        if (mode && nextents + naextents > nblocks)
                return __this_address;
 
-       if (nextents + naextents == 0 && nblocks != 0)
-               return __this_address;
-
        if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
                return __this_address;
 
@@ -743,6 +754,12 @@ xfs_dinode_verify(
                        return fa;
        }
 
+       /* metadata inodes containing btrees always have zero extent count */
+       if (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK) != XFS_DINODE_FMT_META_BTREE) {
+               if (nextents + naextents == 0 && nblocks != 0)
+                       return __this_address;
+       }
+
        return NULL;
 }
 
index 60853bac289a39f796b2012e7945fe3de5dcf2a6..1a782339396dc33204261f5b0c557184a0a3d253 100644 (file)
@@ -267,6 +267,12 @@ xfs_iformat_data_fork(
                        return xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
                case XFS_DINODE_FMT_BTREE:
                        return xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+               case XFS_DINODE_FMT_META_BTREE:
+                       switch (ip->i_metatype) {
+                       default:
+                               break;
+                       }
+                       fallthrough;
                default:
                        xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
                                        dip, sizeof(*dip), __this_address);
@@ -601,6 +607,19 @@ xfs_iflush_fork(
                }
                break;
 
+       case XFS_DINODE_FMT_META_BTREE:
+               ASSERT(whichfork == XFS_DATA_FORK);
+
+               if (!(iip->ili_fields & brootflag[whichfork]))
+                       break;
+
+               switch (ip->i_metatype) {
+               default:
+                       ASSERT(0);
+                       break;
+               }
+               break;
+
        default:
                ASSERT(0);
                break;
index 7e00312225ed10d1f7e2cd76905c7db885d244b2..0d7ad692822d48cdbee051bfa802ace74e719de9 100644 (file)
@@ -983,6 +983,7 @@ xchk_bmap(
        case XFS_DINODE_FMT_UUID:
        case XFS_DINODE_FMT_DEV:
        case XFS_DINODE_FMT_LOCAL:
+       case XFS_DINODE_FMT_META_BTREE:
                /* No mappings to check. */
                if (whichfork == XFS_COW_FORK)
                        xchk_fblock_set_corrupt(sc, whichfork, 0);
index 7c4955482641f796c0459b3b1cfcbbf61a451f31..141d36f1da9a7157e3e2309c983bd191e4d912cd 100644 (file)
@@ -731,6 +731,7 @@ xrep_bmap_check_inputs(
        case XFS_DINODE_FMT_DEV:
        case XFS_DINODE_FMT_LOCAL:
        case XFS_DINODE_FMT_UUID:
+       case XFS_DINODE_FMT_META_BTREE:
                return -ECANCELED;
        case XFS_DINODE_FMT_EXTENTS:
        case XFS_DINODE_FMT_BTREE:
index 25ee66e7649d400a37466a6a40e1fcdfd6b92f10..2e911f38deaebeecabb1159b83351bd486d1569b 100644 (file)
@@ -502,6 +502,10 @@ xchk_dinode(
                if (!S_ISREG(mode) && !S_ISDIR(mode))
                        xchk_ino_set_corrupt(sc, ino);
                break;
+       case XFS_DINODE_FMT_META_BTREE:
+               if (!S_ISREG(mode))
+                       xchk_ino_set_corrupt(sc, ino);
+               break;
        case XFS_DINODE_FMT_UUID:
        default:
                xchk_ino_set_corrupt(sc, ino);
index 5a58ddd27bd2f5fc7c625342ccd7b153965b554a..7faa27472b9129789714d561ad3ffda335c9f81c 100644 (file)
@@ -888,6 +888,25 @@ xrep_dinode_bad_bmbt_fork(
        return false;
 }
 
+/* Check a metadata-btree fork. */
+STATIC bool
+xrep_dinode_bad_metabt_fork(
+       struct xfs_scrub        *sc,
+       struct xfs_dinode       *dip,
+       unsigned int            dfork_size,
+       int                     whichfork)
+{
+       if (whichfork != XFS_DATA_FORK)
+               return true;
+
+       switch (be16_to_cpu(dip->di_metatype)) {
+       default:
+               return true;
+       }
+
+       return false;
+}
+
 /*
  * Check the data fork for things that will fail the ifork verifiers or the
  * ifork formatters.
@@ -968,6 +987,11 @@ xrep_dinode_check_dfork(
                                XFS_DATA_FORK))
                        return true;
                break;
+       case XFS_DINODE_FMT_META_BTREE:
+               if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size,
+                               XFS_DATA_FORK))
+                       return true;
+               break;
        default:
                return true;
        }
@@ -1088,6 +1112,11 @@ xrep_dinode_check_afork(
                                        XFS_ATTR_FORK))
                        return true;
                break;
+       case XFS_DINODE_FMT_META_BTREE:
+               if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size,
+                                       XFS_ATTR_FORK))
+                       return true;
+               break;
        default:
                return true;
        }
@@ -1241,6 +1270,13 @@ xrep_dinode_ensure_forkoff(
                bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
                dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
                break;
+       case XFS_DINODE_FMT_META_BTREE:
+               switch (be16_to_cpu(dip->di_metatype)) {
+               default:
+                       dfork_min = 0;
+                       break;
+               }
+               break;
        default:
                dfork_min = 0;
                break;
index a0a227d183d28d1118db4dd732b71467e4c139ad..2a0b9e3d0fbaeeb7016e33776543223791192a3b 100644 (file)
@@ -499,6 +499,14 @@ xrep_rmap_scan_iext(
        return xrep_rmap_stash_accumulated(rf);
 }
 
+static int
+xrep_rmap_scan_meta_btree(
+       struct xrep_rmap_ifork  *rf,
+       struct xfs_inode        *ip)
+{
+       return -EFSCORRUPTED; /* XXX placeholder */
+}
+
 /* Find all the extents from a given AG in an inode fork. */
 STATIC int
 xrep_rmap_scan_ifork(
@@ -512,14 +520,14 @@ xrep_rmap_scan_ifork(
                .whichfork      = whichfork,
        };
        struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
+       bool                    mappings_done;
        int                     error = 0;
 
        if (!ifp)
                return 0;
 
-       if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
-               bool            mappings_done;
-
+       switch (ifp->if_format) {
+       case XFS_DINODE_FMT_BTREE:
                /*
                 * Scan the bmap btree for data device mappings.  This includes
                 * the btree blocks themselves, even if this is a realtime
@@ -528,15 +536,18 @@ xrep_rmap_scan_ifork(
                error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done);
                if (error || mappings_done)
                        return error;
-       } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
-               return 0;
+               fallthrough;
+       case XFS_DINODE_FMT_EXTENTS:
+               /* Scan incore extent cache if this isn't a realtime file. */
+               if (xfs_ifork_is_realtime(ip, whichfork))
+                       return 0;
+
+               return xrep_rmap_scan_iext(&rf, ifp);
+       case XFS_DINODE_FMT_META_BTREE:
+               return xrep_rmap_scan_meta_btree(&rf, ip);
        }
 
-       /* Scan incore extent cache if this isn't a realtime file. */
-       if (xfs_ifork_is_realtime(ip, whichfork))
-               return 0;
-
-       return xrep_rmap_scan_iext(&rf, ifp);
+       return 0;
 }
 
 /*
index c8ad2606f928b27f6b28705976ed88791b9d41e0..c95fe1b1de4e6f3dc2651cd68804e1c14ff49447 100644 (file)
@@ -2382,7 +2382,16 @@ xfs_iflush(
                        __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
                goto flush_out;
        }
-       if (S_ISREG(VFS_I(ip)->i_mode)) {
+       if (ip->i_df.if_format == XFS_DINODE_FMT_META_BTREE) {
+               if (!S_ISREG(VFS_I(ip)->i_mode) ||
+                   !(ip->i_diflags2 & XFS_DIFLAG2_METADATA)) {
+                       xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+                               "%s: Bad %s meta btree inode %Lu, ptr "PTR_FMT,
+                               __func__, xfs_metafile_type_str(ip->i_metatype),
+                               ip->i_ino, ip);
+                       goto flush_out;
+               }
+       } else if (S_ISREG(VFS_I(ip)->i_mode)) {
                if (XFS_TEST_ERROR(
                    ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
                    ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
@@ -2422,6 +2431,14 @@ xfs_iflush(
                goto flush_out;
        }
 
+       if (xfs_inode_has_attr_fork(ip) &&
+           ip->i_af.if_format == XFS_DINODE_FMT_META_BTREE) {
+               xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+                       "%s: meta btree in inode %Lu attr fork, ptr "PTR_FMT,
+                       __func__, ip->i_ino, ip);
+               goto flush_out;
+       }
+
        /*
         * Inode item log recovery for v2 inodes are dependent on the flushiter
         * count for correct sequencing.  We bump the flush iteration count so
index 912f0b1bc3cb70f3f53a802ad732337e2db819a9..a174f64b8bb250372018a9b31c30985ab8f5907d 100644 (file)
@@ -242,6 +242,7 @@ xfs_inode_item_data_fork_size(
                }
                break;
        case XFS_DINODE_FMT_BTREE:
+       case XFS_DINODE_FMT_META_BTREE:
                if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
                    ip->i_df.if_broot_bytes > 0) {
                        *nbytes += ip->i_df.if_broot_bytes;
@@ -362,6 +363,7 @@ xfs_inode_item_format_data_fork(
                }
                break;
        case XFS_DINODE_FMT_BTREE:
+       case XFS_DINODE_FMT_META_BTREE:
                iip->ili_fields &=
                        ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
 
index e70d2611456bc98524bf8f01b241add60f438efd..6e9b3bfc718c0b23e696fe68ac5ed7dbd98837c4 100644 (file)
@@ -266,6 +266,35 @@ xlog_dinode_verify_extent_counts(
        return 0;
 }
 
+static inline int
+xlog_recover_inode_dbroot(
+       struct xfs_mount        *mp,
+       void                    *src,
+       unsigned int            len,
+       struct xfs_dinode       *dip)
+{
+       void                    *dfork = XFS_DFORK_DPTR(dip);
+       unsigned int            dsize = XFS_DFORK_DSIZE(dip, mp);
+
+       switch (dip->di_format) {
+       case XFS_DINODE_FMT_BTREE:
+               xfs_bmbt_to_bmdr(mp, src, len, dfork, dsize);
+               break;
+       case XFS_DINODE_FMT_META_BTREE:
+               switch (be16_to_cpu(dip->di_metatype)) {
+               default:
+                       ASSERT(0);
+                       return -EFSCORRUPTED;
+               }
+               break;
+       default:
+               ASSERT(0);
+               return -EFSCORRUPTED;
+       }
+
+       return 0;
+}
+
 STATIC int
 xlog_recover_inode_commit_pass2(
        struct xlog                     *log,
@@ -393,8 +422,9 @@ xlog_recover_inode_commit_pass2(
 
 
        if (unlikely(S_ISREG(ldip->di_mode))) {
-               if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
-                   (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
+               if (ldip->di_format != XFS_DINODE_FMT_EXTENTS &&
+                   ldip->di_format != XFS_DINODE_FMT_BTREE &&
+                   ldip->di_format != XFS_DINODE_FMT_META_BTREE) {
                        XFS_CORRUPTION_ERROR(
                                "Bad log dinode data fork format for regular file",
                                XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip));
@@ -475,9 +505,9 @@ xlog_recover_inode_commit_pass2(
                break;
 
        case XFS_ILOG_DBROOT:
-               xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
-                                (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
-                                XFS_DFORK_DSIZE(dip, mp));
+               error = xlog_recover_inode_dbroot(mp, src, len, dip);
+               if (error)
+                       goto out_release;
                break;
 
        default:
index 8b7bb1f5ae3c6f3159a021c88fa99e67a54fa198..a098935163b7c2d6f272ccb0161fbf3f7d289032 100644 (file)
@@ -2299,6 +2299,7 @@ TRACE_DEFINE_ENUM(XFS_DINODE_FMT_LOCAL);
 TRACE_DEFINE_ENUM(XFS_DINODE_FMT_EXTENTS);
 TRACE_DEFINE_ENUM(XFS_DINODE_FMT_BTREE);
 TRACE_DEFINE_ENUM(XFS_DINODE_FMT_UUID);
+TRACE_DEFINE_ENUM(XFS_DINODE_FMT_META_BTREE);
 
 DECLARE_EVENT_CLASS(xfs_swap_extent_class,
        TP_PROTO(struct xfs_inode *ip, int which),