xfs: use deferred frees for btree block freeing
authorDave Chinner <dchinner@redhat.com>
Wed, 28 Jun 2023 18:04:32 +0000 (11:04 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 29 Jun 2023 16:28:23 +0000 (09:28 -0700)
Btrees that aren't freespace management trees use the normal extent
allocation and freeing routines for their blocks. Hence when a btree
block is freed, a direct call to xfs_free_extent() is made and the
extent is immediately freed. This puts the entire free space
management btrees under this path, so we are stacking btrees on
btrees in the call stack. The inobt, finobt and refcount btrees
all do this.

However, the bmap btree does not do this - it calls
xfs_free_extent_later() to defer the extent free operation via an
XEFI and hence it gets processed in deferred operation processing
during the commit of the primary transaction (i.e. via intent
chaining).

We need to change xfs_free_extent() to behave in a non-blocking
manner so that we can avoid deadlocks with busy extents near ENOSPC
in transactions that free multiple extents. Inserting or removing a
record from a btree can cause a multi-level tree merge operation and
that will free multiple blocks from the btree in a single
transaction. i.e. we can call xfs_free_extent() multiple times, and
hence the btree manipulation transaction is vulnerable to this busy
extent deadlock vector.

To fix this, convert all the remaining callers of xfs_free_extent()
to use xfs_free_extent_later() to queue XEFIs and hence defer
processing of the extent frees to a context that can be safely
restarted if a deadlock condition is detected.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
fs/xfs/libxfs/xfs_ag.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_alloc.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap_btree.c
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_refcount_btree.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_reflink.c

index ee84835ebc66d0c66c9786e3a2118d0439548b62..e9cc481b4ddff17961e3a4fbb7db761a945d5079 100644 (file)
@@ -985,7 +985,7 @@ xfs_ag_shrink_space(
                        goto resv_err;
 
                err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
-                               true);
+                               XFS_AG_RESV_NONE, true);
                if (err2)
                        goto resv_err;
 
index c20fe99405d87cadfbf0763020085921adb115d8..713b4712704f3faf980d97ca8e6f934cee6f4812 100644 (file)
@@ -2449,6 +2449,7 @@ xfs_defer_agfl_block(
        xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
        xefi->xefi_blockcount = 1;
        xefi->xefi_owner = oinfo->oi_owner;
+       xefi->xefi_agresv = XFS_AG_RESV_AGFL;
 
        if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
                return -EFSCORRUPTED;
@@ -2470,6 +2471,7 @@ __xfs_free_extent_later(
        xfs_fsblock_t                   bno,
        xfs_filblks_t                   len,
        const struct xfs_owner_info     *oinfo,
+       enum xfs_ag_resv_type           type,
        bool                            skip_discard)
 {
        struct xfs_extent_free_item     *xefi;
@@ -2490,6 +2492,7 @@ __xfs_free_extent_later(
        ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
 #endif
        ASSERT(xfs_extfree_item_cache != NULL);
+       ASSERT(type != XFS_AG_RESV_AGFL);
 
        if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
                return -EFSCORRUPTED;
@@ -2498,6 +2501,7 @@ __xfs_free_extent_later(
                               GFP_KERNEL | __GFP_NOFAIL);
        xefi->xefi_startblock = bno;
        xefi->xefi_blockcount = (xfs_extlen_t)len;
+       xefi->xefi_agresv = type;
        if (skip_discard)
                xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
        if (oinfo) {
index 85ac470be0da55076cdab765877f2ca556e1d406..a3e519577e09758cdad0173a78c1ce0d3478db34 100644 (file)
@@ -232,7 +232,7 @@ xfs_buf_to_agfl_bno(
 
 int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
                xfs_filblks_t len, const struct xfs_owner_info *oinfo,
-               bool skip_discard);
+               enum xfs_ag_resv_type type, bool skip_discard);
 
 /*
  * List of extents to be free "later".
@@ -245,6 +245,7 @@ struct xfs_extent_free_item {
        xfs_extlen_t            xefi_blockcount;/* number of blocks in extent */
        struct xfs_perag        *xefi_pag;
        unsigned int            xefi_flags;
+       enum xfs_ag_resv_type   xefi_agresv;
 };
 
 void xfs_extent_free_get_group(struct xfs_mount *mp,
@@ -259,9 +260,10 @@ xfs_free_extent_later(
        struct xfs_trans                *tp,
        xfs_fsblock_t                   bno,
        xfs_filblks_t                   len,
-       const struct xfs_owner_info     *oinfo)
+       const struct xfs_owner_info     *oinfo,
+       enum xfs_ag_resv_type           type)
 {
-       return __xfs_free_extent_later(tp, bno, len, oinfo, false);
+       return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
 }
 
 
index fef35696adb72d457443be10dece888b97a2c8cf..30c931b38853c9a50d44465e78b41b7bdd993a41 100644 (file)
@@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
                return error;
 
        xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
-       error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+       error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
+                       XFS_AG_RESV_NONE);
        if (error)
                return error;
 
@@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(
                } else {
                        error = __xfs_free_extent_later(tp, del->br_startblock,
                                        del->br_blockcount, NULL,
-                                       (bflags & XFS_BMAPI_NODISCARD) ||
-                                       del->br_state == XFS_EXT_UNWRITTEN);
+                                       XFS_AG_RESV_NONE,
+                                       ((bflags & XFS_BMAPI_NODISCARD) ||
+                                       del->br_state == XFS_EXT_UNWRITTEN));
                        if (error)
                                goto done;
                }
index 36564ae3084fed4e48d4766928ee7d9e4a30b42b..bf3f1b36fdd23b8c132101c49d92ea13ffc464b0 100644 (file)
@@ -271,7 +271,8 @@ xfs_bmbt_free_block(
        int                     error;
 
        xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
-       error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
+       error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
+                       XFS_AG_RESV_NONE);
        if (error)
                return error;
 
index 34600f94c2f4873e207e1f8847dc06d044756ab1..1e5fafbc0cdb07b82bc6b895205da367b5eda4b9 100644 (file)
@@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(
                /* not sparse, calculate extent info directly */
                return xfs_free_extent_later(tp,
                                XFS_AGB_TO_FSB(mp, agno, sagbno),
-                               M_IGEO(mp)->ialloc_blks,
-                               &XFS_RMAP_OINFO_INODES);
+                               M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
+                               XFS_AG_RESV_NONE);
        }
 
        /* holemask is only 16-bits (fits in an unsigned long) */
@@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(
                ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
                ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
                error = xfs_free_extent_later(tp,
-                               XFS_AGB_TO_FSB(mp, agno, agbno),
-                               contigblk, &XFS_RMAP_OINFO_INODES);
+                               XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+                               &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
                if (error)
                        return error;
 
index 5a945ae21b5dbb10d509ee6ea0d2476861a9f5e8..9258f01c0015eab902b12dc3271111ddeb9dec94 100644 (file)
@@ -160,8 +160,7 @@ __xfs_inobt_free_block(
 
        xfs_inobt_mod_blockcount(cur, -1);
        fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
-       return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
-                       XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+       return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
                        &XFS_RMAP_OINFO_INOBT, resv);
 }
 
index b6e21433925ca102031b2881b3303f460cfd21e0..70ab113c9cea93842ad8566e209cfe73a3ad7045 100644 (file)
@@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(
                                                cur->bc_ag.pag->pag_agno,
                                                tmp.rc_startblock);
                                error = xfs_free_extent_later(cur->bc_tp, fsbno,
-                                                 tmp.rc_blockcount, NULL);
+                                                 tmp.rc_blockcount, NULL,
+                                                 XFS_AG_RESV_NONE);
                                if (error)
                                        goto out_error;
                        }
@@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(
                                        cur->bc_ag.pag->pag_agno,
                                        ext.rc_startblock);
                        error = xfs_free_extent_later(cur->bc_tp, fsbno,
-                                       ext.rc_blockcount, NULL);
+                                       ext.rc_blockcount, NULL,
+                                       XFS_AG_RESV_NONE);
                        if (error)
                                goto out_error;
                }
@@ -1981,7 +1983,8 @@ xfs_refcount_recover_cow_leftovers(
 
                /* Free the block. */
                error = xfs_free_extent_later(tp, fsb,
-                               rr->rr_rrec.rc_blockcount, NULL);
+                               rr->rr_rrec.rc_blockcount, NULL,
+                               XFS_AG_RESV_NONE);
                if (error)
                        goto out_trans;
 
index d4afc5f4e6a5f680e929ebaca5265fc2607452cc..5c3987d8dc242e4fa7ae2650b65dea9cf90fa513 100644 (file)
@@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
        struct xfs_buf          *agbp = cur->bc_ag.agbp;
        struct xfs_agf          *agf = agbp->b_addr;
        xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
-       int                     error;
 
        trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
                        XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
        be32_add_cpu(&agf->agf_refcount_blocks, -1);
        xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
-       error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
-                       XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+       return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
                        &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
-       if (error)
-               return error;
-
-       return error;
 }
 
 STATIC int
index f9e36b8106636ac9067078cf761bc55bead05ed6..873653b825e4ad61855746ac7df53ecb88636b42 100644 (file)
@@ -365,7 +365,7 @@ xfs_trans_free_extent(
                        agbno, xefi->xefi_blockcount);
 
        error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
-                       xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
+                       xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
                        xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
 
        /*
@@ -644,6 +644,7 @@ xfs_efi_item_recover(
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                struct xfs_extent_free_item     fake = {
                        .xefi_owner             = XFS_RMAP_OWN_UNKNOWN,
+                       .xefi_agresv            = XFS_AG_RESV_NONE,
                };
                struct xfs_extent               *extp;
 
index abcc559f3c6404916f507b90ed2dc0c1511f7d09..eb9102453affbf34c9378a7b354de22e16f6d7a3 100644 (file)
@@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(
                                        del.br_blockcount);
 
                        error = xfs_free_extent_later(*tpp, del.br_startblock,
-                                         del.br_blockcount, NULL);
+                                       del.br_blockcount, NULL,
+                                       XFS_AG_RESV_NONE);
                        if (error)
                                break;