xfs: support reserved blocks for the rt extent counter
authorChristoph Hellwig <hch@lst.de>
Sun, 9 Feb 2025 05:19:06 +0000 (06:19 +0100)
committerChristoph Hellwig <hch@lst.de>
Mon, 3 Mar 2025 15:16:43 +0000 (08:16 -0700)
The zoned space allocator will need reserved RT extents for garbage
collection and zeroing of partial blocks.  Move the resblks related
fields into the freecounter array so that they can be used for all
counters.

Co-developed-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
fs/xfs/scrub/fscounters.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_fsops.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_super.c

index 207a238de4293784ef555f8211f3c9cb3683386b..9dd893ece1883b1736bee68bd9130b19728bbee3 100644 (file)
@@ -350,7 +350,7 @@ retry:
         * The global incore space reservation is taken from the incore
         * counters, so leave that out of the computation.
         */
-       fsc->fdblocks -= mp->m_resblks_avail;
+       fsc->fdblocks -= mp->m_free[XC_FREE_BLOCKS].res_avail;
 
        /*
         * Delayed allocation reservations are taken out of the incore counters
index 58249f37a7ad66e3f9b6115a1114171d854eab77..f055aebe4c7a13be1c4d1eff4a527868c0df8a75 100644 (file)
@@ -366,6 +366,7 @@ xfs_growfs_log(
 int
 xfs_reserve_blocks(
        struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr,
        uint64_t                request)
 {
        int64_t                 lcounter, delta;
@@ -373,6 +374,8 @@ xfs_reserve_blocks(
        int64_t                 free;
        int                     error = 0;
 
+       ASSERT(ctr < XC_FREE_NR);
+
        /*
         * With per-cpu counters, this becomes an interesting problem. we need
         * to work out if we are freeing or allocation blocks first, then we can
@@ -391,16 +394,16 @@ xfs_reserve_blocks(
         * counters directly since we shouldn't have any problems unreserving
         * space.
         */
-       if (mp->m_resblks > request) {
-               lcounter = mp->m_resblks_avail - request;
+       if (mp->m_free[ctr].res_total > request) {
+               lcounter = mp->m_free[ctr].res_avail - request;
                if (lcounter > 0) {             /* release unused blocks */
                        fdblks_delta = lcounter;
-                       mp->m_resblks_avail -= lcounter;
+                       mp->m_free[ctr].res_avail -= lcounter;
                }
-               mp->m_resblks = request;
+               mp->m_free[ctr].res_total = request;
                if (fdblks_delta) {
                        spin_unlock(&mp->m_sb_lock);
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                        spin_lock(&mp->m_sb_lock);
                }
 
@@ -419,10 +422,10 @@ xfs_reserve_blocks(
         * space to fill it because mod_fdblocks will refill an undersized
         * reserve when it can.
         */
-       free = xfs_sum_freecounter_raw(mp, XC_FREE_BLOCKS) -
-               xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS);
-       delta = request - mp->m_resblks;
-       mp->m_resblks = request;
+       free = xfs_sum_freecounter_raw(mp, ctr) -
+               xfs_freecounter_unavailable(mp, ctr);
+       delta = request - mp->m_free[ctr].res_total;
+       mp->m_free[ctr].res_total = request;
        if (delta > 0 && free > 0) {
                /*
                 * We'll either succeed in getting space from the free block
@@ -436,9 +439,9 @@ xfs_reserve_blocks(
                 */
                fdblks_delta = min(free, delta);
                spin_unlock(&mp->m_sb_lock);
-               error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
+               error = xfs_dec_freecounter(mp, ctr, fdblks_delta, 0);
                if (!error)
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                spin_lock(&mp->m_sb_lock);
        }
 out:
index 3e2f73bcf8314b87520cbb7ae0bf6542f756b5d5..9d23c361ef56e4ebc5b00787c90f21567e3c145b 100644 (file)
@@ -8,7 +8,8 @@
 
 int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
 int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
-int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
+int xfs_reserve_blocks(struct xfs_mount *mp, enum xfs_free_counter cnt,
+               uint64_t request);
 int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
 
 int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
index 0418aad2db916f4e06d095a989c85983d515188f..d250f7f74e3b63078fdc8aaaceb9a74e664d4ac1 100644 (file)
@@ -1131,15 +1131,15 @@ xfs_ioctl_getset_resblocks(
                error = mnt_want_write_file(filp);
                if (error)
                        return error;
-               error = xfs_reserve_blocks(mp, fsop.resblks);
+               error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, fsop.resblks);
                mnt_drop_write_file(filp);
                if (error)
                        return error;
        }
 
        spin_lock(&mp->m_sb_lock);
-       fsop.resblks = mp->m_resblks;
-       fsop.resblks_avail = mp->m_resblks_avail;
+       fsop.resblks = mp->m_free[XC_FREE_BLOCKS].res_total;
+       fsop.resblks_avail = mp->m_free[XC_FREE_BLOCKS].res_avail;
        spin_unlock(&mp->m_sb_lock);
 
        if (copy_to_user(arg, &fsop, sizeof(fsop)))
index f444b41d458775313b39327f2b35e3fe9f63396d..01f3877840395f920c9f0638945eb04394b43ffb 100644 (file)
@@ -461,11 +461,21 @@ xfs_mount_reset_sbqflags(
        return xfs_sync_sb(mp, false);
 }
 
+static const char *const xfs_free_pool_name[] = {
+       [XC_FREE_BLOCKS]        = "free blocks",
+       [XC_FREE_RTEXTENTS]     = "free rt extents",
+};
+
 uint64_t
-xfs_default_resblks(xfs_mount_t *mp)
+xfs_default_resblks(
+       struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr)
 {
        uint64_t resblks;
 
+       if (ctr == XC_FREE_RTEXTENTS)
+               return 0;
+
        /*
         * We default to 5% or 8192 fsbs of space reserved, whichever is
         * smaller.  This is intended to cover concurrent allocation
@@ -678,6 +688,7 @@ xfs_mountfs(
        uint                    quotamount = 0;
        uint                    quotaflags = 0;
        int                     error = 0;
+       int                     i;
 
        xfs_sb_mount_common(mp, sbp);
 
@@ -1046,17 +1057,21 @@ xfs_mountfs(
         * privileged transactions. This is needed so that transaction
         * space required for critical operations can dip into this pool
         * when at ENOSPC. This is needed for operations like create with
-        * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
-        * are not allowed to use this reserved space.
+        * attr, unwritten extent conversion at ENOSPC, garbage collection
+        * etc. Data allocations are not allowed to use this reserved space.
         *
         * This may drive us straight to ENOSPC on mount, but that implies
         * we were already there on the last unmount. Warn if this occurs.
         */
        if (!xfs_is_readonly(mp)) {
-               error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
-               if (error)
-                       xfs_warn(mp,
-       "Unable to allocate reserve blocks. Continuing without reserve pool.");
+               for (i = 0; i < XC_FREE_NR; i++) {
+                       error = xfs_reserve_blocks(mp, i,
+                                       xfs_default_resblks(mp, i));
+                       if (error)
+                               xfs_warn(mp,
+"Unable to allocate reserve blocks. Continuing without reserve pool for %s.",
+                                       xfs_free_pool_name[i]);
+               }
 
                /* Reserve AG blocks for future btree expansion. */
                error = xfs_fs_reserve_ag_blocks(mp);
@@ -1173,7 +1188,7 @@ xfs_unmountfs(
         * we only every apply deltas to the superblock and hence the incore
         * value does not matter....
         */
-       error = xfs_reserve_blocks(mp, 0);
+       error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0);
        if (error)
                xfs_warn(mp, "Unable to free reserved block pool. "
                                "Freespace may not be correct on next mount.");
@@ -1244,26 +1259,26 @@ xfs_add_freecounter(
        enum xfs_free_counter   ctr,
        uint64_t                delta)
 {
-       bool                    has_resv_pool = (ctr == XC_FREE_BLOCKS);
+       struct xfs_freecounter  *counter = &mp->m_free[ctr];
        uint64_t                res_used;
 
        /*
         * If the reserve pool is depleted, put blocks back into it first.
         * Most of the time the pool is full.
         */
-       if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
-               percpu_counter_add(&mp->m_free[ctr].count, delta);
+       if (likely(counter->res_avail == counter->res_total)) {
+               percpu_counter_add(&counter->count, delta);
                return;
        }
 
        spin_lock(&mp->m_sb_lock);
-       res_used = mp->m_resblks - mp->m_resblks_avail;
+       res_used = counter->res_total - counter->res_avail;
        if (res_used > delta) {
-               mp->m_resblks_avail += delta;
+               counter->res_avail += delta;
        } else {
                delta -= res_used;
-               mp->m_resblks_avail = mp->m_resblks;
-               percpu_counter_add(&mp->m_free[ctr].count, delta);
+               counter->res_avail = counter->res_total;
+               percpu_counter_add(&counter->count, delta);
        }
        spin_unlock(&mp->m_sb_lock);
 }
@@ -1277,15 +1292,10 @@ xfs_dec_freecounter(
        uint64_t                delta,
        bool                    rsvd)
 {
-       struct percpu_counter   *counter = &mp->m_free[ctr].count;
-       uint64_t                set_aside = 0;
+       struct xfs_freecounter  *counter = &mp->m_free[ctr];
        s32                     batch;
-       bool                    has_resv_pool;
 
        ASSERT(ctr < XC_FREE_NR);
-       has_resv_pool = (ctr == XC_FREE_BLOCKS);
-       if (rsvd)
-               ASSERT(has_resv_pool);
 
        /*
         * Taking blocks away, need to be more accurate the closer we
@@ -1295,7 +1305,7 @@ xfs_dec_freecounter(
         * then make everything serialise as we are real close to
         * ENOSPC.
         */
-       if (__percpu_counter_compare(counter, 2 * XFS_FDBLOCKS_BATCH,
+       if (__percpu_counter_compare(&counter->count, 2 * XFS_FDBLOCKS_BATCH,
                                     XFS_FDBLOCKS_BATCH) < 0)
                batch = 1;
        else
@@ -1312,25 +1322,25 @@ xfs_dec_freecounter(
         * problems (i.e. transaction abort, pagecache discards, etc.) than
         * slightly premature -ENOSPC.
         */
-       if (has_resv_pool)
-               set_aside = xfs_freecounter_unavailable(mp, ctr);
-       percpu_counter_add_batch(counter, -((int64_t)delta), batch);
-       if (__percpu_counter_compare(counter, set_aside,
+       percpu_counter_add_batch(&counter->count, -((int64_t)delta), batch);
+       if (__percpu_counter_compare(&counter->count,
+                       xfs_freecounter_unavailable(mp, ctr),
                        XFS_FDBLOCKS_BATCH) < 0) {
                /*
                 * Lock up the sb for dipping into reserves before releasing the
                 * space that took us to ENOSPC.
                 */
                spin_lock(&mp->m_sb_lock);
-               percpu_counter_add(counter, delta);
+               percpu_counter_add(&counter->count, delta);
                if (!rsvd)
                        goto fdblocks_enospc;
-               if (delta > mp->m_resblks_avail) {
-                       xfs_warn_once(mp,
+               if (delta > counter->res_avail) {
+                       if (ctr == XC_FREE_BLOCKS)
+                               xfs_warn_once(mp,
 "Reserve blocks depleted! Consider increasing reserve pool size.");
                        goto fdblocks_enospc;
                }
-               mp->m_resblks_avail -= delta;
+               counter->res_avail -= delta;
                spin_unlock(&mp->m_sb_lock);
        }
 
index 7f3265d669bc6e91b7aa1c76f34813a65a9c8926..579eaf09157dccf1a005bcc966d0ba11475daa22 100644 (file)
@@ -108,6 +108,15 @@ struct xfs_groups {
 struct xfs_freecounter {
        /* free blocks for general use: */
        struct percpu_counter   count;
+
+       /* total reserved blocks: */
+       uint64_t                res_total;
+
+       /* available reserved blocks: */
+       uint64_t                res_avail;
+
+       /* reserved blks @ remount,ro: */
+       uint64_t                res_saved;
 };
 
 /*
@@ -250,9 +259,6 @@ typedef struct xfs_mount {
        atomic64_t              m_allocbt_blks;
 
        struct xfs_groups       m_groups[XG_TYPE_MAX];
-       uint64_t                m_resblks;      /* total reserved blocks */
-       uint64_t                m_resblks_avail;/* available reserved blocks */
-       uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
        struct delayed_work     m_reclaim_work; /* background inode reclaim */
        struct dentry           *m_debugfs;     /* debugfs parent */
        struct xfs_kobj         m_kobj;
@@ -638,7 +644,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 }
 
 extern void    xfs_uuid_table_free(void);
-extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
+uint64_t       xfs_default_resblks(struct xfs_mount *mp,
+                       enum xfs_free_counter ctr);
 extern int     xfs_mountfs(xfs_mount_t *mp);
 extern void    xfs_unmountfs(xfs_mount_t *);
 
index b08d28a895cb1d9535a6f061b2743ec23eb61100..366837e71eebdbf4b72b6c76440a68a0ea97dd93 100644 (file)
@@ -924,24 +924,32 @@ xfs_fs_statfs(
 }
 
 STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+       struct xfs_mount        *mp)
 {
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, 0);
+       enum xfs_free_counter   i;
+
+       for (i = 0; i < XC_FREE_NR; i++) {
+               mp->m_free[i].res_saved = mp->m_free[i].res_total;
+               xfs_reserve_blocks(mp, i, 0);
+       }
 }
 
 STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+       struct xfs_mount        *mp)
 {
-       uint64_t resblks;
-
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
+       uint64_t                resblks;
+       enum xfs_free_counter   i;
 
-       xfs_reserve_blocks(mp, resblks);
+       for (i = 0; i < XC_FREE_NR; i++) {
+               if (mp->m_free[i].res_saved) {
+                       resblks = mp->m_free[i].res_saved;
+                       mp->m_free[i].res_saved = 0;
+               } else
+                       resblks = xfs_default_resblks(mp, i);
+               xfs_reserve_blocks(mp, i, resblks);
+       }
 }
 
 /*