xfs: reduce the rate of cond_resched calls inside scrub
authorDarrick J. Wong <djwong@kernel.org>
Mon, 22 Apr 2024 16:48:23 +0000 (09:48 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 23 Apr 2024 23:55:17 +0000 (16:55 -0700)
We really don't want to call cond_resched every single time we go
through a loop in scrub -- there may be billions of records, and probing
into the scheduler itself has overhead.  Reduce this overhead by only
calling cond_resched 10x per second; and add a counter so that we only
check jiffies once every 1000 records or so.

Surprisingly, this reduces scrub-only fstests runtime by about 2%.  I
used the bmapinflate xfs_db command to produce a billion-extent file and
this stupid gadget reduced the scrub runtime by about 4%.

From a stupid microbenchmark of calling these things 1 billion times, I
estimate that cond_resched costs about 5.5ns per call; jiffes costs
about 0.3ns per read; and fatal_signal_pending costs about 0.4ns per
call.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/scrub/common.h
fs/xfs/scrub/scrub.c
fs/xfs/scrub/scrub.h
fs/xfs/scrub/xfarray.c
fs/xfs/scrub/xfarray.h
fs/xfs/scrub/xfile.c

index 39465e39dc5fd994df34a7f7bc55c2aaf17b9237..3d5f1f6b4b7bf50f22be6840aa5f8d6038ec4dc7 100644 (file)
@@ -6,31 +6,6 @@
 #ifndef __XFS_SCRUB_COMMON_H__
 #define __XFS_SCRUB_COMMON_H__
 
-/*
- * We /could/ terminate a scrub/repair operation early.  If we're not
- * in a good place to continue (fatal signal, etc.) then bail out.
- * Note that we're careful not to make any judgements about *error.
- */
-static inline bool
-xchk_should_terminate(
-       struct xfs_scrub        *sc,
-       int                     *error)
-{
-       /*
-        * If preemption is disabled, we need to yield to the scheduler every
-        * few seconds so that we don't run afoul of the soft lockup watchdog
-        * or RCU stall detector.
-        */
-       cond_resched();
-
-       if (fatal_signal_pending(current)) {
-               if (*error == 0)
-                       *error = -EINTR;
-               return true;
-       }
-       return false;
-}
-
 int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
 int xchk_trans_alloc_empty(struct xfs_scrub *sc);
 void xchk_trans_cancel(struct xfs_scrub *sc);
index e813b66b603a105b6778fd0852e10d5e777c4ee1..4a81f828f9f135cfdf8621a0e9d05c96afa9e869 100644 (file)
@@ -620,6 +620,7 @@ xfs_scrub_metadata(
        sc->sm = sm;
        sc->ops = &meta_scrub_ops[sm->sm_type];
        sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
+       sc->relax = INIT_XCHK_RELAX;
 retry_op:
        /*
         * When repairs are allowed, prevent freezing or readonly remount while
index 39102704714625effa8e8efb865ac39bf2b346da..4e7e3edb6350c19e895f0249dec170240d5cc499 100644 (file)
@@ -8,6 +8,49 @@
 
 struct xfs_scrub;
 
+struct xchk_relax {
+       unsigned long   next_resched;
+       unsigned int    resched_nr;
+       bool            interruptible;
+};
+
+/* Yield to the scheduler at most 10x per second. */
+#define XCHK_RELAX_NEXT                (jiffies + (HZ / 10))
+
+#define INIT_XCHK_RELAX        \
+       (struct xchk_relax){ \
+               .next_resched   = XCHK_RELAX_NEXT, \
+               .resched_nr     = 0, \
+               .interruptible  = true, \
+       }
+
+/*
+ * Relax during a scrub operation and exit if there's a fatal signal pending.
+ *
+ * If preemption is disabled, we need to yield to the scheduler every now and
+ * then so that we don't run afoul of the soft lockup watchdog or RCU stall
+ * detector.  cond_resched calls are somewhat expensive (~5ns) so we want to
+ * ratelimit this to 10x per second.  Amortize the cost of the other checks by
+ * only doing it once every 100 calls.
+ */
+static inline int xchk_maybe_relax(struct xchk_relax *widget)
+{
+       /* Amortize the cost of scheduling and checking signals. */
+       if (likely(++widget->resched_nr < 100))
+               return 0;
+       widget->resched_nr = 0;
+
+       if (unlikely(widget->next_resched <= jiffies)) {
+               cond_resched();
+               widget->next_resched = XCHK_RELAX_NEXT;
+       }
+
+       if (widget->interruptible && fatal_signal_pending(current))
+               return -EINTR;
+
+       return 0;
+}
+
 /*
  * Standard flags for allocating memory within scrub.  NOFS context is
  * configured by the process allocation scope.  Scrub and repair must be able
@@ -123,6 +166,9 @@ struct xfs_scrub {
         */
        unsigned int                    sick_mask;
 
+       /* next time we want to cond_resched() */
+       struct xchk_relax               relax;
+
        /* State tracking for single-AG operations. */
        struct xchk_ag                  sa;
 };
@@ -167,6 +213,24 @@ struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
                unsigned int subtype);
 void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
 
+/*
+ * We /could/ terminate a scrub/repair operation early.  If we're not
+ * in a good place to continue (fatal signal, etc.) then bail out.
+ * Note that we're careful not to make any judgements about *error.
+ */
+static inline bool
+xchk_should_terminate(
+       struct xfs_scrub        *sc,
+       int                     *error)
+{
+       if (xchk_maybe_relax(&sc->relax)) {
+               if (*error == 0)
+                       *error = -EINTR;
+               return true;
+       }
+       return false;
+}
+
 /* Metadata scrubbers */
 int xchk_tester(struct xfs_scrub *sc);
 int xchk_superblock(struct xfs_scrub *sc);
index b65cd3fc5ac9b1fa5174091f26705cbd004cbe0b..9185ae7088d49a9dd5b1f8c133e92205f45fe1a3 100644 (file)
@@ -7,9 +7,9 @@
 #include "xfs_fs.h"
 #include "xfs_shared.h"
 #include "xfs_format.h"
+#include "scrub/scrub.h"
 #include "scrub/xfile.h"
 #include "scrub/xfarray.h"
-#include "scrub/scrub.h"
 #include "scrub/trace.h"
 
 /*
@@ -486,6 +486,9 @@ xfarray_sortinfo_alloc(
 
        xfarray_sortinfo_lo(si)[0] = 0;
        xfarray_sortinfo_hi(si)[0] = array->nr - 1;
+       si->relax = INIT_XCHK_RELAX;
+       if (flags & XFARRAY_SORT_KILLABLE)
+               si->relax.interruptible = false;
 
        trace_xfarray_sort(si, nr_bytes);
        *infop = si;
@@ -503,10 +506,7 @@ xfarray_sort_terminated(
         * few seconds so that we don't run afoul of the soft lockup watchdog
         * or RCU stall detector.
         */
-       cond_resched();
-
-       if ((si->flags & XFARRAY_SORT_KILLABLE) &&
-           fatal_signal_pending(current)) {
+       if (xchk_maybe_relax(&si->relax)) {
                if (*error == 0)
                        *error = -EINTR;
                return true;
index 8f54c8fc888faf4ed2e91d4d58a0dc15c827b3cf..5eeeeed13ae24afc5954ba8a0d27260d069da421 100644 (file)
@@ -127,6 +127,9 @@ struct xfarray_sortinfo {
        /* XFARRAY_SORT_* flags; see below. */
        unsigned int            flags;
 
+       /* next time we want to cond_resched() */
+       struct xchk_relax       relax;
+
        /* Cache a folio here for faster scanning for pivots */
        struct folio            *folio;
 
index 4e254a0ba0036c10a7d4ce6bebddbb129d47e39a..d848222f802baa020a00757bce9fc2cbf26ba625 100644 (file)
@@ -10,9 +10,9 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
+#include "scrub/scrub.h"
 #include "scrub/xfile.h"
 #include "scrub/xfarray.h"
-#include "scrub/scrub.h"
 #include "scrub/trace.h"
 #include <linux/shmem_fs.h>