mm: move FOLL_PIN debug accounting under CONFIG_DEBUG_VM for-6.3/iov-extract
authorJens Axboe <axboe@kernel.dk>
Tue, 31 Jan 2023 15:28:53 +0000 (08:28 -0700)
committerJens Axboe <axboe@kernel.dk>
Tue, 14 Feb 2023 23:03:47 +0000 (16:03 -0700)
Using FOLL_PIN for mapping user pages caused a performance regression of
about 2.7%. Looking at profiles, we see:

+2.71%  [kernel.vmlinux]  [k] mod_node_page_state

which wasn't there before. The node page state counters are percpu, but
with a very low threshold. On my setup, every 108th update ends up
needing to punt to two atomic_lond_add()'s, which is causing this above
regression.

As these counters are purely for debug purposes, move them under
CONFIG_DEBUG_VM rather than do them unconditionally. Note that this
commit does not fix a real bug with the commits identified as being
fixed, rather it ensures that we don't regress on performance due to
those commits moving to using FOLL_PIN rather than FOLL_GET.

Fixes: 33f432039135 ("block: convert bio_map_user_iov to use iov_iter_extract_pages")
Fixes: b699de6806c1 ("block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages")
Link: https://lore.kernel.org/linux-block/f57ee72f-38e9-6afa-182f-2794638eadcb@kernel.dk/
Link: https://lore.kernel.org/all/54b0b07a-c178-9ffe-b5af-088f3c21696c@kernel.dk/
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/mmzone.h
mm/gup.c
mm/vmstat.c

index cd28a100d9e4f7fd8cfe16068aab68a914d80729..0153ec8a54ae08d5aa4d8072963b4a306731e72a 100644 (file)
@@ -195,8 +195,10 @@ enum node_stat_item {
        NR_WRITTEN,             /* page writings since bootup */
        NR_THROTTLED_WRITTEN,   /* NR_WRITTEN while reclaim throttled */
        NR_KERNEL_MISC_RECLAIMABLE,     /* reclaimable non-slab kernel pages */
+#ifdef CONFIG_DEBUG_VM
        NR_FOLL_PIN_ACQUIRED,   /* via: pin_user_page(), gup flag: FOLL_PIN */
        NR_FOLL_PIN_RELEASED,   /* pages returned via unpin_user_page() */
+#endif
        NR_KERNEL_STACK_KB,     /* measured in KiB */
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
        NR_KERNEL_SCS_KB,       /* measured in KiB */
index f45a3a5be53a48423daed4d60fe01ddef088cb3b..41abb16286ec0b96cc3ef42818ecfa2169de048b 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -168,7 +168,9 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
                 */
                smp_mb__after_atomic();
 
+#ifdef CONFIG_DEBUG_VM
                node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
+#endif
 
                return folio;
        }
@@ -180,7 +182,9 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
 static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
 {
        if (flags & FOLL_PIN) {
+#ifdef CONFIG_DEBUG_VM
                node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
+#endif
                if (folio_test_large(folio))
                        atomic_sub(refs, folio_pincount_ptr(folio));
                else
@@ -236,8 +240,9 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
                } else {
                        folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
                }
-
+#ifdef CONFIG_DEBUG_VM
                node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+#endif
        }
 
        return 0;
index 1ea6a5ce1c4161b5f41387a82e64c2446ad93a50..5cbd9a1924bfff77b3f3030db4a12e4614db1d9b 100644 (file)
@@ -1227,8 +1227,10 @@ const char * const vmstat_text[] = {
        "nr_written",
        "nr_throttled_written",
        "nr_kernel_misc_reclaimable",
+#ifdef CONFIG_DEBUG_VM
        "nr_foll_pin_acquired",
        "nr_foll_pin_released",
+#endif
        "nr_kernel_stack",
 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
        "nr_shadow_call_stack",