From a8fcff618f9e78836780176d48ae75df34b84ef4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 31 Jan 2023 08:28:53 -0700 Subject: [PATCH] mm: move FOLL_PIN debug accounting under CONFIG_DEBUG_VM Using FOLL_PIN for mapping user pages caused a performance regression of about 2.7%. Looking at profiles, we see: +2.71% [kernel.vmlinux] [k] mod_node_page_state which wasn't there before. The node page state counters are percpu, but with a very low threshold. On my setup, every 108th update ends up needing to punt to two atomic_lond_add()'s, which is causing this above regression. As these counters are purely for debug purposes, move them under CONFIG_DEBUG_VM rather than do them unconditionally. Note that this commit does not fix a real bug with the commits identified as being fixed, rather it ensures that we don't regress on performance due to those commits moving to using FOLL_PIN rather than FOLL_GET. Fixes: 33f432039135 ("block: convert bio_map_user_iov to use iov_iter_extract_pages") Fixes: b699de6806c1 ("block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages") Link: https://lore.kernel.org/linux-block/f57ee72f-38e9-6afa-182f-2794638eadcb@kernel.dk/ Link: https://lore.kernel.org/all/54b0b07a-c178-9ffe-b5af-088f3c21696c@kernel.dk/ Acked-by: David Hildenbrand Reviewed-by: John Hubbard Signed-off-by: Jens Axboe --- include/linux/mmzone.h | 2 ++ mm/gup.c | 7 ++++++- mm/vmstat.c | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cd28a100d9e4..0153ec8a54ae 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -195,8 +195,10 @@ enum node_stat_item { NR_WRITTEN, /* page writings since bootup */ NR_THROTTLED_WRITTEN, /* NR_WRITTEN while reclaim throttled */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ +#ifdef CONFIG_DEBUG_VM NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ +#endif NR_KERNEL_STACK_KB, /* measured in KiB */ #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) NR_KERNEL_SCS_KB, /* measured in KiB */ diff --git a/mm/gup.c b/mm/gup.c index f45a3a5be53a..41abb16286ec 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -168,7 +168,9 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) */ smp_mb__after_atomic(); +#ifdef CONFIG_DEBUG_VM node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); +#endif return folio; } @@ -180,7 +182,9 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { +#ifdef CONFIG_DEBUG_VM node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); +#endif if (folio_test_large(folio)) atomic_sub(refs, folio_pincount_ptr(folio)); else @@ -236,8 +240,9 @@ int __must_check try_grab_page(struct page *page, unsigned int flags) } else { folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); } - +#ifdef CONFIG_DEBUG_VM node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); +#endif } return 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 1ea6a5ce1c41..5cbd9a1924bf 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1227,8 +1227,10 @@ const char * const vmstat_text[] = { "nr_written", "nr_throttled_written", "nr_kernel_misc_reclaimable", +#ifdef CONFIG_DEBUG_VM "nr_foll_pin_acquired", "nr_foll_pin_released", +#endif "nr_kernel_stack", #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) "nr_shadow_call_stack", -- 2.25.1