mm: vmscan: restore high-cpu watermark safety in kswapd

author Johannes Weiner <hannes@cmpxchg.org>

Wed, 16 Apr 2025 13:45:39 +0000 (09:45 -0400)

committer Andrew Morton <akpm@linux-foundation.org>

Fri, 18 Apr 2025 03:10:09 +0000 (20:10 -0700)
author Johannes Weiner <hannes@cmpxchg.org>
Wed, 16 Apr 2025 13:45:39 +0000 (09:45 -0400)
committer Andrew Morton <akpm@linux-foundation.org>
Fri, 18 Apr 2025 03:10:09 +0000 (20:10 -0700)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 4c95fcc9e9df0cafafa3970ff6231b3f20a82bb9..6ccec1bf2896ff74fc75d484e51c64072da0f3c6 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1502,8 +1502,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
  bool zone_watermark_ok(struct zone *z, unsigned int order,
                 unsigned long mark, int highest_zoneidx,
                 unsigned int alloc_flags);
-bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-               unsigned long mark, int highest_zoneidx);
  /*
   * Memory initialization context, use to differentiate memory added by
   * the platform statically or via memory hotplug interface.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index e506e365d6f18b865915e5c809eae100374580ad..5669baf2a6fea75c17b2be426443a6cf29051f52 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3470,18 +3470,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
         return false;
  }
  
-bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-                       unsigned long mark, int highest_zoneidx)
-{
-       long free_pages = zone_page_state(z, NR_FREE_PAGES);
-
-       if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
-               free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
-
-       return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0,
-                                                               free_pages);
-}
-
  #ifdef CONFIG_NUMA
  int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
  
diff --git a/mm/vmscan.c b/mm/vmscan.c

index b620d74b0f66e33ce821260366ec1afde2abcaac..cc422ad830d6357bcaffab2e349690c1367cb158 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6736,6 +6736,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
          * meet watermarks.
          */
         for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) {
+               enum zone_stat_item item;
                 unsigned long free_pages;
  
                 if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
@@ -6748,9 +6749,25 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
                  * blocks to avoid polluting allocator fallbacks.
                  */
                 if (defrag_mode)
-                       free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS);
+                       item = NR_FREE_PAGES_BLOCKS;
                 else
-                       free_pages = zone_page_state(zone, NR_FREE_PAGES);
+                       item = NR_FREE_PAGES;
+
+               /*
+                * When there is a high number of CPUs in the system,
+                * the cumulative error from the vmstat per-cpu cache
+                * can blur the line between the watermarks. In that
+                * case, be safe and get an accurate snapshot.
+                *
+                * TODO: NR_FREE_PAGES_BLOCKS moves in steps of
+                * pageblock_nr_pages, while the vmstat pcp threshold
+                * is limited to 125. On many configurations that
+                * counter won't actually be per-cpu cached. But keep
+                * things simple for now; revisit when somebody cares.
+                */
+               free_pages = zone_page_state(zone, item);
+               if (zone->percpu_drift_mark && free_pages < zone->percpu_drift_mark)
+                       free_pages = zone_page_state_snapshot(zone, item);
  
                 if (__zone_watermark_ok(zone, order, mark, highest_zoneidx,
                                         0, free_pages))
author	Johannes Weiner <hannes@cmpxchg.org>
	Wed, 16 Apr 2025 13:45:39 +0000 (09:45 -0400)
committer	Andrew Morton <akpm@linux-foundation.org>
	Fri, 18 Apr 2025 03:10:09 +0000 (20:10 -0700)
include/linux/mmzone.h		patch \| blob \| blame \| history
mm/page_alloc.c		patch \| blob \| blame \| history
mm/vmscan.c		patch \| blob \| blame \| history