mm/hugetlb: check bootmem pages for zone intersections

author Frank van der Linden <fvdl@google.com>

Fri, 28 Feb 2025 18:29:14 +0000 (18:29 +0000)

committer Andrew Morton <akpm@linux-foundation.org>

Mon, 17 Mar 2025 05:06:28 +0000 (22:06 -0700)
author Frank van der Linden <fvdl@google.com>
Fri, 28 Feb 2025 18:29:14 +0000 (18:29 +0000)
committer Andrew Morton <akpm@linux-foundation.org>
Mon, 17 Mar 2025 05:06:28 +0000 (22:06 -0700)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 00facd2123e59287cec2e4ddf629f5f2694d21c7..e4bf06f13178f5036005416525d0ead957bb8b8d 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -62,6 +62,7 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
  static unsigned long hugetlb_cma_size __initdata;
  
  __initdata struct list_head huge_boot_pages[MAX_NUMNODES];
+static unsigned long hstate_boot_nrinvalid[HUGE_MAX_HSTATE] __initdata;
  
  /*
   * Due to ordering constraints across the init code for various
@@ -3316,6 +3317,44 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
         }
  }
  
+static bool __init hugetlb_bootmem_page_zones_valid(int nid,
+                                                   struct huge_bootmem_page *m)
+{
+       unsigned long start_pfn;
+       bool valid;
+
+       start_pfn = virt_to_phys(m) >> PAGE_SHIFT;
+
+       valid = !pfn_range_intersects_zones(nid, start_pfn,
+                       pages_per_huge_page(m->hstate));
+       if (!valid)
+               hstate_boot_nrinvalid[hstate_index(m->hstate)]++;
+
+       return valid;
+}
+
+/*
+ * Free a bootmem page that was found to be invalid (intersecting with
+ * multiple zones).
+ *
+ * Since it intersects with multiple zones, we can't just do a free
+ * operation on all pages at once, but instead have to walk all
+ * pages, freeing them one by one.
+ */
+static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page,
+                                            struct hstate *h)
+{
+       unsigned long npages = pages_per_huge_page(h);
+       unsigned long pfn;
+
+       while (npages--) {
+               pfn = page_to_pfn(page);
+               __init_reserved_page_zone(pfn, nid);
+               free_reserved_page(page);
+               page++;
+       }
+}
+
  /*
   * Put bootmem huge pages into the standard lists after mem_map is up.
   * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages.
@@ -3323,14 +3362,25 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
  static void __init gather_bootmem_prealloc_node(unsigned long nid)
  {
         LIST_HEAD(folio_list);
-       struct huge_bootmem_page *m;
+       struct huge_bootmem_page *m, *tm;
         struct hstate *h = NULL, *prev_h = NULL;
  
-       list_for_each_entry(m, &huge_boot_pages[nid], list) {
+       list_for_each_entry_safe(m, tm, &huge_boot_pages[nid], list) {
                 struct page *page = virt_to_page(m);
                 struct folio *folio = (void *)page;
  
                 h = m->hstate;
+               if (!hugetlb_bootmem_page_zones_valid(nid, m)) {
+                       /*
+                        * Can't use this page. Initialize the
+                        * page structures if that hasn't already
+                        * been done, and give them to the page
+                        * allocator.
+                        */
+                       hugetlb_bootmem_free_invalid_page(nid, page, h);
+                       continue;
+               }
+
                 /*
                  * It is possible to have multiple huge page sizes (hstates)
                  * in this list.  If so, process each size separately.
@@ -3602,13 +3652,20 @@ static void __init hugetlb_init_hstates(void)
  static void __init report_hugepages(void)
  {
         struct hstate *h;
+       unsigned long nrinvalid;
  
         for_each_hstate(h) {
                 char buf[32];
  
+               nrinvalid = hstate_boot_nrinvalid[hstate_index(h)];
+               h->max_huge_pages -= nrinvalid;
+
                 string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
                 pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
                         buf, h->free_huge_pages);
+               if (nrinvalid)
+                       pr_info("HugeTLB: %s page size: %lu invalid page%s discarded\n",
+                                       buf, nrinvalid, nrinvalid > 1 ? "s" : "");
                 pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n",
                         hugetlb_vmemmap_optimizable_size(h) / SZ_1K, buf);
         }
diff --git a/mm/internal.h b/mm/internal.h

index 780c17b4003a5a40099c635d85d7c6155a2ae820..8233c207d3f3ad5872a7745a95765e4872a95080 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -658,6 +658,8 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
  }
  
  void set_zone_contiguous(struct zone *zone);
+bool pfn_range_intersects_zones(int nid, unsigned long start_pfn,
+                          unsigned long nr_pages);
  
  static inline void clear_zone_contiguous(struct zone *zone)
  {
diff --git a/mm/mm_init.c b/mm/mm_init.c

index 419b7db220d2f03d5d2538d98338e8dee4a7ffc0..3eec528afe43bb2e5dfb034447269cdbaf7ef3e1 100644 (file)
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2287,6 +2287,31 @@ void set_zone_contiguous(struct zone *zone)
         zone->contiguous = true;
  }
  
+/*
+ * Check if a PFN range intersects multiple zones on one or more
+ * NUMA nodes. Specify the @nid argument if it is known that this
+ * PFN range is on one node, NUMA_NO_NODE otherwise.
+ */
+bool pfn_range_intersects_zones(int nid, unsigned long start_pfn,
+                          unsigned long nr_pages)
+{
+       struct zone *zone, *izone = NULL;
+
+       for_each_zone(zone) {
+               if (nid != NUMA_NO_NODE && zone_to_nid(zone) != nid)
+                       continue;
+
+               if (zone_intersects(zone, start_pfn, nr_pages)) {
+                       if (izone != NULL)
+                               return true;
+                       izone = zone;
+               }
+
+       }
+
+       return false;
+}
+
  static void __init mem_init_print_info(void);
  void __init page_alloc_init_late(void)
  {
author	Frank van der Linden <fvdl@google.com>
	Fri, 28 Feb 2025 18:29:14 +0000 (18:29 +0000)
committer	Andrew Morton <akpm@linux-foundation.org>
	Mon, 17 Mar 2025 05:06:28 +0000 (22:06 -0700)
mm/hugetlb.c		patch \| blob \| blame \| history
mm/internal.h		patch \| blob \| blame \| history
mm/mm_init.c		patch \| blob \| blame \| history