hugetlb: batch freeing of vmemmap pages
authorMike Kravetz <mike.kravetz@oracle.com>
Thu, 19 Oct 2023 02:31:07 +0000 (19:31 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 25 Oct 2023 23:47:07 +0000 (16:47 -0700)
Now that batching of hugetlb vmemmap optimization processing is possible,
batch the freeing of vmemmap pages.  When freeing vmemmap pages for a
hugetlb page, we add them to a list that is freed after the entire batch
has been processed.

This enhances the ability to return contiguous ranges of memory to the low
level allocators.

Link: https://lkml.kernel.org/r/20231019023113.345257-6-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Barry Song <21cnbao@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Konrad Dybcio <konradybcio@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Usama Arif <usama.arif@bytedance.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/hugetlb_vmemmap.c

index 77f44b81ff01d8d158d383423f6610230f8a887a..4ac521e596db7d42b51e841f702a6b3a52b01898 100644 (file)
@@ -251,7 +251,7 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
        }
 
        entry = mk_pte(walk->reuse_page, pgprot);
-       list_add_tail(&page->lru, walk->vmemmap_pages);
+       list_add(&page->lru, walk->vmemmap_pages);
        set_pte_at(&init_mm, addr, pte, entry);
 }
 
@@ -306,18 +306,20 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
  * @end:       end address of the vmemmap virtual address range that we want to
  *             remap.
  * @reuse:     reuse address.
+ * @vmemmap_pages: list to deposit vmemmap pages to be freed.  It is callers
+ *             responsibility to free pages.
  *
  * Return: %0 on success, negative error code otherwise.
  */
 static int vmemmap_remap_free(unsigned long start, unsigned long end,
-                             unsigned long reuse)
+                             unsigned long reuse,
+                             struct list_head *vmemmap_pages)
 {
        int ret;
-       LIST_HEAD(vmemmap_pages);
        struct vmemmap_remap_walk walk = {
                .remap_pte      = vmemmap_remap_pte,
                .reuse_addr     = reuse,
-               .vmemmap_pages  = &vmemmap_pages,
+               .vmemmap_pages  = vmemmap_pages,
        };
        int nid = page_to_nid((struct page *)reuse);
        gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
@@ -334,7 +336,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
        if (walk.reuse_page) {
                copy_page(page_to_virt(walk.reuse_page),
                          (void *)walk.reuse_addr);
-               list_add(&walk.reuse_page->lru, &vmemmap_pages);
+               list_add(&walk.reuse_page->lru, vmemmap_pages);
        }
 
        /*
@@ -365,15 +367,13 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
                walk = (struct vmemmap_remap_walk) {
                        .remap_pte      = vmemmap_restore_pte,
                        .reuse_addr     = reuse,
-                       .vmemmap_pages  = &vmemmap_pages,
+                       .vmemmap_pages  = vmemmap_pages,
                };
 
                vmemmap_remap_range(reuse, end, &walk);
        }
        mmap_read_unlock(&init_mm);
 
-       free_vmemmap_page_list(&vmemmap_pages);
-
        return ret;
 }
 
@@ -389,7 +389,7 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
                page = alloc_pages_node(nid, gfp_mask, 0);
                if (!page)
                        goto out;
-               list_add_tail(&page->lru, list);
+               list_add(&page->lru, list);
        }
 
        return 0;
@@ -577,24 +577,17 @@ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *h
        return true;
 }
 
-/**
- * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages.
- * @h:         struct hstate.
- * @head:      the head page whose vmemmap pages will be optimized.
- *
- * This function only tries to optimize @head's vmemmap pages and does not
- * guarantee that the optimization will succeed after it returns. The caller
- * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages
- * have been optimized.
- */
-void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
+static int __hugetlb_vmemmap_optimize(const struct hstate *h,
+                                       struct page *head,
+                                       struct list_head *vmemmap_pages)
 {
+       int ret = 0;
        unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
        unsigned long vmemmap_reuse;
 
        VM_WARN_ON_ONCE(!PageHuge(head));
        if (!vmemmap_should_optimize(h, head))
-               return;
+               return ret;
 
        static_branch_inc(&hugetlb_optimize_vmemmap_key);
 
@@ -604,21 +597,58 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
 
        /*
         * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end)
-        * to the page which @vmemmap_reuse is mapped to, then free the pages
-        * which the range [@vmemmap_start, @vmemmap_end] is mapped to.
+        * to the page which @vmemmap_reuse is mapped to.  Add pages previously
+        * mapping the range to vmemmap_pages list so that they can be freed by
+        * the caller.
         */
-       if (vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse))
+       ret = vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse, vmemmap_pages);
+       if (ret)
                static_branch_dec(&hugetlb_optimize_vmemmap_key);
        else
                SetHPageVmemmapOptimized(head);
+
+       return ret;
+}
+
+/**
+ * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages.
+ * @h:         struct hstate.
+ * @head:      the head page whose vmemmap pages will be optimized.
+ *
+ * This function only tries to optimize @head's vmemmap pages and does not
+ * guarantee that the optimization will succeed after it returns. The caller
+ * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages
+ * have been optimized.
+ */
+void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
+{
+       LIST_HEAD(vmemmap_pages);
+
+       __hugetlb_vmemmap_optimize(h, head, &vmemmap_pages);
+       free_vmemmap_page_list(&vmemmap_pages);
 }
 
 void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
 {
        struct folio *folio;
+       LIST_HEAD(vmemmap_pages);
+
+       list_for_each_entry(folio, folio_list, lru) {
+               int ret = __hugetlb_vmemmap_optimize(h, &folio->page,
+                                                               &vmemmap_pages);
+
+               /*
+                * Pages to be freed may have been accumulated.  If we
+                * encounter an ENOMEM,  free what we have and try again.
+                */
+               if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) {
+                       free_vmemmap_page_list(&vmemmap_pages);
+                       INIT_LIST_HEAD(&vmemmap_pages);
+                       __hugetlb_vmemmap_optimize(h, &folio->page, &vmemmap_pages);
+               }
+       }
 
-       list_for_each_entry(folio, folio_list, lru)
-               hugetlb_vmemmap_optimize(h, &folio->page);
+       free_vmemmap_page_list(&vmemmap_pages);
 }
 
 static struct ctl_table hugetlb_vmemmap_sysctls[] = {