mm/hugetlb: fix memfd_pin_folios resv_huge_pages leak
authorSteve Sistare <steven.sistare@oracle.com>
Tue, 3 Sep 2024 14:25:19 +0000 (07:25 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 26 Sep 2024 21:01:43 +0000 (14:01 -0700)
memfd_pin_folios followed by unpin_folios leaves resv_huge_pages elevated
if the pages were not already faulted in.  During a normal page fault,
resv_huge_pages is consumed here:

hugetlb_fault()
  alloc_hugetlb_folio()
    dequeue_hugetlb_folio_vma()
      dequeue_hugetlb_folio_nodemask()
        dequeue_hugetlb_folio_node_exact()
          free_huge_pages--
      resv_huge_pages--

During memfd_pin_folios, the page is created by calling
alloc_hugetlb_folio_nodemask instead of alloc_hugetlb_folio, and
resv_huge_pages is not modified:

memfd_alloc_folio()
  alloc_hugetlb_folio_nodemask()
    dequeue_hugetlb_folio_nodemask()
      dequeue_hugetlb_folio_node_exact()
        free_huge_pages--

alloc_hugetlb_folio_nodemask has other callers that must not modify
resv_huge_pages.  Therefore, to fix, define an alternate version of
alloc_hugetlb_folio_nodemask for this call site that adjusts
resv_huge_pages.

Link: https://lkml.kernel.org/r/1725373521-451395-4-git-send-email-steven.sistare@oracle.com
Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios")
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/hugetlb.h
mm/hugetlb.c
mm/memfd.c

index 98c47c394b891d838b55197787d1cf266a1a3171..e4697539b665a2639ecd72a34f7f8b56e4c77dcf 100644 (file)
@@ -692,6 +692,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
                                nodemask_t *nmask, gfp_t gfp_mask,
                                bool allow_alloc_fallback);
+struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+                                         nodemask_t *nmask, gfp_t gfp_mask);
+
 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
                        pgoff_t idx);
 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -1059,6 +1062,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
        return NULL;
 }
 
+static inline struct folio *
+alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+                           nodemask_t *nmask, gfp_t gfp_mask)
+{
+       return NULL;
+}
+
 static inline struct folio *
 alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
                        nodemask_t *nmask, gfp_t gfp_mask,
index def84d8bcf2dab97c8cb114f25b09effece4527a..190fa05635f4a9d4af14bfa6bda116298f2dce71 100644 (file)
@@ -2390,6 +2390,23 @@ struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
        return folio;
 }
 
+struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+               nodemask_t *nmask, gfp_t gfp_mask)
+{
+       struct folio *folio;
+
+       spin_lock_irq(&hugetlb_lock);
+       folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid,
+                                              nmask);
+       if (folio) {
+               VM_BUG_ON(!h->resv_huge_pages);
+               h->resv_huge_pages--;
+       }
+
+       spin_unlock_irq(&hugetlb_lock);
+       return folio;
+}
+
 /* folio migration callback function */
 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
                nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback)
index e7b7c5294d59635539ee75a165a3b6ae171a632e..bfe0e7189a37732d61a7346e286bede2ce49888a 100644 (file)
@@ -82,11 +82,10 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
                gfp_mask = htlb_alloc_mask(hstate_file(memfd));
                gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
 
-               folio = alloc_hugetlb_folio_nodemask(hstate_file(memfd),
-                                                    numa_node_id(),
-                                                    NULL,
-                                                    gfp_mask,
-                                                    false);
+               folio = alloc_hugetlb_folio_reserve(hstate_file(memfd),
+                                                   numa_node_id(),
+                                                   NULL,
+                                                   gfp_mask);
                if (folio && folio_try_get(folio)) {
                        err = hugetlb_add_to_page_cache(folio,
                                                        memfd->f_mapping,