Merge tag 'mm-stable-2022-08-09' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-block.git] / mm / hugetlb.c
index f044962ad9df7aee7c1bc41398c46d2e7d8d1fd2..0aee2f3ae15c8251338746659e1b9ea73b1f1807 100644 (file)
@@ -1535,7 +1535,14 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                return;
 
-       if (hugetlb_vmemmap_alloc(h, page)) {
+       /*
+        * If we don't know which subpages are hwpoisoned, we can't free
+        * the hugepage, so it's leaked intentionally.
+        */
+       if (HPageRawHwpUnreliable(page))
+               return;
+
+       if (hugetlb_vmemmap_restore(h, page)) {
                spin_lock_irq(&hugetlb_lock);
                /*
                 * If we cannot allocate vmemmap pages, just refuse to free the
@@ -1547,6 +1554,13 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
                return;
        }
 
+       /*
+        * Move PageHWPoison flag from head page to the raw error pages,
+        * which makes any healthy subpages reusable.
+        */
+       if (unlikely(PageHWPoison(page)))
+               hugetlb_clear_page_hwpoison(page);
+
        for (i = 0; i < pages_per_huge_page(h);
             i++, subpage = mem_map_next(subpage, page, i)) {
                subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
@@ -1612,7 +1626,7 @@ static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
 
 static inline void flush_free_hpage_work(struct hstate *h)
 {
-       if (hugetlb_optimize_vmemmap_pages(h))
+       if (hugetlb_vmemmap_optimizable(h))
                flush_work(&free_hpage_work);
 }
 
@@ -1734,7 +1748,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
 
 static void __prep_new_huge_page(struct hstate *h, struct page *page)
 {
-       hugetlb_vmemmap_free(h, page);
+       hugetlb_vmemmap_optimize(h, page);
        INIT_LIST_HEAD(&page->lru);
        set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
        hugetlb_set_page_subpool(page, NULL);
@@ -2107,17 +2121,8 @@ retry:
                 * Attempt to allocate vmemmmap here so that we can take
                 * appropriate action on failure.
                 */
-               rc = hugetlb_vmemmap_alloc(h, head);
+               rc = hugetlb_vmemmap_restore(h, head);
                if (!rc) {
-                       /*
-                        * Move PageHWPoison flag from head page to the raw
-                        * error page, which makes any subpages rather than
-                        * the error page reusable.
-                        */
-                       if (PageHWPoison(head) && page != head) {
-                               SetPageHWPoison(page);
-                               ClearPageHWPoison(head);
-                       }
                        update_and_free_page(h, head, false);
                } else {
                        spin_lock_irq(&hugetlb_lock);
@@ -2432,8 +2437,7 @@ static void return_unused_surplus_pages(struct hstate *h,
        /* Uncommit the reservation */
        h->resv_huge_pages -= unused_resv_pages;
 
-       /* Cannot return gigantic pages currently */
-       if (hstate_is_gigantic(h))
+       if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                goto out;
 
        /*
@@ -3182,8 +3186,10 @@ static void __init report_hugepages(void)
                char buf[32];
 
                string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
-               pr_info("HugeTLB registered %s page size, pre-allocated %ld pages\n",
+               pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
                        buf, h->free_huge_pages);
+               pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n",
+                       hugetlb_vmemmap_optimizable_size(h) / SZ_1K, buf);
        }
 }
 
@@ -3421,7 +3427,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
        remove_hugetlb_page_for_demote(h, page, false);
        spin_unlock_irq(&hugetlb_lock);
 
-       rc = hugetlb_vmemmap_alloc(h, page);
+       rc = hugetlb_vmemmap_restore(h, page);
        if (rc) {
                /* Allocation of vmemmmap failed, we can not demote page */
                spin_lock_irq(&hugetlb_lock);
@@ -4111,7 +4117,6 @@ void __init hugetlb_add_hstate(unsigned int order)
        h->next_nid_to_free = first_memory_node;
        snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
                                        huge_page_size(h)/1024);
-       hugetlb_vmemmap_init(h);
 
        parsed_hstate = h;
 }
@@ -6985,10 +6990,38 @@ struct page * __weak
 follow_huge_pud(struct mm_struct *mm, unsigned long address,
                pud_t *pud, int flags)
 {
-       if (flags & (FOLL_GET | FOLL_PIN))
+       struct page *page = NULL;
+       spinlock_t *ptl;
+       pte_t pte;
+
+       if (WARN_ON_ONCE(flags & FOLL_PIN))
                return NULL;
 
-       return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+retry:
+       ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud);
+       if (!pud_huge(*pud))
+               goto out;
+       pte = huge_ptep_get((pte_t *)pud);
+       if (pte_present(pte)) {
+               page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+               if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
+                       page = NULL;
+                       goto out;
+               }
+       } else {
+               if (is_hugetlb_entry_migration(pte)) {
+                       spin_unlock(ptl);
+                       __migration_entry_wait(mm, (pte_t *)pud, ptl);
+                       goto retry;
+               }
+               /*
+                * hwpoisoned entry is treated as no_page_table in
+                * follow_page_mask().
+                */
+       }
+out:
+       spin_unlock(ptl);
+       return page;
 }
 
 struct page * __weak