mm: hwpoison: support recovery from ksm_might_need_to_copy()
authorKefeng Wang <wangkefeng.wang@huawei.com>
Fri, 9 Dec 2022 07:28:01 +0000 (15:28 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 9 Feb 2023 23:56:51 +0000 (15:56 -0800)
When the kernel copies a page from ksm_might_need_to_copy(), but runs into
an uncorrectable error, it will crash since poisoned page is consumed by
kernel, this is similar to the issue recently fixed by Copy-on-write
poison recovery.

When an error is detected during the page copy, return VM_FAULT_HWPOISON
in do_swap_page(), and install a hwpoison entry in unuse_pte() when
swapoff, which help us to avoid system crash.  Note, memory failure on a
KSM page will be skipped, but still call memory_failure_queue() to be
consistent with general memory failure process, and we could support KSM
page recovery in the feature.

[wangkefeng.wang@huawei.com: enhance unuse_pte(), fix issue found by lkp]
Link: https://lkml.kernel.org/r/20221213120523.141588-1-wangkefeng.wang@huawei.com
[wangkefeng.wang@huawei.com: update changelog, alter ksm_might_need_to_copy(), restore unlikely() in unuse_pte()]
Link: https://lkml.kernel.org/r/20230201074433.96641-1-wangkefeng.wang@huawei.com
Link: https://lkml.kernel.org/r/20221209072801.193221-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/ksm.c
mm/memory.c
mm/swapfile.c

index dd02780c387f02b3176d088fdcc1bb9d6db487e8..addf490da1464e4f3efc9a158d8926e6bbf1c777 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2629,8 +2629,11 @@ struct page *ksm_might_need_to_copy(struct page *page,
                new_page = NULL;
        }
        if (new_page) {
-               copy_user_highpage(new_page, page, address, vma);
-
+               if (copy_mc_user_highpage(new_page, page, address, vma)) {
+                       put_page(new_page);
+                       memory_failure_queue(page_to_pfn(page), 0);
+                       return ERR_PTR(-EHWPOISON);
+               }
                SetPageDirty(new_page);
                __SetPageUptodate(new_page);
                __SetPageLocked(new_page);
index 3e836fecd0354c8aa433d65ce59b8b5d62ea75a1..f526b9152bef28ddd29d661301876f857e409b5c 100644 (file)
@@ -3840,6 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                if (unlikely(!page)) {
                        ret = VM_FAULT_OOM;
                        goto out_page;
+               } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
+                       ret = VM_FAULT_HWPOISON;
+                       goto out_page;
                }
                folio = page_folio(page);
 
index 4fa440e87cd693af2534ad219b86f49e6f802a2e..eb9b0bf1fcddb3a98c95db0f89809bb1559cf830 100644 (file)
@@ -1764,12 +1764,15 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        struct page *swapcache;
        spinlock_t *ptl;
        pte_t *pte, new_pte;
+       bool hwposioned = false;
        int ret = 1;
 
        swapcache = page;
        page = ksm_might_need_to_copy(page, vma, addr);
        if (unlikely(!page))
                return -ENOMEM;
+       else if (unlikely(PTR_ERR(page) == -EHWPOISON))
+               hwposioned = true;
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
@@ -1777,15 +1780,19 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
                goto out;
        }
 
-       if (unlikely(!PageUptodate(page))) {
-               pte_t pteval;
+       if (unlikely(hwposioned || !PageUptodate(page))) {
+               swp_entry_t swp_entry;
 
                dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-               pteval = swp_entry_to_pte(make_swapin_error_entry());
-               set_pte_at(vma->vm_mm, addr, pte, pteval);
-               swap_free(entry);
+               if (hwposioned) {
+                       swp_entry = make_hwpoison_entry(swapcache);
+                       page = swapcache;
+               } else {
+                       swp_entry = make_swapin_error_entry();
+               }
+               new_pte = swp_entry_to_pte(swp_entry);
                ret = 0;
-               goto out;
+               goto setpte;
        }
 
        /* See do_swap_page() */
@@ -1817,6 +1824,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
                new_pte = pte_mksoft_dirty(new_pte);
        if (pte_swp_uffd_wp(*pte))
                new_pte = pte_mkuffd_wp(new_pte);
+setpte:
        set_pte_at(vma->vm_mm, addr, pte, new_pte);
        swap_free(entry);
 out: