mm: change page type prior to adding page table entry
authorPasha Tatashin <pasha.tatashin@soleen.com>
Fri, 14 Jan 2022 22:06:29 +0000 (14:06 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jan 2022 14:30:28 +0000 (16:30 +0200)
Patch series "page table check", v3.

Ensure that some memory corruptions are prevented by checking at the
time of insertion of entries into user page tables that there is no
illegal sharing.

We have recently found a problem [1] that existed in kernel since 4.14.
The problem was caused by broken page ref count and led to memory
leaking from one process into another.  The problem was accidentally
detected by studying a dump of one process and noticing that one page
contains memory that should not belong to this process.

There are some other page->_refcount related problems that were recently
fixed: [2], [3] which potentially could also lead to illegal sharing.

In addition to hardening refcount [4] itself, this work is an attempt to
prevent this class of memory corruption issues.

It uses a simple state machine that is independent from regular MM logic
to check for illegal sharing at time pages are inserted and removed from
page tables.

[1] https://lore.kernel.org/all/xr9335nxwc5y.fsf@gthelen2.svl.corp.google.com
[2] https://lore.kernel.org/all/1582661774-30925-2-git-send-email-akaher@vmware.com
[3] https://lore.kernel.org/all/20210622021423.154662-3-mike.kravetz@oracle.com
[4] https://lore.kernel.org/all/20211221150140.988298-1-pasha.tatashin@soleen.com

This patch (of 4):

There are a few places where we first update the entry in the user page
table, and later change the struct page to indicate that this is
anonymous or file page.

In most places, however, we first configure the page metadata and then
insert entries into the page table.  Page table check, will use the
information from struct page to verify the type of entry is inserted.

Change the order in all places to first update struct page, and later to
update page table.

This means that we first do calls that may change the type of page (anon
or file):

page_move_anon_rmap
page_add_anon_rmap
do_page_add_anon_rmap
page_add_new_anon_rmap
page_add_file_rmap
hugepage_add_anon_rmap
hugepage_add_new_anon_rmap

And after that do calls that add entries to the page table:

set_huge_pte_at
set_pte_at

Link: https://lkml.kernel.org/r/20211221154650.1047963-1-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20211221154650.1047963-2-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Paul Turner <pjt@google.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Will Deacon <will@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/hugetlb.c
mm/memory.c
mm/migrate.c
mm/swapfile.c

index a1baa198519a2da44a0406c7b02c7c48d3118a39..61895cc01d0980a7e657bf32471ac5462b334076 100644 (file)
@@ -4684,8 +4684,8 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
                     struct page *new_page)
 {
        __SetPageUptodate(new_page);
-       set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
        hugepage_add_new_anon_rmap(new_page, vma, addr);
+       set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
        hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
        ClearHPageRestoreReserve(new_page);
        SetHPageMigratable(new_page);
@@ -5259,10 +5259,10 @@ retry_avoidcopy:
                /* Break COW */
                huge_ptep_clear_flush(vma, haddr, ptep);
                mmu_notifier_invalidate_range(mm, range.start, range.end);
-               set_huge_pte_at(mm, haddr, ptep,
-                               make_huge_pte(vma, new_page, 1));
                page_remove_rmap(old_page, true);
                hugepage_add_new_anon_rmap(new_page, vma, haddr);
+               set_huge_pte_at(mm, haddr, ptep,
+                               make_huge_pte(vma, new_page, 1));
                SetHPageMigratable(new_page);
                /* Make the old page be freed below */
                new_page = old_page;
index bc80d4effac9c0d9918ae72880b53fafc9271700..5fea331b15607d67c8af2b82a0745800947eba4c 100644 (file)
@@ -720,8 +720,6 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
        else if (is_writable_device_exclusive_entry(entry))
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 
-       set_pte_at(vma->vm_mm, address, ptep, pte);
-
        /*
         * No need to take a page reference as one was already
         * created when the swap entry was made.
@@ -735,6 +733,8 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
                 */
                WARN_ON_ONCE(!PageAnon(page));
 
+       set_pte_at(vma->vm_mm, address, ptep, pte);
+
        if (vma->vm_flags & VM_LOCKED)
                mlock_vma_page(page);
 
@@ -3640,8 +3640,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                pte = pte_mkuffd_wp(pte);
                pte = pte_wrprotect(pte);
        }
-       set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
-       arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
        vmf->orig_pte = pte;
 
        /* ksm created a completely new copy */
@@ -3652,6 +3650,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
        }
 
+       set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+       arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
+
        swap_free(entry);
        if (mem_cgroup_swap_full(page) ||
            (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
index cf25b00f03c8e90a2880ab5febadf523ccbc7266..6aa4b53267848fc2a25eb196db174268540dec93 100644 (file)
@@ -236,20 +236,19 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 
                        pte = pte_mkhuge(pte);
                        pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
-                       set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                        if (PageAnon(new))
                                hugepage_add_anon_rmap(new, vma, pvmw.address);
                        else
                                page_dup_rmap(new, true);
+                       set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                } else
 #endif
                {
-                       set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
-
                        if (PageAnon(new))
                                page_add_anon_rmap(new, vma, pvmw.address, false);
                        else
                                page_add_file_rmap(new, false);
+                       set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                }
                if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
                        mlock_vma_page(new);
index e59e08ef46e15fb49927d80caf64effc641c5f5e..e64207e2ef1dd46f1c85239c22cd4721b5901bf0 100644 (file)
@@ -1917,14 +1917,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
        inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
        get_page(page);
-       set_pte_at(vma->vm_mm, addr, pte,
-                  pte_mkold(mk_pte(page, vma->vm_page_prot)));
        if (page == swapcache) {
                page_add_anon_rmap(page, vma, addr, false);
        } else { /* ksm created a completely new copy */
                page_add_new_anon_rmap(page, vma, addr, false);
                lru_cache_add_inactive_or_unevictable(page, vma);
        }
+       set_pte_at(vma->vm_mm, addr, pte,
+                  pte_mkold(mk_pte(page, vma->vm_page_prot)));
        swap_free(entry);
 out:
        pte_unmap_unlock(pte, ptl);