mm/memory.c: recheck page table entry with page table lock held
authorAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Fri, 26 Oct 2018 22:09:01 +0000 (15:09 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Oct 2018 23:26:35 +0000 (16:26 -0700)
We clear the pte temporarily during read/modify/write update of the pte.
If we take a page fault while the pte is cleared, the application can get
SIGBUS.  One such case is with remap_pfn_range without a backing
vm_ops->fault callback.  do_fault will return SIGBUS in that case.

cpu 0   cpu1
mprotect()
ptep_modify_prot_start()/pte cleared.
.
. page fault.
.
.
prep_modify_prot_commit()

Fix this by taking page table lock and rechecking for pte_none.

[aneesh.kumar@linux.ibm.com: fix crash observed with syzkaller run]
Link: http://lkml.kernel.org/r/87va6bwlfg.fsf@linux.ibm.com
Link: http://lkml.kernel.org/r/20180926031858.9692-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ido Schimmel <idosch@idosch.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/memory.c

index 6abc74f41bc0b6d2f9b7210fd102c927178f9616..072139579d897021e83fe6b70344ad4acd62d664 100644 (file)
@@ -3496,10 +3496,36 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        vm_fault_t ret;
 
-       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
-       if (!vma->vm_ops->fault)
-               ret = VM_FAULT_SIGBUS;
-       else if (!(vmf->flags & FAULT_FLAG_WRITE))
+       /*
+        * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
+        */
+       if (!vma->vm_ops->fault) {
+               /*
+                * If we find a migration pmd entry or a none pmd entry, which
+                * should never happen, return SIGBUS
+                */
+               if (unlikely(!pmd_present(*vmf->pmd)))
+                       ret = VM_FAULT_SIGBUS;
+               else {
+                       vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm,
+                                                      vmf->pmd,
+                                                      vmf->address,
+                                                      &vmf->ptl);
+                       /*
+                        * Make sure this is not a temporary clearing of pte
+                        * by holding ptl and checking again. A R/M/W update
+                        * of pte involves: take ptl, clearing the pte so that
+                        * we don't have concurrent modification by hardware
+                        * followed by an update.
+                        */
+                       if (unlikely(pte_none(*vmf->pte)))
+                               ret = VM_FAULT_SIGBUS;
+                       else
+                               ret = VM_FAULT_NOPAGE;
+
+                       pte_unmap_unlock(vmf->pte, vmf->ptl);
+               }
+       } else if (!(vmf->flags & FAULT_FLAG_WRITE))
                ret = do_read_fault(vmf);
        else if (!(vma->vm_flags & VM_SHARED))
                ret = do_cow_fault(vmf);