mm/memory.c: initialise mmu_notifier_range correctly
[linux-2.6-block.git] / mm / memory.c
index 2dd2f9ab57f4656c7dd0e06411a6416b8964922d..e11ca9dd823f20c60dd0c20ff7567e34a84a1dda 100644 (file)
@@ -400,10 +400,10 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
        }
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 {
        spinlock_t *ptl;
-       pgtable_t new = pte_alloc_one(mm, address);
+       pgtable_t new = pte_alloc_one(mm);
        if (!new)
                return -ENOMEM;
 
@@ -434,9 +434,9 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
        return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+int __pte_alloc_kernel(pmd_t *pmd)
 {
-       pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+       pte_t *new = pte_alloc_one_kernel(&init_mm);
        if (!new)
                return -ENOMEM;
 
@@ -2896,7 +2896,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
         *
         * Here we only have down_read(mmap_sem).
         */
-       if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
+       if (pte_alloc(vma->vm_mm, vmf->pmd))
                return VM_FAULT_OOM;
 
        /* See the comment in pte_alloc_one_map() */
@@ -2994,6 +2994,28 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        vm_fault_t ret;
 
+       /*
+        * Preallocate pte before we take page_lock because this might lead to
+        * deadlocks for memcg reclaim which waits for pages under writeback:
+        *                              lock_page(A)
+        *                              SetPageWriteback(A)
+        *                              unlock_page(A)
+        * lock_page(B)
+        *                              lock_page(B)
+        * pte_alloc_pne
+        *   shrink_page_list
+        *     wait_on_page_writeback(A)
+        *                              SetPageWriteback(B)
+        *                              unlock_page(B)
+        *                              # flush A, B to clear the writeback
+        */
+       if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
+               vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
+               if (!vmf->prealloc_pte)
+                       return VM_FAULT_OOM;
+               smp_wmb(); /* See comment in __pte_alloc() */
+       }
+
        ret = vma->vm_ops->fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                            VM_FAULT_DONE_COW)))
@@ -3043,7 +3065,7 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
                pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
                spin_unlock(vmf->ptl);
                vmf->prealloc_pte = NULL;
-       } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
+       } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
                return VM_FAULT_OOM;
        }
 map_pte:
@@ -3122,7 +3144,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
         * related to pte entry. Use the preallocated table for that.
         */
        if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
-               vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
+               vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
                smp_wmb(); /* See comment in __pte_alloc() */
@@ -3360,8 +3382,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
                        start_pgoff + nr_pages - 1);
 
        if (pmd_none(*vmf->pmd)) {
-               vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
-                                                 vmf->address);
+               vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
                if (!vmf->prealloc_pte)
                        goto out;
                smp_wmb(); /* See comment in __pte_alloc() */
@@ -4078,8 +4099,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
                goto out;
 
        if (range) {
-               range->start = address & PAGE_MASK;
-               range->end = range->start + PAGE_SIZE;
+               mmu_notifier_range_init(range, mm, address & PAGE_MASK,
+                                    (address & PAGE_MASK) + PAGE_SIZE);
                mmu_notifier_invalidate_range_start(range);
        }
        ptep = pte_offset_map_lock(mm, pmd, address, ptlp);