Merge branch 'akpm' (patches from Andrew)
[linux-2.6-block.git] / mm / huge_memory.c
index deed97fba97976da9e79da8b45fb7e09eee9efa5..4e4ef8fa479d53b7ee7c4c8fcb86985acb790c8a 100644 (file)
@@ -852,11 +852,10 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
 }
 
 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
-               pmd_t *pmd, int flags)
+               pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
 {
        unsigned long pfn = pmd_pfn(*pmd);
        struct mm_struct *mm = vma->vm_mm;
-       struct dev_pagemap *pgmap;
        struct page *page;
 
        assert_spin_locked(pmd_lockptr(mm, pmd));
@@ -886,12 +885,11 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
                return ERR_PTR(-EEXIST);
 
        pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
-       pgmap = get_dev_pagemap(pfn, NULL);
-       if (!pgmap)
+       *pgmap = get_dev_pagemap(pfn, *pgmap);
+       if (!*pgmap)
                return ERR_PTR(-EFAULT);
        page = pfn_to_page(pfn);
        get_page(page);
-       put_dev_pagemap(pgmap);
 
        return page;
 }
@@ -1000,11 +998,10 @@ static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
 }
 
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
-               pud_t *pud, int flags)
+               pud_t *pud, int flags, struct dev_pagemap **pgmap)
 {
        unsigned long pfn = pud_pfn(*pud);
        struct mm_struct *mm = vma->vm_mm;
-       struct dev_pagemap *pgmap;
        struct page *page;
 
        assert_spin_locked(pud_lockptr(mm, pud));
@@ -1028,12 +1025,11 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
                return ERR_PTR(-EEXIST);
 
        pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
-       pgmap = get_dev_pagemap(pfn, NULL);
-       if (!pgmap)
+       *pgmap = get_dev_pagemap(pfn, *pgmap);
+       if (!*pgmap)
                return ERR_PTR(-EFAULT);
        page = pfn_to_page(pfn);
        get_page(page);
-       put_dev_pagemap(pgmap);
 
        return page;
 }
@@ -1562,8 +1558,20 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
         * We are not sure a pending tlb flush here is for a huge page
         * mapping or not. Hence use the tlb range variant
         */
-       if (mm_tlb_flush_pending(vma->vm_mm))
+       if (mm_tlb_flush_pending(vma->vm_mm)) {
                flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+               /*
+                * change_huge_pmd() released the pmd lock before
+                * invalidating the secondary MMUs sharing the primary
+                * MMU pagetables (with ->invalidate_range()). The
+                * mmu_notifier_invalidate_range_end() (which
+                * internally calls ->invalidate_range()) in
+                * change_pmd_range() will run after us, so we can't
+                * rely on it here and we need an explicit invalidate.
+                */
+               mmu_notifier_invalidate_range(vma->vm_mm, haddr,
+                                             haddr + HPAGE_PMD_SIZE);
+       }
 
        /*
         * Migrate the THP to the requested node, returns with page unlocked
@@ -2369,6 +2377,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
                         (1L << PG_mlocked) |
                         (1L << PG_uptodate) |
                         (1L << PG_active) |
+                        (1L << PG_workingset) |
                         (1L << PG_locked) |
                         (1L << PG_unevictable) |
                         (1L << PG_dirty)));
@@ -2441,13 +2450,13 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        ClearPageCompound(head);
        /* See comment in __split_huge_page_tail() */
        if (PageAnon(head)) {
-               /* Additional pin to radix tree of swap cache */
+               /* Additional pin to swap cache */
                if (PageSwapCache(head))
                        page_ref_add(head, 2);
                else
                        page_ref_inc(head);
        } else {
-               /* Additional pin to radix tree */
+               /* Additional pin to page cache */
                page_ref_add(head, 2);
                xa_unlock(&head->mapping->i_pages);
        }
@@ -2559,7 +2568,7 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
 {
        int extra_pins;
 
-       /* Additional pins from radix tree */
+       /* Additional pins from page cache */
        if (PageAnon(page))
                extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
        else
@@ -2655,17 +2664,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
 
        if (mapping) {
-               void **pslot;
+               XA_STATE(xas, &mapping->i_pages, page_index(head));
 
-               xa_lock(&mapping->i_pages);
-               pslot = radix_tree_lookup_slot(&mapping->i_pages,
-                               page_index(head));
                /*
-                * Check if the head page is present in radix tree.
+                * Check if the head page is present in page cache.
                 * We assume all tail are present too, if head is there.
                 */
-               if (radix_tree_deref_slot_protected(pslot,
-                                       &mapping->i_pages.xa_lock) != head)
+               xa_lock(&mapping->i_pages);
+               if (xas_load(&xas) != head)
                        goto fail;
        }