Merge tag 'rpmsg-v4.20' of git://github.com/andersson/remoteproc
[linux-2.6-block.git] / mm / migrate.c
index d6a2e89b086a43d77f155f6b525fc15326d9c035..f7e4bfdc13b780137d08fa522b070e7192056f24 100644 (file)
@@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
                if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
                        mlock_vma_page(new);
 
+               if (PageTransHuge(page) && PageMlocked(page))
+                       clear_page_mlock(page);
+
                /* No need to invalidate - it was non-present before */
                update_mmu_cache(vma, pvmw.address, pvmw.pte);
        }
@@ -323,7 +326,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
        page = migration_entry_to_page(entry);
 
        /*
-        * Once radix-tree replacement of page migration started, page_count
+        * Once page cache replacement of page migration started, page_count
         * *must* be zero. And, we don't want to call wait_on_page_locked()
         * against a page without get_page().
         * So, we use get_page_unless_zero(), here. Even failed, page fault
@@ -438,10 +441,10 @@ int migrate_page_move_mapping(struct address_space *mapping,
                struct buffer_head *head, enum migrate_mode mode,
                int extra_count)
 {
+       XA_STATE(xas, &mapping->i_pages, page_index(page));
        struct zone *oldzone, *newzone;
        int dirty;
        int expected_count = 1 + extra_count;
-       void **pslot;
 
        /*
         * Device public or private pages have an extra refcount as they are
@@ -467,21 +470,16 @@ int migrate_page_move_mapping(struct address_space *mapping,
        oldzone = page_zone(page);
        newzone = page_zone(newpage);
 
-       xa_lock_irq(&mapping->i_pages);
-
-       pslot = radix_tree_lookup_slot(&mapping->i_pages,
-                                       page_index(page));
+       xas_lock_irq(&xas);
 
        expected_count += hpage_nr_pages(page) + page_has_private(page);
-       if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot,
-                                       &mapping->i_pages.xa_lock) != page) {
-               xa_unlock_irq(&mapping->i_pages);
+       if (page_count(page) != expected_count || xas_load(&xas) != page) {
+               xas_unlock_irq(&xas);
                return -EAGAIN;
        }
 
        if (!page_ref_freeze(page, expected_count)) {
-               xa_unlock_irq(&mapping->i_pages);
+               xas_unlock_irq(&xas);
                return -EAGAIN;
        }
 
@@ -495,7 +493,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
        if (mode == MIGRATE_ASYNC && head &&
                        !buffer_migrate_lock_buffers(head, mode)) {
                page_ref_unfreeze(page, expected_count);
-               xa_unlock_irq(&mapping->i_pages);
+               xas_unlock_irq(&xas);
                return -EAGAIN;
        }
 
@@ -523,16 +521,13 @@ int migrate_page_move_mapping(struct address_space *mapping,
                SetPageDirty(newpage);
        }
 
-       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
+       xas_store(&xas, newpage);
        if (PageTransHuge(page)) {
                int i;
-               int index = page_index(page);
 
                for (i = 1; i < HPAGE_PMD_NR; i++) {
-                       pslot = radix_tree_lookup_slot(&mapping->i_pages,
-                                                      index + i);
-                       radix_tree_replace_slot(&mapping->i_pages, pslot,
-                                               newpage + i);
+                       xas_next(&xas);
+                       xas_store(&xas, newpage + i);
                }
        }
 
@@ -543,7 +538,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
         */
        page_ref_unfreeze(page, expected_count - hpage_nr_pages(page));
 
-       xa_unlock(&mapping->i_pages);
+       xas_unlock(&xas);
        /* Leave irq disabled to prevent preemption while updating stats */
 
        /*
@@ -583,22 +578,18 @@ EXPORT_SYMBOL(migrate_page_move_mapping);
 int migrate_huge_page_move_mapping(struct address_space *mapping,
                                   struct page *newpage, struct page *page)
 {
+       XA_STATE(xas, &mapping->i_pages, page_index(page));
        int expected_count;
-       void **pslot;
-
-       xa_lock_irq(&mapping->i_pages);
-
-       pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
 
+       xas_lock_irq(&xas);
        expected_count = 2 + page_has_private(page);
-       if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
-               xa_unlock_irq(&mapping->i_pages);
+       if (page_count(page) != expected_count || xas_load(&xas) != page) {
+               xas_unlock_irq(&xas);
                return -EAGAIN;
        }
 
        if (!page_ref_freeze(page, expected_count)) {
-               xa_unlock_irq(&mapping->i_pages);
+               xas_unlock_irq(&xas);
                return -EAGAIN;
        }
 
@@ -607,11 +598,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
        get_page(newpage);
 
-       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
+       xas_store(&xas, newpage);
 
        page_ref_unfreeze(page, expected_count - 1);
 
-       xa_unlock_irq(&mapping->i_pages);
+       xas_unlock_irq(&xas);
 
        return MIGRATEPAGE_SUCCESS;
 }
@@ -682,6 +673,8 @@ void migrate_page_states(struct page *newpage, struct page *page)
                SetPageActive(newpage);
        } else if (TestClearPageUnevictable(page))
                SetPageUnevictable(newpage);
+       if (PageWorkingset(page))
+               SetPageWorkingset(newpage);
        if (PageChecked(page))
                SetPageChecked(newpage);
        if (PageMappedToDisk(page))
@@ -1411,7 +1404,7 @@ retry:
                                 * we encounter them after the rest of the list
                                 * is processed.
                                 */
-                               if (PageTransHuge(page)) {
+                               if (PageTransHuge(page) && !PageHuge(page)) {
                                        lock_page(page);
                                        rc = split_huge_page_to_list(page, from);
                                        unlock_page(page);
@@ -1855,46 +1848,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
        return newpage;
 }
 
-/*
- * page migration rate limiting control.
- * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
- * window of time. Default here says do not migrate more than 1280M per second.
- */
-static unsigned int migrate_interval_millisecs __read_mostly = 100;
-static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
-
-/* Returns true if the node is migrate rate-limited after the update */
-static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
-                                       unsigned long nr_pages)
-{
-       /*
-        * Rate-limit the amount of data that is being migrated to a node.
-        * Optimal placement is no good if the memory bus is saturated and
-        * all the time is being spent migrating!
-        */
-       if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
-               spin_lock(&pgdat->numabalancing_migrate_lock);
-               pgdat->numabalancing_migrate_nr_pages = 0;
-               pgdat->numabalancing_migrate_next_window = jiffies +
-                       msecs_to_jiffies(migrate_interval_millisecs);
-               spin_unlock(&pgdat->numabalancing_migrate_lock);
-       }
-       if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
-               trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
-                                                               nr_pages);
-               return true;
-       }
-
-       /*
-        * This is an unlocked non-atomic update so errors are possible.
-        * The consequences are failing to migrate when we potentiall should
-        * have which is not severe enough to warrant locking. If it is ever
-        * a problem, it can be converted to a per-cpu counter.
-        */
-       pgdat->numabalancing_migrate_nr_pages += nr_pages;
-       return false;
-}
-
 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 {
        int page_lru;
@@ -1967,14 +1920,6 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
        if (page_is_file_cache(page) && PageDirty(page))
                goto out;
 
-       /*
-        * Rate-limit the amount of data that is being migrated to a node.
-        * Optimal placement is no good if the memory bus is saturated and
-        * all the time is being spent migrating!
-        */
-       if (numamigrate_update_ratelimit(pgdat, 1))
-               goto out;
-
        isolated = numamigrate_isolate_page(pgdat, page);
        if (!isolated)
                goto out;
@@ -2018,16 +1963,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        int isolated = 0;
        struct page *new_page = NULL;
        int page_lru = page_is_file_cache(page);
-       unsigned long mmun_start = address & HPAGE_PMD_MASK;
-       unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
-
-       /*
-        * Rate-limit the amount of data that is being migrated to a node.
-        * Optimal placement is no good if the memory bus is saturated and
-        * all the time is being spent migrating!
-        */
-       if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
-               goto out_dropref;
+       unsigned long start = address & HPAGE_PMD_MASK;
 
        new_page = alloc_pages_node(node,
                (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
@@ -2050,15 +1986,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        /* anon mapping, we can simply copy page->mapping to the new page: */
        new_page->mapping = page->mapping;
        new_page->index = page->index;
+       /* flush the cache before copying using the kernel virtual address */
+       flush_cache_range(vma, start, start + HPAGE_PMD_SIZE);
        migrate_page_copy(new_page, page);
        WARN_ON(PageLRU(new_page));
 
        /* Recheck the target PMD */
-       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
                spin_unlock(ptl);
-               mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
                /* Reverse changes made by migrate_page_copy() */
                if (TestClearPageActive(new_page))
@@ -2082,16 +2018,26 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 
        /*
-        * Clear the old entry under pagetable lock and establish the new PTE.
-        * Any parallel GUP will either observe the old page blocking on the
-        * page lock, block on the page table lock or observe the new page.
-        * The SetPageUptodate on the new page and page_add_new_anon_rmap
-        * guarantee the copy is visible before the pagetable update.
+        * Overwrite the old entry under pagetable lock and establish
+        * the new PTE. Any parallel GUP will either observe the old
+        * page blocking on the page lock, block on the page table
+        * lock or observe the new page. The SetPageUptodate on the
+        * new page and page_add_new_anon_rmap guarantee the copy is
+        * visible before the pagetable update.
+        */
+       page_add_anon_rmap(new_page, vma, start, true);
+       /*
+        * At this point the pmd is numa/protnone (i.e. non present) and the TLB
+        * has already been flushed globally.  So no TLB can be currently
+        * caching this non present pmd mapping.  There's no need to clear the
+        * pmd before doing set_pmd_at(), nor to flush the TLB after
+        * set_pmd_at().  Clearing the pmd here would introduce a race
+        * condition against MADV_DONTNEED, because MADV_DONTNEED only holds the
+        * mmap_sem for reading.  If the pmd is set to NULL at any given time,
+        * MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this
+        * pmd.
         */
-       flush_cache_range(vma, mmun_start, mmun_end);
-       page_add_anon_rmap(new_page, vma, mmun_start, true);
-       pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
-       set_pmd_at(mm, mmun_start, pmd, entry);
+       set_pmd_at(mm, start, pmd, entry);
        update_mmu_cache_pmd(vma, address, &entry);
 
        page_ref_unfreeze(page, 2);
@@ -2100,11 +2046,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
 
        spin_unlock(ptl);
-       /*
-        * No need to double call mmu_notifier->invalidate_range() callback as
-        * the above pmdp_huge_clear_flush_notify() did already call it.
-        */
-       mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);
 
        /* Take an "isolate" reference and put new page on the LRU. */
        get_page(new_page);
@@ -2125,11 +2066,10 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 
 out_fail:
        count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
-out_dropref:
        ptl = pmd_lock(mm, pmd);
        if (pmd_same(*pmd, entry)) {
                entry = pmd_modify(entry, vma->vm_page_prot);
-               set_pmd_at(mm, mmun_start, pmd, entry);
+               set_pmd_at(mm, start, pmd, entry);
                update_mmu_cache_pmd(vma, address, &entry);
        }
        spin_unlock(ptl);