Merge tag 'drm-next-2019-05-16' of git://anongit.freedesktop.org/drm/drm
[linux-2.6-block.git] / mm / vmscan.c
index fd9de504e516cf176a94eccb18cf5e0a4e9e0f31..7acd0afdfc2a707843c21fa59e6a91a2f22f2a43 100644 (file)
@@ -346,7 +346,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
        int zid;
 
        if (!mem_cgroup_disabled())
-               lru_size = mem_cgroup_get_lru_size(lruvec, lru);
+               lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
        else
                lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
 
@@ -1107,6 +1107,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
        LIST_HEAD(ret_pages);
        LIST_HEAD(free_pages);
        unsigned nr_reclaimed = 0;
+       unsigned pgactivate = 0;
 
        memset(stat, 0, sizeof(*stat));
        cond_resched();
@@ -1466,8 +1467,10 @@ activate_locked:
                        try_to_free_swap(page);
                VM_BUG_ON_PAGE(PageActive(page), page);
                if (!PageMlocked(page)) {
+                       int type = page_is_file_cache(page);
                        SetPageActive(page);
-                       stat->nr_activate++;
+                       pgactivate++;
+                       stat->nr_activate[type] += hpage_nr_pages(page);
                        count_memcg_page_event(page, PGACTIVATE);
                }
 keep_locked:
@@ -1482,7 +1485,7 @@ keep:
        free_unref_page_list(&free_pages);
 
        list_splice(&ret_pages, page_list);
-       count_vm_events(PGACTIVATE, stat->nr_activate);
+       count_vm_events(PGACTIVATE, pgactivate);
 
        return nr_reclaimed;
 }
@@ -1804,40 +1807,54 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
        return isolated > inactive;
 }
 
-static noinline_for_stack void
-putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
+/*
+ * This moves pages from @list to corresponding LRU list.
+ *
+ * We move them the other way if the page is referenced by one or more
+ * processes, from rmap.
+ *
+ * If the pages are mostly unmapped, the processing is fast and it is
+ * appropriate to hold zone_lru_lock across the whole operation.  But if
+ * the pages are mapped, the processing is slow (page_referenced()) so we
+ * should drop zone_lru_lock around each page.  It's impossible to balance
+ * this, so instead we remove the pages from the LRU while processing them.
+ * It is safe to rely on PG_active against the non-LRU pages in here because
+ * nobody will play with that bit on a non-LRU page.
+ *
+ * The downside is that we have to touch page->_refcount against each page.
+ * But we had to alter page->flags anyway.
+ *
+ * Returns the number of pages moved to the given lruvec.
+ */
+
+static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
+                                                    struct list_head *list)
 {
-       struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+       int nr_pages, nr_moved = 0;
        LIST_HEAD(pages_to_free);
+       struct page *page;
+       enum lru_list lru;
 
-       /*
-        * Put back any unfreeable pages.
-        */
-       while (!list_empty(page_list)) {
-               struct page *page = lru_to_page(page_list);
-               int lru;
-
+       while (!list_empty(list)) {
+               page = lru_to_page(list);
                VM_BUG_ON_PAGE(PageLRU(page), page);
-               list_del(&page->lru);
                if (unlikely(!page_evictable(page))) {
+                       list_del(&page->lru);
                        spin_unlock_irq(&pgdat->lru_lock);
                        putback_lru_page(page);
                        spin_lock_irq(&pgdat->lru_lock);
                        continue;
                }
-
                lruvec = mem_cgroup_page_lruvec(page, pgdat);
 
                SetPageLRU(page);
                lru = page_lru(page);
-               add_page_to_lru_list(page, lruvec, lru);
 
-               if (is_active_lru(lru)) {
-                       int file = is_file_lru(lru);
-                       int numpages = hpage_nr_pages(page);
-                       reclaim_stat->recent_rotated[file] += numpages;
-               }
+               nr_pages = hpage_nr_pages(page);
+               update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
+               list_move(&page->lru, &lruvec->lists[lru]);
+
                if (put_page_testzero(page)) {
                        __ClearPageLRU(page);
                        __ClearPageActive(page);
@@ -1850,13 +1867,17 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
                                spin_lock_irq(&pgdat->lru_lock);
                        } else
                                list_add(&page->lru, &pages_to_free);
+               } else {
+                       nr_moved += nr_pages;
                }
        }
 
        /*
         * To save our caller's stack, now use input list for pages to free.
         */
-       list_splice(&pages_to_free, page_list);
+       list_splice(&pages_to_free, list);
+
+       return nr_moved;
 }
 
 /*
@@ -1886,6 +1907,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        unsigned long nr_taken;
        struct reclaim_stat stat;
        int file = is_file_lru(lru);
+       enum vm_event_item item;
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        bool stalled = false;
@@ -1913,17 +1935,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
        reclaim_stat->recent_scanned[file] += nr_taken;
 
-       if (current_is_kswapd()) {
-               if (global_reclaim(sc))
-                       __count_vm_events(PGSCAN_KSWAPD, nr_scanned);
-               count_memcg_events(lruvec_memcg(lruvec), PGSCAN_KSWAPD,
-                                  nr_scanned);
-       } else {
-               if (global_reclaim(sc))
-                       __count_vm_events(PGSCAN_DIRECT, nr_scanned);
-               count_memcg_events(lruvec_memcg(lruvec), PGSCAN_DIRECT,
-                                  nr_scanned);
-       }
+       item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+       if (global_reclaim(sc))
+               __count_vm_events(item, nr_scanned);
+       __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
        spin_unlock_irq(&pgdat->lru_lock);
 
        if (nr_taken == 0)
@@ -1934,19 +1949,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
        spin_lock_irq(&pgdat->lru_lock);
 
-       if (current_is_kswapd()) {
-               if (global_reclaim(sc))
-                       __count_vm_events(PGSTEAL_KSWAPD, nr_reclaimed);
-               count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_KSWAPD,
-                                  nr_reclaimed);
-       } else {
-               if (global_reclaim(sc))
-                       __count_vm_events(PGSTEAL_DIRECT, nr_reclaimed);
-               count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_DIRECT,
-                                  nr_reclaimed);
-       }
+       item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+       if (global_reclaim(sc))
+               __count_vm_events(item, nr_reclaimed);
+       __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
+       reclaim_stat->recent_rotated[0] = stat.nr_activate[0];
+       reclaim_stat->recent_rotated[1] = stat.nr_activate[1];
 
-       putback_inactive_pages(lruvec, &page_list);
+       move_pages_to_lru(lruvec, &page_list);
 
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
 
@@ -1983,73 +1993,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        return nr_reclaimed;
 }
 
-/*
- * This moves pages from the active list to the inactive list.
- *
- * We move them the other way if the page is referenced by one or more
- * processes, from rmap.
- *
- * If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold pgdat->lru_lock across the whole operation.  But if
- * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop pgdat->lru_lock around each page.  It's impossible to balance
- * this, so instead we remove the pages from the LRU while processing them.
- * It is safe to rely on PG_active against the non-LRU pages in here because
- * nobody will play with that bit on a non-LRU page.
- *
- * The downside is that we have to touch page->_refcount against each page.
- * But we had to alter page->flags anyway.
- *
- * Returns the number of pages moved to the given lru.
- */
-
-static unsigned move_active_pages_to_lru(struct lruvec *lruvec,
-                                    struct list_head *list,
-                                    struct list_head *pages_to_free,
-                                    enum lru_list lru)
-{
-       struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-       struct page *page;
-       int nr_pages;
-       int nr_moved = 0;
-
-       while (!list_empty(list)) {
-               page = lru_to_page(list);
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
-
-               VM_BUG_ON_PAGE(PageLRU(page), page);
-               SetPageLRU(page);
-
-               nr_pages = hpage_nr_pages(page);
-               update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
-               list_move(&page->lru, &lruvec->lists[lru]);
-
-               if (put_page_testzero(page)) {
-                       __ClearPageLRU(page);
-                       __ClearPageActive(page);
-                       del_page_from_lru_list(page, lruvec, lru);
-
-                       if (unlikely(PageCompound(page))) {
-                               spin_unlock_irq(&pgdat->lru_lock);
-                               mem_cgroup_uncharge(page);
-                               (*get_compound_page_dtor(page))(page);
-                               spin_lock_irq(&pgdat->lru_lock);
-                       } else
-                               list_add(&page->lru, pages_to_free);
-               } else {
-                       nr_moved += nr_pages;
-               }
-       }
-
-       if (!is_active_lru(lru)) {
-               __count_vm_events(PGDEACTIVATE, nr_moved);
-               count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
-                                  nr_moved);
-       }
-
-       return nr_moved;
-}
-
 static void shrink_active_list(unsigned long nr_to_scan,
                               struct lruvec *lruvec,
                               struct scan_control *sc,
@@ -2079,7 +2022,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
        reclaim_stat->recent_scanned[file] += nr_taken;
 
        __count_vm_events(PGREFILL, nr_scanned);
-       count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
+       __count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
 
        spin_unlock_irq(&pgdat->lru_lock);
 
@@ -2136,13 +2079,19 @@ static void shrink_active_list(unsigned long nr_to_scan,
         */
        reclaim_stat->recent_rotated[file] += nr_rotated;
 
-       nr_activate = move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
-       nr_deactivate = move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
+       nr_activate = move_pages_to_lru(lruvec, &l_active);
+       nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
+       /* Keep all free pages in l_active list */
+       list_splice(&l_inactive, &l_active);
+
+       __count_vm_events(PGDEACTIVATE, nr_deactivate);
+       __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
+
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
        spin_unlock_irq(&pgdat->lru_lock);
 
-       mem_cgroup_uncharge_list(&l_hold);
-       free_unref_page_list(&l_hold);
+       mem_cgroup_uncharge_list(&l_active);
+       free_unref_page_list(&l_active);
        trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
                        nr_deactivate, nr_rotated, sc->priority, file);
 }
@@ -2201,7 +2150,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
         * is being established. Disable active list protection to get
         * rid of the stale workingset quickly.
         */
-       refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
+       refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE);
        if (file && actual_reclaim && lruvec->refaults != refaults) {
                inactive_ratio = 0;
        } else {
@@ -2963,7 +2912,7 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
                struct lruvec *lruvec;
 
                lruvec = mem_cgroup_lruvec(pgdat, memcg);
-               refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
+               refaults = lruvec_page_state_local(lruvec, WORKINGSET_ACTIVATE);
                lruvec->refaults = refaults;
        } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
 }
@@ -3212,10 +3161,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
        if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask))
                return 1;
 
-       trace_mm_vmscan_direct_reclaim_begin(order,
-                               sc.may_writepage,
-                               sc.gfp_mask,
-                               sc.reclaim_idx);
+       trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask);
 
        nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
@@ -3246,9 +3192,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
 
        trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
-                                                     sc.may_writepage,
-                                                     sc.gfp_mask,
-                                                     sc.reclaim_idx);
+                                                     sc.gfp_mask);
 
        /*
         * NOTE: Although we can get the priority field, using it
@@ -3297,10 +3241,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 
        zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
 
-       trace_mm_vmscan_memcg_reclaim_begin(0,
-                                           sc.may_writepage,
-                                           sc.gfp_mask,
-                                           sc.reclaim_idx);
+       trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
 
        psi_memstall_enter(&pflags);
        noreclaim_flag = memalloc_noreclaim_save();
@@ -4149,6 +4090,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
                .reclaim_idx = gfp_zone(gfp_mask),
        };
 
+       trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
+                                          sc.gfp_mask);
+
        cond_resched();
        fs_reclaim_acquire(sc.gfp_mask);
        /*
@@ -4175,6 +4119,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
        current->flags &= ~PF_SWAPWRITE;
        memalloc_noreclaim_restore(noreclaim_flag);
        fs_reclaim_release(sc.gfp_mask);
+
+       trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
+
        return sc.nr_reclaimed >= nr_pages;
 }