Merge tag 'nios2-v5.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/lftan...

[linux-2.6-block.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index c77d1e3761a7f191f5e274b281a9ca132b64fc6f..e5d52d6a24aff1c7fccd292bb8a2455e1eec1d52 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -171,11 +171,22 @@ int vm_swappiness = 60;
   */
  unsigned long vm_total_pages;
  
+static void set_task_reclaim_state(struct task_struct *task,
+                                  struct reclaim_state *rs)
+{
+       /* Check for an overwrite */
+       WARN_ON_ONCE(rs && task->reclaim_state);
+
+       /* Check for the nulling of an already-nulled member */
+       WARN_ON_ONCE(!rs && !task->reclaim_state);
+
+       task->reclaim_state = rs;
+}
+
  static LIST_HEAD(shrinker_list);
  static DECLARE_RWSEM(shrinker_rwsem);
  
-#ifdef CONFIG_MEMCG_KMEM
-
+#ifdef CONFIG_MEMCG
  /*
   * We allow subsystems to populate their shrinker-related
   * LRU lists before register_shrinker_prepared() is called
@@ -227,30 +238,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
         idr_remove(&shrinker_idr, id);
         up_write(&shrinker_rwsem);
  }
-#else /* CONFIG_MEMCG_KMEM */
-static int prealloc_memcg_shrinker(struct shrinker *shrinker)
-{
-       return 0;
-}
-
-static void unregister_memcg_shrinker(struct shrinker *shrinker)
-{
-}
-#endif /* CONFIG_MEMCG_KMEM */
  
-static void set_task_reclaim_state(struct task_struct *task,
-                                  struct reclaim_state *rs)
-{
-       /* Check for an overwrite */
-       WARN_ON_ONCE(rs && task->reclaim_state);
-
-       /* Check for the nulling of an already-nulled member */
-       WARN_ON_ONCE(!rs && !task->reclaim_state);
-
-       task->reclaim_state = rs;
-}
-
-#ifdef CONFIG_MEMCG
  static bool global_reclaim(struct scan_control *sc)
  {
         return !sc->target_mem_cgroup;
@@ -305,6 +293,15 @@ static bool memcg_congested(pg_data_t *pgdat,
  
  }
  #else
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+       return 0;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+}
+
  static bool global_reclaim(struct scan_control *sc)
  {
         return true;
@@ -591,7 +588,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
         return freed;
  }
  
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
  static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                         struct mem_cgroup *memcg, int priority)
  {
@@ -599,7 +596,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
         unsigned long ret, freed = 0;
         int i;
  
-       if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
+       if (!mem_cgroup_online(memcg))
                 return 0;
  
         if (!down_read_trylock(&shrinker_rwsem))
@@ -625,6 +622,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                         continue;
                 }
  
+               /* Call non-slab shrinkers even though kmem is disabled */
+               if (!memcg_kmem_enabled() &&
+                   !(shrinker->flags & SHRINKER_NONSLAB))
+                       continue;
+
                 ret = do_shrink_slab(&sc, shrinker, priority);
                 if (ret == SHRINK_EMPTY) {
                         clear_bit(i, map->map);
@@ -661,13 +663,13 @@ unlock:
         up_read(&shrinker_rwsem);
         return freed;
  }
-#else /* CONFIG_MEMCG_KMEM */
+#else /* CONFIG_MEMCG */
  static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
                         struct mem_cgroup *memcg, int priority)
  {
         return 0;
  }
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
  
  /**
   * shrink_slab - shrink slab caches
@@ -1121,7 +1123,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                       struct scan_control *sc,
                                       enum ttu_flags ttu_flags,
                                       struct reclaim_stat *stat,
-                                     bool force_reclaim)
+                                     bool ignore_references)
  {
         LIST_HEAD(ret_pages);
         LIST_HEAD(free_pages);
@@ -1135,7 +1137,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 struct address_space *mapping;
                 struct page *page;
                 int may_enter_fs;
-               enum page_references references = PAGEREF_RECLAIM_CLEAN;
+               enum page_references references = PAGEREF_RECLAIM;
                 bool dirty, writeback;
                 unsigned int nr_pages;
  
@@ -1149,7 +1151,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
  
                 VM_BUG_ON_PAGE(PageActive(page), page);
  
-               nr_pages = 1 << compound_order(page);
+               nr_pages = compound_nr(page);
  
                 /* Account the number of base pages even though THP */
                 sc->nr_scanned += nr_pages;
@@ -1266,7 +1268,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         }
                 }
  
-               if (!force_reclaim)
+               if (!ignore_references)
                         references = page_check_references(page, sc);
  
                 switch (references) {
@@ -1487,10 +1489,9 @@ free_it:
                  * Is there need to periodically free_page_list? It would
                  * appear not as the counts should be low
                  */
-               if (unlikely(PageTransHuge(page))) {
-                       mem_cgroup_uncharge(page);
+               if (unlikely(PageTransHuge(page)))
                         (*get_compound_page_dtor(page))(page);
-               } else
+               else
                         list_add(&page->lru, &free_pages);
                 continue;
  
@@ -1705,7 +1706,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
  
                 VM_BUG_ON_PAGE(!PageLRU(page), page);
  
-               nr_pages = 1 << compound_order(page);
+               nr_pages = compound_nr(page);
                 total_scan += nr_pages;
  
                 if (page_zonenum(page) > sc->reclaim_idx) {
@@ -1911,7 +1912,6 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
  
                         if (unlikely(PageCompound(page))) {
                                 spin_unlock_irq(&pgdat->lru_lock);
-                               mem_cgroup_uncharge(page);
                                 (*get_compound_page_dtor(page))(page);
                                 spin_lock_irq(&pgdat->lru_lock);
                         } else
@@ -2145,6 +2145,62 @@ static void shrink_active_list(unsigned long nr_to_scan,
                         nr_deactivate, nr_rotated, sc->priority, file);
  }
  
+unsigned long reclaim_pages(struct list_head *page_list)
+{
+       int nid = -1;
+       unsigned long nr_reclaimed = 0;
+       LIST_HEAD(node_page_list);
+       struct reclaim_stat dummy_stat;
+       struct page *page;
+       struct scan_control sc = {
+               .gfp_mask = GFP_KERNEL,
+               .priority = DEF_PRIORITY,
+               .may_writepage = 1,
+               .may_unmap = 1,
+               .may_swap = 1,
+       };
+
+       while (!list_empty(page_list)) {
+               page = lru_to_page(page_list);
+               if (nid == -1) {
+                       nid = page_to_nid(page);
+                       INIT_LIST_HEAD(&node_page_list);
+               }
+
+               if (nid == page_to_nid(page)) {
+                       ClearPageActive(page);
+                       list_move(&page->lru, &node_page_list);
+                       continue;
+               }
+
+               nr_reclaimed += shrink_page_list(&node_page_list,
+                                               NODE_DATA(nid),
+                                               &sc, 0,
+                                               &dummy_stat, false);
+               while (!list_empty(&node_page_list)) {
+                       page = lru_to_page(&node_page_list);
+                       list_del(&page->lru);
+                       putback_lru_page(page);
+               }
+
+               nid = -1;
+       }
+
+       if (!list_empty(&node_page_list)) {
+               nr_reclaimed += shrink_page_list(&node_page_list,
+                                               NODE_DATA(nid),
+                                               &sc, 0,
+                                               &dummy_stat, false);
+               while (!list_empty(&node_page_list)) {
+                       page = lru_to_page(&node_page_list);
+                       list_del(&page->lru);
+                       putback_lru_page(page);
+               }
+       }
+
+       return nr_reclaimed;
+}
+
  /*
   * The inactive anon list should be small enough that the VM never has
   * to do too much work.
@@ -2586,7 +2642,6 @@ static bool in_reclaim_compaction(struct scan_control *sc)
   */
  static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                                         unsigned long nr_reclaimed,
-                                       unsigned long nr_scanned,
                                         struct scan_control *sc)
  {
         unsigned long pages_for_compaction;
@@ -2597,40 +2652,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
         if (!in_reclaim_compaction(sc))
                 return false;
  
-       /* Consider stopping depending on scan and reclaim activity */
-       if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
-               /*
-                * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
-                * full LRU list has been scanned and we are still failing
-                * to reclaim pages. This full LRU scan is potentially
-                * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
-                */
-               if (!nr_reclaimed && !nr_scanned)
-                       return false;
-       } else {
-               /*
-                * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
-                * fail without consequence, stop if we failed to reclaim
-                * any pages from the last SWAP_CLUSTER_MAX number of
-                * pages that were scanned. This will return to the
-                * caller faster at the risk reclaim/compaction and
-                * the resulting allocation attempt fails
-                */
-               if (!nr_reclaimed)
-                       return false;
-       }
-
         /*
-        * If we have not reclaimed enough pages for compaction and the
-        * inactive lists are large enough, continue reclaiming
+        * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX
+        * number of pages that were scanned. This will return to the caller
+        * with the risk reclaim/compaction and the resulting allocation attempt
+        * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL
+        * allocations through requiring that the full LRU list has been scanned
+        * first, by assuming that zero delta of sc->nr_scanned means full LRU
+        * scan, but that approximation was wrong, and there were corner cases
+        * where always a non-zero amount of pages were scanned.
          */
-       pages_for_compaction = compact_gap(sc->order);
-       inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
-       if (get_nr_swap_pages() > 0)
-               inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
-       if (sc->nr_reclaimed < pages_for_compaction &&
-                       inactive_lru_pages > pages_for_compaction)
-               return true;
+       if (!nr_reclaimed)
+               return false;
  
         /* If compaction would go ahead or the allocation would succeed, stop */
         for (z = 0; z <= sc->reclaim_idx; z++) {
@@ -2647,7 +2680,17 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                         ;
                 }
         }
-       return true;
+
+       /*
+        * If we have not reclaimed enough pages for compaction and the
+        * inactive lists are large enough, continue reclaiming
+        */
+       pages_for_compaction = compact_gap(sc->order);
+       inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
+       if (get_nr_swap_pages() > 0)
+               inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
+
+       return inactive_lru_pages > pages_for_compaction;
  }
  
  static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
@@ -2664,10 +2707,6 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
  
         do {
                 struct mem_cgroup *root = sc->target_mem_cgroup;
-               struct mem_cgroup_reclaim_cookie reclaim = {
-                       .pgdat = pgdat,
-                       .priority = sc->priority,
-               };
                 unsigned long node_lru_pages = 0;
                 struct mem_cgroup *memcg;
  
@@ -2676,7 +2715,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                 nr_reclaimed = sc->nr_reclaimed;
                 nr_scanned = sc->nr_scanned;
  
-               memcg = mem_cgroup_iter(root, NULL, &reclaim);
+               memcg = mem_cgroup_iter(root, NULL, NULL);
                 do {
                         unsigned long lru_pages;
                         unsigned long reclaimed;
@@ -2719,21 +2758,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                    sc->nr_scanned - scanned,
                                    sc->nr_reclaimed - reclaimed);
  
-                       /*
-                        * Kswapd have to scan all memory cgroups to fulfill
-                        * the overall scan target for the node.
-                        *
-                        * Limit reclaim, on the other hand, only cares about
-                        * nr_to_reclaim pages to be reclaimed and it will
-                        * retry with decreasing priority if one round over the
-                        * whole hierarchy is not sufficient.
-                        */
-                       if (!current_is_kswapd() &&
-                                       sc->nr_reclaimed >= sc->nr_to_reclaim) {
-                               mem_cgroup_iter_break(root, memcg);
-                               break;
-                       }
-               } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+               } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
  
                 if (reclaim_state) {
                         sc->nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -2810,7 +2835,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                         wait_iff_congested(BLK_RW_ASYNC, HZ/10);
  
         } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
-                                        sc->nr_scanned - nr_scanned, sc));
+                                        sc));
  
         /*
          * Kswapd gives up on balancing particular nodes after too
@@ -3220,6 +3245,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
  
  #ifdef CONFIG_MEMCG
  
+/* Only used by soft limit reclaim. Do not reuse for anything else. */
  unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
                                                 gfp_t gfp_mask, bool noswap,
                                                 pg_data_t *pgdat,
@@ -3235,7 +3261,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
         };
         unsigned long lru_pages;
  
-       set_task_reclaim_state(current, &sc.reclaim_state);
+       WARN_ON_ONCE(!current->reclaim_state);
+
         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
  
@@ -3253,7 +3280,6 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
  
         trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
  
-       set_task_reclaim_state(current, NULL);
         *nr_scanned = sc.nr_scanned;
  
         return sc.nr_reclaimed;