mm/page_alloc.c: __perform_reclaim should return 'unsigned long'

[linux-block.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 5661fa164f1308ee876e7257e27207f1e697de95..a105c657be379a3d0f435a2915eb6487d237e04b 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,6 +69,7 @@
  #include <linux/nmi.h>
  #include <linux/psi.h>
  #include <linux/padata.h>
+#include <linux/khugepaged.h>
  
  #include <asm/sections.h>
  #include <asm/tlbflush.h>
@@ -155,16 +156,16 @@ static int __init early_init_on_alloc(char *buf)
         int ret;
         bool bool_result;
  
-       if (!buf)
-               return -EINVAL;
         ret = kstrtobool(buf, &bool_result);
+       if (ret)
+               return ret;
         if (bool_result && page_poisoning_enabled())
                 pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n");
         if (bool_result)
                 static_branch_enable(&init_on_alloc);
         else
                 static_branch_disable(&init_on_alloc);
-       return ret;
+       return 0;
  }
  early_param("init_on_alloc", early_init_on_alloc);
  
@@ -173,16 +174,16 @@ static int __init early_init_on_free(char *buf)
         int ret;
         bool bool_result;
  
-       if (!buf)
-               return -EINVAL;
         ret = kstrtobool(buf, &bool_result);
+       if (ret)
+               return ret;
         if (bool_result && page_poisoning_enabled())
                 pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n");
         if (bool_result)
                 static_branch_enable(&init_on_free);
         else
                 static_branch_disable(&init_on_free);
-       return ret;
+       return 0;
  }
  early_param("init_on_free", early_init_on_free);
  
@@ -3367,9 +3368,16 @@ struct page *rmqueue(struct zone *preferred_zone,
         struct page *page;
  
         if (likely(order == 0)) {
-               page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
+               /*
+                * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
+                * we need to skip it when CMA area isn't allowed.
+                */
+               if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
+                               migratetype != MIGRATE_MOVABLE) {
+                       page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
                                         migratetype, alloc_flags);
-               goto out;
+                       goto out;
+               }
         }
  
         /*
@@ -3381,7 +3389,13 @@ struct page *rmqueue(struct zone *preferred_zone,
  
         do {
                 page = NULL;
-               if (alloc_flags & ALLOC_HARDER) {
+               /*
+                * order-0 request can reach here when the pcplist is skipped
+                * due to non-CMA allocation context. HIGHATOMIC area is
+                * reserved for high-order atomic allocation, so order-0
+                * request should skip it.
+                */
+               if (order > 0 && alloc_flags & ALLOC_HARDER) {
                         page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
                         if (page)
                                 trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -3972,8 +3986,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
          * success so it is time to admit defeat. We will skip the OOM killer
          * because it is very likely that the caller has a more reasonable
          * fallback than shooting a random task.
+        *
+        * The OOM killer may not free memory on a specific node.
          */
-       if (gfp_mask & __GFP_RETRY_MAYFAIL)
+       if (gfp_mask & (__GFP_RETRY_MAYFAIL | __GFP_THISNODE))
                 goto out;
         /* The OOM killer does not needlessly kill tasks for lowmem */
         if (ac->highest_zoneidx < ZONE_NORMAL)
@@ -3990,10 +4006,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
          * failures more gracefully we should just bail out here.
          */
  
-       /* The OOM killer may not free memory on a specific node */
-       if (gfp_mask & __GFP_THISNODE)
-               goto out;
-
         /* Exhausted what can be done so it's blame time */
         if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
                 *did_some_progress = 1;
@@ -4241,13 +4253,12 @@ EXPORT_SYMBOL_GPL(fs_reclaim_release);
  #endif
  
  /* Perform direct synchronous page reclaim */
-static int
+static unsigned long
  __perform_reclaim(gfp_t gfp_mask, unsigned int order,
                                         const struct alloc_context *ac)
  {
-       int progress;
         unsigned int noreclaim_flag;
-       unsigned long pflags;
+       unsigned long pflags, progress;
  
         cond_resched();
  
@@ -4826,12 +4837,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
  
         *alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags);
  
-       return true;
-}
-
-/* Determine whether to spread dirty pages and what the first usable zone */
-static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
-{
         /* Dirty zone balancing only done in the fast path */
         ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
  
@@ -4842,6 +4847,8 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
          */
         ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
                                         ac->highest_zoneidx, ac->nodemask);
+
+       return true;
  }
  
  /*
@@ -4870,8 +4877,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
         if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
                 return NULL;
  
-       finalise_ac(gfp_mask, &ac);
-
         /*
          * Forbid the first pass from falling back to types that fragment
          * memory until all local zones are considered.
@@ -5637,7 +5642,6 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
         int n, val;
         int min_val = INT_MAX;
         int best_node = NUMA_NO_NODE;
-       const struct cpumask *tmp = cpumask_of_node(0);
  
         /* Use the local node if we haven't already */
         if (!node_isset(node, *used_node_mask)) {
@@ -5658,8 +5662,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
                 val += (n < node);
  
                 /* Give preference to headless and unused nodes */
-               tmp = cpumask_of_node(n);
-               if (!cpumask_empty(tmp))
+               if (!cpumask_empty(cpumask_of_node(n)))
                         val += PENALTY_FOR_NODE_WITH_CPUS;
  
                 /* Slight preference for less loaded node */
@@ -7891,6 +7894,8 @@ int __meminit init_per_zone_wmark_min(void)
         setup_min_slab_ratio();
  #endif
  
+       khugepaged_min_free_kbytes_update();
+
         return 0;
  }
  postcore_initcall(init_per_zone_wmark_min)
@@ -8218,14 +8223,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
  {
         unsigned long iter = 0;
         unsigned long pfn = page_to_pfn(page);
-
-       /*
-        * TODO we could make this much more efficient by not checking every
-        * page in the range if we know all of them are in MOVABLE_ZONE and
-        * that the movable zone guarantees that pages are migratable but
-        * the later is not the case right now unfortunatelly. E.g. movablecore
-        * can still lead to having bootmem allocations in zone_movable.
-        */
+       unsigned long offset = pfn % pageblock_nr_pages;
  
         if (is_migrate_cma_page(page)) {
                 /*
@@ -8239,12 +8237,18 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
                 return page;
         }
  
-       for (; iter < pageblock_nr_pages; iter++) {
+       for (; iter < pageblock_nr_pages - offset; iter++) {
                 if (!pfn_valid_within(pfn + iter))
                         continue;
  
                 page = pfn_to_page(pfn + iter);
  
+               /*
+                * Both, bootmem allocations and memory holes are marked
+                * PG_reserved and are unmovable. We can even have unmovable
+                * allocations inside ZONE_MOVABLE, for example when
+                * specifying "movablecore".
+                */
                 if (PageReserved(page))
                         return page;
  
@@ -8318,14 +8322,6 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,
                  * it.  But now, memory offline itself doesn't call
                  * shrink_node_slabs() and it still to be fixed.
                  */
-               /*
-                * If the page is not RAM, page_count()should be 0.
-                * we don't need more check. This is an _used_ not-movable page.
-                *
-                * The problematic thing here is PG_reserved pages. PG_reserved
-                * is set to both of a memory hole page and a _used_ kernel
-                * page at boot.
-                */
                 return page;
         }
         return NULL;