Merge tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux
[linux-2.6-block.git] / mm / page_alloc.c
index 5703f7fca83261d31606517a3abbae4ee92aec73..39a372a2a1d628a58eb5f02d3a27b3e0989b37f9 100644 (file)
@@ -3085,7 +3085,6 @@ out:
        return page;
 }
 
-
 /*
  * Maximum number of compaction retries wit a progress before OOM
  * killer is consider as the only way to move forward.
@@ -3097,17 +3096,16 @@ out:
 static struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                unsigned int alloc_flags, const struct alloc_context *ac,
-               enum migrate_mode mode, enum compact_result *compact_result)
+               enum compact_priority prio, enum compact_result *compact_result)
 {
        struct page *page;
-       int contended_compaction;
 
        if (!order)
                return NULL;
 
        current->flags |= PF_MEMALLOC;
        *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
-                                               mode, &contended_compaction);
+                                                                       prio);
        current->flags &= ~PF_MEMALLOC;
 
        if (*compact_result <= COMPACT_INACTIVE)
@@ -3136,24 +3134,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         */
        count_vm_event(COMPACTFAIL);
 
-       /*
-        * In all zones where compaction was attempted (and not
-        * deferred or skipped), lock contention has been detected.
-        * For THP allocation we do not want to disrupt the others
-        * so we fallback to base pages instead.
-        */
-       if (contended_compaction == COMPACT_CONTENDED_LOCK)
-               *compact_result = COMPACT_CONTENDED;
-
-       /*
-        * If compaction was aborted due to need_resched(), we do not
-        * want to further increase allocation latency, unless it is
-        * khugepaged trying to collapse.
-        */
-       if (contended_compaction == COMPACT_CONTENDED_SCHED
-               && !(current->flags & PF_KTHREAD))
-               *compact_result = COMPACT_CONTENDED;
-
        cond_resched();
 
        return NULL;
@@ -3161,7 +3141,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 
 static inline bool
 should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
-                    enum compact_result compact_result, enum migrate_mode *migrate_mode,
+                    enum compact_result compact_result,
+                    enum compact_priority *compact_priority,
                     int compaction_retries)
 {
        int max_retries = MAX_COMPACT_RETRIES;
@@ -3172,11 +3153,11 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
        /*
         * compaction considers all the zone as desperately out of memory
         * so it doesn't really make much sense to retry except when the
-        * failure could be caused by weak migration mode.
+        * failure could be caused by insufficient priority
         */
        if (compaction_failed(compact_result)) {
-               if (*migrate_mode == MIGRATE_ASYNC) {
-                       *migrate_mode = MIGRATE_SYNC_LIGHT;
+               if (*compact_priority > MIN_COMPACT_PRIORITY) {
+                       (*compact_priority)--;
                        return true;
                }
                return false;
@@ -3210,7 +3191,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
 static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                unsigned int alloc_flags, const struct alloc_context *ac,
-               enum migrate_mode mode, enum compact_result *compact_result)
+               enum compact_priority prio, enum compact_result *compact_result)
 {
        *compact_result = COMPACT_SKIPPED;
        return NULL;
@@ -3219,7 +3200,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 static inline bool
 should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
                     enum compact_result compact_result,
-                    enum migrate_mode *migrate_mode,
+                    enum compact_priority *compact_priority,
                     int compaction_retries)
 {
        struct zone *zone;
@@ -3373,11 +3354,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
        return false;
 }
 
-static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
-{
-       return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
-}
-
 /*
  * Maximum number of reclaim retries without any progress before OOM killer
  * is consider as the only way to move forward.
@@ -3479,7 +3455,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        struct page *page = NULL;
        unsigned int alloc_flags;
        unsigned long did_some_progress;
-       enum migrate_mode migration_mode = MIGRATE_ASYNC;
+       enum compact_priority compact_priority = DEF_COMPACT_PRIORITY;
        enum compact_result compact_result;
        int compaction_retries = 0;
        int no_progress_loops = 0;
@@ -3510,35 +3486,81 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         */
        alloc_flags = gfp_to_alloc_flags(gfp_mask);
 
+       if (gfp_mask & __GFP_KSWAPD_RECLAIM)
+               wake_all_kswapds(order, ac);
+
+       /*
+        * The adjusted alloc_flags might result in immediate success, so try
+        * that first
+        */
+       page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
+       if (page)
+               goto got_pg;
+
+       /*
+        * For costly allocations, try direct compaction first, as it's likely
+        * that we have enough base pages and don't need to reclaim. Don't try
+        * that for allocations that are allowed to ignore watermarks, as the
+        * ALLOC_NO_WATERMARKS attempt didn't yet happen.
+        */
+       if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER &&
+               !gfp_pfmemalloc_allowed(gfp_mask)) {
+               page = __alloc_pages_direct_compact(gfp_mask, order,
+                                               alloc_flags, ac,
+                                               INIT_COMPACT_PRIORITY,
+                                               &compact_result);
+               if (page)
+                       goto got_pg;
+
+               /*
+                * Checks for costly allocations with __GFP_NORETRY, which
+                * includes THP page fault allocations
+                */
+               if (gfp_mask & __GFP_NORETRY) {
+                       /*
+                        * If compaction is deferred for high-order allocations,
+                        * it is because sync compaction recently failed. If
+                        * this is the case and the caller requested a THP
+                        * allocation, we do not want to heavily disrupt the
+                        * system, so we fail the allocation instead of entering
+                        * direct reclaim.
+                        */
+                       if (compact_result == COMPACT_DEFERRED)
+                               goto nopage;
+
+                       /*
+                        * Looks like reclaim/compaction is worth trying, but
+                        * sync compaction could be very expensive, so keep
+                        * using async compaction.
+                        */
+                       compact_priority = INIT_COMPACT_PRIORITY;
+               }
+       }
+
 retry:
+       /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
        if (gfp_mask & __GFP_KSWAPD_RECLAIM)
                wake_all_kswapds(order, ac);
 
+       if (gfp_pfmemalloc_allowed(gfp_mask))
+               alloc_flags = ALLOC_NO_WATERMARKS;
+
        /*
         * Reset the zonelist iterators if memory policies can be ignored.
         * These allocations are high priority and system rather than user
         * orientated.
         */
-       if (!(alloc_flags & ALLOC_CPUSET) || gfp_pfmemalloc_allowed(gfp_mask)) {
+       if (!(alloc_flags & ALLOC_CPUSET) || (alloc_flags & ALLOC_NO_WATERMARKS)) {
                ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
                ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
                                        ac->high_zoneidx, ac->nodemask);
        }
 
-       /* This is the last chance, in general, before the goto nopage. */
+       /* Attempt with potentially adjusted zonelist and alloc_flags */
        page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
        if (page)
                goto got_pg;
 
-       /* Allocate without watermarks if the context allows */
-       if (gfp_pfmemalloc_allowed(gfp_mask)) {
-
-               page = get_page_from_freelist(gfp_mask, order,
-                                               ALLOC_NO_WATERMARKS, ac);
-               if (page)
-                       goto got_pg;
-       }
-
        /* Caller is not willing to reclaim, we can't balance anything */
        if (!can_direct_reclaim) {
                /*
@@ -3568,38 +3590,6 @@ retry:
        if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
                goto nopage;
 
-       /*
-        * Try direct compaction. The first pass is asynchronous. Subsequent
-        * attempts after direct reclaim are synchronous
-        */
-       page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
-                                       migration_mode,
-                                       &compact_result);
-       if (page)
-               goto got_pg;
-
-       /* Checks for THP-specific high-order allocations */
-       if (is_thp_gfp_mask(gfp_mask)) {
-               /*
-                * If compaction is deferred for high-order allocations, it is
-                * because sync compaction recently failed. If this is the case
-                * and the caller requested a THP allocation, we do not want
-                * to heavily disrupt the system, so we fail the allocation
-                * instead of entering direct reclaim.
-                */
-               if (compact_result == COMPACT_DEFERRED)
-                       goto nopage;
-
-               /*
-                * Compaction is contended so rather back off than cause
-                * excessive stalls.
-                */
-               if(compact_result == COMPACT_CONTENDED)
-                       goto nopage;
-       }
-
-       if (order && compaction_made_progress(compact_result))
-               compaction_retries++;
 
        /* Try direct reclaim and then allocating */
        page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
@@ -3607,16 +3597,25 @@ retry:
        if (page)
                goto got_pg;
 
+       /* Try direct compaction and then allocating */
+       page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
+                                       compact_priority, &compact_result);
+       if (page)
+               goto got_pg;
+
+       if (order && compaction_made_progress(compact_result))
+               compaction_retries++;
+
        /* Do not loop if specifically requested */
        if (gfp_mask & __GFP_NORETRY)
-               goto noretry;
+               goto nopage;
 
        /*
         * Do not retry costly high order allocations unless they are
         * __GFP_REPEAT
         */
        if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
-               goto noretry;
+               goto nopage;
 
        /*
         * Costly allocations might have made a progress but this doesn't mean
@@ -3640,7 +3639,7 @@ retry:
         */
        if (did_some_progress > 0 &&
                        should_compact_retry(ac, order, alloc_flags,
-                               compact_result, &migration_mode,
+                               compact_result, &compact_priority,
                                compaction_retries))
                goto retry;
 
@@ -3655,25 +3654,6 @@ retry:
                goto retry;
        }
 
-noretry:
-       /*
-        * High-order allocations do not necessarily loop after direct reclaim
-        * and reclaim/compaction depends on compaction being called after
-        * reclaim so call directly if necessary.
-        * It can become very expensive to allocate transparent hugepages at
-        * fault, so use asynchronous memory compaction for THP unless it is
-        * khugepaged trying to collapse. All other requests should tolerate
-        * at least light sync migration.
-        */
-       if (is_thp_gfp_mask(gfp_mask) && !(current->flags & PF_KTHREAD))
-               migration_mode = MIGRATE_ASYNC;
-       else
-               migration_mode = MIGRATE_SYNC_LIGHT;
-       page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags,
-                                           ac, migration_mode,
-                                           &compact_result);
-       if (page)
-               goto got_pg;
 nopage:
        warn_alloc_failed(gfp_mask, order, NULL);
 got_pg:
@@ -5296,7 +5276,7 @@ void __init setup_per_cpu_pageset(void)
                setup_zone_pageset(zone);
 }
 
-static noinline __init_refok
+static noinline __ref
 int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 {
        int i;
@@ -5923,7 +5903,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
        }
 }
 
-static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
+static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 {
        unsigned long __maybe_unused start = 0;
        unsigned long __maybe_unused offset = 0;