mm: parallelize deferred_init_memmap()
[linux-2.6-block.git] / mm / page_alloc.c
index 13cc653122b73278afaeb6054539c619b164d11a..27ec5dc4db33676bb34d93ee2d97c10d100959e0 100644 (file)
@@ -68,6 +68,7 @@
 #include <linux/lockdep.h>
 #include <linux/nmi.h>
 #include <linux/psi.h>
+#include <linux/padata.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -302,14 +303,14 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
 #endif
 };
 
-compound_page_dtor * const compound_page_dtors[] = {
-       NULL,
-       free_compound_page,
+compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
+       [NULL_COMPOUND_DTOR] = NULL,
+       [COMPOUND_PAGE_DTOR] = free_compound_page,
 #ifdef CONFIG_HUGETLB_PAGE
-       free_huge_page,
+       [HUGETLB_PAGE_DTOR] = free_huge_page,
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       free_transhuge_page,
+       [TRANSHUGE_PAGE_DTOR] = free_transhuge_page,
 #endif
 };
 
@@ -335,7 +336,6 @@ static unsigned long nr_kernel_pages __initdata;
 static unsigned long nr_all_pages __initdata;
 static unsigned long dma_reserve __initdata;
 
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata;
 static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata;
 static unsigned long required_kernelcore __initdata;
@@ -348,7 +348,6 @@ static bool mirrored_kernelcore __meminitdata;
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 int movable_zone;
 EXPORT_SYMBOL(movable_zone);
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 #if MAX_NUMNODES > 1
 unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -609,8 +608,7 @@ static inline int __maybe_unused bad_range(struct zone *zone, struct page *page)
 }
 #endif
 
-static void bad_page(struct page *page, const char *reason,
-               unsigned long bad_flags)
+static void bad_page(struct page *page, const char *reason)
 {
        static unsigned long resume;
        static unsigned long nr_shown;
@@ -639,10 +637,6 @@ static void bad_page(struct page *page, const char *reason,
        pr_alert("BUG: Bad page state in process %s  pfn:%05lx\n",
                current->comm, page_to_pfn(page));
        __dump_page(page, reason);
-       bad_flags &= page->flags;
-       if (bad_flags)
-               pr_alert("bad because of flags: %#lx(%pGp)\n",
-                                               bad_flags, &bad_flags);
        dump_page_owner(page);
 
        print_modules();
@@ -1077,13 +1071,9 @@ static inline bool page_expected_state(struct page *page,
        return true;
 }
 
-static void free_pages_check_bad(struct page *page)
+static const char *page_bad_reason(struct page *page, unsigned long flags)
 {
-       const char *bad_reason;
-       unsigned long bad_flags;
-
-       bad_reason = NULL;
-       bad_flags = 0;
+       const char *bad_reason = NULL;
 
        if (unlikely(atomic_read(&page->_mapcount) != -1))
                bad_reason = "nonzero mapcount";
@@ -1091,24 +1081,32 @@ static void free_pages_check_bad(struct page *page)
                bad_reason = "non-NULL mapping";
        if (unlikely(page_ref_count(page) != 0))
                bad_reason = "nonzero _refcount";
-       if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
-               bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
-               bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
+       if (unlikely(page->flags & flags)) {
+               if (flags == PAGE_FLAGS_CHECK_AT_PREP)
+                       bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag(s) set";
+               else
+                       bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
        }
 #ifdef CONFIG_MEMCG
        if (unlikely(page->mem_cgroup))
                bad_reason = "page still charged to cgroup";
 #endif
-       bad_page(page, bad_reason, bad_flags);
+       return bad_reason;
+}
+
+static void check_free_page_bad(struct page *page)
+{
+       bad_page(page,
+                page_bad_reason(page, PAGE_FLAGS_CHECK_AT_FREE));
 }
 
-static inline int free_pages_check(struct page *page)
+static inline int check_free_page(struct page *page)
 {
        if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
                return 0;
 
        /* Something has gone sideways, find it */
-       free_pages_check_bad(page);
+       check_free_page_bad(page);
        return 1;
 }
 
@@ -1130,7 +1128,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
        case 1:
                /* the first tail page: ->mapping may be compound_mapcount() */
                if (unlikely(compound_mapcount(page))) {
-                       bad_page(page, "nonzero compound_mapcount", 0);
+                       bad_page(page, "nonzero compound_mapcount");
                        goto out;
                }
                break;
@@ -1142,17 +1140,17 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
                break;
        default:
                if (page->mapping != TAIL_MAPPING) {
-                       bad_page(page, "corrupted mapping in tail page", 0);
+                       bad_page(page, "corrupted mapping in tail page");
                        goto out;
                }
                break;
        }
        if (unlikely(!PageTail(page))) {
-               bad_page(page, "PageTail not set", 0);
+               bad_page(page, "PageTail not set");
                goto out;
        }
        if (unlikely(compound_head(page) != head_page)) {
-               bad_page(page, "compound_head not consistent", 0);
+               bad_page(page, "compound_head not consistent");
                goto out;
        }
        ret = 0;
@@ -1194,7 +1192,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
                for (i = 1; i < (1 << order); i++) {
                        if (compound)
                                bad += free_tail_pages_check(page, page + i);
-                       if (unlikely(free_pages_check(page + i))) {
+                       if (unlikely(check_free_page(page + i))) {
                                bad++;
                                continue;
                        }
@@ -1206,7 +1204,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
        if (memcg_kmem_enabled() && PageKmemcg(page))
                __memcg_kmem_uncharge_page(page, order);
        if (check_free)
-               bad += free_pages_check(page);
+               bad += check_free_page(page);
        if (bad)
                return false;
 
@@ -1253,7 +1251,7 @@ static bool free_pcp_prepare(struct page *page)
 static bool bulkfree_pcp_prepare(struct page *page)
 {
        if (debug_pagealloc_enabled_static())
-               return free_pages_check(page);
+               return check_free_page(page);
        else
                return false;
 }
@@ -1274,7 +1272,7 @@ static bool free_pcp_prepare(struct page *page)
 
 static bool bulkfree_pcp_prepare(struct page *page)
 {
-       return free_pages_check(page);
+       return check_free_page(page);
 }
 #endif /* CONFIG_DEBUG_VM */
 
@@ -1499,45 +1497,49 @@ void __free_pages_core(struct page *page, unsigned int order)
        __free_pages(page, order);
 }
 
-#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
-       defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
+#ifdef CONFIG_NEED_MULTIPLE_NODES
 
 static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
 
-int __meminit early_pfn_to_nid(unsigned long pfn)
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+
+/*
+ * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
+ */
+int __meminit __early_pfn_to_nid(unsigned long pfn,
+                                       struct mminit_pfnnid_cache *state)
 {
-       static DEFINE_SPINLOCK(early_pfn_lock);
+       unsigned long start_pfn, end_pfn;
        int nid;
 
-       spin_lock(&early_pfn_lock);
-       nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
-       if (nid < 0)
-               nid = first_online_node;
-       spin_unlock(&early_pfn_lock);
+       if (state->last_start <= pfn && pfn < state->last_end)
+               return state->last_nid;
+
+       nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
+       if (nid != NUMA_NO_NODE) {
+               state->last_start = start_pfn;
+               state->last_end = end_pfn;
+               state->last_nid = nid;
+       }
 
        return nid;
 }
-#endif
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 
-#ifdef CONFIG_NODES_SPAN_OTHER_NODES
-/* Only safe to use early in boot when initialisation is single-threaded */
-static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+int __meminit early_pfn_to_nid(unsigned long pfn)
 {
+       static DEFINE_SPINLOCK(early_pfn_lock);
        int nid;
 
+       spin_lock(&early_pfn_lock);
        nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
-       if (nid >= 0 && nid != node)
-               return false;
-       return true;
-}
+       if (nid < 0)
+               nid = first_online_node;
+       spin_unlock(&early_pfn_lock);
 
-#else
-static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
-{
-       return true;
+       return nid;
 }
-#endif
-
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
 
 void __init memblock_free_pages(struct page *page, unsigned long pfn,
                                                        unsigned int order)
@@ -1692,7 +1694,6 @@ static void __init deferred_free_pages(unsigned long pfn,
                } else if (!(pfn & nr_pgmask)) {
                        deferred_free_range(pfn - nr_free, nr_free);
                        nr_free = 1;
-                       touch_nmi_watchdog();
                } else {
                        nr_free++;
                }
@@ -1722,7 +1723,6 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
                        continue;
                } else if (!page || !(pfn & nr_pgmask)) {
                        page = pfn_to_page(pfn);
-                       touch_nmi_watchdog();
                } else {
                        page++;
                }
@@ -1816,16 +1816,36 @@ deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
        return nr_pages;
 }
 
+static void __init
+deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
+                          void *arg)
+{
+       unsigned long spfn, epfn;
+       struct zone *zone = arg;
+       u64 i;
+
+       deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
+
+       /*
+        * Initialize and free pages in MAX_ORDER sized increments so that we
+        * can avoid introducing any issues with the buddy allocator.
+        */
+       while (spfn < end_pfn) {
+               deferred_init_maxorder(&i, zone, &spfn, &epfn);
+               cond_resched();
+       }
+}
+
 /* Initialise remaining memory on a node */
 static int __init deferred_init_memmap(void *data)
 {
        pg_data_t *pgdat = data;
        const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
-       unsigned long spfn = 0, epfn = 0, nr_pages = 0;
+       unsigned long spfn = 0, epfn = 0;
        unsigned long first_init_pfn, flags;
        unsigned long start = jiffies;
        struct zone *zone;
-       int zid;
+       int zid, max_threads;
        u64 i;
 
        /* Bind memory initialisation thread to a local node if possible */
@@ -1845,6 +1865,13 @@ static int __init deferred_init_memmap(void *data)
        BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
        pgdat->first_deferred_pfn = ULONG_MAX;
 
+       /*
+        * Once we unlock here, the zone cannot be grown anymore, thus if an
+        * interrupt thread must allocate this early in boot, zone must be
+        * pre-grown prior to start of deferred page initialization.
+        */
+       pgdat_resize_unlock(pgdat, &flags);
+
        /* Only the highest zone is deferred so find it */
        for (zid = 0; zid < MAX_NR_ZONES; zid++) {
                zone = pgdat->node_zones + zid;
@@ -1858,20 +1885,33 @@ static int __init deferred_init_memmap(void *data)
                goto zone_empty;
 
        /*
-        * Initialize and free pages in MAX_ORDER sized increments so
-        * that we can avoid introducing any issues with the buddy
-        * allocator.
+        * More CPUs always led to greater speedups on tested systems, up to
+        * all the nodes' CPUs.  Use all since the system is otherwise idle now.
         */
-       while (spfn < epfn)
-               nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
-zone_empty:
-       pgdat_resize_unlock(pgdat, &flags);
+       max_threads = max(cpumask_weight(cpumask), 1u);
 
+       while (spfn < epfn) {
+               unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
+               struct padata_mt_job job = {
+                       .thread_fn   = deferred_init_memmap_chunk,
+                       .fn_arg      = zone,
+                       .start       = spfn,
+                       .size        = epfn_align - spfn,
+                       .align       = PAGES_PER_SECTION,
+                       .min_chunk   = PAGES_PER_SECTION,
+                       .max_threads = max_threads,
+               };
+
+               padata_do_multithreaded(&job);
+               deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
+                                                   epfn_align);
+       }
+zone_empty:
        /* Sanity check that the next zone really is unpopulated */
        WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
 
-       pr_info("node %d initialised, %lu pages in %ums\n",
-               pgdat->node_id, nr_pages, jiffies_to_msecs(jiffies - start));
+       pr_info("node %d deferred pages initialised in %ums\n",
+               pgdat->node_id, jiffies_to_msecs(jiffies - start));
 
        pgdat_init_report_one_done();
        return 0;
@@ -1908,17 +1948,6 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
 
        pgdat_resize_lock(pgdat, &flags);
 
-       /*
-        * If deferred pages have been initialized while we were waiting for
-        * the lock, return true, as the zone was grown.  The caller will retry
-        * this zone.  We won't return to this function since the caller also
-        * has this static branch.
-        */
-       if (!static_branch_unlikely(&deferred_pages)) {
-               pgdat_resize_unlock(pgdat, &flags);
-               return true;
-       }
-
        /*
         * If someone grew this zone while we were waiting for spinlock, return
         * true, as there might be enough pages already.
@@ -1947,6 +1976,7 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
                first_deferred_pfn = spfn;
 
                nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
+               touch_nmi_watchdog();
 
                /* We should only stop along section boundaries */
                if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
@@ -2092,31 +2122,14 @@ static inline void expand(struct zone *zone, struct page *page,
 
 static void check_new_page_bad(struct page *page)
 {
-       const char *bad_reason = NULL;
-       unsigned long bad_flags = 0;
-
-       if (unlikely(atomic_read(&page->_mapcount) != -1))
-               bad_reason = "nonzero mapcount";
-       if (unlikely(page->mapping != NULL))
-               bad_reason = "non-NULL mapping";
-       if (unlikely(page_ref_count(page) != 0))
-               bad_reason = "nonzero _refcount";
        if (unlikely(page->flags & __PG_HWPOISON)) {
-               bad_reason = "HWPoisoned (hardware-corrupted)";
-               bad_flags = __PG_HWPOISON;
                /* Don't complain about hwpoisoned pages */
                page_mapcount_reset(page); /* remove PageBuddy */
                return;
        }
-       if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
-               bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
-               bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
-       }
-#ifdef CONFIG_MEMCG
-       if (unlikely(page->mem_cgroup))
-               bad_reason = "page still charged to cgroup";
-#endif
-       bad_page(page, bad_reason, bad_flags);
+
+       bad_page(page,
+                page_bad_reason(page, PAGE_FLAGS_CHECK_AT_PREP));
 }
 
 /*
@@ -2609,7 +2622,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
        int order;
        bool ret;
 
-       for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
+       for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
                                                                ac->nodemask) {
                /*
                 * Preserve at least one pageblock unless memory pressure
@@ -2768,6 +2781,20 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 {
        struct page *page;
 
+#ifdef CONFIG_CMA
+       /*
+        * Balance movable allocations between regular and CMA areas by
+        * allocating from CMA when over half of the zone's free memory
+        * is in the CMA area.
+        */
+       if (migratetype == MIGRATE_MOVABLE &&
+           zone_page_state(zone, NR_FREE_CMA_PAGES) >
+           zone_page_state(zone, NR_FREE_PAGES) / 2) {
+               page = __rmqueue_cma_fallback(zone, order);
+               if (page)
+                       return page;
+       }
+#endif
 retry:
        page = __rmqueue_smallest(zone, order, migratetype);
        if (unlikely(!page)) {
@@ -3464,7 +3491,7 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
  * to check in the allocation paths if no pages are free.
  */
 bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
-                        int classzone_idx, unsigned int alloc_flags,
+                        int highest_zoneidx, unsigned int alloc_flags,
                         long free_pages)
 {
        long min = mark;
@@ -3509,7 +3536,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
         * are not met, then a high-order request also cannot go ahead
         * even if a suitable page happened to be free.
         */
-       if (free_pages <= min + z->lowmem_reserve[classzone_idx])
+       if (free_pages <= min + z->lowmem_reserve[highest_zoneidx])
                return false;
 
        /* If this is an order-0 request then the watermark is fine */
@@ -3542,14 +3569,15 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 }
 
 bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
-                     int classzone_idx, unsigned int alloc_flags)
+                     int highest_zoneidx, unsigned int alloc_flags)
 {
-       return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+       return __zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
                                        zone_page_state(z, NR_FREE_PAGES));
 }
 
 static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
-               unsigned long mark, int classzone_idx, unsigned int alloc_flags)
+                               unsigned long mark, int highest_zoneidx,
+                               unsigned int alloc_flags)
 {
        long free_pages = zone_page_state(z, NR_FREE_PAGES);
        long cma_pages = 0;
@@ -3567,22 +3595,23 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
         * the caller is !atomic then it'll uselessly search the free
         * list. That corner case is then slower but it is harmless.
         */
-       if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+       if (!order && (free_pages - cma_pages) >
+                               mark + z->lowmem_reserve[highest_zoneidx])
                return true;
 
-       return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+       return __zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
                                        free_pages);
 }
 
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
-                       unsigned long mark, int classzone_idx)
+                       unsigned long mark, int highest_zoneidx)
 {
        long free_pages = zone_page_state(z, NR_FREE_PAGES);
 
        if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
                free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
 
-       return __zone_watermark_ok(z, order, mark, classzone_idx, 0,
+       return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0,
                                                                free_pages);
 }
 
@@ -3659,8 +3688,8 @@ retry:
         */
        no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
        z = ac->preferred_zoneref;
-       for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
-                                                               ac->nodemask) {
+       for_next_zone_zonelist_nodemask(zone, z, ac->zonelist,
+                                       ac->highest_zoneidx, ac->nodemask) {
                struct page *page;
                unsigned long mark;
 
@@ -3715,7 +3744,7 @@ retry:
 
                mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
                if (!zone_watermark_fast(zone, order, mark,
-                                      ac_classzone_idx(ac), alloc_flags)) {
+                                      ac->highest_zoneidx, alloc_flags)) {
                        int ret;
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@ -3748,7 +3777,7 @@ retry:
                        default:
                                /* did we reclaim enough */
                                if (zone_watermark_ok(zone, order, mark,
-                                               ac_classzone_idx(ac), alloc_flags))
+                                       ac->highest_zoneidx, alloc_flags))
                                        goto try_this_zone;
 
                                continue;
@@ -3907,7 +3936,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        if (gfp_mask & __GFP_RETRY_MAYFAIL)
                goto out;
        /* The OOM killer does not needlessly kill tasks for lowmem */
-       if (ac->high_zoneidx < ZONE_NORMAL)
+       if (ac->highest_zoneidx < ZONE_NORMAL)
                goto out;
        if (pm_suspended_storage())
                goto out;
@@ -4110,10 +4139,10 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
         * Let's give them a good hope and keep retrying while the order-0
         * watermarks are OK.
         */
-       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
-                                       ac->nodemask) {
+       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
+                               ac->highest_zoneidx, ac->nodemask) {
                if (zone_watermark_ok(zone, 0, min_wmark_pages(zone),
-                                       ac_classzone_idx(ac), alloc_flags))
+                                       ac->highest_zoneidx, alloc_flags))
                        return true;
        }
        return false;
@@ -4237,12 +4266,12 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
        struct zoneref *z;
        struct zone *zone;
        pg_data_t *last_pgdat = NULL;
-       enum zone_type high_zoneidx = ac->high_zoneidx;
+       enum zone_type highest_zoneidx = ac->highest_zoneidx;
 
-       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, high_zoneidx,
+       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx,
                                        ac->nodemask) {
                if (last_pgdat != zone->zone_pgdat)
-                       wakeup_kswapd(zone, gfp_mask, order, high_zoneidx);
+                       wakeup_kswapd(zone, gfp_mask, order, highest_zoneidx);
                last_pgdat = zone->zone_pgdat;
        }
 }
@@ -4285,7 +4314,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
                alloc_flags |= ALLOC_HARDER;
 
 #ifdef CONFIG_CMA
-       if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+       if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
                alloc_flags |= ALLOC_CMA;
 #endif
        return alloc_flags;
@@ -4377,8 +4406,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
         * request even if all reclaimable pages are considered then we are
         * screwed and have to go OOM.
         */
-       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
-                                       ac->nodemask) {
+       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
+                               ac->highest_zoneidx, ac->nodemask) {
                unsigned long available;
                unsigned long reclaimable;
                unsigned long min_wmark = min_wmark_pages(zone);
@@ -4392,7 +4421,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                 * reclaimable pages?
                 */
                wmark = __zone_watermark_ok(zone, order, min_wmark,
-                               ac_classzone_idx(ac), alloc_flags, available);
+                               ac->highest_zoneidx, alloc_flags, available);
                trace_reclaim_retry_zone(z, order, reclaimable,
                                available, min_wmark, *no_progress_loops, wmark);
                if (wmark) {
@@ -4511,7 +4540,7 @@ retry_cpuset:
         * could end up iterating over non-eligible zones endlessly.
         */
        ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
-                                       ac->high_zoneidx, ac->nodemask);
+                                       ac->highest_zoneidx, ac->nodemask);
        if (!ac->preferred_zoneref->zone)
                goto nopage;
 
@@ -4598,7 +4627,7 @@ retry:
        if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
                ac->nodemask = NULL;
                ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
-                                       ac->high_zoneidx, ac->nodemask);
+                                       ac->highest_zoneidx, ac->nodemask);
        }
 
        /* Attempt with potentially adjusted zonelist and alloc_flags */
@@ -4732,10 +4761,10 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
                struct alloc_context *ac, gfp_t *alloc_mask,
                unsigned int *alloc_flags)
 {
-       ac->high_zoneidx = gfp_zone(gfp_mask);
+       ac->highest_zoneidx = gfp_zone(gfp_mask);
        ac->zonelist = node_zonelist(preferred_nid, gfp_mask);
        ac->nodemask = nodemask;
-       ac->migratetype = gfpflags_to_migratetype(gfp_mask);
+       ac->migratetype = gfp_migratetype(gfp_mask);
 
        if (cpusets_enabled()) {
                *alloc_mask |= __GFP_HARDWALL;
@@ -4771,7 +4800,7 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
         * may get reset for allocations that ignore memory policies.
         */
        ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
-                                       ac->high_zoneidx, ac->nodemask);
+                                       ac->highest_zoneidx, ac->nodemask);
 }
 
 /*
@@ -5319,7 +5348,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 
        printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
                " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
-               " unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+               " unevictable:%lu dirty:%lu writeback:%lu\n"
                " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
                " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
                " free:%lu free_pcp:%lu free_cma:%lu\n",
@@ -5332,7 +5361,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                global_node_page_state(NR_UNEVICTABLE),
                global_node_page_state(NR_FILE_DIRTY),
                global_node_page_state(NR_WRITEBACK),
-               global_node_page_state(NR_UNSTABLE_NFS),
                global_node_page_state(NR_SLAB_RECLAIMABLE),
                global_node_page_state(NR_SLAB_UNRECLAIMABLE),
                global_node_page_state(NR_FILE_MAPPED),
@@ -5365,7 +5393,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        " anon_thp: %lukB"
 #endif
                        " writeback_tmp:%lukB"
-                       " unstable:%lukB"
                        " all_unreclaimable? %s"
                        "\n",
                        pgdat->node_id,
@@ -5387,7 +5414,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
 #endif
                        K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
-                       K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
                        pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
                                "yes" : "no");
        }
@@ -5420,6 +5446,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        " managed:%lukB"
                        " mlocked:%lukB"
                        " kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+                       " shadow_call_stack:%lukB"
+#endif
                        " pagetables:%lukB"
                        " bounce:%lukB"
                        " free_pcp:%lukB"
@@ -5442,6 +5471,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(zone_managed_pages(zone)),
                        K(zone_page_state(zone, NR_MLOCK)),
                        zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+                       zone_page_state(zone, NR_KERNEL_SCS_KB),
+#endif
                        K(zone_page_state(zone, NR_PAGETABLE)),
                        K(zone_page_state(zone, NR_BOUNCE)),
                        K(free_pcp),
@@ -5689,14 +5721,13 @@ static void build_zonelists(pg_data_t *pgdat)
 {
        static int node_order[MAX_NUMNODES];
        int node, load, nr_nodes = 0;
-       nodemask_t used_mask;
+       nodemask_t used_mask = NODE_MASK_NONE;
        int local_node, prev_node;
 
        /* NUMA-aware ordering of nodes */
        local_node = pgdat->node_id;
        load = nr_online_nodes;
        prev_node = local_node;
-       nodes_clear(used_mask);
 
        memset(node_order, 0, sizeof(node_order));
        while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
@@ -5908,7 +5939,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
 static bool __meminit
 overlap_memmap_init(unsigned long zone, unsigned long *pfn)
 {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        static struct memblock_region *r;
 
        if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
@@ -5924,27 +5954,9 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
                        return true;
                }
        }
-#endif
        return false;
 }
 
-#ifdef CONFIG_SPARSEMEM
-/* Skip PFNs that belong to non-present sections */
-static inline __meminit unsigned long next_pfn(unsigned long pfn)
-{
-       const unsigned long section_nr = pfn_to_section_nr(++pfn);
-
-       if (present_section_nr(section_nr))
-               return pfn;
-       return section_nr_to_pfn(next_present_section_nr(section_nr));
-}
-#else
-static inline __meminit unsigned long next_pfn(unsigned long pfn)
-{
-       return pfn++;
-}
-#endif
-
 /*
  * Initially all pages are reserved - free ones are freed
  * up by memblock_free_all() once the early boot process is
@@ -5984,14 +5996,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                 * function.  They do not exist on hotplugged memory.
                 */
                if (context == MEMMAP_EARLY) {
-                       if (!early_pfn_valid(pfn)) {
-                               pfn = next_pfn(pfn);
-                               continue;
-                       }
-                       if (!early_pfn_in_nid(pfn, nid)) {
-                               pfn++;
-                               continue;
-                       }
                        if (overlap_memmap_init(zone, &pfn))
                                continue;
                        if (defer_init(nid, pfn, end_pfn))
@@ -6107,9 +6111,23 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 }
 
 void __meminit __weak memmap_init(unsigned long size, int nid,
-                                 unsigned long zone, unsigned long start_pfn)
+                                 unsigned long zone,
+                                 unsigned long range_start_pfn)
 {
-       memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, NULL);
+       unsigned long start_pfn, end_pfn;
+       unsigned long range_end_pfn = range_start_pfn + size;
+       int i;
+
+       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
+               start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
+               end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
+
+               if (end_pfn > start_pfn) {
+                       size = end_pfn - start_pfn;
+                       memmap_init_zone(size, nid, zone, start_pfn,
+                                        MEMMAP_EARLY, NULL);
+               }
+       }
 }
 
 static int zone_batchsize(struct zone *zone)
@@ -6261,10 +6279,25 @@ void __init setup_per_cpu_pageset(void)
 {
        struct pglist_data *pgdat;
        struct zone *zone;
+       int __maybe_unused cpu;
 
        for_each_populated_zone(zone)
                setup_zone_pageset(zone);
 
+#ifdef CONFIG_NUMA
+       /*
+        * Unpopulated zones continue using the boot pagesets.
+        * The numa stats for these pagesets need to be reset.
+        * Otherwise, they will end up skewing the stats of
+        * the nodes these zones are associated with.
+        */
+       for_each_possible_cpu(cpu) {
+               struct per_cpu_pageset *pcp = &per_cpu(boot_pageset, cpu);
+               memset(pcp->vm_numa_stat_diff, 0,
+                      sizeof(pcp->vm_numa_stat_diff));
+       }
+#endif
+
        for_each_online_pgdat(pgdat)
                pgdat->per_cpu_nodestats =
                        alloc_percpu(struct per_cpu_nodestat);
@@ -6307,57 +6340,6 @@ void __meminit init_currently_empty_zone(struct zone *zone,
        zone->initialized = 1;
 }
 
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-
-/*
- * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
- */
-int __meminit __early_pfn_to_nid(unsigned long pfn,
-                                       struct mminit_pfnnid_cache *state)
-{
-       unsigned long start_pfn, end_pfn;
-       int nid;
-
-       if (state->last_start <= pfn && pfn < state->last_end)
-               return state->last_nid;
-
-       nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
-       if (nid != NUMA_NO_NODE) {
-               state->last_start = start_pfn;
-               state->last_end = end_pfn;
-               state->last_nid = nid;
-       }
-
-       return nid;
-}
-#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
-
-/**
- * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range
- * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
- * @max_low_pfn: The highest PFN that will be passed to memblock_free_early_nid
- *
- * If an architecture guarantees that all ranges registered contain no holes
- * and may be freed, this this function may be used instead of calling
- * memblock_free_early_nid() manually.
- */
-void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
-{
-       unsigned long start_pfn, end_pfn;
-       int i, this_nid;
-
-       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
-               start_pfn = min(start_pfn, max_low_pfn);
-               end_pfn = min(end_pfn, max_low_pfn);
-
-               if (start_pfn < end_pfn)
-                       memblock_free_early_nid(PFN_PHYS(start_pfn),
-                                       (end_pfn - start_pfn) << PAGE_SHIFT,
-                                       this_nid);
-       }
-}
-
 /**
  * sparse_memory_present_with_active_regions - Call memory_present for each active range
  * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -6470,8 +6452,7 @@ static unsigned long __init zone_spanned_pages_in_node(int nid,
                                        unsigned long node_start_pfn,
                                        unsigned long node_end_pfn,
                                        unsigned long *zone_start_pfn,
-                                       unsigned long *zone_end_pfn,
-                                       unsigned long *ignored)
+                                       unsigned long *zone_end_pfn)
 {
        unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
@@ -6535,8 +6516,7 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn,
 static unsigned long __init zone_absent_pages_in_node(int nid,
                                        unsigned long zone_type,
                                        unsigned long node_start_pfn,
-                                       unsigned long node_end_pfn,
-                                       unsigned long *ignored)
+                                       unsigned long node_end_pfn)
 {
        unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
@@ -6583,45 +6563,9 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
        return nr_absent;
 }
 
-#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-static inline unsigned long __init zone_spanned_pages_in_node(int nid,
-                                       unsigned long zone_type,
-                                       unsigned long node_start_pfn,
-                                       unsigned long node_end_pfn,
-                                       unsigned long *zone_start_pfn,
-                                       unsigned long *zone_end_pfn,
-                                       unsigned long *zones_size)
-{
-       unsigned int zone;
-
-       *zone_start_pfn = node_start_pfn;
-       for (zone = 0; zone < zone_type; zone++)
-               *zone_start_pfn += zones_size[zone];
-
-       *zone_end_pfn = *zone_start_pfn + zones_size[zone_type];
-
-       return zones_size[zone_type];
-}
-
-static inline unsigned long __init zone_absent_pages_in_node(int nid,
-                                               unsigned long zone_type,
-                                               unsigned long node_start_pfn,
-                                               unsigned long node_end_pfn,
-                                               unsigned long *zholes_size)
-{
-       if (!zholes_size)
-               return 0;
-
-       return zholes_size[zone_type];
-}
-
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
 static void __init calculate_node_totalpages(struct pglist_data *pgdat,
                                                unsigned long node_start_pfn,
-                                               unsigned long node_end_pfn,
-                                               unsigned long *zones_size,
-                                               unsigned long *zholes_size)
+                                               unsigned long node_end_pfn)
 {
        unsigned long realtotalpages = 0, totalpages = 0;
        enum zone_type i;
@@ -6629,17 +6573,21 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
        for (i = 0; i < MAX_NR_ZONES; i++) {
                struct zone *zone = pgdat->node_zones + i;
                unsigned long zone_start_pfn, zone_end_pfn;
+               unsigned long spanned, absent;
                unsigned long size, real_size;
 
-               size = zone_spanned_pages_in_node(pgdat->node_id, i,
-                                                 node_start_pfn,
-                                                 node_end_pfn,
-                                                 &zone_start_pfn,
-                                                 &zone_end_pfn,
-                                                 zones_size);
-               real_size = size - zone_absent_pages_in_node(pgdat->node_id, i,
-                                                 node_start_pfn, node_end_pfn,
-                                                 zholes_size);
+               spanned = zone_spanned_pages_in_node(pgdat->node_id, i,
+                                                    node_start_pfn,
+                                                    node_end_pfn,
+                                                    &zone_start_pfn,
+                                                    &zone_end_pfn);
+               absent = zone_absent_pages_in_node(pgdat->node_id, i,
+                                                  node_start_pfn,
+                                                  node_end_pfn);
+
+               size = spanned;
+               real_size = size - absent;
+
                if (size)
                        zone->zone_start_pfn = zone_start_pfn;
                else
@@ -6939,10 +6887,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
         */
        if (pgdat == NODE_DATA(0)) {
                mem_map = NODE_DATA(0)->node_mem_map;
-#if defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) || defined(CONFIG_FLATMEM)
                if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
                        mem_map -= offset;
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
        }
 #endif
 }
@@ -6959,30 +6905,25 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
 #endif
 
-void __init free_area_init_node(int nid, unsigned long *zones_size,
-                                  unsigned long node_start_pfn,
-                                  unsigned long *zholes_size)
+static void __init free_area_init_node(int nid)
 {
        pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long start_pfn = 0;
        unsigned long end_pfn = 0;
 
        /* pg_data_t should be reset to zero when it's allocated */
-       WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
+       WARN_ON(pgdat->nr_zones || pgdat->kswapd_highest_zoneidx);
+
+       get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 
        pgdat->node_id = nid;
-       pgdat->node_start_pfn = node_start_pfn;
+       pgdat->node_start_pfn = start_pfn;
        pgdat->per_cpu_nodestats = NULL;
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-       get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+
        pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
                (u64)start_pfn << PAGE_SHIFT,
                end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
-#else
-       start_pfn = node_start_pfn;
-#endif
-       calculate_node_totalpages(pgdat, start_pfn, end_pfn,
-                                 zones_size, zholes_size);
+       calculate_node_totalpages(pgdat, start_pfn, end_pfn);
 
        alloc_node_mem_map(pgdat);
        pgdat_set_deferred_range(pgdat);
@@ -6990,6 +6931,11 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
        free_area_init_core(pgdat);
 }
 
+void __init free_area_init_memoryless_node(int nid)
+{
+       free_area_init_node(nid);
+}
+
 #if !defined(CONFIG_FLAT_NODE_MEM_MAP)
 /*
  * Initialize all valid struct pages in the range [spfn, epfn) and mark them
@@ -7073,8 +7019,6 @@ static inline void __init init_unavailable_mem(void)
 }
 #endif /* !CONFIG_FLAT_NODE_MEM_MAP */
 
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-
 #if MAX_NUMNODES > 1
 /*
  * Figure out the number of possible node ids.
@@ -7138,24 +7082,6 @@ unsigned long __init node_map_pfn_alignment(void)
        return ~accl_mask + 1;
 }
 
-/* Find the lowest pfn for a node */
-static unsigned long __init find_min_pfn_for_node(int nid)
-{
-       unsigned long min_pfn = ULONG_MAX;
-       unsigned long start_pfn;
-       int i;
-
-       for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
-               min_pfn = min(min_pfn, start_pfn);
-
-       if (min_pfn == ULONG_MAX) {
-               pr_warn("Could not find start_pfn for node %d\n", nid);
-               return 0;
-       }
-
-       return min_pfn;
-}
-
 /**
  * find_min_pfn_with_active_regions - Find the minimum PFN registered
  *
@@ -7164,7 +7090,7 @@ static unsigned long __init find_min_pfn_for_node(int nid)
  */
 unsigned long __init find_min_pfn_with_active_regions(void)
 {
-       return find_min_pfn_for_node(MAX_NUMNODES);
+       return PHYS_PFN(memblock_start_of_DRAM());
 }
 
 /*
@@ -7217,7 +7143,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
                        if (!memblock_is_hotpluggable(r))
                                continue;
 
-                       nid = r->nid;
+                       nid = memblock_get_region_node(r);
 
                        usable_startpfn = PFN_DOWN(r->base);
                        zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
@@ -7238,7 +7164,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
                        if (memblock_is_mirror(r))
                                continue;
 
-                       nid = r->nid;
+                       nid = memblock_get_region_node(r);
 
                        usable_startpfn = memblock_region_memory_base_pfn(r);
 
@@ -7418,8 +7344,17 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
        }
 }
 
+/*
+ * Some architecturs, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
+ * such cases we allow max_zone_pfn sorted in the descending order
+ */
+bool __weak arch_has_descending_max_zone_pfns(void)
+{
+       return false;
+}
+
 /**
- * free_area_init_nodes - Initialise all pg_data_t and zone data
+ * free_area_init - Initialise all pg_data_t and zone data
  * @max_zone_pfn: an array of max PFNs for each zone
  *
  * This will call free_area_init_node() for each active node in the system.
@@ -7431,10 +7366,11 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
  * starts where the previous one ended. For example, ZONE_DMA32 starts
  * at arch_max_dma_pfn.
  */
-void __init free_area_init_nodes(unsigned long *max_zone_pfn)
+void __init free_area_init(unsigned long *max_zone_pfn)
 {
        unsigned long start_pfn, end_pfn;
-       int i, nid;
+       int i, nid, zone;
+       bool descending;
 
        /* Record where the zone boundaries are */
        memset(arch_zone_lowest_possible_pfn, 0,
@@ -7443,14 +7379,20 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
                                sizeof(arch_zone_highest_possible_pfn));
 
        start_pfn = find_min_pfn_with_active_regions();
+       descending = arch_has_descending_max_zone_pfns();
 
        for (i = 0; i < MAX_NR_ZONES; i++) {
-               if (i == ZONE_MOVABLE)
+               if (descending)
+                       zone = MAX_NR_ZONES - i - 1;
+               else
+                       zone = i;
+
+               if (zone == ZONE_MOVABLE)
                        continue;
 
-               end_pfn = max(max_zone_pfn[i], start_pfn);
-               arch_zone_lowest_possible_pfn[i] = start_pfn;
-               arch_zone_highest_possible_pfn[i] = end_pfn;
+               end_pfn = max(max_zone_pfn[zone], start_pfn);
+               arch_zone_lowest_possible_pfn[zone] = start_pfn;
+               arch_zone_highest_possible_pfn[zone] = end_pfn;
 
                start_pfn = end_pfn;
        }
@@ -7503,8 +7445,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
        init_unavailable_mem();
        for_each_online_node(nid) {
                pg_data_t *pgdat = NODE_DATA(nid);
-               free_area_init_node(nid, NULL,
-                               find_min_pfn_for_node(nid), NULL);
+               free_area_init_node(nid);
 
                /* Any memory on that node */
                if (pgdat->node_present_pages)
@@ -7569,8 +7510,6 @@ static int __init cmdline_parse_movablecore(char *p)
 early_param("kernelcore", cmdline_parse_kernelcore);
 early_param("movablecore", cmdline_parse_movablecore);
 
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
 void adjust_managed_page_count(struct page *page, long count)
 {
        atomic_long_add(count, &page_zone(page)->managed_pages);
@@ -7693,13 +7632,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
        dma_reserve = new_dma_reserve;
 }
 
-void __init free_area_init(unsigned long *zones_size)
-{
-       init_unavailable_mem();
-       free_area_init_node(0, zones_size,
-                       __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
-}
-
 static int page_alloc_cpu_dead(unsigned int cpu)
 {
 
@@ -7817,9 +7749,10 @@ static void setup_per_zone_lowmem_reserve(void)
                                idx--;
                                lower_zone = pgdat->node_zones + idx;
 
-                               if (sysctl_lowmem_reserve_ratio[idx] < 1) {
-                                       sysctl_lowmem_reserve_ratio[idx] = 0;
+                               if (!sysctl_lowmem_reserve_ratio[idx] ||
+                                   !zone_managed_pages(lower_zone)) {
                                        lower_zone->lowmem_reserve[j] = 0;
+                                       continue;
                                } else {
                                        lower_zone->lowmem_reserve[j] =
                                                managed_pages / sysctl_lowmem_reserve_ratio[idx];
@@ -7884,9 +7817,9 @@ static void __setup_per_zone_wmarks(void)
                            mult_frac(zone_managed_pages(zone),
                                      watermark_scale_factor, 10000));
 
+               zone->watermark_boost = 0;
                zone->_watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
                zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
-               zone->watermark_boost = 0;
 
                spin_unlock_irqrestore(&zone->lock, flags);
        }
@@ -8083,7 +8016,15 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
+       int i;
+
        proc_dointvec_minmax(table, write, buffer, length, ppos);
+
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               if (sysctl_lowmem_reserve_ratio[i] < 1)
+                       sysctl_lowmem_reserve_ratio[i] = 0;
+       }
+
        setup_per_zone_lowmem_reserve();
        return 0;
 }
@@ -8247,7 +8188,7 @@ void *__init alloc_large_system_hash(const char *tablename,
                                table = memblock_alloc_raw(size,
                                                           SMP_CACHE_BYTES);
                } else if (get_order(size) >= MAX_ORDER || hashdist) {
-                       table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+                       table = __vmalloc(size, gfp_flags);
                        virt = true;
                } else {
                        /*