Merge branch 'for-4.6-ns' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

[linux-2.6-block.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index d156310aedeb7a7161e2763b326ce3800616bd07..a762be57e46e14efa571b967eb75696c35ebd034 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -308,13 +308,20 @@ static inline bool update_defer_init(pg_data_t *pgdat,
                                 unsigned long pfn, unsigned long zone_end,
                                 unsigned long *nr_initialised)
  {
+       unsigned long max_initialise;
+
         /* Always populate low zones for address-contrained allocations */
         if (zone_end < pgdat_end_pfn(pgdat))
                 return true;
+       /*
+        * Initialise at least 2G of a node but also take into account that
+        * two large system hashes that can take up 1GB for 0.25TB/node.
+        */
+       max_initialise = max(2UL << (30 - PAGE_SHIFT),
+               (pgdat->node_spanned_pages >> 8));
  
-       /* Initialise at least 2G of the highest zone */
         (*nr_initialised)++;
-       if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
+       if ((*nr_initialised > max_initialise) &&
             (pfn & (PAGES_PER_SECTION - 1)) == 0) {
                 pgdat->first_deferred_pfn = pfn;
                 return false;
@@ -544,11 +551,11 @@ static int __init debug_guardpage_minorder_setup(char *buf)
         unsigned long res;
  
         if (kstrtoul(buf, 10, &res) < 0 ||  res > MAX_ORDER / 2) {
-               printk(KERN_ERR "Bad debug_guardpage_minorder value\n");
+               pr_err("Bad debug_guardpage_minorder value\n");
                 return 0;
         }
         _debug_guardpage_minorder = res;
-       printk(KERN_INFO "Setting debug_guardpage_minorder to %lu\n", res);
+       pr_info("Setting debug_guardpage_minorder to %lu\n", res);
         return 0;
  }
  __setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
@@ -766,7 +773,7 @@ static inline int free_pages_check(struct page *page)
                 bad_reason = "nonzero mapcount";
         if (unlikely(page->mapping != NULL))
                 bad_reason = "non-NULL mapping";
-       if (unlikely(atomic_read(&page->_count) != 0))
+       if (unlikely(page_ref_count(page) != 0))
                 bad_reason = "nonzero _count";
         if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
                 bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
@@ -1462,7 +1469,7 @@ static inline int check_new_page(struct page *page)
                 bad_reason = "nonzero mapcount";
         if (unlikely(page->mapping != NULL))
                 bad_reason = "non-NULL mapping";
-       if (unlikely(atomic_read(&page->_count) != 0))
+       if (unlikely(page_ref_count(page) != 0))
                 bad_reason = "nonzero _count";
         if (unlikely(page->flags & __PG_HWPOISON)) {
                 bad_reason = "HWPoisoned (hardware-corrupted)";
@@ -2350,19 +2357,11 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
                 list_del(&page->lru);
                 pcp->count--;
         } else {
-               if (unlikely(gfp_flags & __GFP_NOFAIL)) {
-                       /*
-                        * __GFP_NOFAIL is not to be used in new code.
-                        *
-                        * All __GFP_NOFAIL callers should be fixed so that they
-                        * properly detect and handle allocation failures.
-                        *
-                        * We most definitely don't want callers attempting to
-                        * allocate greater than order-1 page units with
-                        * __GFP_NOFAIL.
-                        */
-                       WARN_ON_ONCE(order > 1);
-               }
+               /*
+                * We most definitely don't want callers attempting to
+                * allocate greater than order-1 page units with __GFP_NOFAIL.
+                */
+               WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
                 spin_lock_irqsave(&zone->lock, flags);
  
                 page = NULL;
@@ -2859,8 +2858,12 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                          * XXX: Page reclaim didn't yield anything,
                          * and the OOM killer can't be invoked, but
                          * keep looping as per tradition.
+                        *
+                        * But do not keep looping if oom_killer_disable()
+                        * was already called, for the system is trying to
+                        * enter a quiescent state during suspend.
                          */
-                       *did_some_progress = 1;
+                       *did_some_progress = !oom_killer_disabled;
                         goto out;
                 }
                 if (pm_suspended_storage())
@@ -3119,14 +3122,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                                 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
                 gfp_mask &= ~__GFP_ATOMIC;
  
-       /*
-        * If this allocation cannot block and it is for a specific node, then
-        * fail early.  There's no need to wakeup kswapd or retry for a
-        * speculative node-specific allocation.
-        */
-       if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
-               goto nopage;
-
  retry:
         if (gfp_mask & __GFP_KSWAPD_RECLAIM)
                 wake_all_kswapds(order, ac);
@@ -3483,7 +3478,7 @@ refill:
                 /* Even if we own the page, we do not use atomic_set().
                  * This would break get_page_unless_zero() users.
                  */
-               atomic_add(size - 1, &page->_count);
+               page_ref_add(page, size - 1);
  
                 /* reset page count bias and offset to start of new frag */
                 nc->pfmemalloc = page_is_pfmemalloc(page);
@@ -3495,7 +3490,7 @@ refill:
         if (unlikely(offset < 0)) {
                 page = virt_to_page(nc->va);
  
-               if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
+               if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
                         goto refill;
  
  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
@@ -3503,7 +3498,7 @@ refill:
                 size = nc->size;
  #endif
                 /* OK, page count is 0, we can safely set it */
-               atomic_set(&page->_count, size);
+               set_page_count(page, size);
  
                 /* reset page count bias and offset to start of new frag */
                 nc->pagecnt_bias = size;
@@ -4089,9 +4084,7 @@ static int __parse_numa_zonelist_order(char *s)
         } else if (*s == 'z' || *s == 'Z') {
                 user_zonelist_order = ZONELIST_ORDER_ZONE;
         } else {
-               printk(KERN_WARNING
-                       "Ignoring invalid numa_zonelist_order value:  "
-                       "%s\n", s);
+               pr_warn("Ignoring invalid numa_zonelist_order value:  %s\n", s);
                 return -EINVAL;
         }
         return 0;
@@ -4555,12 +4548,11 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
         else
                 page_group_by_mobility_disabled = 0;
  
-       pr_info("Built %i zonelists in %s order, mobility grouping %s.  "
-               "Total pages: %ld\n",
-                       nr_online_nodes,
-                       zonelist_order_name[current_zonelist_order],
-                       page_group_by_mobility_disabled ? "off" : "on",
-                       vm_total_pages);
+       pr_info("Built %i zonelists in %s order, mobility grouping %s.  Total pages: %ld\n",
+               nr_online_nodes,
+               zonelist_order_name[current_zonelist_order],
+               page_group_by_mobility_disabled ? "off" : "on",
+               vm_total_pages);
  #ifdef CONFIG_NUMA
         pr_info("Policy zone: %s\n", zone_names[policy_zone]);
  #endif
@@ -5476,8 +5468,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                                                "  %s zone: %lu pages used for memmap\n",
                                                zone_names[j], memmap_pages);
                         } else
-                               printk(KERN_WARNING
-                                       "  %s zone: %lu pages exceeds freesize %lu\n",
+                               pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
                                         zone_names[j], memmap_pages, freesize);
                 }
  
@@ -5685,8 +5676,7 @@ static unsigned long __init find_min_pfn_for_node(int nid)
                 min_pfn = min(min_pfn, start_pfn);
  
         if (min_pfn == ULONG_MAX) {
-               printk(KERN_WARNING
-                       "Could not find start_pfn for node %d\n", nid);
+               pr_warn("Could not find start_pfn for node %d\n", nid);
                 return 0;
         }
  
@@ -6158,22 +6148,21 @@ void __init mem_init_print_info(const char *str)
  
  #undef adj_init_size
  
-       pr_info("Memory: %luK/%luK available "
-              "(%luK kernel code, %luK rwdata, %luK rodata, "
-              "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
+       pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved"
  #ifdef CONFIG_HIGHMEM
-              ", %luK highmem"
+               ", %luK highmem"
  #endif
-              "%s%s)\n",
-              nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
-              codesize >> 10, datasize >> 10, rosize >> 10,
-              (init_data_size + init_code_size) >> 10, bss_size >> 10,
-              (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10),
-              totalcma_pages << (PAGE_SHIFT-10),
+               "%s%s)\n",
+               nr_free_pages() << (PAGE_SHIFT - 10),
+               physpages << (PAGE_SHIFT - 10),
+               codesize >> 10, datasize >> 10, rosize >> 10,
+               (init_data_size + init_code_size) >> 10, bss_size >> 10,
+               (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),
+               totalcma_pages << (PAGE_SHIFT - 10),
  #ifdef CONFIG_HIGHMEM
-              totalhigh_pages << (PAGE_SHIFT-10),
+               totalhigh_pages << (PAGE_SHIFT - 10),
  #endif
-              str ? ", " : "", str ? str : "");
+               str ? ", " : "", str ? str : "");
  }
  
  /**
@@ -6705,11 +6694,8 @@ void *__init alloc_large_system_hash(const char *tablename,
         if (!table)
                 panic("Failed to allocate %s hash table\n", tablename);
  
-       printk(KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n",
-              tablename,
-              (1UL << log2qty),
-              ilog2(size) - PAGE_SHIFT,
-              size);
+       pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n",
+               tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size);
  
         if (_hash_shift)
                 *_hash_shift = log2qty;
@@ -6860,7 +6846,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                  * This check already skips compound tails of THP
                  * because their page->_count is zero at all time.
                  */
-               if (!atomic_read(&page->_count)) {
+               if (!page_ref_count(page)) {
                         if (PageBuddy(page))
                                 iter += (1 << page_order(page)) - 1;
                         continue;
@@ -7210,8 +7196,8 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
                 BUG_ON(!PageBuddy(page));
                 order = page_order(page);
  #ifdef CONFIG_DEBUG_VM
-               printk(KERN_INFO "remove from free list %lx %d %lx\n",
-                      pfn, 1 << order, end_pfn);
+               pr_info("remove from free list %lx %d %lx\n",
+                       pfn, 1 << order, end_pfn);
  #endif
                 list_del(&page->lru);
                 rmv_page_order(page);