Merge tag 'devdax-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...
[linux-2.6-block.git] / mm / memory_hotplug.c
index b37f3a5c48336fb12669faed8d19322a90522259..f767582af4f8c0f28102f2d77d8dc6a667ec3df5 100644 (file)
@@ -47,7 +47,7 @@
  * and restore_online_page_callback() for generic callback restore.
  */
 
-static void generic_online_page(struct page *page);
+static void generic_online_page(struct page *page, unsigned int order);
 
 static online_page_callback_t online_page_callback = generic_online_page;
 static DEFINE_MUTEX(online_page_callback_lock);
@@ -96,6 +96,8 @@ void mem_hotplug_done(void)
        cpus_read_unlock();
 }
 
+u64 max_mem_size = U64_MAX;
+
 /* add this memory to iomem resource */
 static struct resource *register_memory_resource(u64 start, u64 size)
 {
@@ -103,6 +105,9 @@ static struct resource *register_memory_resource(u64 start, u64 size)
        unsigned long flags =  IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
        char *resource_name = "System RAM";
 
+       if (start + size > max_mem_size)
+               return ERR_PTR(-E2BIG);
+
        /*
         * Request ownership of the new memory range.  This might be
         * a child of an existing resource that was present but
@@ -653,26 +658,40 @@ void __online_page_free(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__online_page_free);
 
-static void generic_online_page(struct page *page)
+static void generic_online_page(struct page *page, unsigned int order)
+{
+       kernel_map_pages(page, 1 << order, 1);
+       __free_pages_core(page, order);
+       totalram_pages_add(1UL << order);
+#ifdef CONFIG_HIGHMEM
+       if (PageHighMem(page))
+               totalhigh_pages_add(1UL << order);
+#endif
+}
+
+static int online_pages_blocks(unsigned long start, unsigned long nr_pages)
 {
-       __online_page_set_limits(page);
-       __online_page_increment_counters(page);
-       __online_page_free(page);
+       unsigned long end = start + nr_pages;
+       int order, onlined_pages = 0;
+
+       while (start < end) {
+               order = min(MAX_ORDER - 1,
+                       get_order(PFN_PHYS(end) - PFN_PHYS(start)));
+               (*online_page_callback)(pfn_to_page(start), order);
+
+               onlined_pages += (1UL << order);
+               start += (1UL << order);
+       }
+       return onlined_pages;
 }
 
 static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
                        void *arg)
 {
-       unsigned long i;
        unsigned long onlined_pages = *(unsigned long *)arg;
-       struct page *page;
 
        if (PageReserved(pfn_to_page(start_pfn)))
-               for (i = 0; i < nr_pages; i++) {
-                       page = pfn_to_page(start_pfn + i);
-                       (*online_page_callback)(page);
-                       onlined_pages++;
-               }
+               onlined_pages += online_pages_blocks(start_pfn, nr_pages);
 
        online_mem_sections(start_pfn, start_pfn + nr_pages);
 
@@ -686,9 +705,9 @@ static void node_states_check_changes_online(unsigned long nr_pages,
 {
        int nid = zone_to_nid(zone);
 
-       arg->status_change_nid = -1;
-       arg->status_change_nid_normal = -1;
-       arg->status_change_nid_high = -1;
+       arg->status_change_nid = NUMA_NO_NODE;
+       arg->status_change_nid_normal = NUMA_NO_NODE;
+       arg->status_change_nid_high = NUMA_NO_NODE;
 
        if (!node_state(nid, N_MEMORY))
                arg->status_change_nid = nid;
@@ -1185,11 +1204,13 @@ static inline int pageblock_free(struct page *page)
        return PageBuddy(page) && page_order(page) >= pageblock_order;
 }
 
-/* Return the start of the next active pageblock after a given page */
-static struct page *next_active_pageblock(struct page *page)
+/* Return the pfn of the start of the next active pageblock after a given pfn */
+static unsigned long next_active_pageblock(unsigned long pfn)
 {
+       struct page *page = pfn_to_page(pfn);
+
        /* Ensure the starting page is pageblock-aligned */
-       BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+       BUG_ON(pfn & (pageblock_nr_pages - 1));
 
        /* If the entire pageblock is free, move to the end of free page */
        if (pageblock_free(page)) {
@@ -1197,16 +1218,16 @@ static struct page *next_active_pageblock(struct page *page)
                /* be careful. we don't have locks, page_order can be changed.*/
                order = page_order(page);
                if ((order < MAX_ORDER) && (order >= pageblock_order))
-                       return page + (1 << order);
+                       return pfn + (1 << order);
        }
 
-       return page + pageblock_nr_pages;
+       return pfn + pageblock_nr_pages;
 }
 
-static bool is_pageblock_removable_nolock(struct page *page)
+static bool is_pageblock_removable_nolock(unsigned long pfn)
 {
+       struct page *page = pfn_to_page(pfn);
        struct zone *zone;
-       unsigned long pfn;
 
        /*
         * We have to be careful here because we are iterating over memory
@@ -1229,12 +1250,14 @@ static bool is_pageblock_removable_nolock(struct page *page)
 /* Checks if this range of memory is likely to be hot-removable. */
 bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
 {
-       struct page *page = pfn_to_page(start_pfn);
-       struct page *end_page = page + nr_pages;
+       unsigned long end_pfn, pfn;
+
+       end_pfn = min(start_pfn + nr_pages,
+                       zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
 
        /* Check the starting page of each pageblock within the range */
-       for (; page < end_page; page = next_active_pageblock(page)) {
-               if (!is_pageblock_removable_nolock(page))
+       for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
+               if (!is_pageblock_removable_nolock(pfn))
                        return false;
                cond_resched();
        }
@@ -1270,6 +1293,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
                                i++;
                        if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
                                continue;
+                       /* Check if we got outside of the zone */
+                       if (zone && !zone_spans_pfn(zone, pfn + i))
+                               return 0;
                        page = pfn_to_page(pfn + i);
                        if (zone && page_zone(page) != zone)
                                return 0;
@@ -1298,23 +1324,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
 static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 {
        unsigned long pfn;
-       struct page *page;
+
        for (pfn = start; pfn < end; pfn++) {
-               if (pfn_valid(pfn)) {
-                       page = pfn_to_page(pfn);
-                       if (PageLRU(page))
-                               return pfn;
-                       if (__PageMovable(page))
-                               return pfn;
-                       if (PageHuge(page)) {
-                               if (hugepage_migration_supported(page_hstate(page)) &&
-                                   page_huge_active(page))
-                                       return pfn;
-                               else
-                                       pfn = round_up(pfn + 1,
-                                               1 << compound_order(page)) - 1;
-                       }
-               }
+               struct page *page, *head;
+               unsigned long skip;
+
+               if (!pfn_valid(pfn))
+                       continue;
+               page = pfn_to_page(pfn);
+               if (PageLRU(page))
+                       return pfn;
+               if (__PageMovable(page))
+                       return pfn;
+
+               if (!PageHuge(page))
+                       continue;
+               head = compound_head(page);
+               if (hugepage_migration_supported(page_hstate(head)) &&
+                   page_huge_active(head))
+                       return pfn;
+               skip = (1 << compound_order(head)) - (page - head);
+               pfn += skip - 1;
        }
        return 0;
 }
@@ -1341,7 +1371,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 {
        unsigned long pfn;
        struct page *page;
-       int not_managed = 0;
        int ret = 0;
        LIST_HEAD(source);
 
@@ -1352,12 +1381,12 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 
                if (PageHuge(page)) {
                        struct page *head = compound_head(page);
-                       pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
                        if (compound_order(head) > PFN_SECTION_SHIFT) {
                                ret = -EBUSY;
                                break;
                        }
-                       isolate_huge_page(page, &source);
+                       pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+                       isolate_huge_page(head, &source);
                        continue;
                } else if (PageTransHuge(page))
                        pfn = page_to_pfn(compound_head(page))
@@ -1389,7 +1418,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                else
                        ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
                if (!ret) { /* Success */
-                       put_page(page);
                        list_add_tail(&page->lru, &source);
                        if (!__PageMovable(page))
                                inc_node_page_state(page, NR_ISOLATED_ANON +
@@ -1398,22 +1426,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                } else {
                        pr_warn("failed to isolate pfn %lx\n", pfn);
                        dump_page(page, "isolation failed");
-                       put_page(page);
-                       /* Because we don't have big zone->lock. we should
-                          check this again here. */
-                       if (page_count(page)) {
-                               not_managed++;
-                               ret = -EBUSY;
-                               break;
-                       }
                }
+               put_page(page);
        }
        if (!list_empty(&source)) {
-               if (not_managed) {
-                       putback_movable_pages(&source);
-                       goto out;
-               }
-
                /* Allocate a new page from the nearest neighbor node */
                ret = migrate_pages(&source, new_node_page, NULL, 0,
                                        MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
@@ -1426,7 +1442,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                        putback_movable_pages(&source);
                }
        }
-out:
+
        return ret;
 }
 
@@ -1496,9 +1512,9 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
        unsigned long present_pages = 0;
        enum zone_type zt;
 
-       arg->status_change_nid = -1;
-       arg->status_change_nid_normal = -1;
-       arg->status_change_nid_high = -1;
+       arg->status_change_nid = NUMA_NO_NODE;
+       arg->status_change_nid_normal = NUMA_NO_NODE;
+       arg->status_change_nid_high = NUMA_NO_NODE;
 
        /*
         * Check whether node_states[N_NORMAL_MEMORY] will be changed.
@@ -1573,7 +1589,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
           we assume this for now. .*/
        if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
                                  &valid_end)) {
-               mem_hotplug_done();
                ret = -EINVAL;
                reason = "multizone range";
                goto failed_removal;
@@ -1588,7 +1603,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
                                       MIGRATE_MOVABLE,
                                       SKIP_HWPOISON | REPORT_FAILURE);
        if (ret) {
-               mem_hotplug_done();
                reason = "failure to isolate range";
                goto failed_removal;
        }
@@ -1614,7 +1628,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
 
                        cond_resched();
                        lru_add_drain_all();
-                       drain_all_pages(zone);
 
                        pfn = scan_movable_pages(pfn, end_pfn);
                        if (pfn) {