mm: update get_user_pages_longterm to migrate pages allocated from CMA region

author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Tue, 5 Mar 2019 23:47:44 +0000 (15:47 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 6 Mar 2019 05:07:19 +0000 (21:07 -0800)
author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Tue, 5 Mar 2019 23:47:44 +0000 (15:47 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 6 Mar 2019 05:07:19 +0000 (21:07 -0800)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 54c317c8355fc5ee329baaed4216da808adf3c34..ea35263eb76b76e796f2f3ffaa3378585b98db82 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
                                 nodemask_t *nmask);
  struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
                                 unsigned long address);
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+                                    int nid, nodemask_t *nmask);
  int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                         pgoff_t idx);
  
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 80bb6408fe73a8744c733362abf5446b237156ab..20ec56f8e2bbd929e86909c60c644686289f3e5c 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1536,7 +1536,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
                     unsigned int gup_flags, struct page **pages, int *locked);
  long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
                     struct page **pages, unsigned int gup_flags);
-#ifdef CONFIG_FS_DAX
+
+#if defined(CONFIG_FS_DAX) || defined(CONFIG_CMA)
  long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
                             unsigned int gup_flags, struct page **pages,
                             struct vm_area_struct **vmas);
diff --git a/mm/gup.c b/mm/gup.c

index 75029649baca4ac834732b931cb9f743cdc58afb..22291db50013d89d9427a2c50003dca0a25abbcf 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -13,6 +13,9 @@
  #include <linux/sched/signal.h>
  #include <linux/rwsem.h>
  #include <linux/hugetlb.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/sched/mm.h>
  
  #include <asm/mmu_context.h>
  #include <asm/pgtable.h>
@@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
  }
  EXPORT_SYMBOL(get_user_pages);
  
+#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
+
  #ifdef CONFIG_FS_DAX
+static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+       long i;
+       struct vm_area_struct *vma_prev = NULL;
+
+       for (i = 0; i < nr_pages; i++) {
+               struct vm_area_struct *vma = vmas[i];
+
+               if (vma == vma_prev)
+                       continue;
+
+               vma_prev = vma;
+
+               if (vma_is_fsdax(vma))
+                       return true;
+       }
+       return false;
+}
+#else
+static inline bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+       return false;
+}
+#endif
+
+#ifdef CONFIG_CMA
+static struct page *new_non_cma_page(struct page *page, unsigned long private)
+{
+       /*
+        * We want to make sure we allocate the new page from the same node
+        * as the source page.
+        */
+       int nid = page_to_nid(page);
+       /*
+        * Trying to allocate a page for migration. Ignore allocation
+        * failure warnings. We don't force __GFP_THISNODE here because
+        * this node here is the node where we have CMA reservation and
+        * in some case these nodes will have really less non movable
+        * allocation memory.
+        */
+       gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+
+       if (PageHighMem(page))
+               gfp_mask |= __GFP_HIGHMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+       if (PageHuge(page)) {
+               struct hstate *h = page_hstate(page);
+               /*
+                * We don't want to dequeue from the pool because pool pages will
+                * mostly be from the CMA region.
+                */
+               return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+       }
+#endif
+       if (PageTransHuge(page)) {
+               struct page *thp;
+               /*
+                * ignore allocation failure warnings
+                */
+               gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
+
+               /*
+                * Remove the movable mask so that we don't allocate from
+                * CMA area again.
+                */
+               thp_gfpmask &= ~__GFP_MOVABLE;
+               thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+               if (!thp)
+                       return NULL;
+               prep_transhuge_page(thp);
+               return thp;
+       }
+
+       return __alloc_pages_node(nid, gfp_mask, 0);
+}
+
+static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+                                       unsigned int gup_flags,
+                                       struct page **pages,
+                                       struct vm_area_struct **vmas)
+{
+       long i;
+       bool drain_allow = true;
+       bool migrate_allow = true;
+       LIST_HEAD(cma_page_list);
+
+check_again:
+       for (i = 0; i < nr_pages; i++) {
+               /*
+                * If we get a page from the CMA zone, since we are going to
+                * be pinning these entries, we might as well move them out
+                * of the CMA zone if possible.
+                */
+               if (is_migrate_cma_page(pages[i])) {
+
+                       struct page *head = compound_head(pages[i]);
+
+                       if (PageHuge(head)) {
+                               isolate_huge_page(head, &cma_page_list);
+                       } else {
+                               if (!PageLRU(head) && drain_allow) {
+                                       lru_add_drain_all();
+                                       drain_allow = false;
+                               }
+
+                               if (!isolate_lru_page(head)) {
+                                       list_add_tail(&head->lru, &cma_page_list);
+                                       mod_node_page_state(page_pgdat(head),
+                                                           NR_ISOLATED_ANON +
+                                                           page_is_file_cache(head),
+                                                           hpage_nr_pages(head));
+                               }
+                       }
+               }
+       }
+
+       if (!list_empty(&cma_page_list)) {
+               /*
+                * drop the above get_user_pages reference.
+                */
+               for (i = 0; i < nr_pages; i++)
+                       put_page(pages[i]);
+
+               if (migrate_pages(&cma_page_list, new_non_cma_page,
+                                 NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+                       /*
+                        * some of the pages failed migration. Do get_user_pages
+                        * without migration.
+                        */
+                       migrate_allow = false;
+
+                       if (!list_empty(&cma_page_list))
+                               putback_movable_pages(&cma_page_list);
+               }
+               /*
+                * We did migrate all the pages, Try to get the page references again
+                * migrating any new CMA pages which we failed to isolate earlier.
+                */
+               nr_pages = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+               if ((nr_pages > 0) && migrate_allow) {
+                       drain_allow = true;
+                       goto check_again;
+               }
+       }
+
+       return nr_pages;
+}
+#else
+static inline long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+                                              unsigned int gup_flags,
+                                              struct page **pages,
+                                              struct vm_area_struct **vmas)
+{
+       return nr_pages;
+}
+#endif
+
  /*
   * This is the same as get_user_pages() in that it assumes we are
   * operating on the current task's mm, but it goes further to validate
@@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
   * Contrast this to iov_iter_get_pages() usages which are transient.
   */
  long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
-               unsigned int gup_flags, struct page **pages,
-               struct vm_area_struct **vmas_arg)
+                            unsigned int gup_flags, struct page **pages,
+                            struct vm_area_struct **vmas_arg)
  {
         struct vm_area_struct **vmas = vmas_arg;
-       struct vm_area_struct *vma_prev = NULL;
+       unsigned long flags;
         long rc, i;
  
         if (!pages)
@@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
                         return -ENOMEM;
         }
  
+       flags = memalloc_nocma_save();
         rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+       memalloc_nocma_restore(flags);
+       if (rc < 0)
+               goto out;
  
-       for (i = 0; i < rc; i++) {
-               struct vm_area_struct *vma = vmas[i];
-
-               if (vma == vma_prev)
-                       continue;
-
-               vma_prev = vma;
-
-               if (vma_is_fsdax(vma))
-                       break;
-       }
-
-       /*
-        * Either get_user_pages() failed, or the vma validation
-        * succeeded, in either case we don't need to put_page() before
-        * returning.
-        */
-       if (i >= rc)
+       if (check_dax_vmas(vmas, rc)) {
+               for (i = 0; i < rc; i++)
+                       put_page(pages[i]);
+               rc = -EOPNOTSUPP;
                 goto out;
+       }
  
-       for (i = 0; i < rc; i++)
-               put_page(pages[i]);
-       rc = -EOPNOTSUPP;
+       rc = check_and_migrate_cma_pages(start, rc, gup_flags, pages, vmas);
  out:
         if (vmas != vmas_arg)
                 kfree(vmas);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 0c7848fccf9393f4e7202df0e4ba1e2da2ccde6f..97b1e0290c66d48737cda50ccea6bbcc1782c8fc 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1587,8 +1587,8 @@ out_unlock:
         return page;
  }
  
-static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
-               int nid, nodemask_t *nmask)
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+                                    int nid, nodemask_t *nmask)
  {
         struct page *page;
author	Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
	Tue, 5 Mar 2019 23:47:44 +0000 (15:47 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 6 Mar 2019 05:07:19 +0000 (21:07 -0800)
include/linux/hugetlb.h		patch \| blob \| blame \| history
include/linux/mm.h		patch \| blob \| blame \| history
mm/gup.c		patch \| blob \| blame \| history
mm/hugetlb.c		patch \| blob \| blame \| history