Merge branch 'hugepage-fallbacks' (hugepatch patches from David Rientjes)

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 28 Sep 2019 21:26:47 +0000 (14:26 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 28 Sep 2019 21:26:47 +0000 (14:26 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Sep 2019 21:26:47 +0000 (14:26 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Sep 2019 21:26:47 +0000 (14:26 -0700)
diff --combined mm/huge_memory.c

index 73fc517c08d222723b3e2a987775bd83dc6a4697,aec462cc5d4632e01301f55eb7687a1419fea351..c5cb6dcd6c69664c4e9c71d02c20ecae53362e9b
--- 1/mm/huge_memory.c
--- 2/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@@ -496,25 -496,11 +496,25 @@@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, stru
         return pmd;
   }
   
- -static inline struct list_head *page_deferred_list(struct page *page)
+ +#ifdef CONFIG_MEMCG
+ +static inline struct deferred_split *get_deferred_split_queue(struct page *page)
   {
- -      /* ->lru in the tail pages is occupied by compound_head. */
- -      return &page[2].deferred_list;
+ +      struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ +      struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ +
+ +      if (memcg)
+ +              return &memcg->deferred_split_queue;
+ +      else
+ +              return &pgdat->deferred_split_queue;
+ +}
+ +#else
+ +static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+ +{
+ +      struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ +
+ +      return &pgdat->deferred_split_queue;
   }
+ +#endif
   
   void prep_transhuge_page(struct page *page)
   {
@@@ -659,40 -645,30 +659,30 @@@ release
    *        available
    * never: never stall for any thp allocation
    */
- static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
+ static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
   {
         const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-       gfp_t this_node = 0;
- 
- #ifdef CONFIG_NUMA
-       struct mempolicy *pol;
-       /*
-        * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
-        * specified, to express a general desire to stay on the current
-        * node for optimistic allocation attempts. If the defrag mode
-        * and/or madvise hint requires the direct reclaim then we prefer
-        * to fallback to other node rather than node reclaim because that
-        * can lead to excessive reclaim even though there is free memory
-        * on other nodes. We expect that NUMA preferences are specified
-        * by memory policies.
-        */
-       pol = get_vma_policy(vma, addr);
-       if (pol->mode != MPOL_BIND)
-               this_node = __GFP_THISNODE;
-       mpol_cond_put(pol);
- #endif
   
+       /* Always do synchronous compaction */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
                 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+ 
+       /* Kick kcompactd and fail quickly */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
+               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+ 
+       /* Synchronous compaction if madvised, otherwise kick kcompactd */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            __GFP_KSWAPD_RECLAIM | this_node);
+               return GFP_TRANSHUGE_LIGHT |
+                       (vma_madvised ? __GFP_DIRECT_RECLAIM :
+                                       __GFP_KSWAPD_RECLAIM);
+ 
+       /* Only do synchronous compaction if madvised */
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            this_node);
-       return GFP_TRANSHUGE_LIGHT | this_node;
+               return GFP_TRANSHUGE_LIGHT |
+                      (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+ 
+       return GFP_TRANSHUGE_LIGHT;
   }
   
   /* Caller must hold page table lock. */
@@@ -764,8 -740,8 +754,8 @@@ vm_fault_t do_huge_pmd_anonymous_page(s
                         pte_free(vma->vm_mm, pgtable);
                 return ret;
         }
-       gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
+       gfp = alloc_hugepage_direct_gfpmask(vma);
+       page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
         if (unlikely(!page)) {
                 count_vm_event(THP_FAULT_FALLBACK);
                 return VM_FAULT_FALLBACK;
@@@ -1372,9 -1348,8 +1362,8 @@@ vm_fault_t do_huge_pmd_wp_page(struct v
   alloc:
         if (__transparent_hugepage_enabled(vma) &&
             !transparent_hugepage_debug_cow()) {
-               huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-               new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
-                               haddr, numa_node_id());
+               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
+               new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
         } else
                 new_page = NULL;
   
@@@ -2511,8 -2486,6 +2500,8 @@@ static void __split_huge_page(struct pa
         struct page *head = compound_head(page);
         pg_data_t *pgdat = page_pgdat(head);
         struct lruvec *lruvec;
+ +      struct address_space *swap_cache = NULL;
+ +      unsigned long offset = 0;
         int i;
   
         lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@@ -2520,14 -2493,6 +2509,14 @@@
         /* complete memcg works before add pages to LRU */
         mem_cgroup_split_huge_fixup(head);
   
+ +      if (PageAnon(head) && PageSwapCache(head)) {
+ +              swp_entry_t entry = { .val = page_private(head) };
+ +
+ +              offset = swp_offset(entry);
+ +              swap_cache = swap_address_space(entry);
+ +              xa_lock(&swap_cache->i_pages);
+ +      }
+ +
         for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
                 __split_huge_page_tail(head, i, lruvec, list);
                 /* Some pages can be beyond i_size: drop them from page cache */
@@@ -2537,12 -2502,6 +2526,12 @@@
                         if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
                                 shmem_uncharge(head->mapping->host, 1);
                         put_page(head + i);
+ +              } else if (!PageAnon(page)) {
+ +                      __xa_store(&head->mapping->i_pages, head[i].index,
+ +                                      head + i, 0);
+ +              } else if (swap_cache) {
+ +                      __xa_store(&swap_cache->i_pages, offset + i,
+ +                                      head + i, 0);
                 }
         }
   
@@@ -2553,12 -2512,10 +2542,12 @@@
         /* See comment in __split_huge_page_tail() */
         if (PageAnon(head)) {
                 /* Additional pin to swap cache */
- -              if (PageSwapCache(head))
+ +              if (PageSwapCache(head)) {
                         page_ref_add(head, 2);
- -              else
+ +                      xa_unlock(&swap_cache->i_pages);
+ +              } else {
                         page_ref_inc(head);
+ +              }
         } else {
                 /* Additional pin to page cache */
                 page_ref_add(head, 2);
@@@ -2705,7 -2662,6 +2694,7 @@@ int split_huge_page_to_list(struct pag
   {
         struct page *head = compound_head(page);
         struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+ +      struct deferred_split *ds_queue = get_deferred_split_queue(page);
         struct anon_vma *anon_vma = NULL;
         struct address_space *mapping = NULL;
         int count, mapcount, extra_pins, ret;
@@@ -2792,17 -2748,17 +2781,17 @@@
         }
   
         /* Prevent deferred_split_scan() touching ->_refcount */
- -      spin_lock(&pgdata->split_queue_lock);
+ +      spin_lock(&ds_queue->split_queue_lock);
         count = page_count(head);
         mapcount = total_mapcount(head);
         if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
                 if (!list_empty(page_deferred_list(head))) {
- -                      pgdata->split_queue_len--;
+ +                      ds_queue->split_queue_len--;
                         list_del(page_deferred_list(head));
                 }
                 if (mapping)
                         __dec_node_page_state(page, NR_SHMEM_THPS);
- -              spin_unlock(&pgdata->split_queue_lock);
+ +              spin_unlock(&ds_queue->split_queue_lock);
                 __split_huge_page(page, list, end, flags);
                 if (PageSwapCache(head)) {
                         swp_entry_t entry = { .val = page_private(head) };
@@@ -2819,7 -2775,7 +2808,7 @@@
                         dump_page(page, "total_mapcount(head) > 0");
                         BUG();
                 }
- -              spin_unlock(&pgdata->split_queue_lock);
+ +              spin_unlock(&ds_queue->split_queue_lock);
   fail:         if (mapping)
                         xa_unlock(&mapping->i_pages);
                 spin_unlock_irqrestore(&pgdata->lru_lock, flags);
@@@ -2841,86 -2797,53 +2830,86 @@@ out
   
   void free_transhuge_page(struct page *page)
   {
- -      struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+ +      struct deferred_split *ds_queue = get_deferred_split_queue(page);
         unsigned long flags;
   
- -      spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+ +      spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
         if (!list_empty(page_deferred_list(page))) {
- -              pgdata->split_queue_len--;
+ +              ds_queue->split_queue_len--;
                 list_del(page_deferred_list(page));
         }
- -      spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ +      spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
         free_compound_page(page);
   }
   
   void deferred_split_huge_page(struct page *page)
   {
- -      struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+ +      struct deferred_split *ds_queue = get_deferred_split_queue(page);
+ +#ifdef CONFIG_MEMCG
+ +      struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ +#endif
         unsigned long flags;
   
         VM_BUG_ON_PAGE(!PageTransHuge(page), page);
   
- -      spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+ +      /*
+ +       * The try_to_unmap() in page reclaim path might reach here too,
+ +       * this may cause a race condition to corrupt deferred split queue.
+ +       * And, if page reclaim is already handling the same page, it is
+ +       * unnecessary to handle it again in shrinker.
+ +       *
+ +       * Check PageSwapCache to determine if the page is being
+ +       * handled by page reclaim since THP swap would add the page into
+ +       * swap cache before calling try_to_unmap().
+ +       */
+ +      if (PageSwapCache(page))
+ +              return;
+ +
+ +      spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
         if (list_empty(page_deferred_list(page))) {
                 count_vm_event(THP_DEFERRED_SPLIT_PAGE);
- -              list_add_tail(page_deferred_list(page), &pgdata->split_queue);
- -              pgdata->split_queue_len++;
+ +              list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+ +              ds_queue->split_queue_len++;
+ +#ifdef CONFIG_MEMCG
+ +              if (memcg)
+ +                      memcg_set_shrinker_bit(memcg, page_to_nid(page),
+ +                                             deferred_split_shrinker.id);
+ +#endif
         }
- -      spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ +      spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
   }
   
   static unsigned long deferred_split_count(struct shrinker *shrink,
                 struct shrink_control *sc)
   {
         struct pglist_data *pgdata = NODE_DATA(sc->nid);
- -      return READ_ONCE(pgdata->split_queue_len);
+ +      struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ +
+ +#ifdef CONFIG_MEMCG
+ +      if (sc->memcg)
+ +              ds_queue = &sc->memcg->deferred_split_queue;
+ +#endif
+ +      return READ_ONCE(ds_queue->split_queue_len);
   }
   
   static unsigned long deferred_split_scan(struct shrinker *shrink,
                 struct shrink_control *sc)
   {
         struct pglist_data *pgdata = NODE_DATA(sc->nid);
+ +      struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
         unsigned long flags;
         LIST_HEAD(list), *pos, *next;
         struct page *page;
         int split = 0;
   
- -      spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+ +#ifdef CONFIG_MEMCG
+ +      if (sc->memcg)
+ +              ds_queue = &sc->memcg->deferred_split_queue;
+ +#endif
+ +
+ +      spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
         /* Take pin on all head pages to avoid freeing them under us */
- -      list_for_each_safe(pos, next, &pgdata->split_queue) {
+ +      list_for_each_safe(pos, next, &ds_queue->split_queue) {
                 page = list_entry((void *)pos, struct page, mapping);
                 page = compound_head(page);
                 if (get_page_unless_zero(page)) {
@@@ -2928,12 -2851,12 +2917,12 @@@
                 } else {
                         /* We lost race with put_compound_page() */
                         list_del_init(page_deferred_list(page));
- -                      pgdata->split_queue_len--;
+ +                      ds_queue->split_queue_len--;
                 }
                 if (!--sc->nr_to_scan)
                         break;
         }
- -      spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ +      spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
   
         list_for_each_safe(pos, next, &list) {
                 page = list_entry((void *)pos, struct page, mapping);
@@@ -2947,15 -2870,15 +2936,15 @@@ next
                 put_page(page);
         }
   
- -      spin_lock_irqsave(&pgdata->split_queue_lock, flags);
- -      list_splice_tail(&list, &pgdata->split_queue);
- -      spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ +      spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ +      list_splice_tail(&list, &ds_queue->split_queue);
+ +      spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
   
         /*
          * Stop shrinker if we didn't split any page, but the queue is empty.
          * This can happen if pages were freed under us.
          */
- -      if (!split && list_empty(&pgdata->split_queue))
+ +      if (!split && list_empty(&ds_queue->split_queue))
                 return SHRINK_STOP;
         return split;
   }
@@@ -2964,8 -2887,7 +2953,8 @@@ static struct shrinker deferred_split_s
         .count_objects = deferred_split_count,
         .scan_objects = deferred_split_scan,
         .seeks = DEFAULT_SEEKS,
- -      .flags = SHRINKER_NUMA_AWARE,
+ +      .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE |
+ +               SHRINKER_NONSLAB,
   };
   
   #ifdef CONFIG_DEBUG_FS
diff --combined mm/mempolicy.c

index de27d08b1ff8d286ce0eb463041eefc8a746d869,8caab1f81a52efc3b4d5fe222433b0754dfd511f..4ae967bcf95481bc5082904e92426320fa67d64b
--- 1/mm/mempolicy.c
--- 2/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@@ -68,7 -68,7 +68,7 @@@
   #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   
   #include <linux/mempolicy.h>
- -#include <linux/mm.h>
+ +#include <linux/pagewalk.h>
   #include <linux/highmem.h>
   #include <linux/hugetlb.h>
   #include <linux/kernel.h>
@@@ -655,12 -655,6 +655,12 @@@ static int queue_pages_test_walk(unsign
         return 1;
   }
   
+ +static const struct mm_walk_ops queue_pages_walk_ops = {
+ +      .hugetlb_entry          = queue_pages_hugetlb,
+ +      .pmd_entry              = queue_pages_pte_range,
+ +      .test_walk              = queue_pages_test_walk,
+ +};
+ +
   /*
    * Walk through page tables and collect pages to be migrated.
    *
@@@ -685,8 -679,15 +685,8 @@@ queue_pages_range(struct mm_struct *mm
                 .nmask = nodes,
                 .prev = NULL,
         };
- -      struct mm_walk queue_pages_walk = {
- -              .hugetlb_entry = queue_pages_hugetlb,
- -              .pmd_entry = queue_pages_pte_range,
- -              .test_walk = queue_pages_test_walk,
- -              .mm = mm,
- -              .private = &qp,
- -      };
   
- -      return walk_page_range(start, end, &queue_pages_walk);
+ +      return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
   }
   
   /*
@@@ -1179,8 -1180,8 +1179,8 @@@ static struct page *new_page(struct pag
         } else if (PageTransHuge(page)) {
                 struct page *thp;
   
-               thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
-                               address, numa_node_id());
+               thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+                                        HPAGE_PMD_ORDER);
                 if (!thp)
                         return NULL;
                 prep_transhuge_page(thp);
@@@ -1405,7 -1406,6 +1405,7 @@@ static long kernel_mbind(unsigned long 
         int err;
         unsigned short mode_flags;
   
+ +      start = untagged_addr(start);
         mode_flags = mode & MPOL_MODE_FLAGS;
         mode &= ~MPOL_MODE_FLAGS;
         if (mode >= MPOL_MAX)
@@@ -1513,6 -1513,10 +1513,6 @@@ static int kernel_migrate_pages(pid_t p
         if (nodes_empty(*new))
                 goto out_put;
   
- -      nodes_and(*new, *new, node_states[N_MEMORY]);
- -      if (nodes_empty(*new))
- -              goto out_put;
- -
         err = security_task_movememory(task);
         if (err)
                 goto out_put;
@@@ -1559,8 -1563,6 +1559,8 @@@ static int kernel_get_mempolicy(int __u
         int uninitialized_var(pval);
         nodemask_t nodes;
   
+ +      addr = untagged_addr(addr);
+ +
         if (nmask != NULL && maxnode < nr_node_ids)
                 return -EINVAL;
   
@@@ -1732,7 -1734,7 +1732,7 @@@ struct mempolicy *__get_vma_policy(stru
    * freeing by another task.  It is the caller's responsibility to free the
    * extra reference for shared policies.
    */
- struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+ static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                                 unsigned long addr)
   {
         struct mempolicy *pol = __get_vma_policy(vma, addr);
@@@ -2081,6 -2083,7 +2081,7 @@@ static struct page *alloc_page_interlea
    *    @vma:  Pointer to VMA or NULL if not available.
    *    @addr: Virtual Address of the allocation. Must be inside the VMA.
    *    @node: Which node to prefer for allocation (modulo policy).
+  *    @hugepage: for hugepages try only the preferred node if possible
    *
    *    This function allocates a page from the kernel page pool and applies
    *    a NUMA policy associated with the VMA or the current process.
@@@ -2091,7 -2094,7 +2092,7 @@@
    */
   struct page *
   alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-               unsigned long addr, int node)
+               unsigned long addr, int node, bool hugepage)
   {
         struct mempolicy *pol;
         struct page *page;
@@@ -2107,6 -2110,42 +2108,42 @@@
                 mpol_cond_put(pol);
                 page = alloc_page_interleave(gfp, order, nid);
                 goto out;
+       }
+ 
+       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+               int hpage_node = node;
+ 
+               /*
+                * For hugepage allocation and non-interleave policy which
+                * allows the current node (or other explicitly preferred
+                * node) we only try to allocate from the current/preferred
+                * node and don't fall back to other nodes, as the cost of
+                * remote accesses would likely offset THP benefits.
+                *
+                * If the policy is interleave, or does not allow the current
+                * node in its nodemask, we allocate the standard way.
+                */
+               if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
+                       hpage_node = pol->v.preferred_node;
+ 
+               nmask = policy_nodemask(gfp, pol);
+               if (!nmask || node_isset(hpage_node, *nmask)) {
+                       mpol_cond_put(pol);
+                       page = __alloc_pages_node(hpage_node,
+                                               gfp | __GFP_THISNODE, order);
+ 
+                       /*
+                        * If hugepage allocations are configured to always
+                        * synchronous compact or the vma has been madvised
+                        * to prefer hugepage backing, retry allowing remote
+                        * memory as well.
+                        */
+                       if (!page && (gfp & __GFP_DIRECT_RECLAIM))
+                               page = __alloc_pages_node(hpage_node,
+                                               gfp | __GFP_NORETRY, order);
+ 
+                       goto out;
+               }
         }
   
         nmask = policy_nodemask(gfp, pol);
diff --combined mm/page_alloc.c

index 3334a769eb91e1c1cc374560125c8c64e32da979,87cbd92065e53cd45d1412b200f96e8769896aaf..15c2050c629b1d8aacb2f36aac7ac09c54c95449
--- 1/mm/page_alloc.c
--- 2/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@@ -670,7 -670,6 +670,7 @@@ out
   
   void free_compound_page(struct page *page)
   {
+ +      mem_cgroup_uncharge(page);
         __free_pages_ok(page, compound_order(page));
   }
   
@@@ -3512,7 -3511,7 +3512,7 @@@ bool zone_watermark_ok_safe(struct zon
   static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
   {
         return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
- -                              RECLAIM_DISTANCE;
+ +                              node_reclaim_distance;
   }
   #else /* CONFIG_NUMA */
   static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
@@@ -3955,23 -3954,15 +3955,23 @@@ should_compact_retry(struct alloc_conte
         if (compaction_failed(compact_result))
                 goto check_priority;
   
+ +      /*
+ +       * compaction was skipped because there are not enough order-0 pages
+ +       * to work with, so we retry only if it looks like reclaim can help.
+ +       */
+ +      if (compaction_needs_reclaim(compact_result)) {
+ +              ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+ +              goto out;
+ +      }
+ +
         /*
          * make sure the compaction wasn't deferred or didn't bail out early
          * due to locks contention before we declare that we should give up.
- -       * But do not retry if the given zonelist is not suitable for
- -       * compaction.
+ +       * But the next retry should use a higher priority if allowed, so
+ +       * we don't just keep bailing out endlessly.
          */
         if (compaction_withdrawn(compact_result)) {
- -              ret = compaction_zonelist_suitable(ac, order, alloc_flags);
- -              goto out;
+ +              goto check_priority;
         }
   
         /*
@@@ -4467,6 -4458,28 +4467,28 @@@ retry_cpuset
                 if (page)
                         goto got_pg;
   
+                if (order >= pageblock_order && (gfp_mask & __GFP_IO)) {
+                       /*
+                        * If allocating entire pageblock(s) and compaction
+                        * failed because all zones are below low watermarks
+                        * or is prohibited because it recently failed at this
+                        * order, fail immediately.
+                        *
+                        * Reclaim is
+                        *  - potentially very expensive because zones are far
+                        *    below their low watermarks or this is part of very
+                        *    bursty high order allocations,
+                        *  - not guaranteed to help because isolate_freepages()
+                        *    may not iterate over freed pages as part of its
+                        *    linear scan, and
+                        *  - unlikely to make entire pageblocks free on its
+                        *    own.
+                        */
+                       if (compact_result == COMPACT_SKIPPED ||
+                           compact_result == COMPACT_DEFERRED)
+                               goto nopage;
+               }
+ 
                 /*
                  * Checks for costly allocations with __GFP_NORETRY, which
                  * includes THP page fault allocations
@@@ -5980,7 -5993,7 +6002,7 @@@ void __ref memmap_init_zone_device(stru
                 }
         }
   
- -      pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev),
+ +      pr_info("%s initialised %lu pages in %ums\n", __func__,
                 size, jiffies_to_msecs(jiffies - start));
   }
   
@@@ -6647,11 -6660,9 +6669,11 @@@ static unsigned long __init calc_memmap
   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
   static void pgdat_init_split_queue(struct pglist_data *pgdat)
   {
- -      spin_lock_init(&pgdat->split_queue_lock);
- -      INIT_LIST_HEAD(&pgdat->split_queue);
- -      pgdat->split_queue_len = 0;
+ +      struct deferred_split *ds_queue = &pgdat->deferred_split_queue;
+ +
+ +      spin_lock_init(&ds_queue->split_queue_lock);
+ +      INIT_LIST_HEAD(&ds_queue->split_queue);
+ +      ds_queue->split_queue_len = 0;
   }
   #else
   static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
@@@ -8207,7 -8218,7 +8229,7 @@@ bool has_unmovable_pages(struct zone *z
                         if (!hugepage_migration_supported(page_hstate(head)))
                                 goto unmovable;
   
- -                      skip_pages = (1 << compound_order(head)) - (page - head);
+ +                      skip_pages = compound_nr(head) - (page - head);
                         iter += skip_pages - 1;
                         continue;
                 }
diff --combined mm/shmem.c

index 30ce722c23fa976cbc79ed6fdadd73d556b92881,626d8c74b973f173d3062ee118580b649d35073a..cd570cc79c76ab9873ce123dd7fd1d0e4412c0bc
--- 1/mm/shmem.c
--- 2/mm/shmem.c
+++ b/mm/shmem.c
@@@ -37,7 -37,6 +37,7 @@@
   #include <linux/khugepaged.h>
   #include <linux/hugetlb.h>
   #include <linux/frontswap.h>
+ +#include <linux/fs_parser.h>
   
   #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
   
@@@ -108,20 -107,6 +108,20 @@@ struct shmem_falloc 
         pgoff_t nr_unswapped;   /* how often writepage refused to swap out */
   };
   
+ +struct shmem_options {
+ +      unsigned long long blocks;
+ +      unsigned long long inodes;
+ +      struct mempolicy *mpol;
+ +      kuid_t uid;
+ +      kgid_t gid;
+ +      umode_t mode;
+ +      int huge;
+ +      int seen;
+ +#define SHMEM_SEEN_BLOCKS 1
+ +#define SHMEM_SEEN_INODES 2
+ +#define SHMEM_SEEN_HUGE 4
+ +};
+ +
   #ifdef CONFIG_TMPFS
   static unsigned long shmem_default_max_blocks(void)
   {
@@@ -609,7 -594,7 +609,7 @@@ static int shmem_add_to_page_cache(stru
   {
         XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
         unsigned long i = 0;
- -      unsigned long nr = 1UL << compound_order(page);
+ +      unsigned long nr = compound_nr(page);
   
         VM_BUG_ON_PAGE(PageTail(page), page);
         VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@@ -631,7 -616,7 +631,7 @@@
                 if (xas_error(&xas))
                         goto unlock;
   next:
- -              xas_store(&xas, page + i);
+ +              xas_store(&xas, page);
                 if (++i < nr) {
                         xas_next(&xas);
                         goto next;
@@@ -1481,7 -1466,7 +1481,7 @@@ static struct page *shmem_alloc_hugepag
   
         shmem_pseudo_vma_init(&pvma, info, hindex);
         page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
+                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
         shmem_pseudo_vma_destroy(&pvma);
         if (page)
                 prep_transhuge_page(page);
@@@ -1734,7 -1719,7 +1734,7 @@@ unlock
    * vm. If we swap it in we mark it dirty since we also free the swap
    * entry since a page cannot live in both the swap and page cache.
    *
- - * fault_mm and fault_type are only supplied by shmem_fault:
+ + * vmf and fault_type are only supplied by shmem_fault:
    * otherwise they are NULL.
    */
   static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
@@@ -1884,7 -1869,7 +1884,7 @@@ alloc_nohuge
         lru_cache_add_anon(page);
   
         spin_lock_irq(&info->lock);
- -      info->alloced += 1 << compound_order(page);
+ +      info->alloced += compound_nr(page);
         inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
         shmem_recalc_inode(inode);
         spin_unlock_irq(&info->lock);
@@@ -1925,7 -1910,7 +1925,7 @@@ clear
                 struct page *head = compound_head(page);
                 int i;
   
- -              for (i = 0; i < (1 << compound_order(head)); i++) {
+ +              for (i = 0; i < compound_nr(head); i++) {
                         clear_highpage(head + i);
                         flush_dcache_page(head + i);
                 }
@@@ -1952,7 -1937,7 +1952,7 @@@
          * Error recovery.
          */
   unacct:
- -      shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
+ +      shmem_inode_unacct_blocks(inode, compound_nr(page));
   
         if (PageTransHuge(page)) {
                 unlock_page(page);
@@@ -3364,126 -3349,16 +3364,126 @@@ static const struct export_operations s
         .fh_to_dentry   = shmem_fh_to_dentry,
   };
   
- -static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
- -                             bool remount)
+ +enum shmem_param {
+ +      Opt_gid,
+ +      Opt_huge,
+ +      Opt_mode,
+ +      Opt_mpol,
+ +      Opt_nr_blocks,
+ +      Opt_nr_inodes,
+ +      Opt_size,
+ +      Opt_uid,
+ +};
+ +
+ +static const struct fs_parameter_spec shmem_param_specs[] = {
+ +      fsparam_u32   ("gid",           Opt_gid),
+ +      fsparam_enum  ("huge",          Opt_huge),
+ +      fsparam_u32oct("mode",          Opt_mode),
+ +      fsparam_string("mpol",          Opt_mpol),
+ +      fsparam_string("nr_blocks",     Opt_nr_blocks),
+ +      fsparam_string("nr_inodes",     Opt_nr_inodes),
+ +      fsparam_string("size",          Opt_size),
+ +      fsparam_u32   ("uid",           Opt_uid),
+ +      {}
+ +};
+ +
+ +static const struct fs_parameter_enum shmem_param_enums[] = {
+ +      { Opt_huge,     "never",        SHMEM_HUGE_NEVER },
+ +      { Opt_huge,     "always",       SHMEM_HUGE_ALWAYS },
+ +      { Opt_huge,     "within_size",  SHMEM_HUGE_WITHIN_SIZE },
+ +      { Opt_huge,     "advise",       SHMEM_HUGE_ADVISE },
+ +      {}
+ +};
+ +
+ +const struct fs_parameter_description shmem_fs_parameters = {
+ +      .name           = "tmpfs",
+ +      .specs          = shmem_param_specs,
+ +      .enums          = shmem_param_enums,
+ +};
+ +
+ +static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
+ +{
+ +      struct shmem_options *ctx = fc->fs_private;
+ +      struct fs_parse_result result;
+ +      unsigned long long size;
+ +      char *rest;
+ +      int opt;
+ +
+ +      opt = fs_parse(fc, &shmem_fs_parameters, param, &result);
+ +      if (opt < 0)
+ +              return opt;
+ +
+ +      switch (opt) {
+ +      case Opt_size:
+ +              size = memparse(param->string, &rest);
+ +              if (*rest == '%') {
+ +                      size <<= PAGE_SHIFT;
+ +                      size *= totalram_pages();
+ +                      do_div(size, 100);
+ +                      rest++;
+ +              }
+ +              if (*rest)
+ +                      goto bad_value;
+ +              ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE);
+ +              ctx->seen |= SHMEM_SEEN_BLOCKS;
+ +              break;
+ +      case Opt_nr_blocks:
+ +              ctx->blocks = memparse(param->string, &rest);
+ +              if (*rest)
+ +                      goto bad_value;
+ +              ctx->seen |= SHMEM_SEEN_BLOCKS;
+ +              break;
+ +      case Opt_nr_inodes:
+ +              ctx->inodes = memparse(param->string, &rest);
+ +              if (*rest)
+ +                      goto bad_value;
+ +              ctx->seen |= SHMEM_SEEN_INODES;
+ +              break;
+ +      case Opt_mode:
+ +              ctx->mode = result.uint_32 & 07777;
+ +              break;
+ +      case Opt_uid:
+ +              ctx->uid = make_kuid(current_user_ns(), result.uint_32);
+ +              if (!uid_valid(ctx->uid))
+ +                      goto bad_value;
+ +              break;
+ +      case Opt_gid:
+ +              ctx->gid = make_kgid(current_user_ns(), result.uint_32);
+ +              if (!gid_valid(ctx->gid))
+ +                      goto bad_value;
+ +              break;
+ +      case Opt_huge:
+ +              ctx->huge = result.uint_32;
+ +              if (ctx->huge != SHMEM_HUGE_NEVER &&
+ +                  !(IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
+ +                    has_transparent_hugepage()))
+ +                      goto unsupported_parameter;
+ +              ctx->seen |= SHMEM_SEEN_HUGE;
+ +              break;
+ +      case Opt_mpol:
+ +              if (IS_ENABLED(CONFIG_NUMA)) {
+ +                      mpol_put(ctx->mpol);
+ +                      ctx->mpol = NULL;
+ +                      if (mpol_parse_str(param->string, &ctx->mpol))
+ +                              goto bad_value;
+ +                      break;
+ +              }
+ +              goto unsupported_parameter;
+ +      }
+ +      return 0;
+ +
+ +unsupported_parameter:
+ +      return invalf(fc, "tmpfs: Unsupported parameter '%s'", param->key);
+ +bad_value:
+ +      return invalf(fc, "tmpfs: Bad value for '%s'", param->key);
+ +}
+ +
+ +static int shmem_parse_options(struct fs_context *fc, void *data)
   {
- -      char *this_char, *value, *rest;
- -      struct mempolicy *mpol = NULL;
- -      uid_t uid;
- -      gid_t gid;
+ +      char *options = data;
   
         while (options != NULL) {
- -              this_char = options;
+ +              char *this_char = options;
                 for (;;) {
                         /*
                          * NUL-terminate this option: unfortunately,
@@@ -3499,83 -3374,139 +3499,83 @@@
                                 break;
                         }
                 }
- -              if (!*this_char)
- -                      continue;
- -              if ((value = strchr(this_char,'=')) != NULL) {
- -                      *value++ = 0;
- -              } else {
- -                      pr_err("tmpfs: No value for mount option '%s'\n",
- -                             this_char);
- -                      goto error;
- -              }
- -
- -              if (!strcmp(this_char,"size")) {
- -                      unsigned long long size;
- -                      size = memparse(value,&rest);
- -                      if (*rest == '%') {
- -                              size <<= PAGE_SHIFT;
- -                              size *= totalram_pages();
- -                              do_div(size, 100);
- -                              rest++;
+ +              if (*this_char) {
+ +                      char *value = strchr(this_char,'=');
+ +                      size_t len = 0;
+ +                      int err;
+ +
+ +                      if (value) {
+ +                              *value++ = '\0';
+ +                              len = strlen(value);
                         }
- -                      if (*rest)
- -                              goto bad_val;
- -                      sbinfo->max_blocks =
- -                              DIV_ROUND_UP(size, PAGE_SIZE);
- -              } else if (!strcmp(this_char,"nr_blocks")) {
- -                      sbinfo->max_blocks = memparse(value, &rest);
- -                      if (*rest)
- -                              goto bad_val;
- -              } else if (!strcmp(this_char,"nr_inodes")) {
- -                      sbinfo->max_inodes = memparse(value, &rest);
- -                      if (*rest)
- -                              goto bad_val;
- -              } else if (!strcmp(this_char,"mode")) {
- -                      if (remount)
- -                              continue;
- -                      sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
- -                      if (*rest)
- -                              goto bad_val;
- -              } else if (!strcmp(this_char,"uid")) {
- -                      if (remount)
- -                              continue;
- -                      uid = simple_strtoul(value, &rest, 0);
- -                      if (*rest)
- -                              goto bad_val;
- -                      sbinfo->uid = make_kuid(current_user_ns(), uid);
- -                      if (!uid_valid(sbinfo->uid))
- -                              goto bad_val;
- -              } else if (!strcmp(this_char,"gid")) {
- -                      if (remount)
- -                              continue;
- -                      gid = simple_strtoul(value, &rest, 0);
- -                      if (*rest)
- -                              goto bad_val;
- -                      sbinfo->gid = make_kgid(current_user_ns(), gid);
- -                      if (!gid_valid(sbinfo->gid))
- -                              goto bad_val;
- -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
- -              } else if (!strcmp(this_char, "huge")) {
- -                      int huge;
- -                      huge = shmem_parse_huge(value);
- -                      if (huge < 0)
- -                              goto bad_val;
- -                      if (!has_transparent_hugepage() &&
- -                                      huge != SHMEM_HUGE_NEVER)
- -                              goto bad_val;
- -                      sbinfo->huge = huge;
- -#endif
- -#ifdef CONFIG_NUMA
- -              } else if (!strcmp(this_char,"mpol")) {
- -                      mpol_put(mpol);
- -                      mpol = NULL;
- -                      if (mpol_parse_str(value, &mpol))
- -                              goto bad_val;
- -#endif
- -              } else {
- -                      pr_err("tmpfs: Bad mount option %s\n", this_char);
- -                      goto error;
+ +                      err = vfs_parse_fs_string(fc, this_char, value, len);
+ +                      if (err < 0)
+ +                              return err;
                 }
         }
- -      sbinfo->mpol = mpol;
         return 0;
- -
- -bad_val:
- -      pr_err("tmpfs: Bad value '%s' for mount option '%s'\n",
- -             value, this_char);
- -error:
- -      mpol_put(mpol);
- -      return 1;
- -
   }
   
- -static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
+ +/*
+ + * Reconfigure a shmem filesystem.
+ + *
+ + * Note that we disallow change from limited->unlimited blocks/inodes while any
+ + * are in use; but we must separately disallow unlimited->limited, because in
+ + * that case we have no record of how much is already in use.
+ + */
+ +static int shmem_reconfigure(struct fs_context *fc)
   {
- -      struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
- -      struct shmem_sb_info config = *sbinfo;
+ +      struct shmem_options *ctx = fc->fs_private;
+ +      struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
         unsigned long inodes;
- -      int error = -EINVAL;
- -
- -      config.mpol = NULL;
- -      if (shmem_parse_options(data, &config, true))
- -              return error;
+ +      const char *err;
   
         spin_lock(&sbinfo->stat_lock);
         inodes = sbinfo->max_inodes - sbinfo->free_inodes;
- -      if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
- -              goto out;
- -      if (config.max_inodes < inodes)
- -              goto out;
- -      /*
- -       * Those tests disallow limited->unlimited while any are in use;
- -       * but we must separately disallow unlimited->limited, because
- -       * in that case we have no record of how much is already in use.
- -       */
- -      if (config.max_blocks && !sbinfo->max_blocks)
- -              goto out;
- -      if (config.max_inodes && !sbinfo->max_inodes)
- -              goto out;
+ +      if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
+ +              if (!sbinfo->max_blocks) {
+ +                      err = "Cannot retroactively limit size";
+ +                      goto out;
+ +              }
+ +              if (percpu_counter_compare(&sbinfo->used_blocks,
+ +                                         ctx->blocks) > 0) {
+ +                      err = "Too small a size for current use";
+ +                      goto out;
+ +              }
+ +      }
+ +      if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) {
+ +              if (!sbinfo->max_inodes) {
+ +                      err = "Cannot retroactively limit inodes";
+ +                      goto out;
+ +              }
+ +              if (ctx->inodes < inodes) {
+ +                      err = "Too few inodes for current use";
+ +                      goto out;
+ +              }
+ +      }
   
- -      error = 0;
- -      sbinfo->huge = config.huge;
- -      sbinfo->max_blocks  = config.max_blocks;
- -      sbinfo->max_inodes  = config.max_inodes;
- -      sbinfo->free_inodes = config.max_inodes - inodes;
+ +      if (ctx->seen & SHMEM_SEEN_HUGE)
+ +              sbinfo->huge = ctx->huge;
+ +      if (ctx->seen & SHMEM_SEEN_BLOCKS)
+ +              sbinfo->max_blocks  = ctx->blocks;
+ +      if (ctx->seen & SHMEM_SEEN_INODES) {
+ +              sbinfo->max_inodes  = ctx->inodes;
+ +              sbinfo->free_inodes = ctx->inodes - inodes;
+ +      }
   
         /*
          * Preserve previous mempolicy unless mpol remount option was specified.
          */
- -      if (config.mpol) {
+ +      if (ctx->mpol) {
                 mpol_put(sbinfo->mpol);
- -              sbinfo->mpol = config.mpol;     /* transfers initial ref */
+ +              sbinfo->mpol = ctx->mpol;       /* transfers initial ref */
+ +              ctx->mpol = NULL;
         }
+ +      spin_unlock(&sbinfo->stat_lock);
+ +      return 0;
   out:
         spin_unlock(&sbinfo->stat_lock);
- -      return error;
+ +      return invalf(fc, "tmpfs: %s", err);
   }
   
   static int shmem_show_options(struct seq_file *seq, struct dentry *root)
@@@ -3616,9 -3547,8 +3616,9 @@@ static void shmem_put_super(struct supe
         sb->s_fs_info = NULL;
   }
   
- -int shmem_fill_super(struct super_block *sb, void *data, int silent)
+ +static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
   {
+ +      struct shmem_options *ctx = fc->fs_private;
         struct inode *inode;
         struct shmem_sb_info *sbinfo;
         int err = -ENOMEM;
@@@ -3629,6 -3559,9 +3629,6 @@@
         if (!sbinfo)
                 return -ENOMEM;
   
- -      sbinfo->mode = 0777 | S_ISVTX;
- -      sbinfo->uid = current_fsuid();
- -      sbinfo->gid = current_fsgid();
         sb->s_fs_info = sbinfo;
   
   #ifdef CONFIG_TMPFS
@@@ -3638,10 -3571,12 +3638,10 @@@
          * but the internal instance is left unlimited.
          */
         if (!(sb->s_flags & SB_KERNMOUNT)) {
- -              sbinfo->max_blocks = shmem_default_max_blocks();
- -              sbinfo->max_inodes = shmem_default_max_inodes();
- -              if (shmem_parse_options(data, sbinfo, false)) {
- -                      err = -EINVAL;
- -                      goto failed;
- -              }
+ +              if (!(ctx->seen & SHMEM_SEEN_BLOCKS))
+ +                      ctx->blocks = shmem_default_max_blocks();
+ +              if (!(ctx->seen & SHMEM_SEEN_INODES))
+ +                      ctx->inodes = shmem_default_max_inodes();
         } else {
                 sb->s_flags |= SB_NOUSER;
         }
@@@ -3650,18 -3585,11 +3650,18 @@@
   #else
         sb->s_flags |= SB_NOUSER;
   #endif
+ +      sbinfo->max_blocks = ctx->blocks;
+ +      sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+ +      sbinfo->uid = ctx->uid;
+ +      sbinfo->gid = ctx->gid;
+ +      sbinfo->mode = ctx->mode;
+ +      sbinfo->huge = ctx->huge;
+ +      sbinfo->mpol = ctx->mpol;
+ +      ctx->mpol = NULL;
   
         spin_lock_init(&sbinfo->stat_lock);
         if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
                 goto failed;
- -      sbinfo->free_inodes = sbinfo->max_inodes;
         spin_lock_init(&sbinfo->shrinklist_lock);
         INIT_LIST_HEAD(&sbinfo->shrinklist);
   
@@@ -3694,31 -3622,6 +3694,31 @@@ failed
         return err;
   }
   
+ +static int shmem_get_tree(struct fs_context *fc)
+ +{
+ +      return get_tree_nodev(fc, shmem_fill_super);
+ +}
+ +
+ +static void shmem_free_fc(struct fs_context *fc)
+ +{
+ +      struct shmem_options *ctx = fc->fs_private;
+ +
+ +      if (ctx) {
+ +              mpol_put(ctx->mpol);
+ +              kfree(ctx);
+ +      }
+ +}
+ +
+ +static const struct fs_context_operations shmem_fs_context_ops = {
+ +      .free                   = shmem_free_fc,
+ +      .get_tree               = shmem_get_tree,
+ +#ifdef CONFIG_TMPFS
+ +      .parse_monolithic       = shmem_parse_options,
+ +      .parse_param            = shmem_parse_one,
+ +      .reconfigure            = shmem_reconfigure,
+ +#endif
+ +};
+ +
   static struct kmem_cache *shmem_inode_cachep;
   
   static struct inode *shmem_alloc_inode(struct super_block *sb)
@@@ -3835,6 -3738,7 +3835,6 @@@ static const struct super_operations sh
         .destroy_inode  = shmem_destroy_inode,
   #ifdef CONFIG_TMPFS
         .statfs         = shmem_statfs,
- -      .remount_fs     = shmem_remount_fs,
         .show_options   = shmem_show_options,
   #endif
         .evict_inode    = shmem_evict_inode,
@@@ -3855,30 -3759,16 +3855,30 @@@ static const struct vm_operations_struc
   #endif
   };
   
- -static struct dentry *shmem_mount(struct file_system_type *fs_type,
- -      int flags, const char *dev_name, void *data)
+ +int shmem_init_fs_context(struct fs_context *fc)
   {
- -      return mount_nodev(fs_type, flags, data, shmem_fill_super);
+ +      struct shmem_options *ctx;
+ +
+ +      ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL);
+ +      if (!ctx)
+ +              return -ENOMEM;
+ +
+ +      ctx->mode = 0777 | S_ISVTX;
+ +      ctx->uid = current_fsuid();
+ +      ctx->gid = current_fsgid();
+ +
+ +      fc->fs_private = ctx;
+ +      fc->ops = &shmem_fs_context_ops;
+ +      return 0;
   }
   
   static struct file_system_type shmem_fs_type = {
         .owner          = THIS_MODULE,
         .name           = "tmpfs",
- -      .mount          = shmem_mount,
+ +      .init_fs_context = shmem_init_fs_context,
+ +#ifdef CONFIG_TMPFS
+ +      .parameters     = &shmem_fs_parameters,
+ +#endif
         .kill_sb        = kill_litter_super,
         .fs_flags       = FS_USERNS_MOUNT,
   };
@@@ -4022,8 -3912,7 +4022,8 @@@ bool shmem_huge_enabled(struct vm_area_
   
   static struct file_system_type shmem_fs_type = {
         .name           = "tmpfs",
- -      .mount          = ramfs_mount,
+ +      .init_fs_context = ramfs_init_fs_context,
+ +      .parameters     = &ramfs_fs_parameters,
         .kill_sb        = kill_litter_super,
         .fs_flags       = FS_USERNS_MOUNT,
   };
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 28 Sep 2019 21:26:47 +0000 (14:26 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 28 Sep 2019 21:26:47 +0000 (14:26 -0700)
		1	2
mm/huge_memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/mempolicy.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page_alloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/shmem.c	patch \|	diff1 \|	diff2 \|	blob \| history