Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)
diff --git a/arch/mips/vdso/vdso.h b/arch/mips/vdso/vdso.h

index 14b1931be69c3acf78afc794d4b3f0a3bbac5ecc..b65b169778e31478fcc507c12c88fcb1436cdf6b 100644 (file)
--- a/arch/mips/vdso/vdso.h
+++ b/arch/mips/vdso/vdso.h
@@ -9,6 +9,7 @@
  #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT)
  
  /* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */
+#define BUILD_VDSO32_64
  #undef CONFIG_64BIT
  #define CONFIG_32BIT 1
  #ifndef __ASSEMBLY__
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c

index 0e28270b0fd81046b2b2a434b0dd65edb4b282a6..aad6be5c0af0a5f4da721eed1fd8b77cd25f3b94 100644 (file)
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2204,6 +2204,12 @@ int __init acpi_scan_init(void)
         acpi_gpe_apply_masked_gpes();
         acpi_update_all_gpes();
  
+       /*
+        * Although we call __add_memory() that is documented to require the
+        * device_hotplug_lock, it is not necessary here because this is an
+        * early code when userspace or any other code path cannot trigger
+        * hotplug/hotunplug operations.
+        */
         mutex_lock(&acpi_scan_lock);
         /*
          * Enumerate devices in the ACPI namespace.
diff --git a/fs/coredump.c b/fs/coredump.c

index e42e17e55bfd5512bee600f6bd435e66e3f90147..b1ea7dfbd1494bd8d990ae03ee1bc21277dbd636 100644 (file)
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -7,6 +7,7 @@
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/swap.h>
+#include <linux/ctype.h>
  #include <linux/string.h>
  #include <linux/init.h>
  #include <linux/pagemap.h>
@@ -187,11 +188,13 @@ put_exe_file:
   * name into corename, which must have space for at least
   * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
   */
-static int format_corename(struct core_name *cn, struct coredump_params *cprm)
+static int format_corename(struct core_name *cn, struct coredump_params *cprm,
+                          size_t **argv, int *argc)
  {
         const struct cred *cred = current_cred();
         const char *pat_ptr = core_pattern;
         int ispipe = (*pat_ptr == '|');
+       bool was_space = false;
         int pid_in_pattern = 0;
         int err = 0;
  
@@ -201,12 +204,35 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
                 return -ENOMEM;
         cn->corename[0] = '\0';
  
-       if (ispipe)
+       if (ispipe) {
+               int argvs = sizeof(core_pattern) / 2;
+               (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
+               if (!(*argv))
+                       return -ENOMEM;
+               (*argv)[(*argc)++] = 0;
                 ++pat_ptr;
+       }
  
         /* Repeat as long as we have more pattern to process and more output
            space */
         while (*pat_ptr) {
+               /*
+                * Split on spaces before doing template expansion so that
+                * %e and %E don't get split if they have spaces in them
+                */
+               if (ispipe) {
+                       if (isspace(*pat_ptr)) {
+                               was_space = true;
+                               pat_ptr++;
+                               continue;
+                       } else if (was_space) {
+                               was_space = false;
+                               err = cn_printf(cn, "%c", '\0');
+                               if (err)
+                                       return err;
+                               (*argv)[(*argc)++] = cn->used;
+                       }
+               }
                 if (*pat_ptr != '%') {
                         err = cn_printf(cn, "%c", *pat_ptr++);
                 } else {
@@ -546,6 +572,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
         struct cred *cred;
         int retval = 0;
         int ispipe;
+       size_t *argv = NULL;
+       int argc = 0;
         struct files_struct *displaced;
         /* require nonrelative corefile path and be extra careful */
         bool need_suid_safe = false;
@@ -592,9 +620,10 @@ void do_coredump(const kernel_siginfo_t *siginfo)
  
         old_cred = override_creds(cred);
  
-       ispipe = format_corename(&cn, &cprm);
+       ispipe = format_corename(&cn, &cprm, &argv, &argc);
  
         if (ispipe) {
+               int argi;
                 int dump_count;
                 char **helper_argv;
                 struct subprocess_info *sub_info;
@@ -637,12 +666,16 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                         goto fail_dropcount;
                 }
  
-               helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
+               helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
+                                           GFP_KERNEL);
                 if (!helper_argv) {
                         printk(KERN_WARNING "%s failed to allocate memory\n",
                                __func__);
                         goto fail_dropcount;
                 }
+               for (argi = 0; argi < argc; argi++)
+                       helper_argv[argi] = cn.corename + argv[argi];
+               helper_argv[argi] = NULL;
  
                 retval = -ENOMEM;
                 sub_info = call_usermodehelper_setup(helper_argv[0],
@@ -652,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                         retval = call_usermodehelper_exec(sub_info,
                                                           UMH_WAIT_EXEC);
  
-               argv_free(helper_argv);
+               kfree(helper_argv);
                 if (retval) {
                         printk(KERN_INFO "Core dump to |%s pipe failed\n",
                                cn.corename);
@@ -766,6 +799,7 @@ fail_dropcount:
         if (ispipe)
                 atomic_dec(&core_dump_count);
  fail_unlock:
+       kfree(argv);
         kfree(cn.corename);
         coredump_finish(mm, core_dumped);
         revert_creds(old_cred);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c

index 385f3aaa244809b0d442d746fb4e76bd391d7bb2..90c830e3758e2dea9af508efed08b48a8509553f 100644 (file)
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3825,7 +3825,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
         int low_bucket = 0, bucket, high_bucket;
         struct ocfs2_xattr_bucket *search;
-       u32 last_hash;
         u64 blkno, lower_blkno = 0;
  
         search = ocfs2_xattr_bucket_new(inode);
@@ -3869,8 +3868,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
                 if (xh->xh_count)
                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
  
-               last_hash = le32_to_cpu(xe->xe_name_hash);
-
                 /* record lower_blkno which may be the insert place. */
                 lower_blkno = blkno;
  
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h

index c64bea7a52bebd5c7332203e1e3a78bfa69b84c3..e9f20b813a699a3fb7630fe76aba3d1ab60c892e 100644 (file)
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -7,24 +7,6 @@
  #include <linux/compiler.h>
  #include <linux/log2.h>
  
-/*
- * Runtime evaluation of get_order()
- */
-static inline __attribute_const__
-int __get_order(unsigned long size)
-{
-       int order;
-
-       size--;
-       size >>= PAGE_SHIFT;
-#if BITS_PER_LONG == 32
-       order = fls(size);
-#else
-       order = fls64(size);
-#endif
-       return order;
-}
-
  /**
   * get_order - Determine the allocation order of a memory size
   * @size: The size for which to get the order
@@ -43,19 +25,27 @@ int __get_order(unsigned long size)
   * to hold an object of the specified size.
   *
   * The result is undefined if the size is 0.
- *
- * This function may be used to initialise variables with compile time
- * evaluations of constants.
   */
-#define get_order(n)                                           \
-(                                                              \
-       __builtin_constant_p(n) ? (                             \
-               ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :     \
-               (((n) < (1UL << PAGE_SHIFT)) ? 0 :              \
-                ilog2((n) - 1) - PAGE_SHIFT + 1)               \
-       ) :                                                     \
-       __get_order(n)                                          \
-)
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+       if (__builtin_constant_p(size)) {
+               if (!size)
+                       return BITS_PER_LONG - PAGE_SHIFT;
+
+               if (size < (1UL << PAGE_SHIFT))
+                       return 0;
+
+               return ilog2((size) - 1) - PAGE_SHIFT + 1;
+       }
+
+       size--;
+       size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+       return fls(size);
+#else
+       return fls64(size);
+#endif
+}
  
  #endif /* __ASSEMBLY__ */
  
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h

index 1dda31825ec4ae78c8852f15288a80c4a651bcf3..71283739ffd239c5790dd57f408411451fa68220 100644 (file)
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -32,6 +32,7 @@
  
  #endif /* CONFIG_SPARSEMEM */
  
+#ifndef BUILD_VDSO32_64
  /*
   * page->flags layout:
   *
@@ -76,20 +77,22 @@
  #define LAST_CPUPID_SHIFT 0
  #endif
  
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#ifdef CONFIG_KASAN_SW_TAGS
+#define KASAN_TAG_WIDTH 8
+#else
+#define KASAN_TAG_WIDTH 0
+#endif
+
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \
+       <= BITS_PER_LONG - NR_PAGEFLAGS
  #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
  #else
  #define LAST_CPUPID_WIDTH 0
  #endif
  
-#ifdef CONFIG_KASAN_SW_TAGS
-#define KASAN_TAG_WIDTH 8
  #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \
         > BITS_PER_LONG - NR_PAGEFLAGS
-#error "KASAN: not enough bits in page flags for tag"
-#endif
-#else
-#define KASAN_TAG_WIDTH 0
+#error "Not enough bits in page flags"
  #endif
  
  /*
@@ -104,4 +107,5 @@
  #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
  #endif
  
+#endif
  #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/kernel/Makefile b/kernel/Makefile

index a8d923b5481ba91d3cb15209eda7838fe1793ead..ef0d95a190b4179848c52041f2d1cfb6b9fbff77 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -111,7 +111,6 @@ obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
  obj-$(CONFIG_TORTURE_TEST) += torture.o
  
  obj-$(CONFIG_HAS_IOMEM) += iomem.o
-obj-$(CONFIG_ZONE_DEVICE) += memremap.o
  obj-$(CONFIG_RSEQ) += rseq.o
  
  obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
diff --git a/kernel/memremap.c b/kernel/memremap.c

deleted file mode 100644 (file)

index 6ee03a8..0000000
--- a/kernel/memremap.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2015 Intel Corporation. All rights reserved. */
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/kasan.h>
-#include <linux/memory_hotplug.h>
-#include <linux/mm.h>
-#include <linux/pfn_t.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
-#include <linux/types.h>
-#include <linux/wait_bit.h>
-#include <linux/xarray.h>
-
-static DEFINE_XARRAY(pgmap_array);
-#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
-#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
-
-#ifdef CONFIG_DEV_PAGEMAP_OPS
-DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
-EXPORT_SYMBOL(devmap_managed_key);
-static atomic_t devmap_managed_enable;
-
-static void devmap_managed_enable_put(void *data)
-{
-       if (atomic_dec_and_test(&devmap_managed_enable))
-               static_branch_disable(&devmap_managed_key);
-}
-
-static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
-{
-       if (!pgmap->ops || !pgmap->ops->page_free) {
-               WARN(1, "Missing page_free method\n");
-               return -EINVAL;
-       }
-
-       if (atomic_inc_return(&devmap_managed_enable) == 1)
-               static_branch_enable(&devmap_managed_key);
-       return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL);
-}
-#else
-static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
-{
-       return -EINVAL;
-}
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
-
-static void pgmap_array_delete(struct resource *res)
-{
-       xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
-                       NULL, GFP_KERNEL);
-       synchronize_rcu();
-}
-
-static unsigned long pfn_first(struct dev_pagemap *pgmap)
-{
-       return PHYS_PFN(pgmap->res.start) +
-               vmem_altmap_offset(pgmap_altmap(pgmap));
-}
-
-static unsigned long pfn_end(struct dev_pagemap *pgmap)
-{
-       const struct resource *res = &pgmap->res;
-
-       return (res->start + resource_size(res)) >> PAGE_SHIFT;
-}
-
-static unsigned long pfn_next(unsigned long pfn)
-{
-       if (pfn % 1024 == 0)
-               cond_resched();
-       return pfn + 1;
-}
-
-#define for_each_device_pfn(pfn, map) \
-       for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
-
-static void dev_pagemap_kill(struct dev_pagemap *pgmap)
-{
-       if (pgmap->ops && pgmap->ops->kill)
-               pgmap->ops->kill(pgmap);
-       else
-               percpu_ref_kill(pgmap->ref);
-}
-
-static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
-{
-       if (pgmap->ops && pgmap->ops->cleanup) {
-               pgmap->ops->cleanup(pgmap);
-       } else {
-               wait_for_completion(&pgmap->done);
-               percpu_ref_exit(pgmap->ref);
-       }
-}
-
-static void devm_memremap_pages_release(void *data)
-{
-       struct dev_pagemap *pgmap = data;
-       struct device *dev = pgmap->dev;
-       struct resource *res = &pgmap->res;
-       unsigned long pfn;
-       int nid;
-
-       dev_pagemap_kill(pgmap);
-       for_each_device_pfn(pfn, pgmap)
-               put_page(pfn_to_page(pfn));
-       dev_pagemap_cleanup(pgmap);
-
-       /* pages are dead and unused, undo the arch mapping */
-       nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
-
-       mem_hotplug_begin();
-       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-               pfn = PHYS_PFN(res->start);
-               __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
-                                PHYS_PFN(resource_size(res)), NULL);
-       } else {
-               arch_remove_memory(nid, res->start, resource_size(res),
-                               pgmap_altmap(pgmap));
-               kasan_remove_zero_shadow(__va(res->start), resource_size(res));
-       }
-       mem_hotplug_done();
-
-       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
-       pgmap_array_delete(res);
-       dev_WARN_ONCE(dev, pgmap->altmap.alloc,
-                     "%s: failed to free all reserved pages\n", __func__);
-}
-
-static void dev_pagemap_percpu_release(struct percpu_ref *ref)
-{
-       struct dev_pagemap *pgmap =
-               container_of(ref, struct dev_pagemap, internal_ref);
-
-       complete(&pgmap->done);
-}
-
-/**
- * devm_memremap_pages - remap and provide memmap backing for the given resource
- * @dev: hosting device for @res
- * @pgmap: pointer to a struct dev_pagemap
- *
- * Notes:
- * 1/ At a minimum the res and type members of @pgmap must be initialized
- *    by the caller before passing it to this function
- *
- * 2/ The altmap field may optionally be initialized, in which case
- *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
- *
- * 3/ The ref field may optionally be provided, in which pgmap->ref must be
- *    'live' on entry and will be killed and reaped at
- *    devm_memremap_pages_release() time, or if this routine fails.
- *
- * 4/ res is expected to be a host memory range that could feasibly be
- *    treated as a "System RAM" range, i.e. not a device mmio range, but
- *    this is not enforced.
- */
-void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
-{
-       struct resource *res = &pgmap->res;
-       struct dev_pagemap *conflict_pgmap;
-       struct mhp_restrictions restrictions = {
-               /*
-                * We do not want any optional features only our own memmap
-                */
-               .altmap = pgmap_altmap(pgmap),
-       };
-       pgprot_t pgprot = PAGE_KERNEL;
-       int error, nid, is_ram;
-       bool need_devmap_managed = true;
-
-       switch (pgmap->type) {
-       case MEMORY_DEVICE_PRIVATE:
-               if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
-                       WARN(1, "Device private memory not supported\n");
-                       return ERR_PTR(-EINVAL);
-               }
-               if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
-                       WARN(1, "Missing migrate_to_ram method\n");
-                       return ERR_PTR(-EINVAL);
-               }
-               break;
-       case MEMORY_DEVICE_FS_DAX:
-               if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
-                   IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
-                       WARN(1, "File system DAX not supported\n");
-                       return ERR_PTR(-EINVAL);
-               }
-               break;
-       case MEMORY_DEVICE_DEVDAX:
-       case MEMORY_DEVICE_PCI_P2PDMA:
-               need_devmap_managed = false;
-               break;
-       default:
-               WARN(1, "Invalid pgmap type %d\n", pgmap->type);
-               break;
-       }
-
-       if (!pgmap->ref) {
-               if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
-                       return ERR_PTR(-EINVAL);
-
-               init_completion(&pgmap->done);
-               error = percpu_ref_init(&pgmap->internal_ref,
-                               dev_pagemap_percpu_release, 0, GFP_KERNEL);
-               if (error)
-                       return ERR_PTR(error);
-               pgmap->ref = &pgmap->internal_ref;
-       } else {
-               if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
-                       WARN(1, "Missing reference count teardown definition\n");
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       if (need_devmap_managed) {
-               error = devmap_managed_enable_get(dev, pgmap);
-               if (error)
-                       return ERR_PTR(error);
-       }
-
-       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
-       if (conflict_pgmap) {
-               dev_WARN(dev, "Conflicting mapping in same section\n");
-               put_dev_pagemap(conflict_pgmap);
-               error = -ENOMEM;
-               goto err_array;
-       }
-
-       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
-       if (conflict_pgmap) {
-               dev_WARN(dev, "Conflicting mapping in same section\n");
-               put_dev_pagemap(conflict_pgmap);
-               error = -ENOMEM;
-               goto err_array;
-       }
-
-       is_ram = region_intersects(res->start, resource_size(res),
-               IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
-
-       if (is_ram != REGION_DISJOINT) {
-               WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
-                               is_ram == REGION_MIXED ? "mixed" : "ram", res);
-               error = -ENXIO;
-               goto err_array;
-       }
-
-       pgmap->dev = dev;
-
-       error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
-                               PHYS_PFN(res->end), pgmap, GFP_KERNEL));
-       if (error)
-               goto err_array;
-
-       nid = dev_to_node(dev);
-       if (nid < 0)
-               nid = numa_mem_id();
-
-       error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0,
-                       resource_size(res));
-       if (error)
-               goto err_pfn_remap;
-
-       mem_hotplug_begin();
-
-       /*
-        * For device private memory we call add_pages() as we only need to
-        * allocate and initialize struct page for the device memory. More-
-        * over the device memory is un-accessible thus we do not want to
-        * create a linear mapping for the memory like arch_add_memory()
-        * would do.
-        *
-        * For all other device memory types, which are accessible by
-        * the CPU, we do want the linear mapping and thus use
-        * arch_add_memory().
-        */
-       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-               error = add_pages(nid, PHYS_PFN(res->start),
-                               PHYS_PFN(resource_size(res)), &restrictions);
-       } else {
-               error = kasan_add_zero_shadow(__va(res->start), resource_size(res));
-               if (error) {
-                       mem_hotplug_done();
-                       goto err_kasan;
-               }
-
-               error = arch_add_memory(nid, res->start, resource_size(res),
-                                       &restrictions);
-       }
-
-       if (!error) {
-               struct zone *zone;
-
-               zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
-               move_pfn_range_to_zone(zone, PHYS_PFN(res->start),
-                               PHYS_PFN(resource_size(res)), restrictions.altmap);
-       }
-
-       mem_hotplug_done();
-       if (error)
-               goto err_add_memory;
-
-       /*
-        * Initialization of the pages has been deferred until now in order
-        * to allow us to do the work while not holding the hotplug lock.
-        */
-       memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
-                               PHYS_PFN(res->start),
-                               PHYS_PFN(resource_size(res)), pgmap);
-       percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
-
-       error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
-                       pgmap);
-       if (error)
-               return ERR_PTR(error);
-
-       return __va(res->start);
-
- err_add_memory:
-       kasan_remove_zero_shadow(__va(res->start), resource_size(res));
- err_kasan:
-       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
- err_pfn_remap:
-       pgmap_array_delete(res);
- err_array:
-       dev_pagemap_kill(pgmap);
-       dev_pagemap_cleanup(pgmap);
-       return ERR_PTR(error);
-}
-EXPORT_SYMBOL_GPL(devm_memremap_pages);
-
-void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
-{
-       devm_release_action(dev, devm_memremap_pages_release, pgmap);
-}
-EXPORT_SYMBOL_GPL(devm_memunmap_pages);
-
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
-{
-       /* number of pfns from base where pfn_to_page() is valid */
-       if (altmap)
-               return altmap->reserve + altmap->free;
-       return 0;
-}
-
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
-{
-       altmap->alloc -= nr_pfns;
-}
-
-/**
- * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
- * @pfn: page frame number to lookup page_map
- * @pgmap: optional known pgmap that already has a reference
- *
- * If @pgmap is non-NULL and covers @pfn it will be returned as-is.  If @pgmap
- * is non-NULL but does not cover @pfn the reference to it will be released.
- */
-struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
-               struct dev_pagemap *pgmap)
-{
-       resource_size_t phys = PFN_PHYS(pfn);
-
-       /*
-        * In the cached case we're already holding a live reference.
-        */
-       if (pgmap) {
-               if (phys >= pgmap->res.start && phys <= pgmap->res.end)
-                       return pgmap;
-               put_dev_pagemap(pgmap);
-       }
-
-       /* fall back to slow path lookup */
-       rcu_read_lock();
-       pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
-       if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
-               pgmap = NULL;
-       rcu_read_unlock();
-
-       return pgmap;
-}
-EXPORT_SYMBOL_GPL(get_dev_pagemap);
-
-#ifdef CONFIG_DEV_PAGEMAP_OPS
-void __put_devmap_managed_page(struct page *page)
-{
-       int count = page_ref_dec_return(page);
-
-       /*
-        * If refcount is 1 then page is freed and refcount is stable as nobody
-        * holds a reference on the page.
-        */
-       if (count == 1) {
-               /* Clear Active bit in case of parallel mark_page_accessed */
-               __ClearPageActive(page);
-               __ClearPageWaiters(page);
-
-               mem_cgroup_uncharge(page);
-
-               page->pgmap->ops->page_free(page);
-       } else if (!count)
-               __put_page(page);
-}
-EXPORT_SYMBOL(__put_devmap_managed_page);
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
diff --git a/kernel/signal.c b/kernel/signal.c

index 349f5a67f100e8bc33ed4ac44f717de482817776..e667be6907d71333c44546cd43ebfdde6c9eab3c 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -349,7 +349,7 @@ void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask)
   * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop.
   * Group stop states are cleared and the group stop count is consumed if
   * %JOBCTL_STOP_CONSUME was set.  If the consumption completes the group
- * stop, the appropriate %SIGNAL_* flags are set.
+ * stop, the appropriate `SIGNAL_*` flags are set.
   *
   * CONTEXT:
   * Must be called with @task->sighand->siglock held.
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan

index 4fafba1a923b67a2650a0265025e247829d3f9c6..7fa97a8b571778a1619940ab10a9a4bc9e96f823 100644 (file)
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -106,7 +106,6 @@ endchoice
  
  config KASAN_STACK_ENABLE
         bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST
-       default !(CLANG_VERSION < 90000)
         depends on KASAN
         help
           The LLVM stack address sanitizer has a know problem that
@@ -115,11 +114,11 @@ config KASAN_STACK_ENABLE
           Disabling asan-stack makes it safe to run kernels build
           with clang-8 with KASAN enabled, though it loses some of
           the functionality.
-         This feature is always disabled when compile-testing with clang-8
-         or earlier to avoid cluttering the output in stack overflow
-         warnings, but clang-8 users can still enable it for builds without
-         CONFIG_COMPILE_TEST.  On gcc and later clang versions it is
-         assumed to always be safe to use and enabled by default.
+         This feature is always disabled when compile-testing with clang
+         to avoid cluttering the output in stack overflow warnings,
+         but clang users can still enable it for builds without
+         CONFIG_COMPILE_TEST.  On gcc it is assumed to always be safe
+         to use and enabled by default.
  
  config KASAN_STACK
         int
diff --git a/lib/Makefile b/lib/Makefile

index 095601ce371dabd7a7e5b3501c61ac4b6516d91b..29c02a924973afefc9a95cac47a6dfbcced66993 100644 (file)
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -279,7 +279,8 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
  obj-$(CONFIG_UBSAN) += ubsan.o
  
  UBSAN_SANITIZE_ubsan.o := n
-CFLAGS_ubsan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+KASAN_SANITIZE_ubsan.o := n
+CFLAGS_ubsan.o := $(call cc-option, -fno-stack-protector) $(DISABLE_STACKLEAK_PLUGIN)
  
  obj-$(CONFIG_SBITMAP) += sbitmap.o
  
diff --git a/lib/test_meminit.c b/lib/test_meminit.c

index 62d19f270cad4c75b9582abb5397e987c0aaca1c..9729f271d15041ac1930eb85922b81ba094a574f 100644 (file)
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -222,7 +222,7 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor,
                  * Copy the buffer to check that it's not wiped on
                  * free().
                  */
-               buf_copy = kmalloc(size, GFP_KERNEL);
+               buf_copy = kmalloc(size, GFP_ATOMIC);
                 if (buf_copy)
                         memcpy(buf_copy, buf, size);
  
diff --git a/mm/Makefile b/mm/Makefile

index 338e528ad4366f982298e9a8e59eba5e308fab32..d0b295c3b764bb753e4081b5847d6cef577f114d 100644 (file)
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -102,5 +102,6 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
  obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
  obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
  obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
+obj-$(CONFIG_ZONE_DEVICE) += memremap.o
  obj-$(CONFIG_HMM_MIRROR) += hmm.o
  obj-$(CONFIG_MEMFD_CREATE) += memfd.o
diff --git a/mm/compaction.c b/mm/compaction.c

index 9e1b9acb116b9b3efaabd8ee02538c20456b4714..952dc2fb24e50a26bee9621965ec6070b5d13346 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -842,13 +842,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  
                 /*
                  * Periodically drop the lock (if held) regardless of its
-                * contention, to give chance to IRQs. Abort async compaction
-                * if contended.
+                * contention, to give chance to IRQs. Abort completely if
+                * a fatal signal is pending.
                  */
                 if (!(low_pfn % SWAP_CLUSTER_MAX)
                     && compact_unlock_should_abort(&pgdat->lru_lock,
-                                           flags, &locked, cc))
-                       break;
+                                           flags, &locked, cc)) {
+                       low_pfn = 0;
+                       goto fatal_pending;
+               }
  
                 if (!pfn_valid_within(low_pfn))
                         goto isolate_fail;
@@ -1060,6 +1062,7 @@ isolate_abort:
         trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
                                                 nr_scanned, nr_isolated);
  
+fatal_pending:
         cc->total_migrate_scanned += nr_scanned;
         if (nr_isolated)
                 count_compact_events(COMPACTISOLATED, nr_isolated);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c

index dbbd518fb6b3e91dc298956f7abe842361369952..6e9e8cca663e477d328584ac3142d8234d8e3e05 100644 (file)
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -114,7 +114,7 @@
  /* GFP bitmask for kmemleak internal allocations */
  #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
                                  __GFP_NORETRY | __GFP_NOMEMALLOC | \
-                                __GFP_NOWARN | __GFP_NOFAIL)
+                                __GFP_NOWARN)
  
  /* scanning area inside a memory block */
  struct kmemleak_scan_area {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 2a9bbddb0e55427125e90dde4795b6460cdbdae1..c73f0991316511fb5471d3382f2d719c6cbfd759 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -132,7 +132,6 @@ static void release_memory_resource(struct resource *res)
                 return;
         release_resource(res);
         kfree(res);
-       return;
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -979,7 +978,6 @@ static void rollback_node_hotadd(int nid)
         arch_refresh_nodedata(nid, NULL);
         free_percpu(pgdat->per_cpu_nodestats);
         arch_free_nodedata(pgdat);
-       return;
  }
  
  
diff --git a/mm/memremap.c b/mm/memremap.c

new file mode 100644 (file)

index 0000000..6ee03a8
--- /dev/null
+++ b/mm/memremap.c
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2015 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kasan.h>
+#include <linux/memory_hotplug.h>
+#include <linux/mm.h>
+#include <linux/pfn_t.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/types.h>
+#include <linux/wait_bit.h>
+#include <linux/xarray.h>
+
+static DEFINE_XARRAY(pgmap_array);
+#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
+#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL(devmap_managed_key);
+static atomic_t devmap_managed_enable;
+
+static void devmap_managed_enable_put(void *data)
+{
+       if (atomic_dec_and_test(&devmap_managed_enable))
+               static_branch_disable(&devmap_managed_key);
+}
+
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+       if (!pgmap->ops || !pgmap->ops->page_free) {
+               WARN(1, "Missing page_free method\n");
+               return -EINVAL;
+       }
+
+       if (atomic_inc_return(&devmap_managed_enable) == 1)
+               static_branch_enable(&devmap_managed_key);
+       return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL);
+}
+#else
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+       return -EINVAL;
+}
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
+
+static void pgmap_array_delete(struct resource *res)
+{
+       xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
+                       NULL, GFP_KERNEL);
+       synchronize_rcu();
+}
+
+static unsigned long pfn_first(struct dev_pagemap *pgmap)
+{
+       return PHYS_PFN(pgmap->res.start) +
+               vmem_altmap_offset(pgmap_altmap(pgmap));
+}
+
+static unsigned long pfn_end(struct dev_pagemap *pgmap)
+{
+       const struct resource *res = &pgmap->res;
+
+       return (res->start + resource_size(res)) >> PAGE_SHIFT;
+}
+
+static unsigned long pfn_next(unsigned long pfn)
+{
+       if (pfn % 1024 == 0)
+               cond_resched();
+       return pfn + 1;
+}
+
+#define for_each_device_pfn(pfn, map) \
+       for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
+
+static void dev_pagemap_kill(struct dev_pagemap *pgmap)
+{
+       if (pgmap->ops && pgmap->ops->kill)
+               pgmap->ops->kill(pgmap);
+       else
+               percpu_ref_kill(pgmap->ref);
+}
+
+static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
+{
+       if (pgmap->ops && pgmap->ops->cleanup) {
+               pgmap->ops->cleanup(pgmap);
+       } else {
+               wait_for_completion(&pgmap->done);
+               percpu_ref_exit(pgmap->ref);
+       }
+}
+
+static void devm_memremap_pages_release(void *data)
+{
+       struct dev_pagemap *pgmap = data;
+       struct device *dev = pgmap->dev;
+       struct resource *res = &pgmap->res;
+       unsigned long pfn;
+       int nid;
+
+       dev_pagemap_kill(pgmap);
+       for_each_device_pfn(pfn, pgmap)
+               put_page(pfn_to_page(pfn));
+       dev_pagemap_cleanup(pgmap);
+
+       /* pages are dead and unused, undo the arch mapping */
+       nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
+
+       mem_hotplug_begin();
+       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+               pfn = PHYS_PFN(res->start);
+               __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
+                                PHYS_PFN(resource_size(res)), NULL);
+       } else {
+               arch_remove_memory(nid, res->start, resource_size(res),
+                               pgmap_altmap(pgmap));
+               kasan_remove_zero_shadow(__va(res->start), resource_size(res));
+       }
+       mem_hotplug_done();
+
+       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
+       pgmap_array_delete(res);
+       dev_WARN_ONCE(dev, pgmap->altmap.alloc,
+                     "%s: failed to free all reserved pages\n", __func__);
+}
+
+static void dev_pagemap_percpu_release(struct percpu_ref *ref)
+{
+       struct dev_pagemap *pgmap =
+               container_of(ref, struct dev_pagemap, internal_ref);
+
+       complete(&pgmap->done);
+}
+
+/**
+ * devm_memremap_pages - remap and provide memmap backing for the given resource
+ * @dev: hosting device for @res
+ * @pgmap: pointer to a struct dev_pagemap
+ *
+ * Notes:
+ * 1/ At a minimum the res and type members of @pgmap must be initialized
+ *    by the caller before passing it to this function
+ *
+ * 2/ The altmap field may optionally be initialized, in which case
+ *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
+ *
+ * 3/ The ref field may optionally be provided, in which pgmap->ref must be
+ *    'live' on entry and will be killed and reaped at
+ *    devm_memremap_pages_release() time, or if this routine fails.
+ *
+ * 4/ res is expected to be a host memory range that could feasibly be
+ *    treated as a "System RAM" range, i.e. not a device mmio range, but
+ *    this is not enforced.
+ */
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+       struct resource *res = &pgmap->res;
+       struct dev_pagemap *conflict_pgmap;
+       struct mhp_restrictions restrictions = {
+               /*
+                * We do not want any optional features only our own memmap
+                */
+               .altmap = pgmap_altmap(pgmap),
+       };
+       pgprot_t pgprot = PAGE_KERNEL;
+       int error, nid, is_ram;
+       bool need_devmap_managed = true;
+
+       switch (pgmap->type) {
+       case MEMORY_DEVICE_PRIVATE:
+               if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
+                       WARN(1, "Device private memory not supported\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
+                       WARN(1, "Missing migrate_to_ram method\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               break;
+       case MEMORY_DEVICE_FS_DAX:
+               if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
+                   IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
+                       WARN(1, "File system DAX not supported\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               break;
+       case MEMORY_DEVICE_DEVDAX:
+       case MEMORY_DEVICE_PCI_P2PDMA:
+               need_devmap_managed = false;
+               break;
+       default:
+               WARN(1, "Invalid pgmap type %d\n", pgmap->type);
+               break;
+       }
+
+       if (!pgmap->ref) {
+               if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
+                       return ERR_PTR(-EINVAL);
+
+               init_completion(&pgmap->done);
+               error = percpu_ref_init(&pgmap->internal_ref,
+                               dev_pagemap_percpu_release, 0, GFP_KERNEL);
+               if (error)
+                       return ERR_PTR(error);
+               pgmap->ref = &pgmap->internal_ref;
+       } else {
+               if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
+                       WARN(1, "Missing reference count teardown definition\n");
+                       return ERR_PTR(-EINVAL);
+               }
+       }
+
+       if (need_devmap_managed) {
+               error = devmap_managed_enable_get(dev, pgmap);
+               if (error)
+                       return ERR_PTR(error);
+       }
+
+       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
+       if (conflict_pgmap) {
+               dev_WARN(dev, "Conflicting mapping in same section\n");
+               put_dev_pagemap(conflict_pgmap);
+               error = -ENOMEM;
+               goto err_array;
+       }
+
+       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
+       if (conflict_pgmap) {
+               dev_WARN(dev, "Conflicting mapping in same section\n");
+               put_dev_pagemap(conflict_pgmap);
+               error = -ENOMEM;
+               goto err_array;
+       }
+
+       is_ram = region_intersects(res->start, resource_size(res),
+               IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
+
+       if (is_ram != REGION_DISJOINT) {
+               WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
+                               is_ram == REGION_MIXED ? "mixed" : "ram", res);
+               error = -ENXIO;
+               goto err_array;
+       }
+
+       pgmap->dev = dev;
+
+       error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
+                               PHYS_PFN(res->end), pgmap, GFP_KERNEL));
+       if (error)
+               goto err_array;
+
+       nid = dev_to_node(dev);
+       if (nid < 0)
+               nid = numa_mem_id();
+
+       error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0,
+                       resource_size(res));
+       if (error)
+               goto err_pfn_remap;
+
+       mem_hotplug_begin();
+
+       /*
+        * For device private memory we call add_pages() as we only need to
+        * allocate and initialize struct page for the device memory. More-
+        * over the device memory is un-accessible thus we do not want to
+        * create a linear mapping for the memory like arch_add_memory()
+        * would do.
+        *
+        * For all other device memory types, which are accessible by
+        * the CPU, we do want the linear mapping and thus use
+        * arch_add_memory().
+        */
+       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+               error = add_pages(nid, PHYS_PFN(res->start),
+                               PHYS_PFN(resource_size(res)), &restrictions);
+       } else {
+               error = kasan_add_zero_shadow(__va(res->start), resource_size(res));
+               if (error) {
+                       mem_hotplug_done();
+                       goto err_kasan;
+               }
+
+               error = arch_add_memory(nid, res->start, resource_size(res),
+                                       &restrictions);
+       }
+
+       if (!error) {
+               struct zone *zone;
+
+               zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
+               move_pfn_range_to_zone(zone, PHYS_PFN(res->start),
+                               PHYS_PFN(resource_size(res)), restrictions.altmap);
+       }
+
+       mem_hotplug_done();
+       if (error)
+               goto err_add_memory;
+
+       /*
+        * Initialization of the pages has been deferred until now in order
+        * to allow us to do the work while not holding the hotplug lock.
+        */
+       memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
+                               PHYS_PFN(res->start),
+                               PHYS_PFN(resource_size(res)), pgmap);
+       percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
+
+       error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
+                       pgmap);
+       if (error)
+               return ERR_PTR(error);
+
+       return __va(res->start);
+
+ err_add_memory:
+       kasan_remove_zero_shadow(__va(res->start), resource_size(res));
+ err_kasan:
+       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
+ err_pfn_remap:
+       pgmap_array_delete(res);
+ err_array:
+       dev_pagemap_kill(pgmap);
+       dev_pagemap_cleanup(pgmap);
+       return ERR_PTR(error);
+}
+EXPORT_SYMBOL_GPL(devm_memremap_pages);
+
+void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+       devm_release_action(dev, devm_memremap_pages_release, pgmap);
+}
+EXPORT_SYMBOL_GPL(devm_memunmap_pages);
+
+unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+{
+       /* number of pfns from base where pfn_to_page() is valid */
+       if (altmap)
+               return altmap->reserve + altmap->free;
+       return 0;
+}
+
+void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
+{
+       altmap->alloc -= nr_pfns;
+}
+
+/**
+ * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
+ * @pfn: page frame number to lookup page_map
+ * @pgmap: optional known pgmap that already has a reference
+ *
+ * If @pgmap is non-NULL and covers @pfn it will be returned as-is.  If @pgmap
+ * is non-NULL but does not cover @pfn the reference to it will be released.
+ */
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+               struct dev_pagemap *pgmap)
+{
+       resource_size_t phys = PFN_PHYS(pfn);
+
+       /*
+        * In the cached case we're already holding a live reference.
+        */
+       if (pgmap) {
+               if (phys >= pgmap->res.start && phys <= pgmap->res.end)
+                       return pgmap;
+               put_dev_pagemap(pgmap);
+       }
+
+       /* fall back to slow path lookup */
+       rcu_read_lock();
+       pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
+       if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
+               pgmap = NULL;
+       rcu_read_unlock();
+
+       return pgmap;
+}
+EXPORT_SYMBOL_GPL(get_dev_pagemap);
+
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+void __put_devmap_managed_page(struct page *page)
+{
+       int count = page_ref_dec_return(page);
+
+       /*
+        * If refcount is 1 then page is freed and refcount is stable as nobody
+        * holds a reference on the page.
+        */
+       if (count == 1) {
+               /* Clear Active bit in case of parallel mark_page_accessed */
+               __ClearPageActive(page);
+               __ClearPageWaiters(page);
+
+               mem_cgroup_uncharge(page);
+
+               page->pgmap->ops->page_free(page);
+       } else if (!count)
+               __put_page(page);
+}
+EXPORT_SYMBOL(__put_devmap_managed_page);
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
diff --git a/mm/migrate.c b/mm/migrate.c

index 8992741f10aad4bb1e56c4635ef84aac57176550..a42858d8e00b7bae8d43e9746fb786a16a64e809 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -767,12 +767,12 @@ recheck_buffers:
                         }
                         bh = bh->b_this_page;
                 } while (bh != head);
-               spin_unlock(&mapping->private_lock);
                 if (busy) {
                         if (invalidated) {
                                 rc = -EAGAIN;
                                 goto unlock_buffers;
                         }
+                       spin_unlock(&mapping->private_lock);
                         invalidate_bh_lrus();
                         invalidated = true;
                         goto recheck_buffers;
@@ -805,6 +805,8 @@ recheck_buffers:
  
         rc = MIGRATEPAGE_SUCCESS;
  unlock_buffers:
+       if (check_refs)
+               spin_unlock(&mapping->private_lock);
         bh = head;
         do {
                 unlock_buffer(bh);
@@ -2338,16 +2340,13 @@ next:
  static void migrate_vma_collect(struct migrate_vma *migrate)
  {
         struct mmu_notifier_range range;
-       struct mm_walk mm_walk;
-
-       mm_walk.pmd_entry = migrate_vma_collect_pmd;
-       mm_walk.pte_entry = NULL;
-       mm_walk.pte_hole = migrate_vma_collect_hole;
-       mm_walk.hugetlb_entry = NULL;
-       mm_walk.test_walk = NULL;
-       mm_walk.vma = migrate->vma;
-       mm_walk.mm = migrate->vma->vm_mm;
-       mm_walk.private = migrate;
+       struct mm_walk mm_walk = {
+               .pmd_entry = migrate_vma_collect_pmd,
+               .pte_hole = migrate_vma_collect_hole,
+               .vma = migrate->vma,
+               .mm = migrate->vma->vm_mm,
+               .private = migrate,
+       };
  
         mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm,
                                 migrate->start,
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 44df66a98f2adbbd2c18c38dec8eed9e45b4a6ce..dbdc46a84f63089de614a2807ef62a70f7c14968 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -699,7 +699,14 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
         unsigned long ret, freed = 0;
         struct shrinker *shrinker;
  
-       if (!mem_cgroup_is_root(memcg))
+       /*
+        * The root memcg might be allocated even though memcg is disabled
+        * via "cgroup_disable=memory" boot parameter.  This could make
+        * mem_cgroup_is_root() return false, then just run memcg slab
+        * shrink, but skip global shrink.  This may result in premature
+        * oom.
+        */
+       if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
                 return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
  
         if (!down_read_trylock(&shrinker_rwsem))
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c

index 4c223266299aa7c90c1288bcbed7fd891f802f3d..bdb69599c4bdc2526943cf9a690e1ef0697872dd 100644 (file)
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -191,8 +191,7 @@ int cg_find_unified_root(char *root, size_t len)
                 strtok(NULL, delim);
                 strtok(NULL, delim);
  
-               if (strcmp(fs, "cgroup") == 0 &&
-                   strcmp(type, "cgroup2") == 0) {
+               if (strcmp(type, "cgroup2") == 0) {
                         strncpy(root, mount, len);
                         return 0;
                 }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)
arch/mips/vdso/vdso.h		patch \| blob \| blame \| history
drivers/acpi/scan.c		patch \| blob \| blame \| history
fs/coredump.c		patch \| blob \| blame \| history
fs/ocfs2/xattr.c		patch \| blob \| blame \| history
include/asm-generic/getorder.h		patch \| blob \| blame \| history
include/linux/page-flags-layout.h		patch \| blob \| blame \| history
kernel/Makefile		patch \| blob \| blame \| history
kernel/memremap.c	[deleted file]	patch \| blob \| blame \| history
kernel/signal.c		patch \| blob \| blame \| history
lib/Kconfig.kasan		patch \| blob \| blame \| history
lib/Makefile		patch \| blob \| blame \| history
lib/test_meminit.c		patch \| blob \| blame \| history
mm/Makefile		patch \| blob \| blame \| history
mm/compaction.c		patch \| blob \| blame \| history
mm/kmemleak.c		patch \| blob \| blame \| history
mm/memory_hotplug.c		patch \| blob \| blame \| history
mm/memremap.c	[new file with mode: 0644]	patch \| blob
mm/migrate.c		patch \| blob \| blame \| history
mm/vmscan.c		patch \| blob \| blame \| history
tools/testing/selftests/cgroup/cgroup_util.c		patch \| blob \| blame \| history