Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 3 Aug 2019 16:20:49 +0000 (09:20 -0700)
Merge misc fixes from Andrew Morton:
 "17 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  drivers/acpi/scan.c: document why we don't need the device_hotplug_lock
  memremap: move from kernel/ to mm/
  lib/test_meminit.c: use GFP_ATOMIC in RCU critical section
  asm-generic: fix -Wtype-limits compiler warnings
  cgroup: kselftest: relax fs_spec checks
  mm/memory_hotplug.c: remove unneeded return for void function
  mm/migrate.c: initialize pud_entry in migrate_vma()
  coredump: split pipe command whitespace before expanding template
  page flags: prioritize kasan bits over last-cpuid
  ubsan: build ubsan.c more conservatively
  kasan: remove clang version check for KASAN_STACK
  mm: compaction: avoid 100% CPU usage during compaction when a task is killed
  mm: migrate: fix reference check race between __find_get_block() and migration
  mm: vmscan: check if mem cgroup is disabled or not before calling memcg slab shrinker
  ocfs2: remove set but not used variable 'last_hash'
  Revert "kmemleak: allow to coexist with fault injection"
  kernel/signal.c: fix a kernel-doc markup

20 files changed:
arch/mips/vdso/vdso.h
drivers/acpi/scan.c
fs/coredump.c
fs/ocfs2/xattr.c
include/asm-generic/getorder.h
include/linux/page-flags-layout.h
kernel/Makefile
kernel/memremap.c [deleted file]
kernel/signal.c
lib/Kconfig.kasan
lib/Makefile
lib/test_meminit.c
mm/Makefile
mm/compaction.c
mm/kmemleak.c
mm/memory_hotplug.c
mm/memremap.c [new file with mode: 0644]
mm/migrate.c
mm/vmscan.c
tools/testing/selftests/cgroup/cgroup_util.c

index 14b1931be69c3acf78afc794d4b3f0a3bbac5ecc..b65b169778e31478fcc507c12c88fcb1436cdf6b 100644 (file)
@@ -9,6 +9,7 @@
 #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT)
 
 /* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */
+#define BUILD_VDSO32_64
 #undef CONFIG_64BIT
 #define CONFIG_32BIT 1
 #ifndef __ASSEMBLY__
index 0e28270b0fd81046b2b2a434b0dd65edb4b282a6..aad6be5c0af0a5f4da721eed1fd8b77cd25f3b94 100644 (file)
@@ -2204,6 +2204,12 @@ int __init acpi_scan_init(void)
        acpi_gpe_apply_masked_gpes();
        acpi_update_all_gpes();
 
+       /*
+        * Although we call __add_memory() that is documented to require the
+        * device_hotplug_lock, it is not necessary here because this is an
+        * early code when userspace or any other code path cannot trigger
+        * hotplug/hotunplug operations.
+        */
        mutex_lock(&acpi_scan_lock);
        /*
         * Enumerate devices in the ACPI namespace.
index e42e17e55bfd5512bee600f6bd435e66e3f90147..b1ea7dfbd1494bd8d990ae03ee1bc21277dbd636 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/swap.h>
+#include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
@@ -187,11 +188,13 @@ put_exe_file:
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static int format_corename(struct core_name *cn, struct coredump_params *cprm)
+static int format_corename(struct core_name *cn, struct coredump_params *cprm,
+                          size_t **argv, int *argc)
 {
        const struct cred *cred = current_cred();
        const char *pat_ptr = core_pattern;
        int ispipe = (*pat_ptr == '|');
+       bool was_space = false;
        int pid_in_pattern = 0;
        int err = 0;
 
@@ -201,12 +204,35 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
                return -ENOMEM;
        cn->corename[0] = '\0';
 
-       if (ispipe)
+       if (ispipe) {
+               int argvs = sizeof(core_pattern) / 2;
+               (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
+               if (!(*argv))
+                       return -ENOMEM;
+               (*argv)[(*argc)++] = 0;
                ++pat_ptr;
+       }
 
        /* Repeat as long as we have more pattern to process and more output
           space */
        while (*pat_ptr) {
+               /*
+                * Split on spaces before doing template expansion so that
+                * %e and %E don't get split if they have spaces in them
+                */
+               if (ispipe) {
+                       if (isspace(*pat_ptr)) {
+                               was_space = true;
+                               pat_ptr++;
+                               continue;
+                       } else if (was_space) {
+                               was_space = false;
+                               err = cn_printf(cn, "%c", '\0');
+                               if (err)
+                                       return err;
+                               (*argv)[(*argc)++] = cn->used;
+                       }
+               }
                if (*pat_ptr != '%') {
                        err = cn_printf(cn, "%c", *pat_ptr++);
                } else {
@@ -546,6 +572,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
        struct cred *cred;
        int retval = 0;
        int ispipe;
+       size_t *argv = NULL;
+       int argc = 0;
        struct files_struct *displaced;
        /* require nonrelative corefile path and be extra careful */
        bool need_suid_safe = false;
@@ -592,9 +620,10 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 
        old_cred = override_creds(cred);
 
-       ispipe = format_corename(&cn, &cprm);
+       ispipe = format_corename(&cn, &cprm, &argv, &argc);
 
        if (ispipe) {
+               int argi;
                int dump_count;
                char **helper_argv;
                struct subprocess_info *sub_info;
@@ -637,12 +666,16 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                        goto fail_dropcount;
                }
 
-               helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
+               helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
+                                           GFP_KERNEL);
                if (!helper_argv) {
                        printk(KERN_WARNING "%s failed to allocate memory\n",
                               __func__);
                        goto fail_dropcount;
                }
+               for (argi = 0; argi < argc; argi++)
+                       helper_argv[argi] = cn.corename + argv[argi];
+               helper_argv[argi] = NULL;
 
                retval = -ENOMEM;
                sub_info = call_usermodehelper_setup(helper_argv[0],
@@ -652,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                        retval = call_usermodehelper_exec(sub_info,
                                                          UMH_WAIT_EXEC);
 
-               argv_free(helper_argv);
+               kfree(helper_argv);
                if (retval) {
                        printk(KERN_INFO "Core dump to |%s pipe failed\n",
                               cn.corename);
@@ -766,6 +799,7 @@ fail_dropcount:
        if (ispipe)
                atomic_dec(&core_dump_count);
 fail_unlock:
+       kfree(argv);
        kfree(cn.corename);
        coredump_finish(mm, core_dumped);
        revert_creds(old_cred);
index 385f3aaa244809b0d442d746fb4e76bd391d7bb2..90c830e3758e2dea9af508efed08b48a8509553f 100644 (file)
@@ -3825,7 +3825,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
        int low_bucket = 0, bucket, high_bucket;
        struct ocfs2_xattr_bucket *search;
-       u32 last_hash;
        u64 blkno, lower_blkno = 0;
 
        search = ocfs2_xattr_bucket_new(inode);
@@ -3869,8 +3868,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
                if (xh->xh_count)
                        xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
 
-               last_hash = le32_to_cpu(xe->xe_name_hash);
-
                /* record lower_blkno which may be the insert place. */
                lower_blkno = blkno;
 
index c64bea7a52bebd5c7332203e1e3a78bfa69b84c3..e9f20b813a699a3fb7630fe76aba3d1ab60c892e 100644 (file)
@@ -7,24 +7,6 @@
 #include <linux/compiler.h>
 #include <linux/log2.h>
 
-/*
- * Runtime evaluation of get_order()
- */
-static inline __attribute_const__
-int __get_order(unsigned long size)
-{
-       int order;
-
-       size--;
-       size >>= PAGE_SHIFT;
-#if BITS_PER_LONG == 32
-       order = fls(size);
-#else
-       order = fls64(size);
-#endif
-       return order;
-}
-
 /**
  * get_order - Determine the allocation order of a memory size
  * @size: The size for which to get the order
@@ -43,19 +25,27 @@ int __get_order(unsigned long size)
  * to hold an object of the specified size.
  *
  * The result is undefined if the size is 0.
- *
- * This function may be used to initialise variables with compile time
- * evaluations of constants.
  */
-#define get_order(n)                                           \
-(                                                              \
-       __builtin_constant_p(n) ? (                             \
-               ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :     \
-               (((n) < (1UL << PAGE_SHIFT)) ? 0 :              \
-                ilog2((n) - 1) - PAGE_SHIFT + 1)               \
-       ) :                                                     \
-       __get_order(n)                                          \
-)
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+       if (__builtin_constant_p(size)) {
+               if (!size)
+                       return BITS_PER_LONG - PAGE_SHIFT;
+
+               if (size < (1UL << PAGE_SHIFT))
+                       return 0;
+
+               return ilog2((size) - 1) - PAGE_SHIFT + 1;
+       }
+
+       size--;
+       size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+       return fls(size);
+#else
+       return fls64(size);
+#endif
+}
 
 #endif /* __ASSEMBLY__ */
 
index 1dda31825ec4ae78c8852f15288a80c4a651bcf3..71283739ffd239c5790dd57f408411451fa68220 100644 (file)
@@ -32,6 +32,7 @@
 
 #endif /* CONFIG_SPARSEMEM */
 
+#ifndef BUILD_VDSO32_64
 /*
  * page->flags layout:
  *
 #define LAST_CPUPID_SHIFT 0
 #endif
 
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#ifdef CONFIG_KASAN_SW_TAGS
+#define KASAN_TAG_WIDTH 8
+#else
+#define KASAN_TAG_WIDTH 0
+#endif
+
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \
+       <= BITS_PER_LONG - NR_PAGEFLAGS
 #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
 #else
 #define LAST_CPUPID_WIDTH 0
 #endif
 
-#ifdef CONFIG_KASAN_SW_TAGS
-#define KASAN_TAG_WIDTH 8
 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \
        > BITS_PER_LONG - NR_PAGEFLAGS
-#error "KASAN: not enough bits in page flags for tag"
-#endif
-#else
-#define KASAN_TAG_WIDTH 0
+#error "Not enough bits in page flags"
 #endif
 
 /*
 #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
 #endif
 
+#endif
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
index a8d923b5481ba91d3cb15209eda7838fe1793ead..ef0d95a190b4179848c52041f2d1cfb6b9fbff77 100644 (file)
@@ -111,7 +111,6 @@ obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
 
 obj-$(CONFIG_HAS_IOMEM) += iomem.o
-obj-$(CONFIG_ZONE_DEVICE) += memremap.o
 obj-$(CONFIG_RSEQ) += rseq.o
 
 obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
diff --git a/kernel/memremap.c b/kernel/memremap.c
deleted file mode 100644 (file)
index 6ee03a8..0000000
+++ /dev/null
@@ -1,405 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2015 Intel Corporation. All rights reserved. */
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/kasan.h>
-#include <linux/memory_hotplug.h>
-#include <linux/mm.h>
-#include <linux/pfn_t.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
-#include <linux/types.h>
-#include <linux/wait_bit.h>
-#include <linux/xarray.h>
-
-static DEFINE_XARRAY(pgmap_array);
-#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
-#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
-
-#ifdef CONFIG_DEV_PAGEMAP_OPS
-DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
-EXPORT_SYMBOL(devmap_managed_key);
-static atomic_t devmap_managed_enable;
-
-static void devmap_managed_enable_put(void *data)
-{
-       if (atomic_dec_and_test(&devmap_managed_enable))
-               static_branch_disable(&devmap_managed_key);
-}
-
-static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
-{
-       if (!pgmap->ops || !pgmap->ops->page_free) {
-               WARN(1, "Missing page_free method\n");
-               return -EINVAL;
-       }
-
-       if (atomic_inc_return(&devmap_managed_enable) == 1)
-               static_branch_enable(&devmap_managed_key);
-       return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL);
-}
-#else
-static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
-{
-       return -EINVAL;
-}
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
-
-static void pgmap_array_delete(struct resource *res)
-{
-       xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
-                       NULL, GFP_KERNEL);
-       synchronize_rcu();
-}
-
-static unsigned long pfn_first(struct dev_pagemap *pgmap)
-{
-       return PHYS_PFN(pgmap->res.start) +
-               vmem_altmap_offset(pgmap_altmap(pgmap));
-}
-
-static unsigned long pfn_end(struct dev_pagemap *pgmap)
-{
-       const struct resource *res = &pgmap->res;
-
-       return (res->start + resource_size(res)) >> PAGE_SHIFT;
-}
-
-static unsigned long pfn_next(unsigned long pfn)
-{
-       if (pfn % 1024 == 0)
-               cond_resched();
-       return pfn + 1;
-}
-
-#define for_each_device_pfn(pfn, map) \
-       for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
-
-static void dev_pagemap_kill(struct dev_pagemap *pgmap)
-{
-       if (pgmap->ops && pgmap->ops->kill)
-               pgmap->ops->kill(pgmap);
-       else
-               percpu_ref_kill(pgmap->ref);
-}
-
-static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
-{
-       if (pgmap->ops && pgmap->ops->cleanup) {
-               pgmap->ops->cleanup(pgmap);
-       } else {
-               wait_for_completion(&pgmap->done);
-               percpu_ref_exit(pgmap->ref);
-       }
-}
-
-static void devm_memremap_pages_release(void *data)
-{
-       struct dev_pagemap *pgmap = data;
-       struct device *dev = pgmap->dev;
-       struct resource *res = &pgmap->res;
-       unsigned long pfn;
-       int nid;
-
-       dev_pagemap_kill(pgmap);
-       for_each_device_pfn(pfn, pgmap)
-               put_page(pfn_to_page(pfn));
-       dev_pagemap_cleanup(pgmap);
-
-       /* pages are dead and unused, undo the arch mapping */
-       nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
-
-       mem_hotplug_begin();
-       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-               pfn = PHYS_PFN(res->start);
-               __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
-                                PHYS_PFN(resource_size(res)), NULL);
-       } else {
-               arch_remove_memory(nid, res->start, resource_size(res),
-                               pgmap_altmap(pgmap));
-               kasan_remove_zero_shadow(__va(res->start), resource_size(res));
-       }
-       mem_hotplug_done();
-
-       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
-       pgmap_array_delete(res);
-       dev_WARN_ONCE(dev, pgmap->altmap.alloc,
-                     "%s: failed to free all reserved pages\n", __func__);
-}
-
-static void dev_pagemap_percpu_release(struct percpu_ref *ref)
-{
-       struct dev_pagemap *pgmap =
-               container_of(ref, struct dev_pagemap, internal_ref);
-
-       complete(&pgmap->done);
-}
-
-/**
- * devm_memremap_pages - remap and provide memmap backing for the given resource
- * @dev: hosting device for @res
- * @pgmap: pointer to a struct dev_pagemap
- *
- * Notes:
- * 1/ At a minimum the res and type members of @pgmap must be initialized
- *    by the caller before passing it to this function
- *
- * 2/ The altmap field may optionally be initialized, in which case
- *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
- *
- * 3/ The ref field may optionally be provided, in which pgmap->ref must be
- *    'live' on entry and will be killed and reaped at
- *    devm_memremap_pages_release() time, or if this routine fails.
- *
- * 4/ res is expected to be a host memory range that could feasibly be
- *    treated as a "System RAM" range, i.e. not a device mmio range, but
- *    this is not enforced.
- */
-void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
-{
-       struct resource *res = &pgmap->res;
-       struct dev_pagemap *conflict_pgmap;
-       struct mhp_restrictions restrictions = {
-               /*
-                * We do not want any optional features only our own memmap
-                */
-               .altmap = pgmap_altmap(pgmap),
-       };
-       pgprot_t pgprot = PAGE_KERNEL;
-       int error, nid, is_ram;
-       bool need_devmap_managed = true;
-
-       switch (pgmap->type) {
-       case MEMORY_DEVICE_PRIVATE:
-               if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
-                       WARN(1, "Device private memory not supported\n");
-                       return ERR_PTR(-EINVAL);
-               }
-               if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
-                       WARN(1, "Missing migrate_to_ram method\n");
-                       return ERR_PTR(-EINVAL);
-               }
-               break;
-       case MEMORY_DEVICE_FS_DAX:
-               if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
-                   IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
-                       WARN(1, "File system DAX not supported\n");
-                       return ERR_PTR(-EINVAL);
-               }
-               break;
-       case MEMORY_DEVICE_DEVDAX:
-       case MEMORY_DEVICE_PCI_P2PDMA:
-               need_devmap_managed = false;
-               break;
-       default:
-               WARN(1, "Invalid pgmap type %d\n", pgmap->type);
-               break;
-       }
-
-       if (!pgmap->ref) {
-               if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
-                       return ERR_PTR(-EINVAL);
-
-               init_completion(&pgmap->done);
-               error = percpu_ref_init(&pgmap->internal_ref,
-                               dev_pagemap_percpu_release, 0, GFP_KERNEL);
-               if (error)
-                       return ERR_PTR(error);
-               pgmap->ref = &pgmap->internal_ref;
-       } else {
-               if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
-                       WARN(1, "Missing reference count teardown definition\n");
-                       return ERR_PTR(-EINVAL);
-               }
-       }
-
-       if (need_devmap_managed) {
-               error = devmap_managed_enable_get(dev, pgmap);
-               if (error)
-                       return ERR_PTR(error);
-       }
-
-       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
-       if (conflict_pgmap) {
-               dev_WARN(dev, "Conflicting mapping in same section\n");
-               put_dev_pagemap(conflict_pgmap);
-               error = -ENOMEM;
-               goto err_array;
-       }
-
-       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
-       if (conflict_pgmap) {
-               dev_WARN(dev, "Conflicting mapping in same section\n");
-               put_dev_pagemap(conflict_pgmap);
-               error = -ENOMEM;
-               goto err_array;
-       }
-
-       is_ram = region_intersects(res->start, resource_size(res),
-               IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
-
-       if (is_ram != REGION_DISJOINT) {
-               WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
-                               is_ram == REGION_MIXED ? "mixed" : "ram", res);
-               error = -ENXIO;
-               goto err_array;
-       }
-
-       pgmap->dev = dev;
-
-       error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
-                               PHYS_PFN(res->end), pgmap, GFP_KERNEL));
-       if (error)
-               goto err_array;
-
-       nid = dev_to_node(dev);
-       if (nid < 0)
-               nid = numa_mem_id();
-
-       error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0,
-                       resource_size(res));
-       if (error)
-               goto err_pfn_remap;
-
-       mem_hotplug_begin();
-
-       /*
-        * For device private memory we call add_pages() as we only need to
-        * allocate and initialize struct page for the device memory. More-
-        * over the device memory is un-accessible thus we do not want to
-        * create a linear mapping for the memory like arch_add_memory()
-        * would do.
-        *
-        * For all other device memory types, which are accessible by
-        * the CPU, we do want the linear mapping and thus use
-        * arch_add_memory().
-        */
-       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-               error = add_pages(nid, PHYS_PFN(res->start),
-                               PHYS_PFN(resource_size(res)), &restrictions);
-       } else {
-               error = kasan_add_zero_shadow(__va(res->start), resource_size(res));
-               if (error) {
-                       mem_hotplug_done();
-                       goto err_kasan;
-               }
-
-               error = arch_add_memory(nid, res->start, resource_size(res),
-                                       &restrictions);
-       }
-
-       if (!error) {
-               struct zone *zone;
-
-               zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
-               move_pfn_range_to_zone(zone, PHYS_PFN(res->start),
-                               PHYS_PFN(resource_size(res)), restrictions.altmap);
-       }
-
-       mem_hotplug_done();
-       if (error)
-               goto err_add_memory;
-
-       /*
-        * Initialization of the pages has been deferred until now in order
-        * to allow us to do the work while not holding the hotplug lock.
-        */
-       memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
-                               PHYS_PFN(res->start),
-                               PHYS_PFN(resource_size(res)), pgmap);
-       percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
-
-       error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
-                       pgmap);
-       if (error)
-               return ERR_PTR(error);
-
-       return __va(res->start);
-
- err_add_memory:
-       kasan_remove_zero_shadow(__va(res->start), resource_size(res));
- err_kasan:
-       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
- err_pfn_remap:
-       pgmap_array_delete(res);
- err_array:
-       dev_pagemap_kill(pgmap);
-       dev_pagemap_cleanup(pgmap);
-       return ERR_PTR(error);
-}
-EXPORT_SYMBOL_GPL(devm_memremap_pages);
-
-void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
-{
-       devm_release_action(dev, devm_memremap_pages_release, pgmap);
-}
-EXPORT_SYMBOL_GPL(devm_memunmap_pages);
-
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
-{
-       /* number of pfns from base where pfn_to_page() is valid */
-       if (altmap)
-               return altmap->reserve + altmap->free;
-       return 0;
-}
-
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
-{
-       altmap->alloc -= nr_pfns;
-}
-
-/**
- * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
- * @pfn: page frame number to lookup page_map
- * @pgmap: optional known pgmap that already has a reference
- *
- * If @pgmap is non-NULL and covers @pfn it will be returned as-is.  If @pgmap
- * is non-NULL but does not cover @pfn the reference to it will be released.
- */
-struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
-               struct dev_pagemap *pgmap)
-{
-       resource_size_t phys = PFN_PHYS(pfn);
-
-       /*
-        * In the cached case we're already holding a live reference.
-        */
-       if (pgmap) {
-               if (phys >= pgmap->res.start && phys <= pgmap->res.end)
-                       return pgmap;
-               put_dev_pagemap(pgmap);
-       }
-
-       /* fall back to slow path lookup */
-       rcu_read_lock();
-       pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
-       if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
-               pgmap = NULL;
-       rcu_read_unlock();
-
-       return pgmap;
-}
-EXPORT_SYMBOL_GPL(get_dev_pagemap);
-
-#ifdef CONFIG_DEV_PAGEMAP_OPS
-void __put_devmap_managed_page(struct page *page)
-{
-       int count = page_ref_dec_return(page);
-
-       /*
-        * If refcount is 1 then page is freed and refcount is stable as nobody
-        * holds a reference on the page.
-        */
-       if (count == 1) {
-               /* Clear Active bit in case of parallel mark_page_accessed */
-               __ClearPageActive(page);
-               __ClearPageWaiters(page);
-
-               mem_cgroup_uncharge(page);
-
-               page->pgmap->ops->page_free(page);
-       } else if (!count)
-               __put_page(page);
-}
-EXPORT_SYMBOL(__put_devmap_managed_page);
-#endif /* CONFIG_DEV_PAGEMAP_OPS */
index 349f5a67f100e8bc33ed4ac44f717de482817776..e667be6907d71333c44546cd43ebfdde6c9eab3c 100644 (file)
@@ -349,7 +349,7 @@ void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask)
  * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop.
  * Group stop states are cleared and the group stop count is consumed if
  * %JOBCTL_STOP_CONSUME was set.  If the consumption completes the group
- * stop, the appropriate %SIGNAL_* flags are set.
+ * stop, the appropriate `SIGNAL_*` flags are set.
  *
  * CONTEXT:
  * Must be called with @task->sighand->siglock held.
index 4fafba1a923b67a2650a0265025e247829d3f9c6..7fa97a8b571778a1619940ab10a9a4bc9e96f823 100644 (file)
@@ -106,7 +106,6 @@ endchoice
 
 config KASAN_STACK_ENABLE
        bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST
-       default !(CLANG_VERSION < 90000)
        depends on KASAN
        help
          The LLVM stack address sanitizer has a know problem that
@@ -115,11 +114,11 @@ config KASAN_STACK_ENABLE
          Disabling asan-stack makes it safe to run kernels build
          with clang-8 with KASAN enabled, though it loses some of
          the functionality.
-         This feature is always disabled when compile-testing with clang-8
-         or earlier to avoid cluttering the output in stack overflow
-         warnings, but clang-8 users can still enable it for builds without
-         CONFIG_COMPILE_TEST.  On gcc and later clang versions it is
-         assumed to always be safe to use and enabled by default.
+         This feature is always disabled when compile-testing with clang
+         to avoid cluttering the output in stack overflow warnings,
+         but clang users can still enable it for builds without
+         CONFIG_COMPILE_TEST.  On gcc it is assumed to always be safe
+         to use and enabled by default.
 
 config KASAN_STACK
        int
index 095601ce371dabd7a7e5b3501c61ac4b6516d91b..29c02a924973afefc9a95cac47a6dfbcced66993 100644 (file)
@@ -279,7 +279,8 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
 obj-$(CONFIG_UBSAN) += ubsan.o
 
 UBSAN_SANITIZE_ubsan.o := n
-CFLAGS_ubsan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+KASAN_SANITIZE_ubsan.o := n
+CFLAGS_ubsan.o := $(call cc-option, -fno-stack-protector) $(DISABLE_STACKLEAK_PLUGIN)
 
 obj-$(CONFIG_SBITMAP) += sbitmap.o
 
index 62d19f270cad4c75b9582abb5397e987c0aaca1c..9729f271d15041ac1930eb85922b81ba094a574f 100644 (file)
@@ -222,7 +222,7 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor,
                 * Copy the buffer to check that it's not wiped on
                 * free().
                 */
-               buf_copy = kmalloc(size, GFP_KERNEL);
+               buf_copy = kmalloc(size, GFP_ATOMIC);
                if (buf_copy)
                        memcpy(buf_copy, buf, size);
 
index 338e528ad4366f982298e9a8e59eba5e308fab32..d0b295c3b764bb753e4081b5847d6cef577f114d 100644 (file)
@@ -102,5 +102,6 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
 obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
+obj-$(CONFIG_ZONE_DEVICE) += memremap.o
 obj-$(CONFIG_HMM_MIRROR) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
index 9e1b9acb116b9b3efaabd8ee02538c20456b4714..952dc2fb24e50a26bee9621965ec6070b5d13346 100644 (file)
@@ -842,13 +842,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
                /*
                 * Periodically drop the lock (if held) regardless of its
-                * contention, to give chance to IRQs. Abort async compaction
-                * if contended.
+                * contention, to give chance to IRQs. Abort completely if
+                * a fatal signal is pending.
                 */
                if (!(low_pfn % SWAP_CLUSTER_MAX)
                    && compact_unlock_should_abort(&pgdat->lru_lock,
-                                           flags, &locked, cc))
-                       break;
+                                           flags, &locked, cc)) {
+                       low_pfn = 0;
+                       goto fatal_pending;
+               }
 
                if (!pfn_valid_within(low_pfn))
                        goto isolate_fail;
@@ -1060,6 +1062,7 @@ isolate_abort:
        trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
                                                nr_scanned, nr_isolated);
 
+fatal_pending:
        cc->total_migrate_scanned += nr_scanned;
        if (nr_isolated)
                count_compact_events(COMPACTISOLATED, nr_isolated);
index dbbd518fb6b3e91dc298956f7abe842361369952..6e9e8cca663e477d328584ac3142d8234d8e3e05 100644 (file)
 /* GFP bitmask for kmemleak internal allocations */
 #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
                                 __GFP_NORETRY | __GFP_NOMEMALLOC | \
-                                __GFP_NOWARN | __GFP_NOFAIL)
+                                __GFP_NOWARN)
 
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
index 2a9bbddb0e55427125e90dde4795b6460cdbdae1..c73f0991316511fb5471d3382f2d719c6cbfd759 100644 (file)
@@ -132,7 +132,6 @@ static void release_memory_resource(struct resource *res)
                return;
        release_resource(res);
        kfree(res);
-       return;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -979,7 +978,6 @@ static void rollback_node_hotadd(int nid)
        arch_refresh_nodedata(nid, NULL);
        free_percpu(pgdat->per_cpu_nodestats);
        arch_free_nodedata(pgdat);
-       return;
 }
 
 
diff --git a/mm/memremap.c b/mm/memremap.c
new file mode 100644 (file)
index 0000000..6ee03a8
--- /dev/null
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2015 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kasan.h>
+#include <linux/memory_hotplug.h>
+#include <linux/mm.h>
+#include <linux/pfn_t.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/types.h>
+#include <linux/wait_bit.h>
+#include <linux/xarray.h>
+
+static DEFINE_XARRAY(pgmap_array);
+#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
+#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL(devmap_managed_key);
+static atomic_t devmap_managed_enable;
+
+static void devmap_managed_enable_put(void *data)
+{
+       if (atomic_dec_and_test(&devmap_managed_enable))
+               static_branch_disable(&devmap_managed_key);
+}
+
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+       if (!pgmap->ops || !pgmap->ops->page_free) {
+               WARN(1, "Missing page_free method\n");
+               return -EINVAL;
+       }
+
+       if (atomic_inc_return(&devmap_managed_enable) == 1)
+               static_branch_enable(&devmap_managed_key);
+       return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL);
+}
+#else
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+       return -EINVAL;
+}
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
+
+static void pgmap_array_delete(struct resource *res)
+{
+       xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
+                       NULL, GFP_KERNEL);
+       synchronize_rcu();
+}
+
+static unsigned long pfn_first(struct dev_pagemap *pgmap)
+{
+       return PHYS_PFN(pgmap->res.start) +
+               vmem_altmap_offset(pgmap_altmap(pgmap));
+}
+
+static unsigned long pfn_end(struct dev_pagemap *pgmap)
+{
+       const struct resource *res = &pgmap->res;
+
+       return (res->start + resource_size(res)) >> PAGE_SHIFT;
+}
+
+static unsigned long pfn_next(unsigned long pfn)
+{
+       if (pfn % 1024 == 0)
+               cond_resched();
+       return pfn + 1;
+}
+
+#define for_each_device_pfn(pfn, map) \
+       for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
+
+static void dev_pagemap_kill(struct dev_pagemap *pgmap)
+{
+       if (pgmap->ops && pgmap->ops->kill)
+               pgmap->ops->kill(pgmap);
+       else
+               percpu_ref_kill(pgmap->ref);
+}
+
+static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
+{
+       if (pgmap->ops && pgmap->ops->cleanup) {
+               pgmap->ops->cleanup(pgmap);
+       } else {
+               wait_for_completion(&pgmap->done);
+               percpu_ref_exit(pgmap->ref);
+       }
+}
+
+static void devm_memremap_pages_release(void *data)
+{
+       struct dev_pagemap *pgmap = data;
+       struct device *dev = pgmap->dev;
+       struct resource *res = &pgmap->res;
+       unsigned long pfn;
+       int nid;
+
+       dev_pagemap_kill(pgmap);
+       for_each_device_pfn(pfn, pgmap)
+               put_page(pfn_to_page(pfn));
+       dev_pagemap_cleanup(pgmap);
+
+       /* pages are dead and unused, undo the arch mapping */
+       nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
+
+       mem_hotplug_begin();
+       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+               pfn = PHYS_PFN(res->start);
+               __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
+                                PHYS_PFN(resource_size(res)), NULL);
+       } else {
+               arch_remove_memory(nid, res->start, resource_size(res),
+                               pgmap_altmap(pgmap));
+               kasan_remove_zero_shadow(__va(res->start), resource_size(res));
+       }
+       mem_hotplug_done();
+
+       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
+       pgmap_array_delete(res);
+       dev_WARN_ONCE(dev, pgmap->altmap.alloc,
+                     "%s: failed to free all reserved pages\n", __func__);
+}
+
+static void dev_pagemap_percpu_release(struct percpu_ref *ref)
+{
+       struct dev_pagemap *pgmap =
+               container_of(ref, struct dev_pagemap, internal_ref);
+
+       complete(&pgmap->done);
+}
+
+/**
+ * devm_memremap_pages - remap and provide memmap backing for the given resource
+ * @dev: hosting device for @res
+ * @pgmap: pointer to a struct dev_pagemap
+ *
+ * Notes:
+ * 1/ At a minimum the res and type members of @pgmap must be initialized
+ *    by the caller before passing it to this function
+ *
+ * 2/ The altmap field may optionally be initialized, in which case
+ *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
+ *
+ * 3/ The ref field may optionally be provided, in which pgmap->ref must be
+ *    'live' on entry and will be killed and reaped at
+ *    devm_memremap_pages_release() time, or if this routine fails.
+ *
+ * 4/ res is expected to be a host memory range that could feasibly be
+ *    treated as a "System RAM" range, i.e. not a device mmio range, but
+ *    this is not enforced.
+ */
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+       struct resource *res = &pgmap->res;
+       struct dev_pagemap *conflict_pgmap;
+       struct mhp_restrictions restrictions = {
+               /*
+                * We do not want any optional features only our own memmap
+                */
+               .altmap = pgmap_altmap(pgmap),
+       };
+       pgprot_t pgprot = PAGE_KERNEL;
+       int error, nid, is_ram;
+       bool need_devmap_managed = true;
+
+       switch (pgmap->type) {
+       case MEMORY_DEVICE_PRIVATE:
+               if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
+                       WARN(1, "Device private memory not supported\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
+                       WARN(1, "Missing migrate_to_ram method\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               break;
+       case MEMORY_DEVICE_FS_DAX:
+               if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
+                   IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
+                       WARN(1, "File system DAX not supported\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               break;
+       case MEMORY_DEVICE_DEVDAX:
+       case MEMORY_DEVICE_PCI_P2PDMA:
+               need_devmap_managed = false;
+               break;
+       default:
+               WARN(1, "Invalid pgmap type %d\n", pgmap->type);
+               break;
+       }
+
+       if (!pgmap->ref) {
+               if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
+                       return ERR_PTR(-EINVAL);
+
+               init_completion(&pgmap->done);
+               error = percpu_ref_init(&pgmap->internal_ref,
+                               dev_pagemap_percpu_release, 0, GFP_KERNEL);
+               if (error)
+                       return ERR_PTR(error);
+               pgmap->ref = &pgmap->internal_ref;
+       } else {
+               if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
+                       WARN(1, "Missing reference count teardown definition\n");
+                       return ERR_PTR(-EINVAL);
+               }
+       }
+
+       if (need_devmap_managed) {
+               error = devmap_managed_enable_get(dev, pgmap);
+               if (error)
+                       return ERR_PTR(error);
+       }
+
+       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
+       if (conflict_pgmap) {
+               dev_WARN(dev, "Conflicting mapping in same section\n");
+               put_dev_pagemap(conflict_pgmap);
+               error = -ENOMEM;
+               goto err_array;
+       }
+
+       conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
+       if (conflict_pgmap) {
+               dev_WARN(dev, "Conflicting mapping in same section\n");
+               put_dev_pagemap(conflict_pgmap);
+               error = -ENOMEM;
+               goto err_array;
+       }
+
+       is_ram = region_intersects(res->start, resource_size(res),
+               IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
+
+       if (is_ram != REGION_DISJOINT) {
+               WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
+                               is_ram == REGION_MIXED ? "mixed" : "ram", res);
+               error = -ENXIO;
+               goto err_array;
+       }
+
+       pgmap->dev = dev;
+
+       error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
+                               PHYS_PFN(res->end), pgmap, GFP_KERNEL));
+       if (error)
+               goto err_array;
+
+       nid = dev_to_node(dev);
+       if (nid < 0)
+               nid = numa_mem_id();
+
+       error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0,
+                       resource_size(res));
+       if (error)
+               goto err_pfn_remap;
+
+       mem_hotplug_begin();
+
+       /*
+        * For device private memory we call add_pages() as we only need to
+        * allocate and initialize struct page for the device memory. More-
+        * over the device memory is un-accessible thus we do not want to
+        * create a linear mapping for the memory like arch_add_memory()
+        * would do.
+        *
+        * For all other device memory types, which are accessible by
+        * the CPU, we do want the linear mapping and thus use
+        * arch_add_memory().
+        */
+       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+               error = add_pages(nid, PHYS_PFN(res->start),
+                               PHYS_PFN(resource_size(res)), &restrictions);
+       } else {
+               error = kasan_add_zero_shadow(__va(res->start), resource_size(res));
+               if (error) {
+                       mem_hotplug_done();
+                       goto err_kasan;
+               }
+
+               error = arch_add_memory(nid, res->start, resource_size(res),
+                                       &restrictions);
+       }
+
+       if (!error) {
+               struct zone *zone;
+
+               zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
+               move_pfn_range_to_zone(zone, PHYS_PFN(res->start),
+                               PHYS_PFN(resource_size(res)), restrictions.altmap);
+       }
+
+       mem_hotplug_done();
+       if (error)
+               goto err_add_memory;
+
+       /*
+        * Initialization of the pages has been deferred until now in order
+        * to allow us to do the work while not holding the hotplug lock.
+        */
+       memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
+                               PHYS_PFN(res->start),
+                               PHYS_PFN(resource_size(res)), pgmap);
+       percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
+
+       error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
+                       pgmap);
+       if (error)
+               return ERR_PTR(error);
+
+       return __va(res->start);
+
+ err_add_memory:
+       kasan_remove_zero_shadow(__va(res->start), resource_size(res));
+ err_kasan:
+       untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
+ err_pfn_remap:
+       pgmap_array_delete(res);
+ err_array:
+       dev_pagemap_kill(pgmap);
+       dev_pagemap_cleanup(pgmap);
+       return ERR_PTR(error);
+}
+EXPORT_SYMBOL_GPL(devm_memremap_pages);
+
+void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+       devm_release_action(dev, devm_memremap_pages_release, pgmap);
+}
+EXPORT_SYMBOL_GPL(devm_memunmap_pages);
+
+unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+{
+       /* number of pfns from base where pfn_to_page() is valid */
+       if (altmap)
+               return altmap->reserve + altmap->free;
+       return 0;
+}
+
+void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
+{
+       altmap->alloc -= nr_pfns;
+}
+
+/**
+ * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
+ * @pfn: page frame number to lookup page_map
+ * @pgmap: optional known pgmap that already has a reference
+ *
+ * If @pgmap is non-NULL and covers @pfn it will be returned as-is.  If @pgmap
+ * is non-NULL but does not cover @pfn the reference to it will be released.
+ */
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+               struct dev_pagemap *pgmap)
+{
+       resource_size_t phys = PFN_PHYS(pfn);
+
+       /*
+        * In the cached case we're already holding a live reference.
+        */
+       if (pgmap) {
+               if (phys >= pgmap->res.start && phys <= pgmap->res.end)
+                       return pgmap;
+               put_dev_pagemap(pgmap);
+       }
+
+       /* fall back to slow path lookup */
+       rcu_read_lock();
+       pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
+       if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
+               pgmap = NULL;
+       rcu_read_unlock();
+
+       return pgmap;
+}
+EXPORT_SYMBOL_GPL(get_dev_pagemap);
+
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+void __put_devmap_managed_page(struct page *page)
+{
+       int count = page_ref_dec_return(page);
+
+       /*
+        * If refcount is 1 then page is freed and refcount is stable as nobody
+        * holds a reference on the page.
+        */
+       if (count == 1) {
+               /* Clear Active bit in case of parallel mark_page_accessed */
+               __ClearPageActive(page);
+               __ClearPageWaiters(page);
+
+               mem_cgroup_uncharge(page);
+
+               page->pgmap->ops->page_free(page);
+       } else if (!count)
+               __put_page(page);
+}
+EXPORT_SYMBOL(__put_devmap_managed_page);
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
index 8992741f10aad4bb1e56c4635ef84aac57176550..a42858d8e00b7bae8d43e9746fb786a16a64e809 100644 (file)
@@ -767,12 +767,12 @@ recheck_buffers:
                        }
                        bh = bh->b_this_page;
                } while (bh != head);
-               spin_unlock(&mapping->private_lock);
                if (busy) {
                        if (invalidated) {
                                rc = -EAGAIN;
                                goto unlock_buffers;
                        }
+                       spin_unlock(&mapping->private_lock);
                        invalidate_bh_lrus();
                        invalidated = true;
                        goto recheck_buffers;
@@ -805,6 +805,8 @@ recheck_buffers:
 
        rc = MIGRATEPAGE_SUCCESS;
 unlock_buffers:
+       if (check_refs)
+               spin_unlock(&mapping->private_lock);
        bh = head;
        do {
                unlock_buffer(bh);
@@ -2338,16 +2340,13 @@ next:
 static void migrate_vma_collect(struct migrate_vma *migrate)
 {
        struct mmu_notifier_range range;
-       struct mm_walk mm_walk;
-
-       mm_walk.pmd_entry = migrate_vma_collect_pmd;
-       mm_walk.pte_entry = NULL;
-       mm_walk.pte_hole = migrate_vma_collect_hole;
-       mm_walk.hugetlb_entry = NULL;
-       mm_walk.test_walk = NULL;
-       mm_walk.vma = migrate->vma;
-       mm_walk.mm = migrate->vma->vm_mm;
-       mm_walk.private = migrate;
+       struct mm_walk mm_walk = {
+               .pmd_entry = migrate_vma_collect_pmd,
+               .pte_hole = migrate_vma_collect_hole,
+               .vma = migrate->vma,
+               .mm = migrate->vma->vm_mm,
+               .private = migrate,
+       };
 
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm,
                                migrate->start,
index 44df66a98f2adbbd2c18c38dec8eed9e45b4a6ce..dbdc46a84f63089de614a2807ef62a70f7c14968 100644 (file)
@@ -699,7 +699,14 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
        unsigned long ret, freed = 0;
        struct shrinker *shrinker;
 
-       if (!mem_cgroup_is_root(memcg))
+       /*
+        * The root memcg might be allocated even though memcg is disabled
+        * via "cgroup_disable=memory" boot parameter.  This could make
+        * mem_cgroup_is_root() return false, then just run memcg slab
+        * shrink, but skip global shrink.  This may result in premature
+        * oom.
+        */
+       if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
                return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
 
        if (!down_read_trylock(&shrinker_rwsem))
index 4c223266299aa7c90c1288bcbed7fd891f802f3d..bdb69599c4bdc2526943cf9a690e1ef0697872dd 100644 (file)
@@ -191,8 +191,7 @@ int cg_find_unified_root(char *root, size_t len)
                strtok(NULL, delim);
                strtok(NULL, delim);
 
-               if (strcmp(fs, "cgroup") == 0 &&
-                   strcmp(type, "cgroup2") == 0) {
+               if (strcmp(type, "cgroup2") == 0) {
                        strncpy(root, mount, len);
                        return 0;
                }