mm: move MAP_SYNC to asm-generic/mman-common.h
[linux-2.6-block.git] / kernel / memremap.c
index 6e1970719dc23dac7a164560883c14ab7b09c50e..bea6f887adad100f7419efcb71c589578c5f6a9e 100644 (file)
 #include <linux/types.h>
 #include <linux/wait_bit.h>
 #include <linux/xarray.h>
-#include <linux/hmm.h>
 
 static DEFINE_XARRAY(pgmap_array);
 #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
 #define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
-vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
-                      unsigned long addr,
-                      swp_entry_t entry,
-                      unsigned int flags,
-                      pmd_t *pmdp)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL(devmap_managed_key);
+static atomic_t devmap_managed_enable;
+
+static void devmap_managed_enable_put(void *data)
 {
-       struct page *page = device_private_entry_to_page(entry);
-       struct hmm_devmem *devmem;
+       if (atomic_dec_and_test(&devmap_managed_enable))
+               static_branch_disable(&devmap_managed_key);
+}
 
-       devmem = container_of(page->pgmap, typeof(*devmem), pagemap);
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+       if (!pgmap->ops || !pgmap->ops->page_free) {
+               WARN(1, "Missing page_free method\n");
+               return -EINVAL;
+       }
 
-       /*
-        * The page_fault() callback must migrate page back to system memory
-        * so that CPU can access it. This might fail for various reasons
-        * (device issue, device was unsafely unplugged, ...). When such
-        * error conditions happen, the callback must return VM_FAULT_SIGBUS.
-        *
-        * Note that because memory cgroup charges are accounted to the device
-        * memory, this should never fail because of memory restrictions (but
-        * allocation of regular system page might still fail because we are
-        * out of memory).
-        *
-        * There is a more in-depth description of what that callback can and
-        * cannot do, in include/linux/memremap.h
-        */
-       return devmem->page_fault(vma, addr, page, flags, pmdp);
+       if (atomic_inc_return(&devmap_managed_enable) == 1)
+               static_branch_enable(&devmap_managed_key);
+       return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL);
 }
-#endif /* CONFIG_DEVICE_PRIVATE */
+#else
+static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap)
+{
+       return -EINVAL;
+}
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
 
 static void pgmap_array_delete(struct resource *res)
 {
@@ -56,14 +54,8 @@ static void pgmap_array_delete(struct resource *res)
 
 static unsigned long pfn_first(struct dev_pagemap *pgmap)
 {
-       const struct resource *res = &pgmap->res;
-       struct vmem_altmap *altmap = &pgmap->altmap;
-       unsigned long pfn;
-
-       pfn = res->start >> PAGE_SHIFT;
-       if (pgmap->altmap_valid)
-               pfn += vmem_altmap_offset(altmap);
-       return pfn;
+       return (pgmap->res.start >> PAGE_SHIFT) +
+               vmem_altmap_offset(pgmap_altmap(pgmap));
 }
 
 static unsigned long pfn_end(struct dev_pagemap *pgmap)
@@ -83,6 +75,24 @@ static unsigned long pfn_next(unsigned long pfn)
 #define for_each_device_pfn(pfn, map) \
        for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
 
+static void dev_pagemap_kill(struct dev_pagemap *pgmap)
+{
+       if (pgmap->ops && pgmap->ops->kill)
+               pgmap->ops->kill(pgmap);
+       else
+               percpu_ref_kill(pgmap->ref);
+}
+
+static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
+{
+       if (pgmap->ops && pgmap->ops->cleanup) {
+               pgmap->ops->cleanup(pgmap);
+       } else {
+               wait_for_completion(&pgmap->done);
+               percpu_ref_exit(pgmap->ref);
+       }
+}
+
 static void devm_memremap_pages_release(void *data)
 {
        struct dev_pagemap *pgmap = data;
@@ -92,10 +102,10 @@ static void devm_memremap_pages_release(void *data)
        unsigned long pfn;
        int nid;
 
-       pgmap->kill(pgmap->ref);
+       dev_pagemap_kill(pgmap);
        for_each_device_pfn(pfn, pgmap)
                put_page(pfn_to_page(pfn));
-       pgmap->cleanup(pgmap->ref);
+       dev_pagemap_cleanup(pgmap);
 
        /* pages are dead and unused, undo the arch mapping */
        align_start = res->start & ~(SECTION_SIZE - 1);
@@ -111,7 +121,7 @@ static void devm_memremap_pages_release(void *data)
                                align_size >> PAGE_SHIFT, NULL);
        } else {
                arch_remove_memory(nid, align_start, align_size,
-                               pgmap->altmap_valid ? &pgmap->altmap : NULL);
+                               pgmap_altmap(pgmap));
                kasan_remove_zero_shadow(__va(align_start), align_size);
        }
        mem_hotplug_done();
@@ -122,20 +132,29 @@ static void devm_memremap_pages_release(void *data)
                      "%s: failed to free all reserved pages\n", __func__);
 }
 
+static void dev_pagemap_percpu_release(struct percpu_ref *ref)
+{
+       struct dev_pagemap *pgmap =
+               container_of(ref, struct dev_pagemap, internal_ref);
+
+       complete(&pgmap->done);
+}
+
 /**
  * devm_memremap_pages - remap and provide memmap backing for the given resource
  * @dev: hosting device for @res
  * @pgmap: pointer to a struct dev_pagemap
  *
  * Notes:
- * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
+ * 1/ At a minimum the res and type members of @pgmap must be initialized
  *    by the caller before passing it to this function
  *
- * 2/ The altmap field may optionally be initialized, in which case altmap_valid
- *    must be set to true
+ * 2/ The altmap field may optionally be initialized, in which case
+ *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
  *
- * 3/ pgmap->ref must be 'live' on entry and will be killed and reaped
- *    at devm_memremap_pages_release() time, or if this routine fails.
+ * 3/ The ref field may optionally be provided, in which pgmap->ref must be
+ *    'live' on entry and will be killed and reaped at
+ *    devm_memremap_pages_release() time, or if this routine fails.
  *
  * 4/ res is expected to be a host memory range that could feasibly be
  *    treated as a "System RAM" range, i.e. not a device mmio range, but
@@ -144,22 +163,66 @@ static void devm_memremap_pages_release(void *data)
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 {
        resource_size_t align_start, align_size, align_end;
-       struct vmem_altmap *altmap = pgmap->altmap_valid ?
-                       &pgmap->altmap : NULL;
        struct resource *res = &pgmap->res;
        struct dev_pagemap *conflict_pgmap;
        struct mhp_restrictions restrictions = {
                /*
                 * We do not want any optional features only our own memmap
                */
-               .altmap = altmap,
+               .altmap = pgmap_altmap(pgmap),
        };
        pgprot_t pgprot = PAGE_KERNEL;
        int error, nid, is_ram;
+       bool need_devmap_managed = true;
+
+       switch (pgmap->type) {
+       case MEMORY_DEVICE_PRIVATE:
+               if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
+                       WARN(1, "Device private memory not supported\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
+                       WARN(1, "Missing migrate_to_ram method\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               break;
+       case MEMORY_DEVICE_FS_DAX:
+               if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
+                   IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
+                       WARN(1, "File system DAX not supported\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               break;
+       case MEMORY_DEVICE_DEVDAX:
+       case MEMORY_DEVICE_PCI_P2PDMA:
+               need_devmap_managed = false;
+               break;
+       default:
+               WARN(1, "Invalid pgmap type %d\n", pgmap->type);
+               break;
+       }
+
+       if (!pgmap->ref) {
+               if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
+                       return ERR_PTR(-EINVAL);
+
+               init_completion(&pgmap->done);
+               error = percpu_ref_init(&pgmap->internal_ref,
+                               dev_pagemap_percpu_release, 0, GFP_KERNEL);
+               if (error)
+                       return ERR_PTR(error);
+               pgmap->ref = &pgmap->internal_ref;
+       } else {
+               if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
+                       WARN(1, "Missing reference count teardown definition\n");
+                       return ERR_PTR(-EINVAL);
+               }
+       }
 
-       if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) {
-               WARN(1, "Missing reference count teardown definition\n");
-               return ERR_PTR(-EINVAL);
+       if (need_devmap_managed) {
+               error = devmap_managed_enable_get(dev, pgmap);
+               if (error)
+                       return ERR_PTR(error);
        }
 
        align_start = res->start & ~(SECTION_SIZE - 1);
@@ -241,7 +304,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 
                zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
                move_pfn_range_to_zone(zone, align_start >> PAGE_SHIFT,
-                               align_size >> PAGE_SHIFT, altmap);
+                               align_size >> PAGE_SHIFT, pgmap_altmap(pgmap));
        }
 
        mem_hotplug_done();
@@ -271,9 +334,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
  err_pfn_remap:
        pgmap_array_delete(res);
  err_array:
-       pgmap->kill(pgmap->ref);
-       pgmap->cleanup(pgmap->ref);
-
+       dev_pagemap_kill(pgmap);
+       dev_pagemap_cleanup(pgmap);
        return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(devm_memremap_pages);
@@ -287,7 +349,9 @@ EXPORT_SYMBOL_GPL(devm_memunmap_pages);
 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
 {
        /* number of pfns from base where pfn_to_page() is valid */
-       return altmap->reserve + altmap->free;
+       if (altmap)
+               return altmap->reserve + altmap->free;
+       return 0;
 }
 
 void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
@@ -329,28 +393,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 EXPORT_SYMBOL_GPL(get_dev_pagemap);
 
 #ifdef CONFIG_DEV_PAGEMAP_OPS
-DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
-EXPORT_SYMBOL(devmap_managed_key);
-static atomic_t devmap_enable;
-
-/*
- * Toggle the static key for ->page_free() callbacks when dev_pagemap
- * pages go idle.
- */
-void dev_pagemap_get_ops(void)
-{
-       if (atomic_inc_return(&devmap_enable) == 1)
-               static_branch_enable(&devmap_managed_key);
-}
-EXPORT_SYMBOL_GPL(dev_pagemap_get_ops);
-
-void dev_pagemap_put_ops(void)
-{
-       if (atomic_dec_and_test(&devmap_enable))
-               static_branch_disable(&devmap_managed_key);
-}
-EXPORT_SYMBOL_GPL(dev_pagemap_put_ops);
-
 void __put_devmap_managed_page(struct page *page)
 {
        int count = page_ref_dec_return(page);
@@ -366,7 +408,7 @@ void __put_devmap_managed_page(struct page *page)
 
                mem_cgroup_uncharge(page);
 
-               page->pgmap->page_free(page, page->pgmap->data);
+               page->pgmap->ops->page_free(page);
        } else if (!count)
                __put_page(page);
 }