Merge tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / fs / userfaultfd.c
index 40f9e1a2ebdd6d2d10a84167898af571ce2201e2..0fd96d6e39ce826070820fabeff4bc8fce28944b 100644 (file)
 #include <linux/swapops.h>
 #include <linux/miscdevice.h>
 
-int sysctl_unprivileged_userfaultfd __read_mostly;
+static int sysctl_unprivileged_userfaultfd __read_mostly;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table vm_userfaultfd_table[] = {
+       {
+               .procname       = "unprivileged_userfaultfd",
+               .data           = &sysctl_unprivileged_userfaultfd,
+               .maxlen         = sizeof(sysctl_unprivileged_userfaultfd),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE,
+       },
+       { }
+};
+#endif
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
 
@@ -108,6 +123,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
        return ctx->features & UFFD_FEATURE_INITIALIZED;
 }
 
+/*
+ * Whether WP_UNPOPULATED is enabled on the uffd context.  It is only
+ * meaningful when userfaultfd_wp()==true on the vma and when it's
+ * anonymous.
+ */
+bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma)
+{
+       struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+       if (!ctx)
+               return false;
+
+       return ctx->features & UFFD_FEATURE_WP_UNPOPULATED;
+}
+
 static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
                                     vm_flags_t flags)
 {
@@ -1629,7 +1659,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 
                /* Reset ptes for the whole vma range if wr-protected */
                if (userfaultfd_wp(vma))
-                       uffd_wp_range(mm, vma, start, vma_end - start, false);
+                       uffd_wp_range(vma, start, vma_end - start, false);
 
                new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
                prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
@@ -1714,6 +1744,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
        struct uffdio_copy uffdio_copy;
        struct uffdio_copy __user *user_uffdio_copy;
        struct userfaultfd_wake_range range;
+       uffd_flags_t flags = 0;
 
        user_uffdio_copy = (struct uffdio_copy __user *) arg;
 
@@ -1740,10 +1771,12 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
                goto out;
        if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
                goto out;
+       if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP)
+               flags |= MFILL_ATOMIC_WP;
        if (mmget_not_zero(ctx->mm)) {
-               ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
-                                  uffdio_copy.len, &ctx->mmap_changing,
-                                  uffdio_copy.mode);
+               ret = mfill_atomic_copy(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
+                                       uffdio_copy.len, &ctx->mmap_changing,
+                                       flags);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -1793,9 +1826,9 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
                goto out;
 
        if (mmget_not_zero(ctx->mm)) {
-               ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
-                                    uffdio_zeropage.range.len,
-                                    &ctx->mmap_changing);
+               ret = mfill_atomic_zeropage(ctx->mm, uffdio_zeropage.range.start,
+                                          uffdio_zeropage.range.len,
+                                          &ctx->mmap_changing);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -1875,6 +1908,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
        struct uffdio_continue uffdio_continue;
        struct uffdio_continue __user *user_uffdio_continue;
        struct userfaultfd_wake_range range;
+       uffd_flags_t flags = 0;
 
        user_uffdio_continue = (struct uffdio_continue __user *)arg;
 
@@ -1899,13 +1933,16 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
            uffdio_continue.range.start) {
                goto out;
        }
-       if (uffdio_continue.mode & ~UFFDIO_CONTINUE_MODE_DONTWAKE)
+       if (uffdio_continue.mode & ~(UFFDIO_CONTINUE_MODE_DONTWAKE |
+                                    UFFDIO_CONTINUE_MODE_WP))
                goto out;
+       if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_WP)
+               flags |= MFILL_ATOMIC_WP;
 
        if (mmget_not_zero(ctx->mm)) {
-               ret = mcopy_continue(ctx->mm, uffdio_continue.range.start,
-                                    uffdio_continue.range.len,
-                                    &ctx->mmap_changing);
+               ret = mfill_atomic_continue(ctx->mm, uffdio_continue.range.start,
+                                           uffdio_continue.range.len,
+                                           &ctx->mmap_changing, flags);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -1973,6 +2010,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 #endif
 #ifndef CONFIG_PTE_MARKER_UFFD_WP
        uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+       uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
 #endif
        uffdio_api.ioctls = UFFD_API_IOCTLS;
        ret = -EFAULT;
@@ -2180,6 +2218,9 @@ static int __init userfaultfd_init(void)
                                                0,
                                                SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                                init_once_userfaultfd_ctx);
+#ifdef CONFIG_SYSCTL
+       register_sysctl_init("vm", vm_userfaultfd_table);
+#endif
        return 0;
 }
 __initcall(userfaultfd_init);