mm: lock vma explicitly before doing vm_flags_reset and vm_flags_reset_once
authorSuren Baghdasaryan <surenb@google.com>
Fri, 4 Aug 2023 15:27:22 +0000 (08:27 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 21 Aug 2023 20:37:46 +0000 (13:37 -0700)
Implicit vma locking inside vm_flags_reset() and vm_flags_reset_once() is
not obvious and makes it hard to understand where vma locking is happening.
Also in some cases (like in dup_userfaultfd()) vma should be locked earlier
than vma_flags modification. To make locking more visible, change these
functions to assert that the vma write lock is taken and explicitly lock
the vma beforehand. Fix userfaultfd functions which should lock the vma
earlier.

Link: https://lkml.kernel.org/r/20230804152724.3090321-5-surenb@google.com
Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
arch/powerpc/kvm/book3s_hv_uvmem.c
fs/userfaultfd.c
include/linux/mm.h
mm/madvise.c
mm/mlock.c
mm/mprotect.c

index 709ebd578394b67db5d55718d18f7c1d29109975..e2d6f9327f778e74fcc042331e2fd87593ca4ee3 100644 (file)
@@ -410,6 +410,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
                        ret = H_STATE;
                        break;
                }
+               vma_start_write(vma);
                /* Copy vm_flags to avoid partial modifications in ksm_madvise */
                vm_flags = vma->vm_flags;
                ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
index 9854d44ae18ecc6f422dfed69ee35874f8d572a1..70bd2951b68d62d3e103aa347f6448f33a521b10 100644 (file)
@@ -667,6 +667,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                mmap_write_lock(mm);
                for_each_vma(vmi, vma) {
                        if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
+                               vma_start_write(vma);
                                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
                                userfaultfd_set_vm_flags(vma,
                                                         vma->vm_flags & ~__VM_UFFD_FLAGS);
@@ -702,6 +703,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
 
        octx = vma->vm_userfaultfd_ctx.ctx;
        if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
+               vma_start_write(vma);
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
                userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
                return 0;
@@ -783,6 +785,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
                atomic_inc(&ctx->mmap_changing);
        } else {
                /* Drop uffd context if remap feature not enabled */
+               vma_start_write(vma);
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
                userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
        }
@@ -940,6 +943,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
                        prev = vma;
                }
 
+               vma_start_write(vma);
                userfaultfd_set_vm_flags(vma, new_flags);
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
        }
@@ -1511,6 +1515,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                 * the next vma was merged into the current one and
                 * the current one has not been updated yet.
                 */
+               vma_start_write(vma);
                userfaultfd_set_vm_flags(vma, new_flags);
                vma->vm_userfaultfd_ctx.ctx = ctx;
 
@@ -1694,6 +1699,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
                 * the next vma was merged into the current one and
                 * the current one has not been updated yet.
                 */
+               vma_start_write(vma);
                userfaultfd_set_vm_flags(vma, new_flags);
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 
index 49eafc62b4e65ff6d8788742f5bcdc5169f87352..5a6ff914009004fccf1a462f442f81f11d16291f 100644 (file)
@@ -774,18 +774,22 @@ static inline void vm_flags_init(struct vm_area_struct *vma,
        ACCESS_PRIVATE(vma, __vm_flags) = flags;
 }
 
-/* Use when VMA is part of the VMA tree and modifications need coordination */
+/*
+ * Use when VMA is part of the VMA tree and modifications need coordination
+ * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
+ * it should be locked explicitly beforehand.
+ */
 static inline void vm_flags_reset(struct vm_area_struct *vma,
                                  vm_flags_t flags)
 {
-       vma_start_write(vma);
+       vma_assert_write_locked(vma);
        vm_flags_init(vma, flags);
 }
 
 static inline void vm_flags_reset_once(struct vm_area_struct *vma,
                                       vm_flags_t flags)
 {
-       vma_start_write(vma);
+       vma_assert_write_locked(vma);
        WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
 }
 
index da65f8bd9ac33b80c1f1bf58ce4502f8e7de4f0f..8498f700c284f0209b1308e0ab76df33bd62264c 100644 (file)
@@ -173,9 +173,8 @@ static int madvise_update_vma(struct vm_area_struct *vma,
        }
 
 success:
-       /*
-        * vm_flags is protected by the mmap_lock held in write mode.
-        */
+       /* vm_flags is protected by the mmap_lock held in write mode. */
+       vma_start_write(vma);
        vm_flags_reset(vma, new_flags);
        if (!vma->vm_file || vma_is_anon_shmem(vma)) {
                error = replace_anon_vma_name(vma, anon_name);
index 0a0c996c5c21403a2dd91d0f6fdd4bf5064ca247..1746600a2e14702a86c34c9ff98f58fa6851be2d 100644 (file)
@@ -386,6 +386,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
         */
        if (newflags & VM_LOCKED)
                newflags |= VM_IO;
+       vma_start_write(vma);
        vm_flags_reset_once(vma, newflags);
 
        lru_add_drain();
@@ -460,9 +461,9 @@ success:
         * It's okay if try_to_unmap_one unmaps a page just after we
         * set VM_LOCKED, populate_vma_page_range will bring it back.
         */
-
        if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
                /* No work to do, and mlocking twice would be wrong */
+               vma_start_write(vma);
                vm_flags_reset(vma, newflags);
        } else {
                mlock_vma_pages_range(vma, start, end, newflags);
index 3f36c88a238e973210a5220f90dbcd0f8818f2e7..7cd7f644da800d7d10cd1a9dc016dd02b04157b0 100644 (file)
@@ -656,6 +656,7 @@ success:
         * vm_flags and vm_page_prot are protected by the mmap_lock
         * held in write mode.
         */
+       vma_start_write(vma);
        vm_flags_reset(vma, newflags);
        if (vma_wants_manual_pte_write_upgrade(vma))
                mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;