mm/madvise: remove redundant mmap_lock operations from process_madvise()
authorSeongJae Park <sj@kernel.org>
Thu, 6 Feb 2025 06:15:17 +0000 (22:15 -0800)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 17 Mar 2025 05:06:04 +0000 (22:06 -0700)
Optimize redundant mmap lock operations from process_madvise() by directly
doing the mmap locking first, and then the remaining works for all ranges
in the loop.

[akpm@linux-foundation.org: update comment, per Lorenzo]
Link: https://lkml.kernel.org/r/20250206061517.2958-5-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Liam R. Howlett <howlett@gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/madvise.c

index 6e31e3202d7191744483725252996d08d51cc788..6ecead476a80958b7067bb101504090e976a36e5 100644 (file)
@@ -1778,16 +1778,33 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
 
        total_len = iov_iter_count(iter);
 
+       ret = madvise_lock(mm, behavior);
+       if (ret)
+               return ret;
+
        while (iov_iter_count(iter)) {
-               ret = do_madvise(mm, (unsigned long)iter_iov_addr(iter),
-                                iter_iov_len(iter), behavior);
+               unsigned long start = (unsigned long)iter_iov_addr(iter);
+               size_t len_in = iter_iov_len(iter);
+               size_t len;
+
+               if (!is_valid_madvise(start, len_in, behavior)) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               len = PAGE_ALIGN(len_in);
+               if (start + len == start)
+                       ret = 0;
+               else
+                       ret = madvise_do_behavior(mm, start, len_in, len,
+                                       behavior);
                /*
                 * An madvise operation is attempting to restart the syscall,
                 * but we cannot proceed as it would not be correct to repeat
                 * the operation in aggregate, and would be surprising to the
                 * user.
                 *
-                * As we have already dropped locks, it is safe to just loop and
+                * We drop and reacquire locks so it is safe to just loop and
                 * try again. We check for fatal signals in case we need exit
                 * early anyway.
                 */
@@ -1796,12 +1813,17 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
                                ret = -EINTR;
                                break;
                        }
+
+                       /* Drop and reacquire lock to unwind race. */
+                       madvise_unlock(mm, behavior);
+                       madvise_lock(mm, behavior);
                        continue;
                }
                if (ret < 0)
                        break;
                iov_iter_advance(iter, iter_iov_len(iter));
        }
+       madvise_unlock(mm, behavior);
 
        ret = (total_len - iov_iter_count(iter)) ? : ret;