memcg: mm_update_next_owner: kill the "retry" logic
authorOleg Nesterov <oleg@redhat.com>
Wed, 26 Jun 2024 15:29:24 +0000 (17:29 +0200)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 5 Jul 2024 01:05:57 +0000 (18:05 -0700)
Add the new helper, try_to_set_owner(), which tries to update mm->owner
once we see c->mm == mm.  This way mm_update_next_owner() doesn't need to
restart the list_for_each_entry/for_each_process loops from the very
beginning if it races with exit/exec, it can just continue.

Unlike the current code, try_to_set_owner() re-checks tsk->mm == mm before
it drops tasklist_lock, so it doesn't need get/put_task_struct().

Link: https://lkml.kernel.org/r/20240626152924.GA17933@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jinliang Zheng <alexjlzheng@tencent.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Tycho Andersen <tandersen@netflix.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
kernel/exit.c

index 81fcee45d6302e9a4dce57613323d0bfe6758344..877fae2cc705bd68cdefbe324333f01309b1ac88 100644 (file)
@@ -439,6 +439,23 @@ static void coredump_task_exit(struct task_struct *tsk)
 }
 
 #ifdef CONFIG_MEMCG
+/* drops tasklist_lock if succeeds */
+static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
+{
+       bool ret = false;
+
+       task_lock(tsk);
+       if (likely(tsk->mm == mm)) {
+               /* tsk can't pass exit_mm/exec_mmap and exit */
+               read_unlock(&tasklist_lock);
+               WRITE_ONCE(mm->owner, tsk);
+               lru_gen_migrate_mm(mm);
+               ret = true;
+       }
+       task_unlock(tsk);
+       return ret;
+}
+
 /*
  * A task is exiting.   If it owned this mm, find a new owner for the mm.
  */
@@ -446,7 +463,6 @@ void mm_update_next_owner(struct mm_struct *mm)
 {
        struct task_struct *c, *g, *p = current;
 
-retry:
        /*
         * If the exiting or execing task is not the owner, it's
         * someone else's problem.
@@ -468,16 +484,16 @@ retry:
         * Search in the children
         */
        list_for_each_entry(c, &p->children, sibling) {
-               if (c->mm == mm)
-                       goto assign_new_owner;
+               if (c->mm == mm && try_to_set_owner(c, mm))
+                       goto ret;
        }
 
        /*
         * Search in the siblings
         */
        list_for_each_entry(c, &p->real_parent->children, sibling) {
-               if (c->mm == mm)
-                       goto assign_new_owner;
+               if (c->mm == mm && try_to_set_owner(c, mm))
+                       goto ret;
        }
 
        /*
@@ -489,9 +505,11 @@ retry:
                if (g->flags & PF_KTHREAD)
                        continue;
                for_each_thread(g, c) {
-                       if (c->mm == mm)
-                               goto assign_new_owner;
-                       if (c->mm)
+                       struct mm_struct *c_mm = READ_ONCE(c->mm);
+                       if (c_mm == mm) {
+                               if (try_to_set_owner(c, mm))
+                                       goto ret;
+                       } else if (c_mm)
                                break;
                }
        }
@@ -502,30 +520,9 @@ retry:
         * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
         */
        WRITE_ONCE(mm->owner, NULL);
+ ret:
        return;
 
-assign_new_owner:
-       BUG_ON(c == p);
-       get_task_struct(c);
-       /*
-        * The task_lock protects c->mm from changing.
-        * We always want mm->owner->mm == mm
-        */
-       task_lock(c);
-       /*
-        * Delay read_unlock() till we have the task_lock()
-        * to ensure that c does not slip away underneath us
-        */
-       read_unlock(&tasklist_lock);
-       if (c->mm != mm) {
-               task_unlock(c);
-               put_task_struct(c);
-               goto retry;
-       }
-       WRITE_ONCE(mm->owner, c);
-       lru_gen_migrate_mm(mm);
-       task_unlock(c);
-       put_task_struct(c);
 }
 #endif /* CONFIG_MEMCG */