mm: rcu-protected get_mm_exe_file()
authorKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Thu, 16 Apr 2015 19:47:56 +0000 (12:47 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Apr 2015 13:04:07 +0000 (09:04 -0400)
This patch removes mm->mmap_sem from mm->exe_file read side.
Also it kills dup_mm_exe_file() and moves exe_file duplication into
dup_mmap() where both mmap_sems are locked.

[akpm@linux-foundation.org: fix comment typo]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/file.c
include/linux/fs.h
include/linux/mm_types.h
kernel/fork.c

index ee738ea028fadab5d742445c6e48c151cb53f612..93c5f89c248b07b6fba50fd31f4bd36ba9520fef 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,8 +638,7 @@ static struct file *__fget(unsigned int fd, fmode_t mask)
        file = fcheck_files(files, fd);
        if (file) {
                /* File object ref couldn't be taken */
-               if ((file->f_mode & mask) ||
-                   !atomic_long_inc_not_zero(&file->f_count))
+               if ((file->f_mode & mask) || !get_file_rcu(file))
                        file = NULL;
        }
        rcu_read_unlock();
index f4fc60727b8da5a1c0e5e0dc1085013949a8c2d8..6bf7ab7c15737675a9f814cc305c90c96ac056ca 100644 (file)
@@ -870,6 +870,7 @@ static inline struct file *get_file(struct file *f)
        atomic_long_inc(&f->f_count);
        return f;
 }
+#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
 #define file_count(x)  atomic_long_read(&(x)->f_count)
 
index 590630eb59ba3c1a2181075d4dd30a26477e05a9..8d37e26a1007c6493990093e7ce8a03f2b201419 100644 (file)
@@ -429,7 +429,7 @@ struct mm_struct {
 #endif
 
        /* store ref to file /proc/<pid>/exe symlink points to */
-       struct file *exe_file;
+       struct file __rcu *exe_file;
 #ifdef CONFIG_MMU_NOTIFIER
        struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
index 8807a129711bf78053bd9b07ad6fe36bb9c2f592..259202637531e9b10f3a985f41bd2490afd23c54 100644 (file)
@@ -403,6 +403,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
         */
        down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
+       /* No ordering required: file already has been exposed. */
+       RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+
        mm->total_vm = oldmm->total_vm;
        mm->shared_vm = oldmm->shared_vm;
        mm->exec_vm = oldmm->exec_vm;
@@ -528,7 +531,13 @@ static inline void mm_free_pgd(struct mm_struct *mm)
        pgd_free(mm, mm->pgd);
 }
 #else
-#define dup_mmap(mm, oldmm)    (0)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+       down_write(&oldmm->mmap_sem);
+       RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+       up_write(&oldmm->mmap_sem);
+       return 0;
+}
 #define mm_alloc_pgd(mm)       (0)
 #define mm_free_pgd(mm)
 #endif /* CONFIG_MMU */
@@ -697,35 +706,46 @@ void mmput(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmput);
 
+/**
+ * set_mm_exe_file - change a reference to the mm's executable file
+ *
+ * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
+ *
+ * Main users are mmput(), sys_execve() and sys_prctl(PR_SET_MM_MAP/EXE_FILE).
+ * Callers prevent concurrent invocations: in mmput() nobody alive left,
+ * in execve task is single-threaded, prctl holds mmap_sem exclusively.
+ */
 void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 {
+       struct file *old_exe_file = rcu_dereference_protected(mm->exe_file,
+                       !atomic_read(&mm->mm_users) || current->in_execve ||
+                       lockdep_is_held(&mm->mmap_sem));
+
        if (new_exe_file)
                get_file(new_exe_file);
-       if (mm->exe_file)
-               fput(mm->exe_file);
-       mm->exe_file = new_exe_file;
+       rcu_assign_pointer(mm->exe_file, new_exe_file);
+       if (old_exe_file)
+               fput(old_exe_file);
 }
 
+/**
+ * get_mm_exe_file - acquire a reference to the mm's executable file
+ *
+ * Returns %NULL if mm has no associated executable file.
+ * User must release file via fput().
+ */
 struct file *get_mm_exe_file(struct mm_struct *mm)
 {
        struct file *exe_file;
 
-       /* We need mmap_sem to protect against races with removal of exe_file */
-       down_read(&mm->mmap_sem);
-       exe_file = mm->exe_file;
-       if (exe_file)
-               get_file(exe_file);
-       up_read(&mm->mmap_sem);
+       rcu_read_lock();
+       exe_file = rcu_dereference(mm->exe_file);
+       if (exe_file && !get_file_rcu(exe_file))
+               exe_file = NULL;
+       rcu_read_unlock();
        return exe_file;
 }
 
-static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
-{
-       /* It's safe to write the exe_file pointer without exe_file_lock because
-        * this is called during fork when the task is not yet in /proc */
-       newmm->exe_file = get_mm_exe_file(oldmm);
-}
-
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
@@ -887,8 +907,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
        if (!mm_init(mm, tsk))
                goto fail_nomem;
 
-       dup_mm_exe_file(oldmm, mm);
-
        err = dup_mmap(mm, oldmm);
        if (err)
                goto free_pt;