mount: add a comment about concurrent changes with statmount()/listmount()
authorChristian Brauner <brauner@kernel.org>
Wed, 16 Apr 2025 08:19:05 +0000 (10:19 +0200)
committerChristian Brauner <brauner@kernel.org>
Fri, 23 May 2025 12:20:44 +0000 (14:20 +0200)
Add some comments in there highlighting a few non-obvious assumptions.

Link: https://lore.kernel.org/20250416-zerknirschen-aluminium-14a55639076f@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/namespace.c

index 14935a0500a2068e08bb8a53787b1d6e404e474f..ddb0a688633c1e278af7a50b719d3e918dd700af 100644 (file)
@@ -5831,13 +5831,29 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
                return err;
 
        s->root = root;
-       s->idmap = mnt_idmap(s->mnt);
-       if (s->mask & STATMOUNT_SB_BASIC)
-               statmount_sb_basic(s);
 
+       /*
+        * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap
+        * can change concurrently as we only hold the read-side of the
+        * namespace semaphore and mount properties may change with only
+        * the mount lock held.
+        *
+        * We could sample the mount lock sequence counter to detect
+        * those changes and retry. But it's not worth it. Worst that
+        * happens is that the mnt->mnt_idmap pointer is already changed
+        * while mnt->mnt_flags isn't or vica versa. So what.
+        *
+        * Both mnt->mnt_flags and mnt->mnt_idmap are set and retrieved
+        * via READ_ONCE()/WRITE_ONCE() and guard against theoretical
+        * torn read/write. That's all we care about right now.
+        */
+       s->idmap = mnt_idmap(s->mnt);
        if (s->mask & STATMOUNT_MNT_BASIC)
                statmount_mnt_basic(s);
 
+       if (s->mask & STATMOUNT_SB_BASIC)
+               statmount_sb_basic(s);
+
        if (s->mask & STATMOUNT_PROPAGATE_FROM)
                statmount_propagate_from(s);
 
@@ -6149,6 +6165,10 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
            !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
                return -ENOENT;
 
+       /*
+        * We only need to guard against mount topology changes as
+        * listmount() doesn't care about any mount properties.
+        */
        scoped_guard(rwsem_read, &namespace_sem)
                ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
                                   nr_mnt_ids, (flags & LISTMOUNT_REVERSE));