net: add exit_batch_rtnl() method
authorEric Dumazet <edumazet@google.com>
Tue, 6 Feb 2024 14:42:57 +0000 (14:42 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 8 Feb 2024 02:55:10 +0000 (18:55 -0800)
Many (struct pernet_operations)->exit_batch() methods have
to acquire rtnl.

In presence of rtnl mutex pressure, this makes cleanup_net()
very slow.

This patch adds a new exit_batch_rtnl() method to reduce
number of rtnl acquisitions from cleanup_net().

exit_batch_rtnl() handlers are called while rtnl is locked,
and devices to be killed can be queued in a list provided
as their second argument.

A single unregister_netdevice_many() is called right
before rtnl is released.

exit_batch_rtnl() handlers are called before ->exit() and
->exit_batch() handlers.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Antoine Tenart <atenart@kernel.org>
Link: https://lore.kernel.org/r/20240206144313.2050392-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/net_namespace.h
net/core/net_namespace.c

index cd0c2eedbb5e9ddcbd5e0a37e2eb7e0cf57495d5..20c34bd7a07783a9a13696fd74b41eff1ff860a8 100644 (file)
@@ -448,6 +448,9 @@ struct pernet_operations {
        void (*pre_exit)(struct net *net);
        void (*exit)(struct net *net);
        void (*exit_batch)(struct list_head *net_exit_list);
+       /* Following method is called with RTNL held. */
+       void (*exit_batch_rtnl)(struct list_head *net_exit_list,
+                               struct list_head *dev_kill_list);
        unsigned int *id;
        size_t size;
 };
index 72799533426b6162256d7c4eef355af96c66e844..233ec0cdd0111d5ca21c6f8a66f4c1f3fbc4657b 100644 (file)
@@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 {
        /* Must be called with pernet_ops_rwsem held */
        const struct pernet_operations *ops, *saved_ops;
-       int error = 0;
        LIST_HEAD(net_exit_list);
+       LIST_HEAD(dev_kill_list);
+       int error = 0;
 
        refcount_set(&net->ns.count, 1);
        ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
@@ -357,6 +358,15 @@ out_undo:
 
        synchronize_rcu();
 
+       ops = saved_ops;
+       rtnl_lock();
+       list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
+               if (ops->exit_batch_rtnl)
+                       ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+       }
+       unregister_netdevice_many(&dev_kill_list);
+       rtnl_unlock();
+
        ops = saved_ops;
        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
                ops_exit_list(ops, &net_exit_list);
@@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work)
        struct net *net, *tmp, *last;
        struct llist_node *net_kill_list;
        LIST_HEAD(net_exit_list);
+       LIST_HEAD(dev_kill_list);
 
        /* Atomically snapshot the list of namespaces to cleanup */
        net_kill_list = llist_del_all(&cleanup_list);
@@ -613,6 +624,14 @@ static void cleanup_net(struct work_struct *work)
         */
        synchronize_rcu();
 
+       rtnl_lock();
+       list_for_each_entry_reverse(ops, &pernet_list, list) {
+               if (ops->exit_batch_rtnl)
+                       ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
+       }
+       unregister_netdevice_many(&dev_kill_list);
+       rtnl_unlock();
+
        /* Run all of the network namespace exit methods */
        list_for_each_entry_reverse(ops, &pernet_list, list)
                ops_exit_list(ops, &net_exit_list);
@@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_
 {
        ops_pre_exit_list(ops, net_exit_list);
        synchronize_rcu();
+
+       if (ops->exit_batch_rtnl) {
+               LIST_HEAD(dev_kill_list);
+
+               rtnl_lock();
+               ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
+               unregister_netdevice_many(&dev_kill_list);
+               rtnl_unlock();
+       }
        ops_exit_list(ops, net_exit_list);
+
        ops_free_list(ops, net_exit_list);
 }