net-sysfs: prevent uncleared queues from being re-added
authorAntoine Tenart <atenart@kernel.org>
Tue, 4 Feb 2025 17:03:12 +0000 (18:03 +0100)
committerJakub Kicinski <kuba@kernel.org>
Thu, 6 Feb 2025 01:49:08 +0000 (17:49 -0800)
With the (upcoming) removal of the rtnl_trylock/restart_syscall logic
and because of how Tx/Rx queues are implemented (and their
requirements), it might happen that a queue is re-added before having
the chance to be cleared. In such rare case, do not complete the queue
addition operation.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Link: https://patch.msgid.link/20250204170314.146022-4-atenart@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/core/net-sysfs.c

index 0b7ee260613d597b7d8473ee846a9014567f1990..027af27517fa31b3cf8c695e325c9b5e0a175187 100644 (file)
@@ -1210,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
        struct kobject *kobj = &queue->kobj;
        int error = 0;
 
+       /* Rx queues are cleared in rx_queue_release to allow later
+        * re-registration. This is triggered when their kobj refcount is
+        * dropped.
+        *
+        * If a queue is removed while both a read (or write) operation and a
+        * the re-addition of the same queue are pending (waiting on rntl_lock)
+        * it might happen that the re-addition will execute before the read,
+        * making the initial removal to never happen (queue's kobj refcount
+        * won't drop enough because of the pending read). In such rare case,
+        * return to allow the removal operation to complete.
+        */
+       if (unlikely(kobj->state_initialized)) {
+               netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
+               return -EAGAIN;
+       }
+
        /* Kobject_put later will trigger rx_queue_release call which
         * decreases dev refcount: Take that reference here
         */
@@ -1898,6 +1914,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
        struct kobject *kobj = &queue->kobj;
        int error = 0;
 
+       /* Tx queues are cleared in netdev_queue_release to allow later
+        * re-registration. This is triggered when their kobj refcount is
+        * dropped.
+        *
+        * If a queue is removed while both a read (or write) operation and a
+        * the re-addition of the same queue are pending (waiting on rntl_lock)
+        * it might happen that the re-addition will execute before the read,
+        * making the initial removal to never happen (queue's kobj refcount
+        * won't drop enough because of the pending read). In such rare case,
+        * return to allow the removal operation to complete.
+        */
+       if (unlikely(kobj->state_initialized)) {
+               netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
+               return -EAGAIN;
+       }
+
        /* Kobject_put later will trigger netdev_queue_release call
         * which decreases dev refcount: Take that reference here
         */