Cosmin reports an issue with ipv6_add_dev being called from
NETDEV_CHANGE notifier:
[ 3455.008776] ? ipv6_add_dev+0x370/0x620
[ 3455.010097] ipv6_find_idev+0x96/0xe0
[ 3455.010725] addrconf_add_dev+0x1e/0xa0
[ 3455.011382] addrconf_init_auto_addrs+0xb0/0x720
[ 3455.013537] addrconf_notify+0x35f/0x8d0
[ 3455.014214] notifier_call_chain+0x38/0xf0
[ 3455.014903] netdev_state_change+0x65/0x90
[ 3455.015586] linkwatch_do_dev+0x5a/0x70
[ 3455.016238] rtnl_getlink+0x241/0x3e0
[ 3455.019046] rtnetlink_rcv_msg+0x177/0x5e0
Similarly, linkwatch might get to ipv6_add_dev without ops lock:
[ 3456.656261] ? ipv6_add_dev+0x370/0x620
[ 3456.660039] ipv6_find_idev+0x96/0xe0
[ 3456.660445] addrconf_add_dev+0x1e/0xa0
[ 3456.660861] addrconf_init_auto_addrs+0xb0/0x720
[ 3456.661803] addrconf_notify+0x35f/0x8d0
[ 3456.662236] notifier_call_chain+0x38/0xf0
[ 3456.662676] netdev_state_change+0x65/0x90
[ 3456.663112] linkwatch_do_dev+0x5a/0x70
[ 3456.663529] __linkwatch_run_queue+0xeb/0x200
[ 3456.663990] linkwatch_event+0x21/0x30
[ 3456.664399] process_one_work+0x211/0x610
[ 3456.664828] worker_thread+0x1cc/0x380
[ 3456.665691] kthread+0xf4/0x210
Reclassify NETDEV_CHANGE as a notifier that consistently runs under the
instance lock.
Link: https://lore.kernel.org/netdev/aac073de8beec3e531c86c101b274d434741c28e.camel@nvidia.com/
Reported-by: Cosmin Ratiu <cratiu@nvidia.com>
Tested-by: Cosmin Ratiu <cratiu@nvidia.com>
Fixes:
ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations")
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250404161122.3907628-1-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Devices drivers are encouraged to rely on the instance lock where possible.
For the (mostly software) drivers that need to interact with the core stack,
-there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g.,
-``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle
-acquiring the instance lock themselves, while the ``netif_xxx`` functions
-assume that the driver has already acquired the instance lock.
+there are two sets of interfaces: ``dev_xxx``/``netdev_xxx`` and ``netif_xxx``
+(e.g., ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx``/``netdev_xxx``
+functions handle acquiring the instance lock themselves, while the
+``netif_xxx`` functions assume that the driver has already acquired
+the instance lock.
Notifiers and netdev instance lock
==================================
running under the lock:
* ``NETDEV_REGISTER``
* ``NETDEV_UP``
+* ``NETDEV_CHANGE``
The following notifiers are running without the lock:
* ``NETDEV_UNREGISTER``
* pending work list (if queued).
*/
void linkwatch_sync_dev(struct net_device *dev);
+void __linkwatch_sync_dev(struct net_device *dev);
/**
* netif_carrier_ok - test if carrier present
int dev_set_promiscuity(struct net_device *dev, int inc);
int netif_set_allmulti(struct net_device *dev, int inc, bool notify);
int dev_set_allmulti(struct net_device *dev, int inc);
+void netif_state_change(struct net_device *dev);
void netdev_state_change(struct net_device *dev);
void __netdev_notify_peers(struct net_device *dev);
void netdev_notify_peers(struct net_device *dev);
return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
}
-void netdev_set_operstate(struct net_device *dev, int newstate);
+void netif_set_operstate(struct net_device *dev, int newstate);
#endif /* __LINUX_RTNETLINK_H */
}
EXPORT_SYMBOL(netdev_features_change);
-/**
- * netdev_state_change - device changes state
- * @dev: device to cause notification
- *
- * Called to indicate a device has changed state. This function calls
- * the notifier chains for netdev_chain and sends a NEWLINK message
- * to the routing socket.
- */
-void netdev_state_change(struct net_device *dev)
+void netif_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
struct netdev_notifier_change_info change_info = {
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL);
}
}
-EXPORT_SYMBOL(netdev_state_change);
/**
* __netdev_notify_peers - notify network peers about existence of @dev,
return ret;
}
EXPORT_SYMBOL_GPL(dev_xdp_propagate);
+
+/**
+ * netdev_state_change() - device changes state
+ * @dev: device to cause notification
+ *
+ * Called to indicate a device has changed state. This function calls
+ * the notifier chains for netdev_chain and sends a NEWLINK message
+ * to the routing socket.
+ */
+void netdev_state_change(struct net_device *dev)
+{
+ netdev_lock_ops(dev);
+ netif_state_change(dev);
+ netdev_unlock_ops(dev);
+}
+EXPORT_SYMBOL(netdev_state_change);
else
dev_deactivate(dev);
- netdev_state_change(dev);
+ netif_state_change(dev);
}
/* Note: our callers are responsible for calling netdev_tracker_free().
* This is the reason we use __dev_put() instead of dev_put().
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
spin_unlock_irq(&lweventlist_lock);
+ netdev_lock_ops(dev);
linkwatch_do_dev(dev);
+ netdev_unlock_ops(dev);
do_dev--;
spin_lock_irq(&lweventlist_lock);
}
spin_unlock_irq(&lweventlist_lock);
}
-void linkwatch_sync_dev(struct net_device *dev)
+static bool linkwatch_clean_dev(struct net_device *dev)
{
unsigned long flags;
- int clean = 0;
+ bool clean = false;
spin_lock_irqsave(&lweventlist_lock, flags);
if (!list_empty(&dev->link_watch_list)) {
list_del_init(&dev->link_watch_list);
- clean = 1;
+ clean = true;
/* We must release netdev tracker under
* the spinlock protection.
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
- if (clean)
+
+ return clean;
+}
+
+void __linkwatch_sync_dev(struct net_device *dev)
+{
+ netdev_ops_assert_locked(dev);
+
+ if (linkwatch_clean_dev(dev))
linkwatch_do_dev(dev);
}
+void linkwatch_sync_dev(struct net_device *dev)
+{
+ if (linkwatch_clean_dev(dev)) {
+ netdev_lock_ops(dev);
+ linkwatch_do_dev(dev);
+ netdev_unlock_ops(dev);
+ }
+}
/* Must be called with the rtnl semaphore held */
void linkwatch_run_queue(void)
switch (cmd) {
case NETDEV_REGISTER:
case NETDEV_UP:
+ case NETDEV_CHANGE:
netdev_ops_assert_locked(dev);
fallthrough;
case NETDEV_DOWN:
case NETDEV_REBOOT:
- case NETDEV_CHANGE:
case NETDEV_UNREGISTER:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
-void netdev_set_operstate(struct net_device *dev, int newstate)
+void netif_set_operstate(struct net_device *dev, int newstate)
{
unsigned int old = READ_ONCE(dev->operstate);
return;
} while (!try_cmpxchg(&dev->operstate, &old, newstate));
- netdev_state_change(dev);
+ netif_state_change(dev);
}
-EXPORT_SYMBOL(netdev_set_operstate);
+EXPORT_SYMBOL(netif_set_operstate);
static void set_operstate(struct net_device *dev, unsigned char transition)
{
break;
}
- netdev_set_operstate(dev, operstate);
+ netif_set_operstate(dev, operstate);
}
static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
errout:
if (status & DO_SETLINK_MODIFIED) {
if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
- netdev_state_change(dev);
+ netif_state_change(dev);
if (err < 0)
net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
nla_len(tb[IFLA_BROADCAST]));
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
- if (tb[IFLA_OPERSTATE])
+ if (tb[IFLA_OPERSTATE]) {
+ netdev_lock_ops(dev);
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+ netdev_unlock_ops(dev);
+ }
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
u32 ethtool_op_get_link(struct net_device *dev)
{
/* Synchronize carrier state with link watch, see also rtnl_getlink() */
- linkwatch_sync_dev(dev);
+ __linkwatch_sync_dev(dev);
return netif_carrier_ok(dev) ? 1 : 0;
}
struct net_device *dev = master->dev;
if (!is_admin_up(dev)) {
- netdev_set_operstate(dev, IF_OPER_DOWN);
+ netif_set_operstate(dev, IF_OPER_DOWN);
return;
}
if (has_carrier)
- netdev_set_operstate(dev, IF_OPER_UP);
+ netif_set_operstate(dev, IF_OPER_UP);
else
- netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
+ netif_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
}
static bool hsr_check_carrier(struct hsr_port *master)