net: phy: transfer phy_config_inband() locking responsibility to phylink
authorVladimir Oltean <vladimir.oltean@nxp.com>
Thu, 4 Sep 2025 12:52:38 +0000 (15:52 +0300)
committerJakub Kicinski <kuba@kernel.org>
Sat, 6 Sep 2025 00:49:43 +0000 (17:49 -0700)
Problem description
===================

Lockdep reports a possible circular locking dependency (AB/BA) between
&pl->state_mutex and &phy->lock, as follows.

phylink_resolve() // acquires &pl->state_mutex
-> phylink_major_config()
   -> phy_config_inband() // acquires &pl->phydev->lock

whereas all the other call sites where &pl->state_mutex and
&pl->phydev->lock have the locking scheme reversed. Everywhere else,
&pl->phydev->lock is acquired at the top level, and &pl->state_mutex at
the lower level. A clear example is phylink_bringup_phy().

The outlier is the newly introduced phy_config_inband() and the existing
lock order is the correct one. To understand why it cannot be the other
way around, it is sufficient to consider phylink_phy_change(), phylink's
callback from the PHY device's phy->phy_link_change() virtual method,
invoked by the PHY state machine.

phy_link_up() and phy_link_down(), the (indirect) callers of
phylink_phy_change(), are called with &phydev->lock acquired.
Then phylink_phy_change() acquires its own &pl->state_mutex, to
serialize changes made to its pl->phy_state and pl->link_config.
So all other instances of &pl->state_mutex and &phydev->lock must be
consistent with this order.

Problem impact
==============

I think the kernel runs a serious deadlock risk if an existing
phylink_resolve() thread, which results in a phy_config_inband() call,
is concurrent with a phy_link_up() or phy_link_down() call, which will
deadlock on &pl->state_mutex in phylink_phy_change(). Practically
speaking, the impact may be limited by the slow speed of the medium
auto-negotiation protocol, which makes it unlikely for the current state
to still be unresolved when a new one is detected, but I think the
problem is there. Nonetheless, the problem was discovered using lockdep.

Proposed solution
=================

Practically speaking, the phy_config_inband() requirement of having
phydev->lock acquired must transfer to the caller (phylink is the only
caller). There, it must bubble up until immediately before
&pl->state_mutex is acquired, for the cases where that takes place.

Solution details, considerations, notes
=======================================

This is the phy_config_inband() call graph:

                          sfp_upstream_ops :: connect_phy()
                          |
                          v
                          phylink_sfp_connect_phy()
                          |
                          v
                          phylink_sfp_config_phy()
                          |
                          |   sfp_upstream_ops :: module_insert()
                          |   |
                          |   v
                          |   phylink_sfp_module_insert()
                          |   |
                          |   |   sfp_upstream_ops :: module_start()
                          |   |   |
                          |   |   v
                          |   |   phylink_sfp_module_start()
                          |   |   |
                          |   v   v
                          |   phylink_sfp_config_optical()
 phylink_start()          |   |
   |   phylink_resume()   v   v
   |   |  phylink_sfp_set_config()
   |   |  |
   v   v  v
 phylink_mac_initial_config()
   |   phylink_resolve()
   |   |  phylink_ethtool_ksettings_set()
   v   v  v
   phylink_major_config()
            |
            v
    phy_config_inband()

phylink_major_config() caller #1, phylink_mac_initial_config(), does not
acquire &pl->state_mutex nor do its callers. It must acquire
&pl->phydev->lock prior to calling phylink_major_config().

phylink_major_config() caller #2, phylink_resolve() acquires
&pl->state_mutex, thus also needs to acquire &pl->phydev->lock.

phylink_major_config() caller #3, phylink_ethtool_ksettings_set(), is
completely uninteresting, because it only calls phylink_major_config()
if pl->phydev is NULL (otherwise it calls phy_ethtool_ksettings_set()).
We need to change nothing there.

Other solutions
===============

The lock inversion between &pl->state_mutex and &pl->phydev->lock has
occurred at least once before, as seen in commit c718af2d00a3 ("net:
phylink: fix ethtool -A with attached PHYs"). The solution there was to
simply not call phy_set_asym_pause() under the &pl->state_mutex. That
cannot be extended to our case though, where the phy_config_inband()
call is much deeper inside the &pl->state_mutex section.

Fixes: 5fd0f1a02e75 ("net: phylink: add negotiation of in-band capabilities")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://patch.msgid.link/20250904125238.193990-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/phy/phy.c
drivers/net/phy/phylink.c

index 13df28445f0201dda26b9e4a2819feacd4977094..c02da57a4da5e385744de2990a6acc4e8b72a9f5 100644 (file)
@@ -1065,23 +1065,19 @@ EXPORT_SYMBOL_GPL(phy_inband_caps);
  */
 int phy_config_inband(struct phy_device *phydev, unsigned int modes)
 {
-       int err;
+       lockdep_assert_held(&phydev->lock);
 
        if (!!(modes & LINK_INBAND_DISABLE) +
            !!(modes & LINK_INBAND_ENABLE) +
            !!(modes & LINK_INBAND_BYPASS) != 1)
                return -EINVAL;
 
-       mutex_lock(&phydev->lock);
        if (!phydev->drv)
-               err = -EIO;
+               return -EIO;
        else if (!phydev->drv->config_inband)
-               err = -EOPNOTSUPP;
-       else
-               err = phydev->drv->config_inband(phydev, modes);
-       mutex_unlock(&phydev->lock);
+               return -EOPNOTSUPP;
 
-       return err;
+       return phydev->drv->config_inband(phydev, modes);
 }
 EXPORT_SYMBOL(phy_config_inband);
 
index aa17ad2622fc7c7b74f8c69cce0ee7c29e2119fd..1988b7d2089a6c4f98deef9b9afa4ce41aef8e7d 100644 (file)
@@ -1434,6 +1434,7 @@ static void phylink_get_fixed_state(struct phylink *pl,
 static void phylink_mac_initial_config(struct phylink *pl, bool force_restart)
 {
        struct phylink_link_state link_state;
+       struct phy_device *phy = pl->phydev;
 
        switch (pl->req_link_an_mode) {
        case MLO_AN_PHY:
@@ -1457,7 +1458,11 @@ static void phylink_mac_initial_config(struct phylink *pl, bool force_restart)
        link_state.link = false;
 
        phylink_apply_manual_flow(pl, &link_state);
+       if (phy)
+               mutex_lock(&phy->lock);
        phylink_major_config(pl, force_restart, &link_state);
+       if (phy)
+               mutex_unlock(&phy->lock);
 }
 
 static const char *phylink_pause_to_str(int pause)
@@ -1598,6 +1603,8 @@ static void phylink_resolve(struct work_struct *w)
 
        mutex_lock(&pl->phydev_mutex);
        phy = pl->phydev;
+       if (phy)
+               mutex_lock(&phy->lock);
        mutex_lock(&pl->state_mutex);
        cur_link_state = phylink_link_is_up(pl);
 
@@ -1699,6 +1706,8 @@ static void phylink_resolve(struct work_struct *w)
                queue_work(system_power_efficient_wq, &pl->resolve);
        }
        mutex_unlock(&pl->state_mutex);
+       if (phy)
+               mutex_unlock(&phy->lock);
        mutex_unlock(&pl->phydev_mutex);
 }