net/mlx4_core: Manage interface state for Reset flow cases
authorYishai Hadas <yishaih@mellanox.com>
Sun, 25 Jan 2015 14:59:40 +0000 (16:59 +0200)
committerDavid S. Miller <davem@davemloft.net>
Sun, 25 Jan 2015 22:43:14 +0000 (14:43 -0800)
We need to manage interface state to sync between reset flow and some other
relative cases such as remove_one. This has to be done to prevent certain
races. For example in case software stack is down as a result of unload call,
the remove_one should skip the unload phase.

Implement the remove_one case, handling AER and other cases comes next.

The interface can be up/down, upon remove_one, the state will include an extra
bit indicating that the device is cleaned-up, forcing other tasks to finish
before the final cleanup.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx4/catas.c
drivers/net/ethernet/mellanox/mlx4/intf.c
drivers/net/ethernet/mellanox/mlx4/main.c
include/linux/mlx4/device.h

index 63f14ffcc90604765d86ed207aada714a8b65e60..3fcf3cfaedfc26c7c15edb027c6aa1e45a37e541 100644 (file)
@@ -122,8 +122,14 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
        int err = 0;
 
        mlx4_enter_error_state(persist);
-       err = mlx4_restart_one(persist->pdev);
-       mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err);
+       mutex_lock(&persist->interface_state_mutex);
+       if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
+           !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
+               err = mlx4_restart_one(persist->pdev);
+               mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
+                         err);
+       }
+       mutex_unlock(&persist->interface_state_mutex);
 }
 
 static void dump_err_buf(struct mlx4_dev *dev)
@@ -211,6 +217,9 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
                iounmap(priv->catas_err.map);
                priv->catas_err.map = NULL;
        }
+
+       if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION)
+               flush_workqueue(dev->persist->catas_wq);
 }
 
 int  mlx4_catas_init(struct mlx4_dev *dev)
index 116895ac8b353afa4f461564ae8bac8529803be2..fba0b96a6f28f6c8d8a8bb4afc243e526fec831b 100644 (file)
@@ -138,6 +138,7 @@ int mlx4_register_device(struct mlx4_dev *dev)
 
        mutex_lock(&intf_mutex);
 
+       dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP;
        list_add_tail(&priv->dev_list, &dev_list);
        list_for_each_entry(intf, &intf_list, list)
                mlx4_add_device(intf, priv);
@@ -162,6 +163,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
                mlx4_remove_device(intf, priv);
 
        list_del(&priv->dev_list);
+       dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
 
        mutex_unlock(&intf_mutex);
 }
index dc2d910fcc88bbbb3ad129fee7fad78ae5d1285a..d59cae5da3f03fd5242b2df19f9e41191c9f59ca 100644 (file)
@@ -3114,6 +3114,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        pci_set_drvdata(pdev, dev->persist);
        priv->pci_dev_data = id->driver_data;
        mutex_init(&dev->persist->device_state_mutex);
+       mutex_init(&dev->persist->interface_state_mutex);
 
        ret =  __mlx4_init_one(pdev, id->driver_data, priv);
        if (ret) {
@@ -3232,7 +3233,17 @@ static void mlx4_remove_one(struct pci_dev *pdev)
        struct mlx4_dev  *dev  = persist->dev;
        struct mlx4_priv *priv = mlx4_priv(dev);
 
-       mlx4_unload_one(pdev);
+       mutex_lock(&persist->interface_state_mutex);
+       persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
+       mutex_unlock(&persist->interface_state_mutex);
+
+       /* device marked to be under deletion running now without the lock
+        * letting other tasks to be terminated
+        */
+       if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+               mlx4_unload_one(pdev);
+       else
+               mlx4_info(dev, "%s: interface is down\n", __func__);
        mlx4_catas_end(dev);
        pci_release_regions(pdev);
        pci_disable_device(pdev);
index 7d5d317cb7a66000ec43faf6b708d0bc43bcdf6b..33f9ca71925c0cb521e12c149fdbcad37e9251b7 100644 (file)
@@ -416,6 +416,11 @@ enum {
        MLX4_DEVICE_STATE_INTERNAL_ERROR        = 1 << 1,
 };
 
+enum {
+       MLX4_INTERFACE_STATE_UP         = 1 << 0,
+       MLX4_INTERFACE_STATE_DELETION   = 1 << 1,
+};
+
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
                             MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
 
@@ -760,6 +765,8 @@ struct mlx4_dev_persistent {
        struct workqueue_struct *catas_wq;
        struct mutex    device_state_mutex; /* protect HW state */
        u8              state;
+       struct mutex    interface_state_mutex; /* protect SW state */
+       u8      interface_state;
 };
 
 struct mlx4_dev {