net/mlx5: Parallelize vhca event handling
authorWei Zhang <weizhang@nvidia.com>
Thu, 12 Oct 2023 19:27:36 +0000 (12:27 -0700)
committerSaeed Mahameed <saeedm@nvidia.com>
Sat, 14 Oct 2023 17:16:29 +0000 (10:16 -0700)
At present, mlx5 driver have a general purpose
event handler which not only handles vhca event
but also many other events. This incurs a huge
bottleneck because the event handler is
implemented by single threaded workqueue and all
events are forced to be handled in serial manner
even though application tries to create multiple
SFs simultaneously.

Introduce a dedicated vhca event handler which
manages SFs parallel creation.

Signed-off-by: Wei Zhang <weizhang@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/events.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c
include/linux/mlx5/driver.h

index 3ec892d51f57d2b8834ddfd4116ea4031dfac4c8..d91ea53eb394d10d45d03522203126e48d942ffa 100644 (file)
@@ -441,8 +441,3 @@ int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int ev
 
        return blocking_notifier_call_chain(&events->sw_nh, event, data);
 }
-
-void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
-{
-       queue_work(dev->priv.events->wq, work);
-}
index 19ffd1816474a2153a3e9919d579152dfc835ee3..d348a7f9511f1a1ec019d90ba0f9308872389610 100644 (file)
@@ -159,6 +159,8 @@ enum mlx5_semaphore_space_address {
 
 #define MLX5_DEFAULT_PROF       2
 #define MLX5_SF_PROF           3
+#define MLX5_NUM_FW_CMD_THREADS 8
+#define MLX5_DEV_MAX_WQS       MLX5_NUM_FW_CMD_THREADS
 
 static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed,
                                      size_t item_size, size_t num_items,
@@ -347,7 +349,6 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap
 #define mlx5_vport_get_other_func_general_cap(dev, vport, out)         \
        mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL)
 
-void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
 static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
 {
        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
index d908fba968f083168b67ec19fe9e56dec04bbf92..c6fd729de8b2499b24ea5d1b70198179a43ef657 100644 (file)
@@ -21,6 +21,15 @@ struct mlx5_vhca_event_work {
        struct mlx5_vhca_state_event event;
 };
 
+struct mlx5_vhca_event_handler {
+       struct workqueue_struct *wq;
+};
+
+struct mlx5_vhca_events {
+       struct mlx5_core_dev *dev;
+       struct mlx5_vhca_event_handler handler[MLX5_DEV_MAX_WQS];
+};
+
 int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen)
 {
        u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {};
@@ -99,6 +108,12 @@ static void mlx5_vhca_state_work_handler(struct work_struct *_work)
        kfree(work);
 }
 
+static void
+mlx5_vhca_events_work_enqueue(struct mlx5_core_dev *dev, int idx, struct work_struct *work)
+{
+       queue_work(dev->priv.vhca_events->handler[idx].wq, work);
+}
+
 static int
 mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, void *data)
 {
@@ -106,6 +121,7 @@ mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, v
                                mlx5_nb_cof(nb, struct mlx5_vhca_state_notifier, nb);
        struct mlx5_vhca_event_work *work;
        struct mlx5_eqe *eqe = data;
+       int wq_idx;
 
        work = kzalloc(sizeof(*work), GFP_ATOMIC);
        if (!work)
@@ -113,7 +129,8 @@ mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, v
        INIT_WORK(&work->work, &mlx5_vhca_state_work_handler);
        work->notifier = notifier;
        work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id);
-       mlx5_events_work_enqueue(notifier->dev, &work->work);
+       wq_idx = work->event.function_id % MLX5_DEV_MAX_WQS;
+       mlx5_vhca_events_work_enqueue(notifier->dev, wq_idx, &work->work);
        return NOTIFY_OK;
 }
 
@@ -132,28 +149,62 @@ void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap)
 int mlx5_vhca_event_init(struct mlx5_core_dev *dev)
 {
        struct mlx5_vhca_state_notifier *notifier;
+       char wq_name[MLX5_CMD_WQ_MAX_NAME];
+       struct mlx5_vhca_events *events;
+       int err, i;
 
        if (!mlx5_vhca_event_supported(dev))
                return 0;
 
-       notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
-       if (!notifier)
+       events = kzalloc(sizeof(*events), GFP_KERNEL);
+       if (!events)
                return -ENOMEM;
 
+       events->dev = dev;
+       dev->priv.vhca_events = events;
+       for (i = 0; i < MLX5_DEV_MAX_WQS; i++) {
+               snprintf(wq_name, MLX5_CMD_WQ_MAX_NAME, "mlx5_vhca_event%d", i);
+               events->handler[i].wq = create_singlethread_workqueue(wq_name);
+               if (!events->handler[i].wq) {
+                       err = -ENOMEM;
+                       goto err_create_wq;
+               }
+       }
+
+       notifier = kzalloc(sizeof(*notifier), GFP_KERNEL);
+       if (!notifier) {
+               err = -ENOMEM;
+               goto err_notifier;
+       }
+
        dev->priv.vhca_state_notifier = notifier;
        notifier->dev = dev;
        BLOCKING_INIT_NOTIFIER_HEAD(&notifier->n_head);
        MLX5_NB_INIT(&notifier->nb, mlx5_vhca_state_change_notifier, VHCA_STATE_CHANGE);
        return 0;
+
+err_notifier:
+err_create_wq:
+       for (--i; i >= 0; i--)
+               destroy_workqueue(events->handler[i].wq);
+       kfree(events);
+       return err;
 }
 
 void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev)
 {
+       struct mlx5_vhca_events *vhca_events;
+       int i;
+
        if (!mlx5_vhca_event_supported(dev))
                return;
 
        kfree(dev->priv.vhca_state_notifier);
        dev->priv.vhca_state_notifier = NULL;
+       vhca_events = dev->priv.vhca_events;
+       for (i = 0; i < MLX5_DEV_MAX_WQS; i++)
+               destroy_workqueue(vhca_events->handler[i].wq);
+       kvfree(vhca_events);
 }
 
 void mlx5_vhca_event_start(struct mlx5_core_dev *dev)
index 52e982bc0f50d3e61e4681eef5646eed97a3a475..7968c5ee85c41345d287a1070bd4a3f833722f7b 100644 (file)
@@ -615,6 +615,7 @@ struct mlx5_priv {
        int                     adev_idx;
        int                     sw_vhca_id;
        struct mlx5_events      *events;
+       struct mlx5_vhca_events *vhca_events;
 
        struct mlx5_flow_steering *steering;
        struct mlx5_mpfs        *mpfs;