Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox...
authorSaeed Mahameed <saeedm@mellanox.com>
Mon, 10 Dec 2018 23:43:47 +0000 (15:43 -0800)
committerSaeed Mahameed <saeedm@mellanox.com>
Mon, 10 Dec 2018 23:50:50 +0000 (15:50 -0800)
mlx5-next shared branch with rdma subtree to avoid mlx5 rdma v.s. netdev
conflicts.

Highlights:

1) RDMA ODP  (On Demand Paging) improvements and moving ODP logic to
mlx5 RDMA driver
2) Improved mlx5 core driver and device events handling and provided API
for upper layers to subscribe to device events.
3) RDMA only code cleanup from mlx5 core
4) Add helper to get CQE opcode
5) Rework handling of port module events
6) shared mlx5_ifc.h updates to avoid conflicts

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
57 files changed:
drivers/infiniband/core/umem_odp.c
drivers/infiniband/hw/mlx5/Makefile
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/ib_rep.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mlx5/srq.h [new file with mode: 0644]
drivers/infiniband/hw/mlx5/srq_cmd.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/events.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/srq.c [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
drivers/net/ethernet/mellanox/mlx5/core/wq.h
include/linux/mlx5/cq.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/eq.h [new file with mode: 0644]
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/port.h
include/linux/mlx5/qp.h
include/linux/mlx5/srq.h [deleted file]
include/linux/mlx5/transobj.h

index 676c1fd1119d80a17d4542d035a319300332842f..9608681224e668d92ebe1c4b1b2004e61a930160 100644 (file)
@@ -647,8 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
                                flags, local_page_list, NULL, NULL);
                up_read(&owning_mm->mmap_sem);
 
-               if (npages < 0)
+               if (npages < 0) {
+                       if (npages != -EAGAIN)
+                               pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
+                       else
+                               pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
                        break;
+               }
 
                bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
                mutex_lock(&umem_odp->umem_mutex);
@@ -666,8 +671,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
                        ret = ib_umem_odp_map_dma_single_page(
                                        umem_odp, k, local_page_list[j],
                                        access_mask, current_seq);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               if (ret != -EAGAIN)
+                                       pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+                               else
+                                       pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
                                break;
+                       }
 
                        p = page_to_phys(local_page_list[j]);
                        k++;
index b8e4b15e2674b963428d137b00978b732179dffe..33f5adb14e4ef17075a9c9a3aaa6d5741cc0c0de 100644 (file)
@@ -1,6 +1,8 @@
 obj-$(CONFIG_MLX5_INFINIBAND)  += mlx5_ib.o
 
-mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
+mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
+               srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
+               cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
 mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
index 7d769b5538b4a275796dcfd8b7545152239667f5..26ab9041f94aa81c35a3e777e1a5e32e2ae991fc 100644 (file)
@@ -35,6 +35,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_cache.h>
 #include "mlx5_ib.h"
+#include "srq.h"
 
 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
 {
@@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
 
        cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
 
-       if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
+       if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
            !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
                return cqe;
        } else {
@@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                struct mlx5_core_srq *msrq = NULL;
 
                if (qp->ibqp.xrcd) {
-                       msrq = mlx5_core_get_srq(dev->mdev,
-                                                be32_to_cpu(cqe->srqn));
+                       msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
                        srq = to_mibsrq(msrq);
                } else {
                        srq = to_msrq(qp->ibqp.srq);
@@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
        }
        wc->byte_len = be32_to_cpu(cqe->byte_cnt);
 
-       switch (cqe->op_own >> 4) {
+       switch (get_cqe_opcode(cqe)) {
        case MLX5_CQE_RESP_WR_IMM:
                wc->opcode      = IB_WC_RECV_RDMA_WITH_IMM;
                wc->wc_flags    = IB_WC_WITH_IMM;
@@ -537,7 +537,7 @@ repoll:
         */
        rmb();
 
-       opcode = cqe64->op_own >> 4;
+       opcode = get_cqe_opcode(cqe64);
        if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
                if (likely(cq->resize_buf)) {
                        free_cq_buf(dev, &cq->buf);
@@ -1295,7 +1295,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
                return -EINVAL;
        }
 
-       while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
+       while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
                dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
                                             (i + 1) & cq->resize_buf->nent);
                dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
index 584ff2ea7810465a1fb5971bfb8a517d1dcd013c..8a682d86d63471d2af4dbfefbf14c5b5c2333ac8 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include "ib_rep.h"
+#include "srq.h"
 
 static const struct mlx5_ib_profile rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_INIT,
@@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_rep_roce_init,
                     mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+                    mlx5_init_srq_table,
+                    mlx5_cleanup_srq_table),
        STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
                     mlx5_ib_stage_dev_res_init,
                     mlx5_ib_stage_dev_res_cleanup),
index 3569fda07e07f47b9286b7e1251c2716f9169203..0eeefff09c1eba8918d301cdb39408b457cf7f68 100644 (file)
@@ -60,6 +60,7 @@
 #include "mlx5_ib.h"
 #include "ib_rep.h"
 #include "cmd.h"
+#include "srq.h"
 #include <linux/mlx5/fs_helpers.h>
 #include <linux/mlx5/accel.h>
 #include <rdma/uverbs_std_types.h>
@@ -82,10 +83,13 @@ static char mlx5_version[] =
 
 struct mlx5_ib_event_work {
        struct work_struct      work;
-       struct mlx5_core_dev    *dev;
-       void                    *context;
-       enum mlx5_dev_event     event;
-       unsigned long           param;
+       union {
+               struct mlx5_ib_dev            *dev;
+               struct mlx5_ib_multiport_info *mpi;
+       };
+       bool                    is_slave;
+       unsigned int            event;
+       void                    *param;
 };
 
 enum {
@@ -2669,11 +2673,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
                         ntohs(ib_spec->gre.val.protocol));
 
                memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
-                                   gre_key_h),
+                                   gre_key.nvgre.hi),
                       &ib_spec->gre.mask.key,
                       sizeof(ib_spec->gre.mask.key));
                memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
-                                   gre_key_h),
+                                   gre_key.nvgre.hi),
                       &ib_spec->gre.val.key,
                       sizeof(ib_spec->gre.val.key));
                break;
@@ -4226,6 +4230,63 @@ static void delay_drop_handler(struct work_struct *work)
        mutex_unlock(&delay_drop->lock);
 }
 
+static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+                                struct ib_event *ibev)
+{
+       switch (eqe->sub_type) {
+       case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+               schedule_work(&ibdev->delay_drop.delay_drop_work);
+               break;
+       default: /* do nothing */
+               return;
+       }
+}
+
+static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+                             struct ib_event *ibev)
+{
+       u8 port = (eqe->data.port.port >> 4) & 0xf;
+
+       ibev->element.port_num = port;
+
+       switch (eqe->sub_type) {
+       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+               /* In RoCE, port up/down events are handled in
+                * mlx5_netdev_event().
+                */
+               if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+                                           IB_LINK_LAYER_ETHERNET)
+                       return -EINVAL;
+
+               ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
+                               IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_LID:
+               ibev->event = IB_EVENT_LID_CHANGE;
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+               ibev->event = IB_EVENT_PKEY_CHANGE;
+               schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+               ibev->event = IB_EVENT_GID_CHANGE;
+               break;
+
+       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+               ibev->event = IB_EVENT_CLIENT_REREGISTER;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static void mlx5_ib_handle_event(struct work_struct *_work)
 {
        struct mlx5_ib_event_work *work =
@@ -4233,65 +4294,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
        struct mlx5_ib_dev *ibdev;
        struct ib_event ibev;
        bool fatal = false;
-       u8 port = (u8)work->param;
 
-       if (mlx5_core_is_mp_slave(work->dev)) {
-               ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
+       if (work->is_slave) {
+               ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
                if (!ibdev)
                        goto out;
        } else {
-               ibdev = work->context;
+               ibdev = work->dev;
        }
 
        switch (work->event) {
        case MLX5_DEV_EVENT_SYS_ERROR:
                ibev.event = IB_EVENT_DEVICE_FATAL;
                mlx5_ib_handle_internal_error(ibdev);
+               ibev.element.port_num  = (u8)(unsigned long)work->param;
                fatal = true;
                break;
-
-       case MLX5_DEV_EVENT_PORT_UP:
-       case MLX5_DEV_EVENT_PORT_DOWN:
-       case MLX5_DEV_EVENT_PORT_INITIALIZED:
-               /* In RoCE, port up/down events are handled in
-                * mlx5_netdev_event().
-                */
-               if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
-                       IB_LINK_LAYER_ETHERNET)
+       case MLX5_EVENT_TYPE_PORT_CHANGE:
+               if (handle_port_change(ibdev, work->param, &ibev))
                        goto out;
-
-               ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
-                            IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
                break;
-
-       case MLX5_DEV_EVENT_LID_CHANGE:
-               ibev.event = IB_EVENT_LID_CHANGE;
-               break;
-
-       case MLX5_DEV_EVENT_PKEY_CHANGE:
-               ibev.event = IB_EVENT_PKEY_CHANGE;
-               schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
-               break;
-
-       case MLX5_DEV_EVENT_GUID_CHANGE:
-               ibev.event = IB_EVENT_GID_CHANGE;
-               break;
-
-       case MLX5_DEV_EVENT_CLIENT_REREG:
-               ibev.event = IB_EVENT_CLIENT_REREGISTER;
-               break;
-       case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
-               schedule_work(&ibdev->delay_drop.delay_drop_work);
-               goto out;
+       case MLX5_EVENT_TYPE_GENERAL_EVENT:
+               handle_general_event(ibdev, work->param, &ibev);
+               /* fall through */
        default:
                goto out;
        }
 
-       ibev.device           = &ibdev->ib_dev;
-       ibev.element.port_num = port;
+       ibev.device = &ibdev->ib_dev;
 
-       if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
-               mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+       if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
+               mlx5_ib_warn(ibdev, "warning: event on port %d\n",  ibev.element.port_num);
                goto out;
        }
 
@@ -4304,22 +4337,43 @@ out:
        kfree(work);
 }
 
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
-                         enum mlx5_dev_event event, unsigned long param)
+static int mlx5_ib_event(struct notifier_block *nb,
+                        unsigned long event, void *param)
 {
        struct mlx5_ib_event_work *work;
 
        work = kmalloc(sizeof(*work), GFP_ATOMIC);
        if (!work)
-               return;
+               return NOTIFY_DONE;
 
        INIT_WORK(&work->work, mlx5_ib_handle_event);
-       work->dev = dev;
+       work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
+       work->is_slave = false;
        work->param = param;
-       work->context = context;
        work->event = event;
 
        queue_work(mlx5_ib_event_wq, &work->work);
+
+       return NOTIFY_OK;
+}
+
+static int mlx5_ib_event_slave_port(struct notifier_block *nb,
+                                   unsigned long event, void *param)
+{
+       struct mlx5_ib_event_work *work;
+
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work)
+               return NOTIFY_DONE;
+
+       INIT_WORK(&work->work, mlx5_ib_handle_event);
+       work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
+       work->is_slave = true;
+       work->param = param;
+       work->event = event;
+       queue_work(mlx5_ib_event_wq, &work->work);
+
+       return NOTIFY_OK;
 }
 
 static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -5330,7 +5384,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
 
-       return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
+       return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector);
 }
 
 /* The mlx5_ib_multiport_mutex should be held when calling this function */
@@ -5350,6 +5404,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
                spin_unlock(&port->mp.mpi_lock);
                return;
        }
+
+       if (mpi->mdev_events.notifier_call)
+               mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+       mpi->mdev_events.notifier_call = NULL;
+
        mpi->ibdev = NULL;
 
        spin_unlock(&port->mp.mpi_lock);
@@ -5405,6 +5464,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
 
        ibdev->port[port_num].mp.mpi = mpi;
        mpi->ibdev = ibdev;
+       mpi->mdev_events.notifier_call = NULL;
        spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
 
        err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
@@ -5422,6 +5482,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
                goto unbind;
        }
 
+       mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
+       mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
+
        err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
        if (err)
                goto unbind;
@@ -5694,8 +5757,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
        dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
        dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
        dev->ib_dev.phys_port_cnt       = dev->num_ports;
-       dev->ib_dev.num_comp_vectors    =
-               dev->mdev->priv.eq_table.num_comp_vectors;
+       dev->ib_dev.num_comp_vectors    = mlx5_comp_vectors_count(mdev);
        dev->ib_dev.dev.parent          = &mdev->pdev->dev;
 
        mutex_init(&dev->cap_mask_mutex);
@@ -6034,6 +6096,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
        return mlx5_ib_odp_init_one(dev);
 }
 
+void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_odp_cleanup_one(dev);
+}
+
 int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
@@ -6152,6 +6219,34 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
        mlx5_ib_unregister_vport_reps(dev);
 }
 
+static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
+{
+       dev->mdev_events.notifier_call = mlx5_ib_event;
+       mlx5_notifier_register(dev->mdev, &dev->mdev_events);
+       return 0;
+}
+
+static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+}
+
+static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
+{
+       int uid;
+
+       uid = mlx5_ib_devx_create(dev);
+       if (uid > 0)
+               dev->devx_whitelist_uid = uid;
+
+       return 0;
+}
+static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+       if (dev->devx_whitelist_uid)
+               mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+}
+
 void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
                      const struct mlx5_ib_profile *profile,
                      int stage)
@@ -6163,8 +6258,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
                        profile->stage[stage].cleanup(dev);
        }
 
-       if (dev->devx_whitelist_uid)
-               mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
        ib_dealloc_device((struct ib_device *)dev);
 }
 
@@ -6173,7 +6266,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
 {
        int err;
        int i;
-       int uid;
 
        for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
                if (profile->stage[i].init) {
@@ -6183,10 +6275,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
                }
        }
 
-       uid = mlx5_ib_devx_create(dev);
-       if (uid > 0)
-               dev->devx_whitelist_uid = uid;
-
        dev->profile = profile;
        dev->ib_active = true;
 
@@ -6214,12 +6302,18 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_roce_init,
                     mlx5_ib_stage_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+                    mlx5_init_srq_table,
+                    mlx5_cleanup_srq_table),
        STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
                     mlx5_ib_stage_dev_res_init,
                     mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+                    mlx5_ib_stage_dev_notifier_init,
+                    mlx5_ib_stage_dev_notifier_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_ODP,
                     mlx5_ib_stage_odp_init,
-                    NULL),
+                    mlx5_ib_stage_odp_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
                     mlx5_ib_stage_counters_init,
                     mlx5_ib_stage_counters_cleanup),
@@ -6238,6 +6332,9 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_SPECS,
                     mlx5_ib_stage_populate_specs,
                     NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+                    mlx5_ib_stage_devx_init,
+                    mlx5_ib_stage_devx_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
@@ -6265,9 +6362,15 @@ static const struct mlx5_ib_profile nic_rep_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_rep_roce_init,
                     mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+                    mlx5_init_srq_table,
+                    mlx5_cleanup_srq_table),
        STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
                     mlx5_ib_stage_dev_res_init,
                     mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+                    mlx5_ib_stage_dev_notifier_init,
+                    mlx5_ib_stage_dev_notifier_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
                     mlx5_ib_stage_counters_init,
                     mlx5_ib_stage_counters_cleanup),
@@ -6388,10 +6491,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 static struct mlx5_interface mlx5_ib_interface = {
        .add            = mlx5_ib_add,
        .remove         = mlx5_ib_remove,
-       .event          = mlx5_ib_event,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       .pfault         = mlx5_ib_pfault,
-#endif
        .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
 };
 
index b651a7a6fde9e6d3a44bcde7372d40afb66c3e36..861b68f2e33085da501fe2ed0ddc6253e3286b2a 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
 #include <linux/mlx5/fs.h>
 #include <linux/types.h>
 #include <linux/mlx5/transobj.h>
@@ -50,6 +49,8 @@
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 
+#include "srq.h"
+
 #define mlx5_ib_dbg(_dev, format, arg...)                                      \
        dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,      \
                __LINE__, current->pid, ##arg)
@@ -774,7 +775,9 @@ enum mlx5_ib_stages {
        MLX5_IB_STAGE_CAPS,
        MLX5_IB_STAGE_NON_DEFAULT_CB,
        MLX5_IB_STAGE_ROCE,
+       MLX5_IB_STAGE_SRQ,
        MLX5_IB_STAGE_DEVICE_RESOURCES,
+       MLX5_IB_STAGE_DEVICE_NOTIFIER,
        MLX5_IB_STAGE_ODP,
        MLX5_IB_STAGE_COUNTERS,
        MLX5_IB_STAGE_CONG_DEBUGFS,
@@ -782,6 +785,7 @@ enum mlx5_ib_stages {
        MLX5_IB_STAGE_BFREG,
        MLX5_IB_STAGE_PRE_IB_REG_UMR,
        MLX5_IB_STAGE_SPECS,
+       MLX5_IB_STAGE_WHITELIST_UID,
        MLX5_IB_STAGE_IB_REG,
        MLX5_IB_STAGE_POST_IB_REG_UMR,
        MLX5_IB_STAGE_DELAY_DROP,
@@ -806,6 +810,7 @@ struct mlx5_ib_multiport_info {
        struct list_head list;
        struct mlx5_ib_dev *ibdev;
        struct mlx5_core_dev *mdev;
+       struct notifier_block mdev_events;
        struct completion unref_comp;
        u64 sys_image_guid;
        u32 mdev_refcnt;
@@ -880,10 +885,20 @@ struct mlx5_ib_lb_state {
        bool                    enabled;
 };
 
+struct mlx5_ib_pf_eq {
+       struct mlx5_ib_dev *dev;
+       struct mlx5_eq *core;
+       struct work_struct work;
+       spinlock_t lock; /* Pagefaults spinlock */
+       struct workqueue_struct *wq;
+       mempool_t *pool;
+};
+
 struct mlx5_ib_dev {
        struct ib_device                ib_dev;
        const struct uverbs_object_tree_def *driver_trees[7];
        struct mlx5_core_dev            *mdev;
+       struct notifier_block           mdev_events;
        struct mlx5_roce                roce[MLX5_MAX_PORTS];
        int                             num_ports;
        /* serialize update of capability mask
@@ -902,6 +917,8 @@ struct mlx5_ib_dev {
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        struct ib_odp_caps      odp_caps;
        u64                     odp_max_size;
+       struct mlx5_ib_pf_eq    odp_pf_eq;
+
        /*
         * Sleepable RCU that prevents destruction of MRs while they are still
         * being used by a page fault handler.
@@ -927,6 +944,7 @@ struct mlx5_ib_dev {
        u64                     sys_image_guid;
        struct mlx5_memic       memic;
        u16                     devx_whitelist_uid;
+       struct mlx5_srq_table   srq_table;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1158,9 +1176,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
-                   struct mlx5_pagefault *pfault);
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
 int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
 void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
@@ -1175,6 +1192,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 }
 
 static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
 static inline int mlx5_ib_odp_init(void) { return 0; }
 static inline void mlx5_ib_odp_cleanup(void)                               {}
 static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
index 2cc3d69ab6f64dde00ee48c82ff93c5edca697f4..4ead8c0fff5ad46d6d68f676194badda71938f4f 100644 (file)
 #include "mlx5_ib.h"
 #include "cmd.h"
 
+#include <linux/mlx5/eq.h>
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+       u32                     bytes_committed;
+       u32                     token;
+       u8                      event_subtype;
+       u8                      type;
+       union {
+               /* Initiator or send message responder pagefault details. */
+               struct {
+                       /* Received packet size, only valid for responders. */
+                       u32     packet_size;
+                       /*
+                        * Number of resource holding WQE, depends on type.
+                        */
+                       u32     wq_num;
+                       /*
+                        * WQE index. Refers to either the send queue or
+                        * receive queue, according to event_subtype.
+                        */
+                       u16     wqe_index;
+               } wqe;
+               /* RDMA responder pagefault details */
+               struct {
+                       u32     r_key;
+                       /*
+                        * Received packet size, minimal size page fault
+                        * resolution required for forward progress.
+                        */
+                       u32     packet_size;
+                       u32     rdma_op_len;
+                       u64     rdma_va;
+               } rdma;
+       };
+
+       struct mlx5_ib_pf_eq    *eq;
+       struct work_struct      work;
+};
+
 #define MAX_PREFETCH_LEN (4*1024*1024U)
 
 /* Timeout in ms to wait for an active mmu notifier to complete when handling
@@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
 {
        int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
                     pfault->wqe.wq_num : pfault->token;
-       int ret = mlx5_core_page_fault_resume(dev->mdev,
-                                             pfault->token,
-                                             wq_num,
-                                             pfault->type,
-                                             error);
-       if (ret)
-               mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
-                           wq_num);
+       u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { };
+       u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = { };
+       int err;
+
+       MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
+       MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type);
+       MLX5_SET(page_fault_resume_in, in, token, pfault->token);
+       MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+       MLX5_SET(page_fault_resume_in, in, error, !!error);
+
+       err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+       if (err)
+               mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
+                           wq_num, err);
 }
 
 static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
@@ -607,8 +653,8 @@ out:
                        if (!wait_for_completion_timeout(
                                        &odp->notifier_completion,
                                        timeout)) {
-                               mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
-                                            current_seq, odp->notifiers_seq);
+                               mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
+                                            current_seq, odp->notifiers_seq, odp->notifiers_count);
                        }
                } else {
                        /* The MR is being killed, kill the QP as well. */
@@ -1026,16 +1072,31 @@ invalid_transport_or_opcode:
        return 0;
 }
 
-static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
-                                             u32 wq_num)
+static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
+                                                      u32 wq_num, int pf_type)
 {
-       struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
+       enum mlx5_res_type res_type;
 
-       if (!mqp) {
-               mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
+       switch (pf_type) {
+       case MLX5_WQE_PF_TYPE_RMP:
+               res_type = MLX5_RES_SRQ;
+               break;
+       case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
+       case MLX5_WQE_PF_TYPE_RESP:
+       case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
+               res_type = MLX5_RES_QP;
+               break;
+       default:
                return NULL;
        }
 
+       return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
+}
+
+static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
+{
+       struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res;
+
        return to_mibqp(mqp);
 }
 
@@ -1049,18 +1110,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
        int resume_with_error = 1;
        u16 wqe_index = pfault->wqe.wqe_index;
        int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
+       struct mlx5_core_rsc_common *res;
        struct mlx5_ib_qp *qp;
 
+       res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
+       if (!res) {
+               mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num);
+               return;
+       }
+
+       switch (res->res) {
+       case MLX5_RES_QP:
+               qp = res_to_qp(res);
+               break;
+       default:
+               mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
+               goto resolve_page_fault;
+       }
+
        buffer = (char *)__get_free_page(GFP_KERNEL);
        if (!buffer) {
                mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
                goto resolve_page_fault;
        }
 
-       qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
-       if (!qp)
-               goto resolve_page_fault;
-
        ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
                                    PAGE_SIZE, &qp->trans_qp.base);
        if (ret < 0) {
@@ -1100,6 +1173,7 @@ resolve_page_fault:
        mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
                    pfault->wqe.wq_num, resume_with_error,
                    pfault->type);
+       mlx5_core_res_put(res);
        free_page((unsigned long)buffer);
 }
 
@@ -1178,10 +1252,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
        }
 }
 
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
-                   struct mlx5_pagefault *pfault)
+static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
 {
-       struct mlx5_ib_dev *dev = context;
        u8 event_subtype = pfault->event_subtype;
 
        switch (event_subtype) {
@@ -1198,6 +1270,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
        }
 }
 
+static void mlx5_ib_eqe_pf_action(struct work_struct *work)
+{
+       struct mlx5_pagefault *pfault = container_of(work,
+                                                    struct mlx5_pagefault,
+                                                    work);
+       struct mlx5_ib_pf_eq *eq = pfault->eq;
+
+       mlx5_ib_pfault(eq->dev, pfault);
+       mempool_free(pfault, eq->pool);
+}
+
+static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
+{
+       struct mlx5_eqe_page_fault *pf_eqe;
+       struct mlx5_pagefault *pfault;
+       struct mlx5_eqe *eqe;
+       int cc = 0;
+
+       while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) {
+               pfault = mempool_alloc(eq->pool, GFP_ATOMIC);
+               if (!pfault) {
+                       schedule_work(&eq->work);
+                       break;
+               }
+
+               pf_eqe = &eqe->data.page_fault;
+               pfault->event_subtype = eqe->sub_type;
+               pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
+
+               mlx5_ib_dbg(eq->dev,
+                           "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
+                           eqe->sub_type, pfault->bytes_committed);
+
+               switch (eqe->sub_type) {
+               case MLX5_PFAULT_SUBTYPE_RDMA:
+                       /* RDMA based event */
+                       pfault->type =
+                               be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+                       pfault->token =
+                               be32_to_cpu(pf_eqe->rdma.pftype_token) &
+                               MLX5_24BIT_MASK;
+                       pfault->rdma.r_key =
+                               be32_to_cpu(pf_eqe->rdma.r_key);
+                       pfault->rdma.packet_size =
+                               be16_to_cpu(pf_eqe->rdma.packet_length);
+                       pfault->rdma.rdma_op_len =
+                               be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+                       pfault->rdma.rdma_va =
+                               be64_to_cpu(pf_eqe->rdma.rdma_va);
+                       mlx5_ib_dbg(eq->dev,
+                                   "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
+                                   pfault->type, pfault->token,
+                                   pfault->rdma.r_key);
+                       mlx5_ib_dbg(eq->dev,
+                                   "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+                                   pfault->rdma.rdma_op_len,
+                                   pfault->rdma.rdma_va);
+                       break;
+
+               case MLX5_PFAULT_SUBTYPE_WQE:
+                       /* WQE based event */
+                       pfault->type =
+                               (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
+                       pfault->token =
+                               be32_to_cpu(pf_eqe->wqe.token);
+                       pfault->wqe.wq_num =
+                               be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+                               MLX5_24BIT_MASK;
+                       pfault->wqe.wqe_index =
+                               be16_to_cpu(pf_eqe->wqe.wqe_index);
+                       pfault->wqe.packet_size =
+                               be16_to_cpu(pf_eqe->wqe.packet_length);
+                       mlx5_ib_dbg(eq->dev,
+                                   "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+                                   pfault->type, pfault->token,
+                                   pfault->wqe.wq_num,
+                                   pfault->wqe.wqe_index);
+                       break;
+
+               default:
+                       mlx5_ib_warn(eq->dev,
+                                    "Unsupported page fault event sub-type: 0x%02hhx\n",
+                                    eqe->sub_type);
+                       /* Unsupported page faults should still be
+                        * resolved by the page fault handler
+                        */
+               }
+
+               pfault->eq = eq;
+               INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action);
+               queue_work(eq->wq, &pfault->work);
+
+               cc = mlx5_eq_update_cc(eq->core, ++cc);
+       }
+
+       mlx5_eq_update_ci(eq->core, cc, 1);
+}
+
+static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+{
+       struct mlx5_ib_pf_eq *eq = eq_ptr;
+       unsigned long flags;
+
+       if (spin_trylock_irqsave(&eq->lock, flags)) {
+               mlx5_ib_eq_pf_process(eq);
+               spin_unlock_irqrestore(&eq->lock, flags);
+       } else {
+               schedule_work(&eq->work);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Cheap workaround.
+ */
+static void mempool_refill(mempool_t *pool)
+{
+       while (pool->curr_nr < pool->min_nr)
+               mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void mlx5_ib_eq_pf_action(struct work_struct *work)
+{
+       struct mlx5_ib_pf_eq *eq =
+               container_of(work, struct mlx5_ib_pf_eq, work);
+
+       mempool_refill(eq->pool);
+
+       spin_lock_irq(&eq->lock);
+       mlx5_ib_eq_pf_process(eq);
+       spin_unlock_irq(&eq->lock);
+}
+
+enum {
+       MLX5_IB_NUM_PF_EQE      = 0x1000,
+       MLX5_IB_NUM_PF_DRAIN    = 64,
+};
+
+static int
+mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+       struct mlx5_eq_param param = {};
+       int err;
+
+       INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
+       spin_lock_init(&eq->lock);
+       eq->dev = dev;
+
+       eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
+                                              sizeof(struct mlx5_pagefault));
+       if (!eq->pool)
+               return -ENOMEM;
+
+       eq->wq = alloc_workqueue("mlx5_ib_page_fault",
+                                WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
+                                MLX5_NUM_CMD_EQE);
+       if (!eq->wq) {
+               err = -ENOMEM;
+               goto err_mempool;
+       }
+
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_PFAULT_IDX,
+               .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+               .nent = MLX5_IB_NUM_PF_EQE,
+               .context = eq,
+               .handler = mlx5_ib_eq_pf_int
+       };
+       eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+       if (IS_ERR(eq->core)) {
+               err = PTR_ERR(eq->core);
+               goto err_wq;
+       }
+
+       return 0;
+err_wq:
+       destroy_workqueue(eq->wq);
+err_mempool:
+       mempool_destroy(eq->pool);
+       return err;
+}
+
+static int
+mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+       int err;
+
+       err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
+       cancel_work_sync(&eq->work);
+       destroy_workqueue(eq->wq);
+       mempool_destroy(eq->pool);
+
+       return err;
+}
+
 void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
 {
        if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
@@ -1226,7 +1495,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
 
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
 {
-       int ret;
+       int ret = 0;
 
        if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
                ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
@@ -1236,7 +1505,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
                }
        }
 
-       return 0;
+       if (!MLX5_CAP_GEN(dev->mdev, pg))
+               return ret;
+
+       ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
+
+       return ret;
+}
+
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
+{
+       if (!MLX5_CAP_GEN(dev->mdev, pg))
+               return;
+
+       mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
 }
 
 int mlx5_ib_odp_init(void)
@@ -1246,4 +1528,3 @@ int mlx5_ib_odp_init(void)
 
        return 0;
 }
-
index d012e7dbcc38150053a5fc71fca8f41f3ea5421f..91dcd3918d96f70c6ea83635bad333eec85f5d00 100644 (file)
@@ -1,46 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2018, Mellanox Technologies inc.  All rights reserved.
  */
 
 #include <linux/module.h>
 #include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
 #include <linux/slab.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
-
 #include "mlx5_ib.h"
-
-/* not supported currently */
-static int srq_signature;
+#include "srq.h"
 
 static void *get_wqe(struct mlx5_ib_srq *srq, int n)
 {
@@ -202,7 +171,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
                err = -ENOMEM;
                goto err_in;
        }
-       srq->wq_sig = !!srq_signature;
+       srq->wq_sig = 0;
 
        in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
@@ -327,7 +296,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 
        in.pd = to_mpd(pd)->pdn;
        in.db_record = srq->db.dma;
-       err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
+       err = mlx5_cmd_create_srq(dev, &srq->msrq, &in);
        kvfree(in.pas);
        if (err) {
                mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
@@ -351,7 +320,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
        return &srq->ibsrq;
 
 err_core:
-       mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
+       mlx5_cmd_destroy_srq(dev, &srq->msrq);
 
 err_usr_kern_srq:
        if (pd->uobject)
@@ -381,7 +350,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
                        return -EINVAL;
 
                mutex_lock(&srq->mutex);
-               ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1);
+               ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1);
                mutex_unlock(&srq->mutex);
 
                if (ret)
@@ -402,7 +371,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
        if (!out)
                return -ENOMEM;
 
-       ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out);
+       ret = mlx5_cmd_query_srq(dev, &srq->msrq, out);
        if (ret)
                goto out_box;
 
@@ -420,7 +389,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
        struct mlx5_ib_dev *dev = to_mdev(srq->device);
        struct mlx5_ib_srq *msrq = to_msrq(srq);
 
-       mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
+       mlx5_cmd_destroy_srq(dev, &msrq->msrq);
 
        if (srq->uobject) {
                mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
new file mode 100644 (file)
index 0000000..75eb583
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef MLX5_IB_SRQ_H
+#define MLX5_IB_SRQ_H
+
+enum {
+       MLX5_SRQ_FLAG_ERR    = (1 << 0),
+       MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
+       MLX5_SRQ_FLAG_RNDV   = (1 << 2),
+};
+
+struct mlx5_srq_attr {
+       u32 type;
+       u32 flags;
+       u32 log_size;
+       u32 wqe_shift;
+       u32 log_page_size;
+       u32 wqe_cnt;
+       u32 srqn;
+       u32 xrcd;
+       u32 page_offset;
+       u32 cqn;
+       u32 pd;
+       u32 lwm;
+       u32 user_index;
+       u64 db_record;
+       __be64 *pas;
+       u32 tm_log_list_size;
+       u32 tm_next_tag;
+       u32 tm_hw_phase_cnt;
+       u32 tm_sw_phase_cnt;
+       u16 uid;
+};
+
+struct mlx5_ib_dev;
+
+struct mlx5_core_srq {
+       struct mlx5_core_rsc_common common; /* must be first */
+       u32 srqn;
+       int max;
+       size_t max_gs;
+       size_t max_avail_gather;
+       int wqe_shift;
+       void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e);
+
+       atomic_t refcount;
+       struct completion free;
+       u16 uid;
+};
+
+struct mlx5_srq_table {
+       struct notifier_block nb;
+       /* protect radix tree
+        */
+       spinlock_t lock;
+       struct radix_tree_root tree;
+};
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                       struct mlx5_srq_attr *in);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                      struct mlx5_srq_attr *out);
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                    u16 lwm, int is_srq);
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn);
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev);
+#endif /* MLX5_IB_SRQ_H */
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
new file mode 100644 (file)
index 0000000..7aaaffb
--- /dev/null
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_ib.h"
+#include "srq.h"
+
+static int get_pas_size(struct mlx5_srq_attr *in)
+{
+       u32 log_page_size = in->log_page_size + 12;
+       u32 log_srq_size  = in->log_size;
+       u32 log_rq_stride = in->wqe_shift;
+       u32 page_offset   = in->page_offset;
+       u32 po_quanta     = 1 << (log_page_size - 6);
+       u32 rq_sz         = 1 << (log_srq_size + 4 + log_rq_stride);
+       u32 page_size     = 1 << log_page_size;
+       u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
+       u32 rq_num_pas    = DIV_ROUND_UP(rq_sz_po, page_size);
+
+       return rq_num_pas * sizeof(u64);
+}
+
+static void set_wq(void *wq, struct mlx5_srq_attr *in)
+{
+       MLX5_SET(wq,   wq, wq_signature,  !!(in->flags
+                & MLX5_SRQ_FLAG_WQ_SIG));
+       MLX5_SET(wq,   wq, log_wq_pg_sz,  in->log_page_size);
+       MLX5_SET(wq,   wq, log_wq_stride, in->wqe_shift + 4);
+       MLX5_SET(wq,   wq, log_wq_sz,     in->log_size);
+       MLX5_SET(wq,   wq, page_offset,   in->page_offset);
+       MLX5_SET(wq,   wq, lwm,           in->lwm);
+       MLX5_SET(wq,   wq, pd,            in->pd);
+       MLX5_SET64(wq, wq, dbr_addr,      in->db_record);
+}
+
+static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+       MLX5_SET(srqc,   srqc, wq_signature,  !!(in->flags
+                & MLX5_SRQ_FLAG_WQ_SIG));
+       MLX5_SET(srqc,   srqc, log_page_size, in->log_page_size);
+       MLX5_SET(srqc,   srqc, log_rq_stride, in->wqe_shift);
+       MLX5_SET(srqc,   srqc, log_srq_size,  in->log_size);
+       MLX5_SET(srqc,   srqc, page_offset,   in->page_offset);
+       MLX5_SET(srqc,   srqc, lwm,           in->lwm);
+       MLX5_SET(srqc,   srqc, pd,            in->pd);
+       MLX5_SET64(srqc, srqc, dbr_addr,      in->db_record);
+       MLX5_SET(srqc,   srqc, xrcd,          in->xrcd);
+       MLX5_SET(srqc,   srqc, cqn,           in->cqn);
+}
+
+static void get_wq(void *wq, struct mlx5_srq_attr *in)
+{
+       if (MLX5_GET(wq, wq, wq_signature))
+               in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+       in->log_page_size = MLX5_GET(wq,   wq, log_wq_pg_sz);
+       in->wqe_shift     = MLX5_GET(wq,   wq, log_wq_stride) - 4;
+       in->log_size      = MLX5_GET(wq,   wq, log_wq_sz);
+       in->page_offset   = MLX5_GET(wq,   wq, page_offset);
+       in->lwm           = MLX5_GET(wq,   wq, lwm);
+       in->pd            = MLX5_GET(wq,   wq, pd);
+       in->db_record     = MLX5_GET64(wq, wq, dbr_addr);
+}
+
+static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+       if (MLX5_GET(srqc, srqc, wq_signature))
+               in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+       in->log_page_size = MLX5_GET(srqc,   srqc, log_page_size);
+       in->wqe_shift     = MLX5_GET(srqc,   srqc, log_rq_stride);
+       in->log_size      = MLX5_GET(srqc,   srqc, log_srq_size);
+       in->page_offset   = MLX5_GET(srqc,   srqc, page_offset);
+       in->lwm           = MLX5_GET(srqc,   srqc, lwm);
+       in->pd            = MLX5_GET(srqc,   srqc, pd);
+       in->db_record     = MLX5_GET64(srqc, srqc, dbr_addr);
+}
+
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
+{
+       struct mlx5_srq_table *table = &dev->srq_table;
+       struct mlx5_core_srq *srq;
+
+       spin_lock(&table->lock);
+
+       srq = radix_tree_lookup(&table->tree, srqn);
+       if (srq)
+               atomic_inc(&srq->refcount);
+
+       spin_unlock(&table->lock);
+
+       return srq;
+}
+
+static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                         struct mlx5_srq_attr *in)
+{
+       u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
+       void *create_in;
+       void *srqc;
+       void *pas;
+       int pas_size;
+       int inlen;
+       int err;
+
+       pas_size  = get_pas_size(in);
+       inlen     = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
+       create_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!create_in)
+               return -ENOMEM;
+
+       MLX5_SET(create_srq_in, create_in, uid, in->uid);
+       srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
+       pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
+
+       set_srqc(srqc, in);
+       memcpy(pas, in->pas, pas_size);
+
+       MLX5_SET(create_srq_in, create_in, opcode,
+                MLX5_CMD_OP_CREATE_SRQ);
+
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+                           sizeof(create_out));
+       kvfree(create_in);
+       if (!err) {
+               srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+               srq->uid = in->uid;
+       }
+
+       return err;
+}
+
+static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+       u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
+       u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
+
+       MLX5_SET(destroy_srq_in, srq_in, opcode,
+                MLX5_CMD_OP_DESTROY_SRQ);
+       MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+       MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
+
+       return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+                            sizeof(srq_out));
+}
+
+static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                      u16 lwm, int is_srq)
+{
+       u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+       u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+
+       MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
+       MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
+       MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
+       MLX5_SET(arm_rq_in, srq_in, lwm,      lwm);
+       MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
+
+       return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+                            sizeof(srq_out));
+}
+
+static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                        struct mlx5_srq_attr *out)
+{
+       u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
+       u32 *srq_out;
+       void *srqc;
+       int err;
+
+       srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
+       if (!srq_out)
+               return -ENOMEM;
+
+       MLX5_SET(query_srq_in, srq_in, opcode,
+                MLX5_CMD_OP_QUERY_SRQ);
+       MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
+       err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+                           MLX5_ST_SZ_BYTES(query_srq_out));
+       if (err)
+               goto out;
+
+       srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+       get_srqc(srqc, out);
+       if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
+               out->flags |= MLX5_SRQ_FLAG_ERR;
+out:
+       kvfree(srq_out);
+       return err;
+}
+
+static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+                             struct mlx5_core_srq *srq,
+                             struct mlx5_srq_attr *in)
+{
+       u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+       void *create_in;
+       void *xrc_srqc;
+       void *pas;
+       int pas_size;
+       int inlen;
+       int err;
+
+       pas_size  = get_pas_size(in);
+       inlen     = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
+       create_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!create_in)
+               return -ENOMEM;
+
+       MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
+       xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
+                               xrc_srq_context_entry);
+       pas      = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
+
+       set_srqc(xrc_srqc, in);
+       MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
+       memcpy(pas, in->pas, pas_size);
+       MLX5_SET(create_xrc_srq_in, create_in, opcode,
+                MLX5_CMD_OP_CREATE_XRC_SRQ);
+
+       memset(create_out, 0, sizeof(create_out));
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+                           sizeof(create_out));
+       if (err)
+               goto out;
+
+       srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+       srq->uid = in->uid;
+out:
+       kvfree(create_in);
+       return err;
+}
+
+static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+                              struct mlx5_core_srq *srq)
+{
+       u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
+       u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
+
+       MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
+                MLX5_CMD_OP_DESTROY_XRC_SRQ);
+       MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+       MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
+
+       return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+                            xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                          u16 lwm)
+{
+       u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
+       u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
+
+       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
+       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod,   MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
+       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
+
+       return  mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+                             xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+                            struct mlx5_core_srq *srq,
+                            struct mlx5_srq_attr *out)
+{
+       u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
+       u32 *xrcsrq_out;
+       void *xrc_srqc;
+       int err;
+
+       xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
+       if (!xrcsrq_out)
+               return -ENOMEM;
+       memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
+
+       MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
+                MLX5_CMD_OP_QUERY_XRC_SRQ);
+       MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+
+       err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+                           xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+       if (err)
+               goto out;
+
+       xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
+                               xrc_srq_context_entry);
+       get_srqc(xrc_srqc, out);
+       if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
+               out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+       kvfree(xrcsrq_out);
+       return err;
+}
+
+static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                         struct mlx5_srq_attr *in)
+{
+       void *create_out = NULL;
+       void *create_in = NULL;
+       void *rmpc;
+       void *wq;
+       int pas_size;
+       int outlen;
+       int inlen;
+       int err;
+
+       pas_size = get_pas_size(in);
+       inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+       outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
+       create_in = kvzalloc(inlen, GFP_KERNEL);
+       create_out = kvzalloc(outlen, GFP_KERNEL);
+       if (!create_in || !create_out) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
+       wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+       MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+       MLX5_SET(create_rmp_in, create_in, uid, in->uid);
+       set_wq(wq, in);
+       memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
+
+       MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
+       if (!err) {
+               srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn);
+               srq->uid = in->uid;
+       }
+
+out:
+       kvfree(create_in);
+       kvfree(create_out);
+       return err;
+}
+
+static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+       u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {};
+       u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
+
+       MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+       MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+       MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                      u16 lwm)
+{
+       void *out = NULL;
+       void *in = NULL;
+       void *rmpc;
+       void *wq;
+       void *bitmask;
+       int outlen;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_rmp_in);
+       outlen = MLX5_ST_SZ_BYTES(modify_rmp_out);
+
+       in = kvzalloc(inlen, GFP_KERNEL);
+       out = kvzalloc(outlen, GFP_KERNEL);
+       if (!in || !out) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       rmpc =    MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
+       bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
+       wq   =    MLX5_ADDR_OF(rmpc,            rmpc, wq);
+
+       MLX5_SET(modify_rmp_in, in,      rmp_state, MLX5_RMPC_STATE_RDY);
+       MLX5_SET(modify_rmp_in, in,      rmpn,      srq->srqn);
+       MLX5_SET(modify_rmp_in, in, uid, srq->uid);
+       MLX5_SET(wq,            wq,      lwm,       lwm);
+       MLX5_SET(rmp_bitmask,   bitmask, lwm,       1);
+       MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+       MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
+
+       err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
+
+out:
+       kvfree(in);
+       kvfree(out);
+       return err;
+}
+
+static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                        struct mlx5_srq_attr *out)
+{
+       u32 *rmp_out = NULL;
+       u32 *rmp_in = NULL;
+       void *rmpc;
+       int outlen;
+       int inlen;
+       int err;
+
+       outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+       inlen = MLX5_ST_SZ_BYTES(query_rmp_in);
+
+       rmp_out = kvzalloc(outlen, GFP_KERNEL);
+       rmp_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!rmp_out || !rmp_in) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
+       MLX5_SET(query_rmp_in, rmp_in, rmpn,   srq->srqn);
+       err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen);
+       if (err)
+               goto out;
+
+       rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
+       get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
+       if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
+               out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+       kvfree(rmp_out);
+       kvfree(rmp_in);
+       return err;
+}
+
+static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                         struct mlx5_srq_attr *in)
+{
+       u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
+       void *create_in;
+       void *xrqc;
+       void *wq;
+       int pas_size;
+       int inlen;
+       int err;
+
+       pas_size = get_pas_size(in);
+       inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
+       create_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!create_in)
+               return -ENOMEM;
+
+       xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
+       wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+
+       set_wq(wq, in);
+       memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
+
+       if (in->type == IB_SRQT_TM) {
+               MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
+               if (in->flags & MLX5_SRQ_FLAG_RNDV)
+                       MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
+               MLX5_SET(xrqc, xrqc,
+                        tag_matching_topology_context.log_matching_list_sz,
+                        in->tm_log_list_size);
+       }
+       MLX5_SET(xrqc, xrqc, user_index, in->user_index);
+       MLX5_SET(xrqc, xrqc, cqn, in->cqn);
+       MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+       MLX5_SET(create_xrq_in, create_in, uid, in->uid);
+       err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+                           sizeof(create_out));
+       kvfree(create_in);
+       if (!err) {
+               srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+               srq->uid = in->uid;
+       }
+
+       return err;
+}
+
+static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+       u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
+
+       MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+       MLX5_SET(destroy_xrq_in, in, xrqn,   srq->srqn);
+       MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
+
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
+                      struct mlx5_core_srq *srq,
+                      u16 lwm)
+{
+       u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+
+       MLX5_SET(arm_rq_in, in, opcode,     MLX5_CMD_OP_ARM_RQ);
+       MLX5_SET(arm_rq_in, in, op_mod,     MLX5_ARM_RQ_IN_OP_MOD_XRQ);
+       MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+       MLX5_SET(arm_rq_in, in, lwm,        lwm);
+       MLX5_SET(arm_rq_in, in, uid, srq->uid);
+
+       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                        struct mlx5_srq_attr *out)
+{
+       u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
+       u32 *xrq_out;
+       int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
+       void *xrqc;
+       int err;
+
+       xrq_out = kvzalloc(outlen, GFP_KERNEL);
+       if (!xrq_out)
+               return -ENOMEM;
+
+       MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
+       MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
+
+       err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen);
+       if (err)
+               goto out;
+
+       xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
+       get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
+       if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
+               out->flags |= MLX5_SRQ_FLAG_ERR;
+       out->tm_next_tag =
+               MLX5_GET(xrqc, xrqc,
+                        tag_matching_topology_context.append_next_index);
+       out->tm_hw_phase_cnt =
+               MLX5_GET(xrqc, xrqc,
+                        tag_matching_topology_context.hw_phase_cnt);
+       out->tm_sw_phase_cnt =
+               MLX5_GET(xrqc, xrqc,
+                        tag_matching_topology_context.sw_phase_cnt);
+
+out:
+       kvfree(xrq_out);
+       return err;
+}
+
+static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                           struct mlx5_srq_attr *in)
+{
+       if (!dev->mdev->issi)
+               return create_srq_cmd(dev, srq, in);
+       switch (srq->common.res) {
+       case MLX5_RES_XSRQ:
+               return create_xrc_srq_cmd(dev, srq, in);
+       case MLX5_RES_XRQ:
+               return create_xrq_cmd(dev, srq, in);
+       default:
+               return create_rmp_cmd(dev, srq, in);
+       }
+}
+
+static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+       if (!dev->mdev->issi)
+               return destroy_srq_cmd(dev, srq);
+       switch (srq->common.res) {
+       case MLX5_RES_XSRQ:
+               return destroy_xrc_srq_cmd(dev, srq);
+       case MLX5_RES_XRQ:
+               return destroy_xrq_cmd(dev, srq);
+       default:
+               return destroy_rmp_cmd(dev, srq);
+       }
+}
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                       struct mlx5_srq_attr *in)
+{
+       struct mlx5_srq_table *table = &dev->srq_table;
+       int err;
+
+       switch (in->type) {
+       case IB_SRQT_XRC:
+               srq->common.res = MLX5_RES_XSRQ;
+               break;
+       case IB_SRQT_TM:
+               srq->common.res = MLX5_RES_XRQ;
+               break;
+       default:
+               srq->common.res = MLX5_RES_SRQ;
+       }
+
+       err = create_srq_split(dev, srq, in);
+       if (err)
+               return err;
+
+       atomic_set(&srq->refcount, 1);
+       init_completion(&srq->free);
+
+       spin_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree, srq->srqn, srq);
+       spin_unlock_irq(&table->lock);
+       if (err)
+               goto err_destroy_srq_split;
+
+       return 0;
+
+err_destroy_srq_split:
+       destroy_srq_split(dev, srq);
+
+       return err;
+}
+
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+       struct mlx5_srq_table *table = &dev->srq_table;
+       struct mlx5_core_srq *tmp;
+       int err;
+
+       spin_lock_irq(&table->lock);
+       tmp = radix_tree_delete(&table->tree, srq->srqn);
+       spin_unlock_irq(&table->lock);
+       if (!tmp || tmp != srq)
+               return -EINVAL;
+
+       err = destroy_srq_split(dev, srq);
+       if (err)
+               return err;
+
+       if (atomic_dec_and_test(&srq->refcount))
+               complete(&srq->free);
+       wait_for_completion(&srq->free);
+
+       return 0;
+}
+
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                      struct mlx5_srq_attr *out)
+{
+       if (!dev->mdev->issi)
+               return query_srq_cmd(dev, srq, out);
+       switch (srq->common.res) {
+       case MLX5_RES_XSRQ:
+               return query_xrc_srq_cmd(dev, srq, out);
+       case MLX5_RES_XRQ:
+               return query_xrq_cmd(dev, srq, out);
+       default:
+               return query_rmp_cmd(dev, srq, out);
+       }
+}
+
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+                    u16 lwm, int is_srq)
+{
+       if (!dev->mdev->issi)
+               return arm_srq_cmd(dev, srq, lwm, is_srq);
+       switch (srq->common.res) {
+       case MLX5_RES_XSRQ:
+               return arm_xrc_srq_cmd(dev, srq, lwm);
+       case MLX5_RES_XRQ:
+               return arm_xrq_cmd(dev, srq, lwm);
+       default:
+               return arm_rmp_cmd(dev, srq, lwm);
+       }
+}
+
+static int srq_event_notifier(struct notifier_block *nb,
+                             unsigned long type, void *data)
+{
+       struct mlx5_srq_table *table;
+       struct mlx5_core_srq *srq;
+       struct mlx5_eqe *eqe;
+       u32 srqn;
+
+       if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR &&
+           type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)
+               return NOTIFY_DONE;
+
+       table = container_of(nb, struct mlx5_srq_table, nb);
+
+       eqe = data;
+       srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+
+       spin_lock(&table->lock);
+
+       srq = radix_tree_lookup(&table->tree, srqn);
+       if (srq)
+               atomic_inc(&srq->refcount);
+
+       spin_unlock(&table->lock);
+
+       if (!srq)
+               return NOTIFY_OK;
+
+       srq->event(srq, eqe->type);
+
+       if (atomic_dec_and_test(&srq->refcount))
+               complete(&srq->free);
+
+       return NOTIFY_OK;
+}
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_srq_table *table = &dev->srq_table;
+
+       memset(table, 0, sizeof(*table));
+       spin_lock_init(&table->lock);
+       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+
+       table->nb.notifier_call = srq_event_notifier;
+       mlx5_notifier_register(dev->mdev, &table->nb);
+
+       return 0;
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_srq_table *table = &dev->srq_table;
+
+       mlx5_notifier_unregister(dev->mdev, &table->nb);
+}
index d324a3884462914bd91ffb94c132756e2f056282..d499b3d003480ecdbb74a5c5bf58f705cda22728 100644 (file)
@@ -12,9 +12,9 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 # mlx5 core basic
 #
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
-               health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+               health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
                mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-               fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o  \
+               fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
                diag/fs_tracepoint.o diag/fw_tracer.o
 
 #
index a5a0823e5ada8fa2484c5c88ab766d349731c3c3..d3125cdf69dbfbf81d3c0a6cb045b898d04bc4cb 100644 (file)
 #include <linux/random.h>
 #include <linux/io-mapping.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
 #include <linux/debugfs.h>
 
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
        CMD_IF_REV = 5,
@@ -313,6 +315,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_FPGA_DESTROY_QP:
        case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
        case MLX5_CMD_OP_DEALLOC_MEMIC:
+       case MLX5_CMD_OP_PAGE_FAULT_RESUME:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -326,7 +329,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_CREATE_MKEY:
        case MLX5_CMD_OP_QUERY_MKEY:
        case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
-       case MLX5_CMD_OP_PAGE_FAULT_RESUME:
        case MLX5_CMD_OP_CREATE_EQ:
        case MLX5_CMD_OP_QUERY_EQ:
        case MLX5_CMD_OP_GEN_EQE:
@@ -371,6 +373,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
        case MLX5_CMD_OP_ALLOC_Q_COUNTER:
        case MLX5_CMD_OP_QUERY_Q_COUNTER:
+       case MLX5_CMD_OP_SET_MONITOR_COUNTER:
+       case MLX5_CMD_OP_ARM_MONITOR_COUNTER:
        case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
        case MLX5_CMD_OP_QUERY_RATE_LIMIT:
        case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
@@ -520,6 +524,8 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+       MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER);
+       MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER);
        MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
@@ -805,6 +811,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
        return MLX5_GET(mbox_in, in->first.data, opcode);
 }
 
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+
 static void cb_timeout_handler(struct work_struct *work)
 {
        struct delayed_work *dwork = container_of(work, struct delayed_work,
@@ -1412,14 +1420,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
                up(&cmd->sem);
 }
 
+static int cmd_comp_notifier(struct notifier_block *nb,
+                            unsigned long type, void *data)
+{
+       struct mlx5_core_dev *dev;
+       struct mlx5_cmd *cmd;
+       struct mlx5_eqe *eqe;
+
+       cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb);
+       dev = container_of(cmd, struct mlx5_core_dev, cmd);
+       eqe = data;
+
+       mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+
+       return NOTIFY_OK;
+}
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
+       MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD);
+       mlx5_eq_notifier_register(dev, &dev->cmd.nb);
        mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
 }
 
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
 {
        mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+       mlx5_eq_notifier_unregister(dev, &dev->cmd.nb);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1435,7 +1461,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
        }
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
        struct mlx5_cmd_work_ent *ent;
@@ -1533,7 +1559,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
                }
        }
 }
-EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
+{
+       unsigned long flags;
+       u64 vector;
+
+       /* wait for pending handlers to complete */
+       mlx5_eq_synchronize_cmd_irq(dev);
+       spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+       vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+       if (!vector)
+               goto no_trig;
+
+       vector |= MLX5_TRIGGERED_CMD_COMP;
+       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+       mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+       mlx5_cmd_comp_handler(dev, vector, true);
+       return;
+
+no_trig:
+       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
 
 static int status_to_err(u8 status)
 {
index 4b85abb5c9f7936ebc6b70b6a05328f6270c668d..713a17ee37518e8e017d4c35456c439fa5beb004 100644 (file)
@@ -38,6 +38,7 @@
 #include <rdma/ib_verbs.h>
 #include <linux/mlx5/cq.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 #define TASKLET_MAX_TIME 2
 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
@@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
        u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
-       struct mlx5_eq *eq;
+       struct mlx5_eq_comp *eq;
        int err;
 
-       eq = mlx5_eqn2eq(dev, eqn);
+       eq = mlx5_eqn2comp_eq(dev, eqn);
        if (IS_ERR(eq))
                return PTR_ERR(eq);
 
@@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
        /* Add to comp EQ CQ tree to recv comp events */
-       err = mlx5_eq_add_cq(eq, cq);
+       err = mlx5_eq_add_cq(&eq->core, cq);
        if (err)
                goto err_cmd;
 
        /* Add to async EQ CQ tree to recv async events */
-       err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+       err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq);
        if (err)
                goto err_cq_add;
 
@@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        return 0;
 
 err_cq_add:
-       mlx5_eq_del_cq(eq, cq);
+       mlx5_eq_del_cq(&eq->core, cq);
 err_cmd:
        memset(din, 0, sizeof(din));
        memset(dout, 0, sizeof(dout));
@@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
        u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
        int err;
 
-       err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+       err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
        if (err)
                return err;
 
-       err = mlx5_eq_del_cq(cq->eq, cq);
+       err = mlx5_eq_del_cq(&cq->eq->core, cq);
        if (err)
                return err;
 
index 90fabd612b6cd84f1420afa151cc6c3b0103acfb..a11e22d0b0ccbda0674ba7873cfc57fab8ebedd7 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
        QP_PID,
@@ -349,6 +350,16 @@ out:
        return param;
 }
 
+static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                             u32 *out, int outlen)
+{
+       u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
+
+       MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+       MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
                         int index)
 {
index 37ba7c78859db17aa7ecfa76648ca54adb71790b..ebc046fa97d353bce5f9802eb1889faa755bede3 100644 (file)
@@ -45,75 +45,11 @@ struct mlx5_device_context {
        unsigned long           state;
 };
 
-struct mlx5_delayed_event {
-       struct list_head        list;
-       struct mlx5_core_dev    *dev;
-       enum mlx5_dev_event     event;
-       unsigned long           param;
-};
-
 enum {
        MLX5_INTERFACE_ADDED,
        MLX5_INTERFACE_ATTACHED,
 };
 
-static void add_delayed_event(struct mlx5_priv *priv,
-                             struct mlx5_core_dev *dev,
-                             enum mlx5_dev_event event,
-                             unsigned long param)
-{
-       struct mlx5_delayed_event *delayed_event;
-
-       delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
-       if (!delayed_event) {
-               mlx5_core_err(dev, "event %d is missed\n", event);
-               return;
-       }
-
-       mlx5_core_dbg(dev, "Accumulating event %d\n", event);
-       delayed_event->dev = dev;
-       delayed_event->event = event;
-       delayed_event->param = param;
-       list_add_tail(&delayed_event->list, &priv->waiting_events_list);
-}
-
-static void delayed_event_release(struct mlx5_device_context *dev_ctx,
-                                 struct mlx5_priv *priv)
-{
-       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-       struct mlx5_delayed_event *de;
-       struct mlx5_delayed_event *n;
-       struct list_head temp;
-
-       INIT_LIST_HEAD(&temp);
-
-       spin_lock_irq(&priv->ctx_lock);
-
-       priv->is_accum_events = false;
-       list_splice_init(&priv->waiting_events_list, &temp);
-       if (!dev_ctx->context)
-               goto out;
-       list_for_each_entry_safe(de, n, &temp, list)
-               dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
-
-out:
-       spin_unlock_irq(&priv->ctx_lock);
-
-       list_for_each_entry_safe(de, n, &temp, list) {
-               list_del(&de->list);
-               kfree(de);
-       }
-}
-
-/* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
-static void delayed_event_start(struct mlx5_priv *priv)
-{
-       spin_lock_irq(&priv->ctx_lock);
-       priv->is_accum_events = true;
-       spin_unlock_irq(&priv->ctx_lock);
-}
 
 void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 {
@@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 
        dev_ctx->intf = intf;
 
-       delayed_event_start(priv);
-
        dev_ctx->context = intf->add(dev);
        if (dev_ctx->context) {
                set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 
                spin_lock_irq(&priv->ctx_lock);
                list_add_tail(&dev_ctx->list, &priv->ctx_list);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-               if (dev_ctx->intf->pfault) {
-                       if (priv->pfault) {
-                               mlx5_core_err(dev, "multiple page fault handlers not supported");
-                       } else {
-                               priv->pfault_ctx = dev_ctx->context;
-                               priv->pfault = dev_ctx->intf->pfault;
-                       }
-               }
-#endif
                spin_unlock_irq(&priv->ctx_lock);
        }
 
-       delayed_event_release(dev_ctx, priv);
-
        if (!dev_ctx->context)
                kfree(dev_ctx);
 }
@@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
        if (!dev_ctx)
                return;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       spin_lock_irq(&priv->ctx_lock);
-       if (priv->pfault == dev_ctx->intf->pfault)
-               priv->pfault = NULL;
-       spin_unlock_irq(&priv->ctx_lock);
-
-       synchronize_srcu(&priv->pfault_srcu);
-#endif
-
        spin_lock_irq(&priv->ctx_lock);
        list_del(&dev_ctx->list);
        spin_unlock_irq(&priv->ctx_lock);
@@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
        if (!dev_ctx)
                return;
 
-       delayed_event_start(priv);
        if (intf->attach) {
                if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
-                       goto out;
+                       return;
                if (intf->attach(dev, dev_ctx->context))
-                       goto out;
-
+                       return;
                set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
        } else {
                if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
-                       goto out;
+                       return;
                dev_ctx->context = intf->add(dev);
                if (!dev_ctx->context)
-                       goto out;
-
+                       return;
                set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
        }
-
-out:
-       delayed_event_release(dev_ctx, priv);
 }
 
 void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
        mutex_unlock(&mlx5_intf_mutex);
 }
 
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
-       struct mlx5_priv *priv = &mdev->priv;
-       struct mlx5_device_context *dev_ctx;
-       unsigned long flags;
-       void *result = NULL;
-
-       spin_lock_irqsave(&priv->ctx_lock, flags);
-
-       list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
-               if ((dev_ctx->intf->protocol == protocol) &&
-                   dev_ctx->intf->get_dev) {
-                       result = dev_ctx->intf->get_dev(dev_ctx->context);
-                       break;
-               }
-
-       spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
-       return result;
-}
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
-
 /* Must be called with intf_mutex held */
 void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
 {
@@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
        return res;
 }
 
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-                    unsigned long param)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_device_context *dev_ctx;
-       unsigned long flags;
-
-       spin_lock_irqsave(&priv->ctx_lock, flags);
-
-       if (priv->is_accum_events)
-               add_delayed_event(priv, dev, event, param);
-
-       /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
-        * still in priv->ctx_list. In this case, only notify the dev_ctx if its
-        * ADDED or ATTACHED bit are set.
-        */
-       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-               if (dev_ctx->intf->event &&
-                   (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
-                    test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
-                       dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
-       spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
-                         struct mlx5_pagefault *pfault)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       int srcu_idx;
-
-       srcu_idx = srcu_read_lock(&priv->pfault_srcu);
-       if (priv->pfault)
-               priv->pfault(dev, priv->pfault_ctx, pfault);
-       srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
-}
-#endif
 
 void mlx5_dev_list_lock(void)
 {
index 0f11fff32a9b9215f9ea31169fd2c437ef7a9cde..424457ff9759f090db64e02eee6f253e6e0b3f86 100644 (file)
@@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p,
        PRINT_MASKED_VAL(name, p, format);                 \
 }
        DECLARE_MASK_VAL(u64, gre_key) = {
-               .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
-                    MLX5_GET(fte_match_set_misc, mask, gre_key_l),
-               .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
-                    MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+               .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 |
+                    MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo),
+               .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 |
+                    MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)};
 
        PRINT_MASKED_VAL(gre_key, p, "%llu");
        PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
index d4ec93bde4dedbaeca4bb5976c705a3ea6b83f82..6999f4486e9ec786424be5dd9f72d91e333c0086 100644 (file)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 #define CREATE_TRACE_POINTS
+#include "lib/eq.h"
 #include "fw_tracer.h"
 #include "fw_tracer_tracepoint.h"
 
@@ -846,9 +847,9 @@ free_tracer:
        return ERR_PTR(err);
 }
 
-/* Create HW resources + start tracer
- * must be called before Async EQ is created
- */
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
+
+/* Create HW resources + start tracer */
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 {
        struct mlx5_core_dev *dev;
@@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
                goto err_dealloc_pd;
        }
 
+       MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
+       mlx5_eq_notifier_register(dev, &tracer->nb);
+
        mlx5_fw_tracer_start(tracer);
 
        return 0;
@@ -883,9 +887,7 @@ err_dealloc_pd:
        return err;
 }
 
-/* Stop tracer + Cleanup HW resources
- * must be called after Async EQ is destroyed
- */
+/* Stop tracer + Cleanup HW resources */
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 {
        if (IS_ERR_OR_NULL(tracer))
@@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 
        mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
                      tracer->owner);
-
+       mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
        cancel_work_sync(&tracer->ownership_change_work);
        cancel_work_sync(&tracer->handle_traces_work);
 
@@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
        kfree(tracer);
 }
 
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
 {
-       struct mlx5_fw_tracer *tracer = dev->tracer;
-
-       if (!tracer)
-               return;
+       struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
+       struct mlx5_core_dev *dev = tracer->dev;
+       struct mlx5_eqe *eqe = data;
 
        switch (eqe->sub_type) {
        case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
@@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
                mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
                              eqe->sub_type);
        }
+
+       return NOTIFY_OK;
 }
 
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
index 0347f2dd5cee1263617496543aa9269e0d9193cf..a8b8747f2b61142ded720a7e573f68b5be39fa14 100644 (file)
@@ -55,6 +55,7 @@
 
 struct mlx5_fw_tracer {
        struct mlx5_core_dev *dev;
+       struct mlx5_nb        nb;
        bool owner;
        u8   trc_ver;
        struct workqueue_struct *work_queue;
@@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 
 #endif
index 8760d10ad5a113f1017994e30eecce0014c5cbb8..9ea0f853c3bb82e0e3d821b9fba8b6869bd7dc8c 100644 (file)
@@ -176,8 +176,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 {
        return is_kdump_kernel() ?
                MLX5E_MIN_NUM_CHANNELS :
-               min_t(int, mdev->priv.eq_table.num_comp_vectors,
-                     MLX5E_MAX_NUM_CHANNELS);
+               min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
 }
 
 /* Use this function to get max num channels after netdev was created */
@@ -629,7 +628,6 @@ struct mlx5e_channel_stats {
 } ____cacheline_aligned_in_smp;
 
 enum {
-       MLX5E_STATE_ASYNC_EVENTS_ENABLED,
        MLX5E_STATE_OPENED,
        MLX5E_STATE_DESTROYING,
 };
@@ -696,6 +694,8 @@ struct mlx5e_priv {
        struct hwtstamp_config     tstamp;
        u16                        q_counter;
        u16                        drop_rq_q_counter;
+       struct notifier_block      events_nb;
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
        struct mlx5e_dcbx          dcbx;
 #endif
index 544c287c648139b9cbcd519d1f1e057baef45e3b..06b1c0172a7bd2c4596ceb5ea76570225e7c5ec6 100644 (file)
@@ -49,6 +49,7 @@
 #include "lib/clock.h"
 #include "en/port.h"
 #include "en/xdp.h"
+#include "lib/eq.h"
 
 struct mlx5e_rq_param {
        u32                     rqc[MLX5_ST_SZ_DW(rqc)];
@@ -293,33 +294,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
        queue_work(priv->wq, &priv->update_stats_work);
 }
 
-static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
-                             enum mlx5_dev_event event, unsigned long param)
+static int async_event(struct notifier_block *nb, unsigned long event, void *data)
 {
-       struct mlx5e_priv *priv = vpriv;
+       struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
+       struct mlx5_eqe   *eqe = data;
 
-       if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
-               return;
+       if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
+               return NOTIFY_DONE;
 
-       switch (event) {
-       case MLX5_DEV_EVENT_PORT_UP:
-       case MLX5_DEV_EVENT_PORT_DOWN:
+       switch (eqe->sub_type) {
+       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
                queue_work(priv->wq, &priv->update_carrier_work);
                break;
        default:
-               break;
+               return NOTIFY_DONE;
        }
+
+       return NOTIFY_OK;
 }
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-       set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+       priv->events_nb.notifier_call = async_event;
+       mlx5_notifier_register(priv->mdev, &priv->events_nb);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-       clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
-       synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+       mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
 }
 
 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
@@ -1763,11 +1766,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
        mlx5e_free_cq(cq);
 }
 
-static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
-{
-       return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
-}
-
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
                             struct mlx5e_params *params,
                             struct mlx5e_channel_param *cparam)
@@ -1918,9 +1916,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
                              struct mlx5e_channel_param *cparam,
                              struct mlx5e_channel **cp)
 {
+       int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
        struct net_dim_cq_moder icocq_moder = {0, 0};
        struct net_device *netdev = priv->netdev;
-       int cpu = mlx5e_get_cpu(priv, ix);
        struct mlx5e_channel *c;
        unsigned int irq;
        int err;
@@ -4137,17 +4135,17 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
                                        struct mlx5e_txqsq *sq)
 {
-       struct mlx5_eq *eq = sq->cq.mcq.eq;
+       struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
        u32 eqe_count;
 
        netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-                  eq->eqn, eq->cons_index, eq->irqn);
+                  eq->core.eqn, eq->core.cons_index, eq->core.irqn);
 
        eqe_count = mlx5_eq_poll_irq_disabled(eq);
        if (!eqe_count)
                return false;
 
-       netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+       netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
        sq->channel->stats->eq_rearm++;
        return true;
 }
@@ -4988,7 +4986,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
        netif_carrier_off(netdev);
 
 #ifdef CONFIG_MLX5_EN_ARFS
-       netdev->rx_cpu_rmap = mdev->rmap;
+       netdev->rx_cpu_rmap =  mlx5_eq_table_get_rmap(mdev);
 #endif
 
        return 0;
@@ -5200,21 +5198,12 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
        kfree(ppriv);
 }
 
-static void *mlx5e_get_netdev(void *vpriv)
-{
-       struct mlx5e_priv *priv = vpriv;
-
-       return priv->netdev;
-}
-
 static struct mlx5_interface mlx5e_interface = {
        .add       = mlx5e_add,
        .remove    = mlx5e_remove,
        .attach    = mlx5e_attach,
        .detach    = mlx5e_detach,
-       .event     = mlx5e_async_event,
        .protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
-       .get_dev   = mlx5e_get_netdev,
 };
 
 void mlx5e_init(void)
index 624eed345b5d2b19fa5ed54935667b41090383f8..a75aad03559368381841f1389a4b0d1f42f90d74 100644 (file)
@@ -554,9 +554,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
 
        mlx5_cqwq_pop(&cq->wq);
 
-       if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
+       if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
                netdev_WARN_ONCE(cq->channel->netdev,
-                                "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
+                                "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
                return;
        }
 
@@ -898,7 +898,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
        prefetchw(va); /* xdp_frame data area */
        prefetch(data);
 
-       if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+       if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
                rq->stats->wqe_err++;
                return NULL;
        }
@@ -930,7 +930,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
        u16 byte_cnt     = cqe_bcnt - headlen;
        struct sk_buff *skb;
 
-       if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+       if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
                rq->stats->wqe_err++;
                return NULL;
        }
@@ -1154,7 +1154,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 
        wi->consumed_strides += cstrides;
 
-       if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+       if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
                rq->stats->wqe_err++;
                goto mpwrq_cqe_out;
        }
index 40b60e958cfd9c5a0f1eae5a58d468c605d7587b..75d30fa637d655640397d9b69e3e69cdf6ddf23c 100644 (file)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include "lib/mlx5.h"
 #include "en.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/tls.h"
@@ -1088,13 +1089,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
 }
 
 static const struct counter_desc mlx5e_pme_status_desc[] = {
-       { "module_unplug", 8 },
+       { "module_unplug",       sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED },
 };
 
 static const struct counter_desc mlx5e_pme_error_desc[] = {
-       { "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
-       { "module_high_temp", 48 },       /* high temperature */
-       { "module_bad_shorted", 56 },    /* bad or shorted cable/module */
+       { "module_bus_stuck",    sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK },
+       { "module_high_temp",    sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE },
+       { "module_bad_shorted",  sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE },
 };
 
 #define NUM_PME_STATUS_STATS           ARRAY_SIZE(mlx5e_pme_status_desc)
@@ -1122,15 +1123,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
 static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
                                    int idx)
 {
-       struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+       struct mlx5_pme_stats pme_stats;
        int i;
 
+       mlx5_get_pme_stats(priv->mdev, &pme_stats);
+
        for (i = 0; i < NUM_PME_STATUS_STATS; i++)
-               data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+               data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters,
                                                   mlx5e_pme_status_desc, i);
 
        for (i = 0; i < NUM_PME_ERR_STATS; i++)
-               data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+               data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters,
                                                   mlx5e_pme_error_desc, i);
 
        return idx;
index 9afdf955f2bc3cf9f60cc8c5db1b8c4cb2d6b5f6..f201965b888f0812991aeb285b48a4b8e8a39e05 100644 (file)
@@ -507,7 +507,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
                wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
-               if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+               if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
                        if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
                                              &sq->state)) {
                                mlx5e_dump_error_cqe(sq,
index c1e1a16a9b07d4335bb4cdc3b29bdea3673b8fa2..ee04aab65a9f2e09764486c02b78a353709acd0a 100644 (file)
  */
 
 #include <linux/interrupt.h>
+#include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
 #include <linux/mlx5/cmd.h>
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "fpga/core.h"
 #include "eswitch.h"
 #include "lib/clock.h"
 #include "diag/fw_tracer.h"
 
 enum {
-       MLX5_EQE_SIZE           = sizeof(struct mlx5_eqe),
        MLX5_EQE_OWNER_INIT_VAL = 0x1,
 };
 
@@ -55,14 +57,32 @@ enum {
 };
 
 enum {
-       MLX5_NUM_SPARE_EQE      = 0x80,
-       MLX5_NUM_ASYNC_EQE      = 0x1000,
-       MLX5_NUM_CMD_EQE        = 32,
-       MLX5_NUM_PF_DRAIN       = 64,
+       MLX5_EQ_DOORBEL_OFFSET  = 0x40,
 };
 
-enum {
-       MLX5_EQ_DOORBEL_OFFSET  = 0x40,
+struct mlx5_irq_info {
+       cpumask_var_t mask;
+       char name[MLX5_MAX_IRQ_NAME];
+       void *context; /* dev_id provided to request_irq */
+};
+
+struct mlx5_eq_table {
+       struct list_head        comp_eqs_list;
+       struct mlx5_eq          pages_eq;
+       struct mlx5_eq          cmd_eq;
+       struct mlx5_eq          async_eq;
+
+       struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
+
+       /* Since CQ DB is stored in async_eq */
+       struct mlx5_nb          cq_err_nb;
+
+       struct mutex            lock; /* sync async eqs creations */
+       int                     num_comp_vectors;
+       struct mlx5_irq_info    *irq_info;
+#ifdef CONFIG_RFS_ACCEL
+       struct cpu_rmap         *rmap;
+#endif
 };
 
 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)          | \
@@ -78,17 +98,6 @@ enum {
                               (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)       | \
                               (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
 
-struct map_eq_in {
-       u64     mask;
-       u32     reserved;
-       u32     unmap_eqn;
-};
-
-struct cre_des_eq {
-       u8      reserved[15];
-       u8      eqn;
-};
-
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
        u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
@@ -99,213 +108,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
-{
-       return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
-}
-
-static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
-{
-       struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
-
-       return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
-}
-
-static const char *eqe_type_str(u8 type)
-{
-       switch (type) {
-       case MLX5_EVENT_TYPE_COMP:
-               return "MLX5_EVENT_TYPE_COMP";
-       case MLX5_EVENT_TYPE_PATH_MIG:
-               return "MLX5_EVENT_TYPE_PATH_MIG";
-       case MLX5_EVENT_TYPE_COMM_EST:
-               return "MLX5_EVENT_TYPE_COMM_EST";
-       case MLX5_EVENT_TYPE_SQ_DRAINED:
-               return "MLX5_EVENT_TYPE_SQ_DRAINED";
-       case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-               return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
-       case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
-               return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
-       case MLX5_EVENT_TYPE_CQ_ERROR:
-               return "MLX5_EVENT_TYPE_CQ_ERROR";
-       case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-               return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
-       case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-               return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
-       case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-               return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
-       case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-               return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
-       case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-               return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
-       case MLX5_EVENT_TYPE_INTERNAL_ERROR:
-               return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
-       case MLX5_EVENT_TYPE_PORT_CHANGE:
-               return "MLX5_EVENT_TYPE_PORT_CHANGE";
-       case MLX5_EVENT_TYPE_GPIO_EVENT:
-               return "MLX5_EVENT_TYPE_GPIO_EVENT";
-       case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
-               return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
-       case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
-               return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
-       case MLX5_EVENT_TYPE_REMOTE_CONFIG:
-               return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
-       case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
-               return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
-       case MLX5_EVENT_TYPE_STALL_EVENT:
-               return "MLX5_EVENT_TYPE_STALL_EVENT";
-       case MLX5_EVENT_TYPE_CMD:
-               return "MLX5_EVENT_TYPE_CMD";
-       case MLX5_EVENT_TYPE_PAGE_REQUEST:
-               return "MLX5_EVENT_TYPE_PAGE_REQUEST";
-       case MLX5_EVENT_TYPE_PAGE_FAULT:
-               return "MLX5_EVENT_TYPE_PAGE_FAULT";
-       case MLX5_EVENT_TYPE_PPS_EVENT:
-               return "MLX5_EVENT_TYPE_PPS_EVENT";
-       case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
-               return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
-       case MLX5_EVENT_TYPE_FPGA_ERROR:
-               return "MLX5_EVENT_TYPE_FPGA_ERROR";
-       case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
-               return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
-       case MLX5_EVENT_TYPE_GENERAL_EVENT:
-               return "MLX5_EVENT_TYPE_GENERAL_EVENT";
-       case MLX5_EVENT_TYPE_DEVICE_TRACER:
-               return "MLX5_EVENT_TYPE_DEVICE_TRACER";
-       default:
-               return "Unrecognized event";
-       }
-}
-
-static enum mlx5_dev_event port_subtype_event(u8 subtype)
-{
-       switch (subtype) {
-       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
-               return MLX5_DEV_EVENT_PORT_DOWN;
-       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
-               return MLX5_DEV_EVENT_PORT_UP;
-       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
-               return MLX5_DEV_EVENT_PORT_INITIALIZED;
-       case MLX5_PORT_CHANGE_SUBTYPE_LID:
-               return MLX5_DEV_EVENT_LID_CHANGE;
-       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
-               return MLX5_DEV_EVENT_PKEY_CHANGE;
-       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
-               return MLX5_DEV_EVENT_GUID_CHANGE;
-       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
-               return MLX5_DEV_EVENT_CLIENT_REREG;
-       }
-       return -1;
-}
-
-static void eq_update_ci(struct mlx5_eq *eq, int arm)
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 {
-       __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
-       u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
-
-       __raw_writel((__force u32)cpu_to_be32(val), addr);
-       /* We still want ordering, just not swabbing, so add a barrier */
-       mb();
-}
+       struct mlx5_cq_table *table = &eq->cq_table;
+       struct mlx5_core_cq *cq = NULL;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void eqe_pf_action(struct work_struct *work)
-{
-       struct mlx5_pagefault *pfault = container_of(work,
-                                                    struct mlx5_pagefault,
-                                                    work);
-       struct mlx5_eq *eq = pfault->eq;
+       spin_lock(&table->lock);
+       cq = radix_tree_lookup(&table->tree, cqn);
+       if (likely(cq))
+               mlx5_cq_hold(cq);
+       spin_unlock(&table->lock);
 
-       mlx5_core_page_fault(eq->dev, pfault);
-       mempool_free(pfault, eq->pf_ctx.pool);
+       return cq;
 }
 
-static void eq_pf_process(struct mlx5_eq *eq)
+static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
 {
-       struct mlx5_core_dev *dev = eq->dev;
-       struct mlx5_eqe_page_fault *pf_eqe;
-       struct mlx5_pagefault *pfault;
+       struct mlx5_eq_comp *eq_comp = eq_ptr;
+       struct mlx5_eq *eq = eq_ptr;
        struct mlx5_eqe *eqe;
        int set_ci = 0;
+       u32 cqn = -1;
 
        while ((eqe = next_eqe_sw(eq))) {
-               pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
-               if (!pfault) {
-                       schedule_work(&eq->pf_ctx.work);
-                       break;
-               }
-
+               struct mlx5_core_cq *cq;
+               /* Make sure we read EQ entry contents after we've
+                * checked the ownership bit.
+                */
                dma_rmb();
-               pf_eqe = &eqe->data.page_fault;
-               pfault->event_subtype = eqe->sub_type;
-               pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
-               mlx5_core_dbg(dev,
-                             "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
-                             eqe->sub_type, pfault->bytes_committed);
-
-               switch (eqe->sub_type) {
-               case MLX5_PFAULT_SUBTYPE_RDMA:
-                       /* RDMA based event */
-                       pfault->type =
-                               be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
-                       pfault->token =
-                               be32_to_cpu(pf_eqe->rdma.pftype_token) &
-                               MLX5_24BIT_MASK;
-                       pfault->rdma.r_key =
-                               be32_to_cpu(pf_eqe->rdma.r_key);
-                       pfault->rdma.packet_size =
-                               be16_to_cpu(pf_eqe->rdma.packet_length);
-                       pfault->rdma.rdma_op_len =
-                               be32_to_cpu(pf_eqe->rdma.rdma_op_len);
-                       pfault->rdma.rdma_va =
-                               be64_to_cpu(pf_eqe->rdma.rdma_va);
-                       mlx5_core_dbg(dev,
-                                     "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
-                                     pfault->type, pfault->token,
-                                     pfault->rdma.r_key);
-                       mlx5_core_dbg(dev,
-                                     "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
-                                     pfault->rdma.rdma_op_len,
-                                     pfault->rdma.rdma_va);
-                       break;
-
-               case MLX5_PFAULT_SUBTYPE_WQE:
-                       /* WQE based event */
-                       pfault->type =
-                               (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
-                       pfault->token =
-                               be32_to_cpu(pf_eqe->wqe.token);
-                       pfault->wqe.wq_num =
-                               be32_to_cpu(pf_eqe->wqe.pftype_wq) &
-                               MLX5_24BIT_MASK;
-                       pfault->wqe.wqe_index =
-                               be16_to_cpu(pf_eqe->wqe.wqe_index);
-                       pfault->wqe.packet_size =
-                               be16_to_cpu(pf_eqe->wqe.packet_length);
-                       mlx5_core_dbg(dev,
-                                     "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
-                                     pfault->type, pfault->token,
-                                     pfault->wqe.wq_num,
-                                     pfault->wqe.wqe_index);
-                       break;
-
-               default:
-                       mlx5_core_warn(dev,
-                                      "Unsupported page fault event sub-type: 0x%02hhx\n",
-                                      eqe->sub_type);
-                       /* Unsupported page faults should still be
-                        * resolved by the page fault handler
-                        */
+               /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
+               cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+
+               cq = mlx5_eq_cq_get(eq, cqn);
+               if (likely(cq)) {
+                       ++cq->arm_sn;
+                       cq->comp(cq);
+                       mlx5_cq_put(cq);
+               } else {
+                       mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
                }
 
-               pfault->eq = eq;
-               INIT_WORK(&pfault->work, eqe_pf_action);
-               queue_work(eq->pf_ctx.wq, &pfault->work);
-
                ++eq->cons_index;
                ++set_ci;
 
+               /* The HCA will think the queue has overflowed if we
+                * don't tell it we've been processing events.  We
+                * create our EQs with MLX5_NUM_SPARE_EQE extra
+                * entries, so we must update our consumer index at
+                * least that often.
+                */
                if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
                        eq_update_ci(eq, 0);
                        set_ci = 0;
@@ -313,165 +165,41 @@ static void eq_pf_process(struct mlx5_eq *eq)
        }
 
        eq_update_ci(eq, 1);
-}
 
-static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
-{
-       struct mlx5_eq *eq = eq_ptr;
-       unsigned long flags;
-
-       if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
-               eq_pf_process(eq);
-               spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
-       } else {
-               schedule_work(&eq->pf_ctx.work);
-       }
+       if (cqn != -1)
+               tasklet_schedule(&eq_comp->tasklet_ctx.task);
 
        return IRQ_HANDLED;
 }
 
-/* mempool_refill() was proposed but unfortunately wasn't accepted
- * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
- * Chip workaround.
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them.  It is not recommended to use it, unless this is the last
+ * resort.
  */
-static void mempool_refill(mempool_t *pool)
-{
-       while (pool->curr_nr < pool->min_nr)
-               mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
-}
-
-static void eq_pf_action(struct work_struct *work)
-{
-       struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
-
-       mempool_refill(eq->pf_ctx.pool);
-
-       spin_lock_irq(&eq->pf_ctx.lock);
-       eq_pf_process(eq);
-       spin_unlock_irq(&eq->pf_ctx.lock);
-}
-
-static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
-{
-       spin_lock_init(&pf_ctx->lock);
-       INIT_WORK(&pf_ctx->work, eq_pf_action);
-
-       pf_ctx->wq = alloc_ordered_workqueue(name,
-                                            WQ_MEM_RECLAIM);
-       if (!pf_ctx->wq)
-               return -ENOMEM;
-
-       pf_ctx->pool = mempool_create_kmalloc_pool
-               (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
-       if (!pf_ctx->pool)
-               goto err_wq;
-
-       return 0;
-err_wq:
-       destroy_workqueue(pf_ctx->wq);
-       return -ENOMEM;
-}
-
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
-                               u32 wq_num, u8 type, int error)
-{
-       u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
-       u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = {0};
-
-       MLX5_SET(page_fault_resume_in, in, opcode,
-                MLX5_CMD_OP_PAGE_FAULT_RESUME);
-       MLX5_SET(page_fault_resume_in, in, error, !!error);
-       MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
-       MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
-       MLX5_SET(page_fault_resume_in, in, token, token);
-
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
-#endif
-
-static void general_event_handler(struct mlx5_core_dev *dev,
-                                 struct mlx5_eqe *eqe)
-{
-       switch (eqe->sub_type) {
-       case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
-               if (dev->event)
-                       dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
-               break;
-       default:
-               mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
-                             eqe->sub_type);
-       }
-}
-
-static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
-                                   struct mlx5_eqe *eqe)
-{
-       u64 value_lsb;
-       u64 value_msb;
-
-       value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
-       value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
-
-       mlx5_core_warn(dev,
-                      "High temperature on sensors with bit set %llx %llx",
-                      value_msb, value_lsb);
-}
-
-/* caller must eventually call mlx5_cq_put on the returned cq */
-static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
-{
-       struct mlx5_cq_table *table = &eq->cq_table;
-       struct mlx5_core_cq *cq = NULL;
-
-       spin_lock(&table->lock);
-       cq = radix_tree_lookup(&table->tree, cqn);
-       if (likely(cq))
-               mlx5_cq_hold(cq);
-       spin_unlock(&table->lock);
-
-       return cq;
-}
-
-static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
-{
-       struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-       if (unlikely(!cq)) {
-               mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
-               return;
-       }
-
-       ++cq->arm_sn;
-
-       cq->comp(cq);
-
-       mlx5_cq_put(cq);
-}
-
-static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 {
-       struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-       if (unlikely(!cq)) {
-               mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
-               return;
-       }
+       u32 count_eqe;
 
-       cq->event(cq, event_type);
+       disable_irq(eq->core.irqn);
+       count_eqe = eq->core.cons_index;
+       mlx5_eq_comp_int(eq->core.irqn, eq);
+       count_eqe = eq->core.cons_index - count_eqe;
+       enable_irq(eq->core.irqn);
 
-       mlx5_cq_put(cq);
+       return count_eqe;
 }
 
-static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
 {
        struct mlx5_eq *eq = eq_ptr;
-       struct mlx5_core_dev *dev = eq->dev;
+       struct mlx5_eq_table *eqt;
+       struct mlx5_core_dev *dev;
        struct mlx5_eqe *eqe;
        int set_ci = 0;
-       u32 cqn = -1;
-       u32 rsn;
-       u8 port;
+
+       dev = eq->dev;
+       eqt = dev->priv.eq_table;
 
        while ((eqe = next_eqe_sw(eq))) {
                /*
@@ -480,116 +208,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                 */
                dma_rmb();
 
-               mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
-                             eq->eqn, eqe_type_str(eqe->type));
-               switch (eqe->type) {
-               case MLX5_EVENT_TYPE_COMP:
-                       cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-                       mlx5_eq_cq_completion(eq, cqn);
-                       break;
-               case MLX5_EVENT_TYPE_DCT_DRAINED:
-                       rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
-                       rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
-                       mlx5_rsc_event(dev, rsn, eqe->type);
-                       break;
-               case MLX5_EVENT_TYPE_PATH_MIG:
-               case MLX5_EVENT_TYPE_COMM_EST:
-               case MLX5_EVENT_TYPE_SQ_DRAINED:
-               case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-               case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-               case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-               case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-               case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-                       rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
-                       rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
-                       mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
-                                     eqe_type_str(eqe->type), eqe->type, rsn);
-                       mlx5_rsc_event(dev, rsn, eqe->type);
-                       break;
-
-               case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
-               case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-                       rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
-                       mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
-                                     eqe_type_str(eqe->type), eqe->type, rsn);
-                       mlx5_srq_event(dev, rsn, eqe->type);
-                       break;
-
-               case MLX5_EVENT_TYPE_CMD:
-                       mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
-                       break;
+               if (likely(eqe->type < MLX5_EVENT_TYPE_MAX))
+                       atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
+               else
+                       mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type);
 
-               case MLX5_EVENT_TYPE_PORT_CHANGE:
-                       port = (eqe->data.port.port >> 4) & 0xf;
-                       switch (eqe->sub_type) {
-                       case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
-                       case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
-                       case MLX5_PORT_CHANGE_SUBTYPE_LID:
-                       case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
-                       case MLX5_PORT_CHANGE_SUBTYPE_GUID:
-                       case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
-                       case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
-                               if (dev->event)
-                                       dev->event(dev, port_subtype_event(eqe->sub_type),
-                                                  (unsigned long)port);
-                               break;
-                       default:
-                               mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
-                                              port, eqe->sub_type);
-                       }
-                       break;
-               case MLX5_EVENT_TYPE_CQ_ERROR:
-                       cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
-                                      cqn, eqe->data.cq_err.syndrome);
-                       mlx5_eq_cq_event(eq, cqn, eqe->type);
-                       break;
-
-               case MLX5_EVENT_TYPE_PAGE_REQUEST:
-                       {
-                               u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
-                               s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
-
-                               mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
-                                             func_id, npages);
-                               mlx5_core_req_pages_handler(dev, func_id, npages);
-                       }
-                       break;
-
-               case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
-                       mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
-                       mlx5_port_module_event(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_PPS_EVENT:
-                       mlx5_pps_event(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_FPGA_ERROR:
-               case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
-                       mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
-                       break;
-
-               case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
-                       mlx5_temp_warning_event(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_GENERAL_EVENT:
-                       general_event_handler(dev, eqe);
-                       break;
-
-               case MLX5_EVENT_TYPE_DEVICE_TRACER:
-                       mlx5_fw_tracer_event(dev, eqe);
-                       break;
-
-               default:
-                       mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
-                                      eqe->type, eq->eqn);
-                       break;
-               }
+               atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
 
                ++eq->cons_index;
                ++set_ci;
@@ -608,30 +232,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 
        eq_update_ci(eq, 1);
 
-       if (cqn != -1)
-               tasklet_schedule(&eq->tasklet_ctx.task);
-
        return IRQ_HANDLED;
 }
 
-/* Some architectures don't latch interrupts when they are disabled, so using
- * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
- * avoid losing them.  It is not recommended to use it, unless this is the last
- * resort.
- */
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
-{
-       u32 count_eqe;
-
-       disable_irq(eq->irqn);
-       count_eqe = eq->cons_index;
-       mlx5_eq_int(eq->irqn, eq);
-       count_eqe = eq->cons_index - count_eqe;
-       enable_irq(eq->irqn);
-
-       return count_eqe;
-}
-
 static void init_eq_buf(struct mlx5_eq *eq)
 {
        struct mlx5_eqe *eqe;
@@ -643,39 +246,35 @@ static void init_eq_buf(struct mlx5_eq *eq)
        }
 }
 
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-                      int nent, u64 mask, const char *name,
-                      enum mlx5_eq_type type)
+static int
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+             struct mlx5_eq_param *param)
 {
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
        struct mlx5_cq_table *cq_table = &eq->cq_table;
        u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
        struct mlx5_priv *priv = &dev->priv;
-       irq_handler_t handler;
+       u8 vecidx = param->index;
        __be64 *pas;
        void *eqc;
        int inlen;
        u32 *in;
        int err;
 
+       if (eq_table->irq_info[vecidx].context)
+               return -EEXIST;
+
        /* Init CQ table */
        memset(cq_table, 0, sizeof(*cq_table));
        spin_lock_init(&cq_table->lock);
        INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
 
-       eq->type = type;
-       eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+       eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
        eq->cons_index = 0;
        err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
        if (err)
                return err;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (type == MLX5_EQ_TYPE_PF)
-               handler = mlx5_eq_pf_int;
-       else
-#endif
-               handler = mlx5_eq_int;
-
        init_eq_buf(eq);
 
        inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
@@ -691,7 +290,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        mlx5_fill_page_array(&eq->buf, pas);
 
        MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
-       MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+       MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
 
        eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
        MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -704,15 +303,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        if (err)
                goto err_in;
 
-       snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+       snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
                 name, pci_name(dev->pdev));
+       eq_table->irq_info[vecidx].context = param->context;
 
+       eq->vecidx = vecidx;
        eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
        eq->irqn = pci_irq_vector(dev->pdev, vecidx);
        eq->dev = dev;
        eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
-       err = request_irq(eq->irqn, handler, 0,
-                         priv->irq_info[vecidx].name, eq);
+       err = request_irq(eq->irqn, param->handler, 0,
+                         eq_table->irq_info[vecidx].name, param->context);
        if (err)
                goto err_eq;
 
@@ -720,21 +321,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        if (err)
                goto err_irq;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (type == MLX5_EQ_TYPE_PF) {
-               err = init_pf_ctx(&eq->pf_ctx, name);
-               if (err)
-                       goto err_irq;
-       } else
-#endif
-       {
-               INIT_LIST_HEAD(&eq->tasklet_ctx.list);
-               INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
-               spin_lock_init(&eq->tasklet_ctx.lock);
-               tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
-                            (unsigned long)&eq->tasklet_ctx);
-       }
-
        /* EQs are created in ARMED state
         */
        eq_update_ci(eq, 1);
@@ -756,27 +342,25 @@ err_buf:
        return err;
 }
 
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+       struct mlx5_irq_info *irq_info;
        int err;
 
+       irq_info = &eq_table->irq_info[eq->vecidx];
+
        mlx5_debug_eq_remove(dev, eq);
-       free_irq(eq->irqn, eq);
+
+       free_irq(eq->irqn, irq_info->context);
+       irq_info->context = NULL;
+
        err = mlx5_cmd_destroy_eq(dev, eq->eqn);
        if (err)
                mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
                               eq->eqn);
        synchronize_irq(eq->irqn);
 
-       if (eq->type == MLX5_EQ_TYPE_COMP) {
-               tasklet_disable(&eq->tasklet_ctx.task);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       } else if (eq->type == MLX5_EQ_TYPE_PF) {
-               cancel_work_sync(&eq->pf_ctx.work);
-               destroy_workqueue(eq->pf_ctx.wq);
-               mempool_destroy(eq->pf_ctx.pool);
-#endif
-       }
        mlx5_buf_free(dev, &eq->buf);
 
        return err;
@@ -816,28 +400,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
        return 0;
 }
 
-int mlx5_eq_init(struct mlx5_core_dev *dev)
+int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 {
-       int err;
+       struct mlx5_eq_table *eq_table;
+       int i, err;
 
-       spin_lock_init(&dev->priv.eq_table.lock);
+       eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
+       if (!eq_table)
+               return -ENOMEM;
+
+       dev->priv.eq_table = eq_table;
 
        err = mlx5_eq_debugfs_init(dev);
+       if (err)
+               goto kvfree_eq_table;
+
+       mutex_init(&eq_table->lock);
+       for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
+               ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
 
+       return 0;
+
+kvfree_eq_table:
+       kvfree(eq_table);
+       dev->priv.eq_table = NULL;
        return err;
 }
 
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
 {
        mlx5_eq_debugfs_cleanup(dev);
+       kvfree(dev->priv.eq_table);
 }
 
-int mlx5_start_eqs(struct mlx5_core_dev *dev)
+/* Async EQs */
+
+static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+                          struct mlx5_eq *eq, struct mlx5_eq_param *param)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+       int err;
+
+       mutex_lock(&eq_table->lock);
+       if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
+               err = -ENOSPC;
+               goto unlock;
+       }
+
+       err = create_map_eq(dev, eq, name, param);
+unlock:
+       mutex_unlock(&eq_table->lock);
+       return err;
+}
+
+static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
        int err;
 
+       mutex_lock(&eq_table->lock);
+       err = destroy_unmap_eq(dev, eq);
+       mutex_unlock(&eq_table->lock);
+       return err;
+}
+
+static int cq_err_event_notifier(struct notifier_block *nb,
+                                unsigned long type, void *data)
+{
+       struct mlx5_eq_table *eqt;
+       struct mlx5_core_cq *cq;
+       struct mlx5_eqe *eqe;
+       struct mlx5_eq *eq;
+       u32 cqn;
+
+       /* type == MLX5_EVENT_TYPE_CQ_ERROR */
+
+       eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
+       eq  = &eqt->async_eq;
+       eqe = data;
+
+       cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+       mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+                      cqn, eqe->data.cq_err.syndrome);
+
+       cq = mlx5_eq_cq_get(eq, cqn);
+       if (unlikely(!cq)) {
+               mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+               return NOTIFY_OK;
+       }
+
+       cq->event(cq, type);
+
+       mlx5_cq_put(cq);
+
+       return NOTIFY_OK;
+}
+
+static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+{
+       u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+
        if (MLX5_VPORT_MANAGER(dev))
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
@@ -865,127 +527,521 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
        if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
 
-       err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
-                                MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
-                                "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+       if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
+               async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
+
+       return async_event_mask;
+}
+
+static int create_async_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_param param = {};
+       int err;
+
+       MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+       mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_CMD_IDX,
+               .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+               .nent = MLX5_NUM_CMD_EQE,
+               .context = &table->cmd_eq,
+               .handler = mlx5_eq_async_int,
+       };
+       err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
-               return err;
+               goto err0;
        }
 
        mlx5_cmd_use_events(dev);
 
-       err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
-                                MLX5_NUM_ASYNC_EQE, async_event_mask,
-                                "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_ASYNC_IDX,
+               .mask = gather_async_events_mask(dev),
+               .nent = MLX5_NUM_ASYNC_EQE,
+               .context = &table->async_eq,
+               .handler = mlx5_eq_async_int,
+       };
+       err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
                goto err1;
        }
 
-       err = mlx5_create_map_eq(dev, &table->pages_eq,
-                                MLX5_EQ_VEC_PAGES,
-                                /* TODO: sriov max_vf + */ 1,
-                                1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
-                                MLX5_EQ_TYPE_ASYNC);
+       param = (struct mlx5_eq_param) {
+               .index = MLX5_EQ_PAGEREQ_IDX,
+               .mask =  1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+               .nent = /* TODO: sriov max_vf + */ 1,
+               .context = &table->pages_eq,
+               .handler = mlx5_eq_async_int,
+       };
+       err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
                goto err2;
        }
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (MLX5_CAP_GEN(dev, pg)) {
-               err = mlx5_create_map_eq(dev, &table->pfault_eq,
-                                        MLX5_EQ_VEC_PFAULT,
-                                        MLX5_NUM_ASYNC_EQE,
-                                        1 << MLX5_EVENT_TYPE_PAGE_FAULT,
-                                        "mlx5_page_fault_eq",
-                                        MLX5_EQ_TYPE_PF);
-               if (err) {
-                       mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
-                                      err);
-                       goto err3;
-               }
-       }
-
-       return err;
-err3:
-       mlx5_destroy_unmap_eq(dev, &table->pages_eq);
-#else
        return err;
-#endif
 
 err2:
-       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       destroy_async_eq(dev, &table->async_eq);
 
 err1:
        mlx5_cmd_use_polling(dev);
-       mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+       destroy_async_eq(dev, &table->cmd_eq);
+err0:
+       mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
        return err;
 }
 
-void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+static void destroy_async_eqs(struct mlx5_core_dev *dev)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       struct mlx5_eq_table *table = dev->priv.eq_table;
        int err;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (MLX5_CAP_GEN(dev, pg)) {
-               err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
-               if (err)
-                       mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
-                                     err);
-       }
-#endif
-
-       err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+       err = destroy_async_eq(dev, &table->pages_eq);
        if (err)
                mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
                              err);
 
-       err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       err = destroy_async_eq(dev, &table->async_eq);
        if (err)
                mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
                              err);
+
        mlx5_cmd_use_polling(dev);
 
-       err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+       err = destroy_async_eq(dev, &table->cmd_eq);
        if (err)
                mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
                              err);
+
+       mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 }
 
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-                      u32 *out, int outlen)
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
 {
-       u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+       return &dev->priv.eq_table->async_eq;
+}
 
-       MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
-       MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
+{
+       synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+}
+
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
+{
+       synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+}
+
+/* Generic EQ API for mlx5_core consumers
+ * Needed For RDMA ODP EQ for now
+ */
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+                      struct mlx5_eq_param *param)
+{
+       struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
+       int err;
+
+       if (!eq)
+               return ERR_PTR(-ENOMEM);
+
+       err = create_async_eq(dev, name, eq, param);
+       if (err) {
+               kvfree(eq);
+               eq = ERR_PTR(err);
+       }
+
+       return eq;
+}
+EXPORT_SYMBOL(mlx5_eq_create_generic);
+
+int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       int err;
+
+       if (IS_ERR(eq))
+               return -EINVAL;
+
+       err = destroy_async_eq(dev, eq);
+       if (err)
+               goto out;
+
+       kvfree(eq);
+out:
+       return err;
+}
+EXPORT_SYMBOL(mlx5_eq_destroy_generic);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
+{
+       u32 ci = eq->cons_index + cc;
+       struct mlx5_eqe *eqe;
+
+       eqe = get_eqe(eq, ci & (eq->nent - 1));
+       eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+       /* Make sure we read EQ entry contents after we've
+        * checked the ownership bit.
+        */
+       if (eqe)
+               dma_rmb();
+
+       return eqe;
+}
+EXPORT_SYMBOL(mlx5_eq_get_eqe);
+
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
+{
+       __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+       u32 val;
+
+       eq->cons_index += cc;
+       val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+       __raw_writel((__force u32)cpu_to_be32(val), addr);
+       /* We still want ordering, just not swabbing, so add a barrier */
+       mb();
+}
+EXPORT_SYMBOL(mlx5_eq_update_ci);
+
+/* Completion EQs */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+       int irq = pci_irq_vector(mdev->pdev, vecidx);
+       struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+       if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+                       irq_info->mask);
+
+       if (IS_ENABLED(CONFIG_SMP) &&
+           irq_set_affinity_hint(irq, irq_info->mask))
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+       return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, vecidx);
+       struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
+
+       irq_set_affinity_hint(irq, NULL);
+       free_cpumask_var(irq_info->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
+               err = set_comp_irq_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               clear_comp_irq_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
+               clear_comp_irq_affinity_hint(mdev, i);
+}
+
+static void destroy_comp_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_comp *eq, *n;
+
+       clear_comp_irqs_affinity_hints(dev);
+
+#ifdef CONFIG_RFS_ACCEL
+       if (table->rmap) {
+               free_irq_cpu_rmap(table->rmap);
+               table->rmap = NULL;
+       }
+#endif
+       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+               list_del(&eq->list);
+               if (destroy_unmap_eq(dev, &eq->core))
+                       mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
+                                      eq->core.eqn);
+               tasklet_disable(&eq->tasklet_ctx.task);
+               kfree(eq);
+       }
+}
+
+static int create_comp_eqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       char name[MLX5_MAX_IRQ_NAME];
+       struct mlx5_eq_comp *eq;
+       int ncomp_vec;
+       int nent;
+       int err;
+       int i;
+
+       INIT_LIST_HEAD(&table->comp_eqs_list);
+       ncomp_vec = table->num_comp_vectors;
+       nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+       table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+       if (!table->rmap)
+               return -ENOMEM;
+#endif
+       for (i = 0; i < ncomp_vec; i++) {
+               int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+               struct mlx5_eq_param param = {};
+
+               eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+               if (!eq) {
+                       err = -ENOMEM;
+                       goto clean;
+               }
+
+               INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+               INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+               spin_lock_init(&eq->tasklet_ctx.lock);
+               tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+                            (unsigned long)&eq->tasklet_ctx);
+
+#ifdef CONFIG_RFS_ACCEL
+               irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
+#endif
+               snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+               param = (struct mlx5_eq_param) {
+                       .index = vecidx,
+                       .mask = 0,
+                       .nent = nent,
+                       .context = &eq->core,
+                       .handler = mlx5_eq_comp_int
+               };
+               err = create_map_eq(dev, &eq->core, name, &param);
+               if (err) {
+                       kfree(eq);
+                       goto clean;
+               }
+               mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
+               /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
+               list_add_tail(&eq->list, &table->comp_eqs_list);
+       }
+
+       err = set_comp_irq_affinity_hints(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+               goto clean;
+       }
+
+       return 0;
+
+clean:
+       destroy_comp_eqs(dev);
+       return err;
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+                   unsigned int *irqn)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_comp *eq, *n;
+       int err = -ENOENT;
+       int i = 0;
+
+       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+               if (i++ == vector) {
+                       *eqn = eq->core.eqn;
+                       *irqn = eq->core.irqn;
+                       err = 0;
+                       break;
+               }
+       }
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
+{
+       return dev->priv.eq_table->num_comp_vectors;
+}
+EXPORT_SYMBOL(mlx5_comp_vectors_count);
+
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
+{
+       /* TODO: consider irq_get_affinity_mask(irq) */
+       return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+}
+EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
+
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+       return dev->priv.eq_table->rmap;
+#else
+       return NULL;
+#endif
+}
+
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
+{
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_eq_comp *eq;
+
+       list_for_each_entry(eq, &table->comp_eqs_list, list) {
+               if (eq->core.eqn == eqn)
+                       return eq;
+       }
+
+       return ERR_PTR(-ENOENT);
 }
 
 /* This function should only be called after mlx5_cmd_force_teardown_hca */
 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq;
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+       int i, max_eqs;
+
+       clear_comp_irqs_affinity_hints(dev);
 
 #ifdef CONFIG_RFS_ACCEL
-       if (dev->rmap) {
-               free_irq_cpu_rmap(dev->rmap);
-               dev->rmap = NULL;
+       if (table->rmap) {
+               free_irq_cpu_rmap(table->rmap);
+               table->rmap = NULL;
        }
 #endif
-       list_for_each_entry(eq, &table->comp_eqs_list, list)
-               free_irq(eq->irqn, eq);
-
-       free_irq(table->pages_eq.irqn, &table->pages_eq);
-       free_irq(table->async_eq.irqn, &table->async_eq);
-       free_irq(table->cmd_eq.irqn, &table->cmd_eq);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       if (MLX5_CAP_GEN(dev, pg))
-               free_irq(table->pfault_eq.irqn, &table->pfault_eq);
-#endif
+
+       mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+       max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
+       for (i = max_eqs - 1; i >= 0; i--) {
+               if (!table->irq_info[i].context)
+                       continue;
+               free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
+               table->irq_info[i].context = NULL;
+       }
+       mutex_unlock(&table->lock);
+       pci_free_irq_vectors(dev->pdev);
+}
+
+static int alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_eq_table *table = priv->eq_table;
+       int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+                     MLX5_CAP_GEN(dev, max_num_eqs) :
+                     1 << MLX5_CAP_GEN(dev, log_max_eq);
+       int nvec;
+       int err;
+
+       nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+              MLX5_EQ_VEC_COMP_BASE;
+       nvec = min_t(int, nvec, num_eqs);
+       if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+               return -ENOMEM;
+
+       table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
+       if (!table->irq_info)
+               return -ENOMEM;
+
+       nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
+                                    nvec, PCI_IRQ_MSIX);
+       if (nvec < 0) {
+               err = nvec;
+               goto err_free_irq_info;
+       }
+
+       table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+       return 0;
+
+err_free_irq_info:
+       kfree(table->irq_info);
+       return err;
+}
+
+static void free_irq_vectors(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+
        pci_free_irq_vectors(dev->pdev);
+       kfree(priv->eq_table->irq_info);
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+       int err;
+
+       err = alloc_irq_vectors(dev);
+       if (err) {
+               mlx5_core_err(dev, "alloc irq vectors failed\n");
+               return err;
+       }
+
+       err = create_async_eqs(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to create async EQs\n");
+               goto err_async_eqs;
+       }
+
+       err = create_comp_eqs(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to create completion EQs\n");
+               goto err_comp_eqs;
+       }
+
+       return 0;
+err_comp_eqs:
+       destroy_async_eqs(dev);
+err_async_eqs:
+       free_irq_vectors(dev);
+       return err;
+}
+
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
+{
+       destroy_comp_eqs(dev);
+       destroy_async_eqs(dev);
+       free_irq_vectors(dev);
+}
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+       struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+       if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+               return -EINVAL;
+
+       return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
+}
+
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
+{
+       struct mlx5_eq_table *eqt = dev->priv.eq_table;
+
+       if (nb->event_type >= MLX5_EVENT_TYPE_MAX)
+               return -EINVAL;
+
+       return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
 }
index d004957328f9ca9daa6b46c375310b1f20d1af31..e6a9b19d86262a5b00129da748f6a25005e65549 100644 (file)
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "eswitch.h"
 #include "fs_core.h"
+#include "lib/eq.h"
 
 #define UPLINK_VPORT 0xFFFF
 
@@ -1567,7 +1569,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
        /* Mark this vport as disabled to discard new events */
        vport->enabled = false;
 
-       synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
        /* Wait for current already scheduled events to complete */
        flush_workqueue(esw->work_queue);
        /* Disable events from this vport */
@@ -1593,10 +1594,25 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
        mutex_unlock(&esw->state_lock);
 }
 
+static int eswitch_vport_event(struct notifier_block *nb,
+                              unsigned long type, void *data)
+{
+       struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb);
+       struct mlx5_eqe *eqe = data;
+       struct mlx5_vport *vport;
+       u16 vport_num;
+
+       vport_num = be16_to_cpu(eqe->data.vport_change.vport_num);
+       vport = &esw->vports[vport_num];
+       if (vport->enabled)
+               queue_work(esw->work_queue, &vport->vport_change_handler);
+
+       return NOTIFY_OK;
+}
+
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
 
-
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
        int err;
@@ -1640,6 +1656,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        for (i = 0; i <= nvfs; i++)
                esw_enable_vport(esw, i, enabled_events);
 
+       if (mode == SRIOV_LEGACY) {
+               MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+               mlx5_eq_notifier_register(esw->dev, &esw->nb);
+       }
+
        esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
                 esw->enabled_vports);
        return 0;
@@ -1669,6 +1690,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
        mc_promisc = &esw->mc_promisc;
        nvports = esw->enabled_vports;
 
+       if (esw->mode == SRIOV_LEGACY)
+               mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+
        for (i = 0; i < esw->total_vports; i++)
                esw_disable_vport(esw, i);
 
@@ -1777,23 +1801,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
        kfree(esw);
 }
 
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
-{
-       struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
-       u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
-       struct mlx5_vport *vport;
-
-       if (!esw) {
-               pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
-                       vport_num);
-               return;
-       }
-
-       vport = &esw->vports[vport_num];
-       if (vport->enabled)
-               queue_work(esw->work_queue, &vport->vport_change_handler);
-}
-
 /* Vport Administration */
 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
 
index aaafc9f171151db2f273f7eb12b3d2a5d12d93c6..480ffa294867db7c70b5d2f93d3fe7f1abf03dda 100644 (file)
@@ -181,6 +181,7 @@ struct esw_mc_addr { /* SRIOV only */
 
 struct mlx5_eswitch {
        struct mlx5_core_dev    *dev;
+       struct mlx5_nb          nb;
        struct mlx5_eswitch_fdb fdb_table;
        struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
        struct workqueue_struct *work_queue;
@@ -211,7 +212,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw);
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
@@ -352,7 +352,6 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
 static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
 static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
 
index 9eac137790f5aa2222522344139395c5bc5aed11..4d7b65df32efcdb2a4eeff538f7a92405b2565c8 100644 (file)
@@ -125,8 +125,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                dest[i].vport.num = attr->out_rep[j]->vport;
                                dest[i].vport.vhca_id =
                                        MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
-                               dest[i].vport.vhca_id_valid =
-                                       !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+                               if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+                                       dest[i].vport.flags |=
+                                               MLX5_FLOW_DEST_VPORT_VHCA_ID;
                                i++;
                        }
                }
@@ -220,7 +221,8 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
                dest[i].vport.num = attr->out_rep[i]->vport;
                dest[i].vport.vhca_id =
                        MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
-               dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+               if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+                       dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
        }
        dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
        dest[i].ft = fwd_fdb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
new file mode 100644 (file)
index 0000000..fbc42b7
--- /dev/null
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
+
+struct mlx5_event_nb {
+       struct mlx5_nb  nb;
+       void           *ctx;
+};
+
+/* General events handlers for the low level mlx5_core driver
+ *
+ * Other Major feature specific events such as
+ * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
+ * separate notifiers callbacks, specifically by those mlx5 components.
+ */
+static int any_notifier(struct notifier_block *, unsigned long, void *);
+static int temp_warn(struct notifier_block *, unsigned long, void *);
+static int port_module(struct notifier_block *, unsigned long, void *);
+
+/* handler which forwards the event to events->nh, driver notifiers */
+static int forward_event(struct notifier_block *, unsigned long, void *);
+
+static struct mlx5_nb events_nbs_ref[] = {
+       /* Events to be proccessed by mlx5_core */
+       {.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
+       {.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
+       {.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+
+       /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
+       /* QP/WQ resource events to forward */
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
+       /* SRQ events */
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
+       {.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
+};
+
+struct mlx5_events {
+       struct mlx5_core_dev *dev;
+       struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
+       /* driver notifier chain */
+       struct atomic_notifier_head nh;
+       /* port module events stats */
+       struct mlx5_pme_stats pme_stats;
+};
+
+static const char *eqe_type_str(u8 type)
+{
+       switch (type) {
+       case MLX5_EVENT_TYPE_COMP:
+               return "MLX5_EVENT_TYPE_COMP";
+       case MLX5_EVENT_TYPE_PATH_MIG:
+               return "MLX5_EVENT_TYPE_PATH_MIG";
+       case MLX5_EVENT_TYPE_COMM_EST:
+               return "MLX5_EVENT_TYPE_COMM_EST";
+       case MLX5_EVENT_TYPE_SQ_DRAINED:
+               return "MLX5_EVENT_TYPE_SQ_DRAINED";
+       case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+               return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+       case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+               return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+       case MLX5_EVENT_TYPE_CQ_ERROR:
+               return "MLX5_EVENT_TYPE_CQ_ERROR";
+       case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+       case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+               return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+       case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+       case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+               return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+       case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+               return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+       case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+               return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+       case MLX5_EVENT_TYPE_PORT_CHANGE:
+               return "MLX5_EVENT_TYPE_PORT_CHANGE";
+       case MLX5_EVENT_TYPE_GPIO_EVENT:
+               return "MLX5_EVENT_TYPE_GPIO_EVENT";
+       case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+               return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+       case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+               return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+       case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+               return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+       case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+               return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+       case MLX5_EVENT_TYPE_STALL_EVENT:
+               return "MLX5_EVENT_TYPE_STALL_EVENT";
+       case MLX5_EVENT_TYPE_CMD:
+               return "MLX5_EVENT_TYPE_CMD";
+       case MLX5_EVENT_TYPE_PAGE_REQUEST:
+               return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+       case MLX5_EVENT_TYPE_PAGE_FAULT:
+               return "MLX5_EVENT_TYPE_PAGE_FAULT";
+       case MLX5_EVENT_TYPE_PPS_EVENT:
+               return "MLX5_EVENT_TYPE_PPS_EVENT";
+       case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+               return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+       case MLX5_EVENT_TYPE_FPGA_ERROR:
+               return "MLX5_EVENT_TYPE_FPGA_ERROR";
+       case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+               return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+       case MLX5_EVENT_TYPE_GENERAL_EVENT:
+               return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+       case MLX5_EVENT_TYPE_MONITOR_COUNTER:
+               return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
+       case MLX5_EVENT_TYPE_DEVICE_TRACER:
+               return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+       default:
+               return "Unrecognized event";
+       }
+}
+
+/* handles all FW events, type == eqe->type */
+static int any_notifier(struct notifier_block *nb,
+                       unsigned long type, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+
+       mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
+                     eqe_type_str(eqe->type), eqe->sub_type);
+       return NOTIFY_OK;
+}
+
+/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
+static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+       u64 value_lsb;
+       u64 value_msb;
+
+       value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+       value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+       mlx5_core_warn(events->dev,
+                      "High temperature on sensors with bit set %llx %llx",
+                      value_msb, value_lsb);
+
+       return NOTIFY_OK;
+}
+
+/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
+{
+       switch (status) {
+       case MLX5_MODULE_STATUS_PLUGGED:
+               return "Cable plugged";
+       case MLX5_MODULE_STATUS_UNPLUGGED:
+               return "Cable unplugged";
+       case MLX5_MODULE_STATUS_ERROR:
+               return "Cable error";
+       case MLX5_MODULE_STATUS_DISABLED:
+               return "Cable disabled";
+       default:
+               return "Unknown status";
+       }
+}
+
+static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
+{
+       switch (error) {
+       case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
+               return "Power budget exceeded";
+       case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
+               return "Long Range for non MLNX cable";
+       case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
+               return "Bus stuck (I2C or data shorted)";
+       case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
+               return "No EEPROM/retry timeout";
+       case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
+               return "Enforce part number list";
+       case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
+               return "Unknown identifier";
+       case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
+               return "High Temperature";
+       case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
+               return "Bad or shorted cable/module";
+       case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
+               return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
+       default:
+               return "Unknown error";
+       }
+}
+
+/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
+static int port_module(struct notifier_block *nb, unsigned long type, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+
+       enum port_module_event_status_type module_status;
+       enum port_module_event_error_type error_type;
+       struct mlx5_eqe_port_module *module_event_eqe;
+       const char *status_str, *error_str;
+       u8 module_num;
+
+       module_event_eqe = &eqe->data.port_module;
+       module_num = module_event_eqe->module;
+       module_status = module_event_eqe->module_status &
+                       PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+       error_type = module_event_eqe->error_type &
+                    PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+       if (module_status < MLX5_MODULE_STATUS_NUM)
+               events->pme_stats.status_counters[module_status]++;
+       status_str = mlx5_pme_status_to_string(module_status);
+
+       if (module_status == MLX5_MODULE_STATUS_ERROR) {
+               if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
+                       events->pme_stats.error_counters[error_type]++;
+               error_str = mlx5_pme_error_to_string(error_type);
+       }
+
+       if (!printk_ratelimit())
+               return NOTIFY_OK;
+
+       if (module_status == MLX5_MODULE_STATUS_ERROR)
+               mlx5_core_err(events->dev,
+                             "Port module event[error]: module %u, %s, %s\n",
+                             module_num, status_str, error_str);
+       else
+               mlx5_core_info(events->dev,
+                              "Port module event: module %u, %s\n",
+                              module_num, status_str);
+
+       return NOTIFY_OK;
+}
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
+{
+       *stats = dev->priv.events->pme_stats;
+}
+
+/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
+static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+       struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
+       struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_eqe      *eqe      = data;
+
+       mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
+                     eqe_type_str(eqe->type), eqe->sub_type);
+       atomic_notifier_call_chain(&events->nh, event, data);
+       return NOTIFY_OK;
+}
+
+int mlx5_events_init(struct mlx5_core_dev *dev)
+{
+       struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
+
+       if (!events)
+               return -ENOMEM;
+
+       ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
+       events->dev = dev;
+       dev->priv.events = events;
+       return 0;
+}
+
+void mlx5_events_cleanup(struct mlx5_core_dev *dev)
+{
+       kvfree(dev->priv.events);
+}
+
+void mlx5_events_start(struct mlx5_core_dev *dev)
+{
+       struct mlx5_events *events = dev->priv.events;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
+               events->notifiers[i].nb  = events_nbs_ref[i];
+               events->notifiers[i].ctx = events;
+               mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
+       }
+}
+
+void mlx5_events_stop(struct mlx5_core_dev *dev)
+{
+       struct mlx5_events *events = dev->priv.events;
+       int i;
+
+       for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
+               mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+}
+
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+       struct mlx5_events *events = dev->priv.events;
+
+       return atomic_notifier_chain_register(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_register);
+
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
+{
+       struct mlx5_events *events = dev->priv.events;
+
+       return atomic_notifier_chain_unregister(&events->nh, nb);
+}
+EXPORT_SYMBOL(mlx5_notifier_unregister);
+
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
+{
+       return atomic_notifier_call_chain(&events->nh, event, data);
+}
index 8ca1d1949d930d46d0cf7c386383e060640ff792..873541ef4c1b754b15209f7516bcee07378f6763 100644 (file)
@@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
 {
        u8 opcode, status = 0;
 
-       opcode = cqe->op_own >> 4;
+       opcode = get_cqe_opcode(cqe);
 
        switch (opcode) {
        case MLX5_CQE_REQ_ERR:
index 436a8136f26ff5f8b879eb02313beabb717f779e..27c5f6c7d36a7c4c58c38e2a7bd3702a0ae9eed5 100644 (file)
@@ -36,6 +36,7 @@
 
 #include "mlx5_core.h"
 #include "lib/mlx5.h"
+#include "lib/eq.h"
 #include "fpga/core.h"
 #include "fpga/conn.h"
 
@@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
        return 0;
 }
 
+static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
+
+static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+       struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
+
+       return mlx5_fpga_event(fdev, event, eqe);
+}
+
+static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
+{
+       struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
+
+       return mlx5_fpga_event(fdev, event, eqe);
+}
+
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
        struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
        if (err)
                goto out;
 
+       MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
+       MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
+       mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
+       mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
+
        err = mlx5_fpga_conn_device_init(fdev);
        if (err)
                goto err_rsvd_gid;
@@ -201,6 +223,8 @@ err_conn_init:
        mlx5_fpga_conn_device_cleanup(fdev);
 
 err_rsvd_gid:
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
        mlx5_core_unreserve_gids(mdev, max_num_qps);
 out:
        spin_lock_irqsave(&fdev->state_lock, flags);
@@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
        }
 
        mlx5_fpga_conn_device_cleanup(fdev);
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
+       mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
+
        max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
        mlx5_core_unreserve_gids(mdev, max_num_qps);
 }
@@ -283,9 +310,10 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
        return "Unknown";
 }
 
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
+                          unsigned long event, void *eqe)
 {
-       struct mlx5_fpga_device *fdev = mdev->fpga;
+       void *data = ((struct mlx5_eqe *)eqe)->data.raw;
        const char *event_name;
        bool teardown = false;
        unsigned long flags;
@@ -303,9 +331,7 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
                fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
                break;
        default:
-               mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
-                                          event);
-               return;
+               return NOTIFY_DONE;
        }
 
        spin_lock_irqsave(&fdev->state_lock, flags);
@@ -326,4 +352,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
         */
        if (teardown)
                mlx5_trigger_health_work(fdev->mdev);
+
+       return NOTIFY_OK;
 }
index 3e2355c8df3ffd3e6b2b5da4cb734d976f011528..7e2e871dbf833b059d790bf161feb7264f2df093 100644 (file)
 
 #ifdef CONFIG_MLX5_FPGA
 
+#include <linux/mlx5/eq.h>
+
+#include "lib/eq.h"
 #include "fpga/cmd.h"
 
 /* Represents an Innova device */
 struct mlx5_fpga_device {
        struct mlx5_core_dev *mdev;
+       struct mlx5_nb fpga_err_nb;
+       struct mlx5_nb fpga_qp_err_nb;
        spinlock_t state_lock; /* Protects state transitions */
        enum mlx5_fpga_status state;
        enum mlx5_fpga_image last_admin_image;
@@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
-void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
 
 #else
 
@@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
 {
 }
 
-static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
-                                  void *data)
-{
-}
-
 #endif
 
 #endif /* __MLX5_FPGA_CORE_H__ */
index 08a891f9aadea2af6abbd61a8ca328e43d4d8c1b..c44ccb67c4a36777b3d87ca00ba1fc6027689c85 100644 (file)
@@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
+static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
+                                 struct fs_fte *fte, bool *extended_dest)
+{
+       int fw_log_max_fdb_encap_uplink =
+               MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+       int num_fwd_destinations = 0;
+       struct mlx5_flow_rule *dst;
+       int num_encap = 0;
+
+       *extended_dest = false;
+       if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+               return 0;
+
+       list_for_each_entry(dst, &fte->node.children, node.list) {
+               if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+                       continue;
+               if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+                   dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+                       num_encap++;
+               num_fwd_destinations++;
+       }
+       if (num_fwd_destinations > 1 && num_encap > 0)
+               *extended_dest = true;
+
+       if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+               mlx5_core_warn(dev, "FW does not support extended destination");
+               return -EOPNOTSUPP;
+       }
+       if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+               mlx5_core_warn(dev, "FW does not support more than %d encaps",
+                              1 << fw_log_max_fdb_encap_uplink);
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
 static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
                            int opmod, int modify_mask,
                            struct mlx5_flow_table *ft,
                            unsigned group_id,
                            struct fs_fte *fte)
 {
-       unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
-               fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
        u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+       bool extended_dest = false;
        struct mlx5_flow_rule *dst;
        void *in_flow_context, *vlan;
        void *in_match_value;
+       unsigned int inlen;
+       int dst_cnt_size;
        void *in_dests;
        u32 *in;
        int err;
 
+       if (mlx5_set_extended_dest(dev, fte, &extended_dest))
+               return -EOPNOTSUPP;
+
+       if (!extended_dest)
+               dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+       else
+               dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+       inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
        in = kvzalloc(inlen, GFP_KERNEL);
        if (!in)
                return -ENOMEM;
@@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
        MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 
        MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
-       MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
-       MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
-                fte->action.reformat_id);
+       MLX5_SET(flow_context, in_flow_context, extended_destination,
+                extended_dest);
+       if (extended_dest) {
+               u32 action;
+
+               action = fte->action.action &
+                       ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+               MLX5_SET(flow_context, in_flow_context, action, action);
+       } else {
+               MLX5_SET(flow_context, in_flow_context, action,
+                        fte->action.action);
+               MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+                        fte->action.reformat_id);
+       }
        MLX5_SET(flow_context, in_flow_context, modify_header_id,
                 fte->action.modify_id);
 
@@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
                                id = dst->dest_attr.vport.num;
                                MLX5_SET(dest_format_struct, in_dests,
                                         destination_eswitch_owner_vhca_id_valid,
-                                        dst->dest_attr.vport.vhca_id_valid);
+                                        !!(dst->dest_attr.vport.flags &
+                                           MLX5_FLOW_DEST_VPORT_VHCA_ID));
                                MLX5_SET(dest_format_struct, in_dests,
                                         destination_eswitch_owner_vhca_id,
                                         dst->dest_attr.vport.vhca_id);
+                               if (extended_dest) {
+                                       MLX5_SET(dest_format_struct, in_dests,
+                                                packet_reformat,
+                                                !!(dst->dest_attr.vport.flags &
+                                                   MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+                                       MLX5_SET(extended_dest_format, in_dests,
+                                                packet_reformat_id,
+                                                dst->dest_attr.vport.reformat_id);
+                               }
                                break;
                        default:
                                id = dst->dest_attr.tir_num;
@@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
                        MLX5_SET(dest_format_struct, in_dests, destination_type,
                                 type);
                        MLX5_SET(dest_format_struct, in_dests, destination_id, id);
-                       in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+                       in_dests += dst_cnt_size;
                        list_size++;
                }
 
@@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
                        MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
                                 dst->dest_attr.counter_id);
-                       in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+                       in_dests += dst_cnt_size;
                        list_size++;
                }
                if (list_size > max_list_size) {
index b51ad217da32d608a95a16909fd5577174a755d0..2dc86347af58716633476a3fc0362aca4a4f1c44 100644 (file)
@@ -145,29 +145,6 @@ struct mlx5_flow_table {
        struct rhltable                 fgs_hash;
 };
 
-struct mlx5_fc_cache {
-       u64 packets;
-       u64 bytes;
-       u64 lastuse;
-};
-
-struct mlx5_fc {
-       struct list_head list;
-       struct llist_node addlist;
-       struct llist_node dellist;
-
-       /* last{packets,bytes} members are used when calculating the delta since
-        * last reading
-        */
-       u64 lastpackets;
-       u64 lastbytes;
-
-       u32 id;
-       bool aging;
-
-       struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
-};
-
 struct mlx5_ft_underlay_qp {
        struct list_head list;
        u32 qpn;
index 32accd6b041b3b3d16402bcb03204be730794c3c..c6c28f56aa2942cee106ce298d1c3f59d8462914 100644 (file)
 /* Max number of counters to query in bulk read is 32K */
 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
 
+struct mlx5_fc_cache {
+       u64 packets;
+       u64 bytes;
+       u64 lastuse;
+};
+
+struct mlx5_fc {
+       struct list_head list;
+       struct llist_node addlist;
+       struct llist_node dellist;
+
+       /* last{packets,bytes} members are used when calculating the delta since
+        * last reading
+        */
+       u64 lastpackets;
+       u64 lastbytes;
+
+       u32 id;
+       bool aging;
+
+       struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
 /* locking scheme:
  *
  * It is the responsibility of the user to prevent concurrent calls or bad
index 43118de8ee99a19b29ed687c84b45b043f1cab7f..196c07383082f9fe479e930ea4f4a3229c561c43 100644 (file)
@@ -38,6 +38,8 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
+#include "lib/mlx5.h"
 
 enum {
        MLX5_HEALTH_POLL_INTERVAL       = 2 * HZ,
@@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
                    &dev->iseg->cmdq_addr_l_sz);
 }
 
-static void trigger_cmd_completions(struct mlx5_core_dev *dev)
-{
-       unsigned long flags;
-       u64 vector;
-
-       /* wait for pending handlers to complete */
-       synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
-       spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
-       vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
-       if (!vector)
-               goto no_trig;
-
-       vector |= MLX5_TRIGGERED_CMD_COMP;
-       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-
-       mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-       mlx5_cmd_comp_handler(dev, vector, true);
-       return;
-
-no_trig:
-       spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
-}
-
 static int in_fatal(struct mlx5_core_dev *dev)
 {
        struct mlx5_core_health *health = &dev->priv.health;
@@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
        mlx5_core_err(dev, "start\n");
        if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
                dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
-               trigger_cmd_completions(dev);
+               mlx5_cmd_trigger_completions(dev);
        }
 
-       mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+       mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
        mlx5_core_err(dev, "end\n");
 
 unlock:
index 0d90b1b4a3d388c2793de0a8f688c605f3c3abfd..d27c239e7d6cc3402fca8f23757b55a0aaccdf4d 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/clocksource.h>
 #include <linux/highmem.h>
 #include <rdma/mlx5-abi.h>
+#include "lib/eq.h"
 #include "en.h"
 #include "clock.h"
 
@@ -439,16 +440,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
        clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
 }
 
-void mlx5_pps_event(struct mlx5_core_dev *mdev,
-                   struct mlx5_eqe *eqe)
+static int mlx5_pps_event(struct notifier_block *nb,
+                         unsigned long type, void *data)
 {
-       struct mlx5_clock *clock = &mdev->clock;
+       struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb);
+       struct mlx5_core_dev *mdev = clock->mdev;
        struct ptp_clock_event ptp_event;
-       struct timespec64 ts;
-       u64 nsec_now, nsec_delta;
        u64 cycles_now, cycles_delta;
+       u64 nsec_now, nsec_delta, ns;
+       struct mlx5_eqe *eqe = data;
        int pin = eqe->data.pps.pin;
-       s64 ns;
+       struct timespec64 ts;
        unsigned long flags;
 
        switch (clock->ptp_info.pin_config[pin].func) {
@@ -463,6 +465,7 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
                } else {
                        ptp_event.type = PTP_CLOCK_EXTTS;
                }
+               /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
                ptp_clock_event(clock->ptp, &ptp_event);
                break;
        case PTP_PF_PEROUT:
@@ -481,8 +484,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
                write_sequnlock_irqrestore(&clock->lock, flags);
                break;
        default:
-               mlx5_core_err(mdev, " Unhandled event\n");
+               mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n",
+                             clock->ptp_info.pin_config[pin].func);
        }
+
+       return NOTIFY_OK;
 }
 
 void mlx5_init_clock(struct mlx5_core_dev *mdev)
@@ -567,6 +573,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
                               PTR_ERR(clock->ptp));
                clock->ptp = NULL;
        }
+
+       MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
+       mlx5_eq_notifier_register(mdev, &clock->pps_nb);
 }
 
 void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -576,6 +585,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
        if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
                return;
 
+       mlx5_eq_notifier_unregister(mdev, &clock->pps_nb);
        if (clock->ptp) {
                ptp_clock_unregister(clock->ptp);
                clock->ptp = NULL;
index 263cb6e2aeee52e5bbdbd698ab3556531529a14a..31600924bdc367824b2ed9ae199dbc05d17177d7 100644 (file)
@@ -36,7 +36,6 @@
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 void mlx5_init_clock(struct mlx5_core_dev *mdev);
 void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
-void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 
 static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 {
@@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
 #else
 static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
 static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
-
 static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 {
        return -1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
new file mode 100644 (file)
index 0000000..c0fb6d7
--- /dev/null
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies */
+
+#ifndef __LIB_MLX5_EQ_H__
+#define __LIB_MLX5_EQ_H__
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eq.h>
+#include <linux/mlx5/cq.h>
+
+#define MLX5_MAX_IRQ_NAME   (32)
+#define MLX5_EQE_SIZE       (sizeof(struct mlx5_eqe))
+
+struct mlx5_eq_tasklet {
+       struct list_head      list;
+       struct list_head      process_list;
+       struct tasklet_struct task;
+       spinlock_t            lock; /* lock completion tasklet list */
+};
+
+struct mlx5_cq_table {
+       spinlock_t              lock;   /* protect radix tree */
+       struct radix_tree_root  tree;
+};
+
+struct mlx5_eq {
+       struct mlx5_core_dev    *dev;
+       struct mlx5_cq_table    cq_table;
+       __be32 __iomem          *doorbell;
+       u32                     cons_index;
+       struct mlx5_frag_buf    buf;
+       int                     size;
+       unsigned int            vecidx;
+       unsigned int            irqn;
+       u8                      eqn;
+       int                     nent;
+       struct mlx5_rsc_debug   *dbg;
+};
+
+struct mlx5_eq_comp {
+       struct mlx5_eq          core; /* Must be first */
+       struct mlx5_eq_tasklet  tasklet_ctx;
+       struct list_head        list;
+};
+
+static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+       return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+       struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+       return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+       __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+       u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+       __raw_writel((__force u32)cpu_to_be32(val), addr);
+       /* We still want ordering, just not swabbing, so add a barrier */
+       mb();
+}
+
+int mlx5_eq_table_init(struct mlx5_core_dev *dev);
+void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_eq_table_create(struct mlx5_core_dev *dev);
+void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
+struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
+void mlx5_cq_tasklet_cb(unsigned long data);
+struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
+#endif
+
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+
+#endif
index 7550b1cc8c6aed8bcaf73f3b8ea8d332e093fe3f..397a2847867a8cc42e719ce4b90f9b1ef74dd3a1 100644 (file)
@@ -33,6 +33,8 @@
 #ifndef __LIB_MLX5_H__
 #define __LIB_MLX5_H__
 
+#include "mlx5_core.h"
+
 void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
 void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
 int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
@@ -40,4 +42,38 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
 int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
 void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
 
+/* TODO move to lib/events.h */
+
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK    0xF
+
+enum port_module_event_status_type {
+       MLX5_MODULE_STATUS_PLUGGED   = 0x1,
+       MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+       MLX5_MODULE_STATUS_ERROR     = 0x3,
+       MLX5_MODULE_STATUS_DISABLED  = 0x4,
+       MLX5_MODULE_STATUS_NUM,
+};
+
+enum  port_module_event_error_type {
+       MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED    = 0x0,
+       MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX  = 0x1,
+       MLX5_MODULE_EVENT_ERROR_BUS_STUCK                = 0x2,
+       MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT  = 0x3,
+       MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4,
+       MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER       = 0x5,
+       MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE         = 0x6,
+       MLX5_MODULE_EVENT_ERROR_BAD_CABLE                = 0x7,
+       MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc,
+       MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_pme_stats {
+       u64 status_counters[MLX5_MODULE_STATUS_NUM];
+       u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
+void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
+int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+
 #endif
index 28132c7dc05f252c6287a3fa6a8a37415de4872c..7789955738127c3d3382dbe64fc540ea14996f8f 100644 (file)
@@ -43,7 +43,6 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
 #include <linux/kmod.h>
 #include <linux/mlx5/mlx5_ifc.h>
@@ -53,6 +52,7 @@
 #endif
 #include <net/devlink.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 #include "fs_core.h"
 #include "lib/mpfs.h"
 #include "eswitch.h"
@@ -319,51 +319,6 @@ static void release_bar(struct pci_dev *pdev)
        pci_release_regions(pdev);
 }
 
-static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_eq_table *table = &priv->eq_table;
-       int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
-                     MLX5_CAP_GEN(dev, max_num_eqs) :
-                     1 << MLX5_CAP_GEN(dev, log_max_eq);
-       int nvec;
-       int err;
-
-       nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-              MLX5_EQ_VEC_COMP_BASE;
-       nvec = min_t(int, nvec, num_eqs);
-       if (nvec <= MLX5_EQ_VEC_COMP_BASE)
-               return -ENOMEM;
-
-       priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
-       if (!priv->irq_info)
-               return -ENOMEM;
-
-       nvec = pci_alloc_irq_vectors(dev->pdev,
-                       MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-                       PCI_IRQ_MSIX);
-       if (nvec < 0) {
-               err = nvec;
-               goto err_free_irq_info;
-       }
-
-       table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
-       return 0;
-
-err_free_irq_info:
-       kfree(priv->irq_info);
-       return err;
-}
-
-static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-
-       pci_free_irq_vectors(dev->pdev);
-       kfree(priv->irq_info);
-}
-
 struct mlx5_reg_host_endianness {
        u8      he;
        u8      rsvd[15];
@@ -637,177 +592,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
        return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
-static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-       struct mlx5_priv *priv  = &mdev->priv;
-       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
-       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
-               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
-               return -ENOMEM;
-       }
-
-       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
-                       priv->irq_info[i].mask);
-
-       if (IS_ENABLED(CONFIG_SMP) &&
-           irq_set_affinity_hint(irq, priv->irq_info[i].mask))
-               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
-       return 0;
-}
-
-static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-       struct mlx5_priv *priv  = &mdev->priv;
-       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
-
-       irq_set_affinity_hint(irq, NULL);
-       free_cpumask_var(priv->irq_info[i].mask);
-}
-
-static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
-{
-       int err;
-       int i;
-
-       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
-               err = mlx5_irq_set_affinity_hint(mdev, i);
-               if (err)
-                       goto err_out;
-       }
-
-       return 0;
-
-err_out:
-       for (i--; i >= 0; i--)
-               mlx5_irq_clear_affinity_hint(mdev, i);
-
-       return err;
-}
-
-static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
-{
-       int i;
-
-       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
-               mlx5_irq_clear_affinity_hint(mdev, i);
-}
-
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
-                   unsigned int *irqn)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq, *n;
-       int err = -ENOENT;
-
-       spin_lock(&table->lock);
-       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
-               if (eq->index == vector) {
-                       *eqn = eq->eqn;
-                       *irqn = eq->irqn;
-                       err = 0;
-                       break;
-               }
-       }
-       spin_unlock(&table->lock);
-
-       return err;
-}
-EXPORT_SYMBOL(mlx5_vector2eqn);
-
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq;
-
-       spin_lock(&table->lock);
-       list_for_each_entry(eq, &table->comp_eqs_list, list)
-               if (eq->eqn == eqn) {
-                       spin_unlock(&table->lock);
-                       return eq;
-               }
-
-       spin_unlock(&table->lock);
-
-       return ERR_PTR(-ENOENT);
-}
-
-static void free_comp_eqs(struct mlx5_core_dev *dev)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       struct mlx5_eq *eq, *n;
-
-#ifdef CONFIG_RFS_ACCEL
-       if (dev->rmap) {
-               free_irq_cpu_rmap(dev->rmap);
-               dev->rmap = NULL;
-       }
-#endif
-       spin_lock(&table->lock);
-       list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
-               list_del(&eq->list);
-               spin_unlock(&table->lock);
-               if (mlx5_destroy_unmap_eq(dev, eq))
-                       mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
-                                      eq->eqn);
-               kfree(eq);
-               spin_lock(&table->lock);
-       }
-       spin_unlock(&table->lock);
-}
-
-static int alloc_comp_eqs(struct mlx5_core_dev *dev)
-{
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       char name[MLX5_MAX_IRQ_NAME];
-       struct mlx5_eq *eq;
-       int ncomp_vec;
-       int nent;
-       int err;
-       int i;
-
-       INIT_LIST_HEAD(&table->comp_eqs_list);
-       ncomp_vec = table->num_comp_vectors;
-       nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
-       dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
-       if (!dev->rmap)
-               return -ENOMEM;
-#endif
-       for (i = 0; i < ncomp_vec; i++) {
-               eq = kzalloc(sizeof(*eq), GFP_KERNEL);
-               if (!eq) {
-                       err = -ENOMEM;
-                       goto clean;
-               }
-
-#ifdef CONFIG_RFS_ACCEL
-               irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
-                                MLX5_EQ_VEC_COMP_BASE + i));
-#endif
-               snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
-               err = mlx5_create_map_eq(dev, eq,
-                                        i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
-                                        name, MLX5_EQ_TYPE_COMP);
-               if (err) {
-                       kfree(eq);
-                       goto clean;
-               }
-               mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
-               eq->index = i;
-               spin_lock(&table->lock);
-               list_add_tail(&eq->list, &table->comp_eqs_list);
-               spin_unlock(&table->lock);
-       }
-
-       return 0;
-
-clean:
-       free_comp_eqs(dev);
-       return err;
-}
-
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
        u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]   = {0};
@@ -944,22 +728,26 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto out;
        }
 
-       err = mlx5_eq_init(dev);
+       err = mlx5_eq_table_init(dev);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize eq\n");
                goto out;
        }
 
+       err = mlx5_events_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "failed to initialize events\n");
+               goto err_eq_cleanup;
+       }
+
        err = mlx5_cq_debugfs_init(dev);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
-               goto err_eq_cleanup;
+               goto err_events_cleanup;
        }
 
        mlx5_init_qp_table(dev);
 
-       mlx5_init_srq_table(dev);
-
        mlx5_init_mkey_table(dev);
 
        mlx5_init_reserved_gids(dev);
@@ -1013,12 +801,12 @@ err_rl_cleanup:
 err_tables_cleanup:
        mlx5_vxlan_destroy(dev->vxlan);
        mlx5_cleanup_mkey_table(dev);
-       mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
        mlx5_cq_debugfs_cleanup(dev);
-
+err_events_cleanup:
+       mlx5_events_cleanup(dev);
 err_eq_cleanup:
-       mlx5_eq_cleanup(dev);
+       mlx5_eq_table_cleanup(dev);
 
 out:
        return err;
@@ -1036,10 +824,10 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
        mlx5_cleanup_clock(dev);
        mlx5_cleanup_reserved_gids(dev);
        mlx5_cleanup_mkey_table(dev);
-       mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
        mlx5_cq_debugfs_cleanup(dev);
-       mlx5_eq_cleanup(dev);
+       mlx5_events_cleanup(dev);
+       mlx5_eq_table_cleanup(dev);
 }
 
 static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
@@ -1131,16 +919,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto reclaim_boot_pages;
        }
 
-       err = mlx5_pagealloc_start(dev);
-       if (err) {
-               dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
-               goto reclaim_boot_pages;
-       }
-
        err = mlx5_cmd_init_hca(dev, sw_owner_id);
        if (err) {
                dev_err(&pdev->dev, "init hca failed\n");
-               goto err_pagealloc_stop;
+               goto reclaim_boot_pages;
        }
 
        mlx5_set_driver_version(dev);
@@ -1161,23 +943,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                }
        }
 
-       err = mlx5_alloc_irq_vectors(dev);
-       if (err) {
-               dev_err(&pdev->dev, "alloc irq vectors failed\n");
-               goto err_cleanup_once;
-       }
-
        dev->priv.uar = mlx5_get_uars_page(dev);
        if (IS_ERR(dev->priv.uar)) {
                dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
                err = PTR_ERR(dev->priv.uar);
-               goto err_disable_msix;
+               goto err_get_uars;
        }
 
-       err = mlx5_start_eqs(dev);
+       mlx5_events_start(dev);
+       mlx5_pagealloc_start(dev);
+
+       err = mlx5_eq_table_create(dev);
        if (err) {
-               dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
-               goto err_put_uars;
+               dev_err(&pdev->dev, "Failed to create EQs\n");
+               goto err_eq_table;
        }
 
        err = mlx5_fw_tracer_init(dev->tracer);
@@ -1186,18 +965,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_fw_tracer;
        }
 
-       err = alloc_comp_eqs(dev);
-       if (err) {
-               dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
-               goto err_comp_eqs;
-       }
-
-       err = mlx5_irq_set_affinity_hints(dev);
-       if (err) {
-               dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
-               goto err_affinity_hints;
-       }
-
        err = mlx5_fpga_device_start(dev);
        if (err) {
                dev_err(&pdev->dev, "fpga device start failed %d\n", err);
@@ -1266,24 +1033,17 @@ err_ipsec_start:
        mlx5_fpga_device_stop(dev);
 
 err_fpga_start:
-       mlx5_irq_clear_affinity_hints(dev);
-
-err_affinity_hints:
-       free_comp_eqs(dev);
-
-err_comp_eqs:
        mlx5_fw_tracer_cleanup(dev->tracer);
 
 err_fw_tracer:
-       mlx5_stop_eqs(dev);
+       mlx5_eq_table_destroy(dev);
 
-err_put_uars:
+err_eq_table:
+       mlx5_pagealloc_stop(dev);
+       mlx5_events_stop(dev);
        mlx5_put_uars_page(dev, priv->uar);
 
-err_disable_msix:
-       mlx5_free_irq_vectors(dev);
-
-err_cleanup_once:
+err_get_uars:
        if (boot)
                mlx5_cleanup_once(dev);
 
@@ -1294,9 +1054,6 @@ err_stop_poll:
                goto out_err;
        }
 
-err_pagealloc_stop:
-       mlx5_pagealloc_stop(dev);
-
 reclaim_boot_pages:
        mlx5_reclaim_startup_pages(dev);
 
@@ -1340,21 +1097,20 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        mlx5_accel_ipsec_cleanup(dev);
        mlx5_accel_tls_cleanup(dev);
        mlx5_fpga_device_stop(dev);
-       mlx5_irq_clear_affinity_hints(dev);
-       free_comp_eqs(dev);
        mlx5_fw_tracer_cleanup(dev->tracer);
-       mlx5_stop_eqs(dev);
+       mlx5_eq_table_destroy(dev);
+       mlx5_pagealloc_stop(dev);
+       mlx5_events_stop(dev);
        mlx5_put_uars_page(dev, priv->uar);
-       mlx5_free_irq_vectors(dev);
        if (cleanup)
                mlx5_cleanup_once(dev);
        mlx5_stop_health_poll(dev, cleanup);
+
        err = mlx5_cmd_teardown_hca(dev);
        if (err) {
                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
                goto out;
        }
-       mlx5_pagealloc_stop(dev);
        mlx5_reclaim_startup_pages(dev);
        mlx5_core_disable_hca(dev, 0);
        mlx5_cmd_cleanup(dev);
@@ -1364,12 +1120,6 @@ out:
        return err;
 }
 
-struct mlx5_core_event_handler {
-       void (*event)(struct mlx5_core_dev *dev,
-                     enum mlx5_dev_event event,
-                     void *data);
-};
-
 static const struct devlink_ops mlx5_devlink_ops = {
 #ifdef CONFIG_MLX5_ESWITCH
        .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
@@ -1403,7 +1153,6 @@ static int init_one(struct pci_dev *pdev,
        pci_set_drvdata(pdev, dev);
 
        dev->pdev = pdev;
-       dev->event = mlx5_core_event;
        dev->profile = &profile[prof_sel];
 
        INIT_LIST_HEAD(&priv->ctx_list);
@@ -1411,17 +1160,6 @@ static int init_one(struct pci_dev *pdev,
        mutex_init(&dev->pci_status_mutex);
        mutex_init(&dev->intf_state_mutex);
 
-       INIT_LIST_HEAD(&priv->waiting_events_list);
-       priv->is_accum_events = false;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       err = init_srcu_struct(&priv->pfault_srcu);
-       if (err) {
-               dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
-                       err);
-               goto clean_dev;
-       }
-#endif
        mutex_init(&priv->bfregs.reg_head.lock);
        mutex_init(&priv->bfregs.wc_head.lock);
        INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
@@ -1430,7 +1168,7 @@ static int init_one(struct pci_dev *pdev,
        err = mlx5_pci_init(dev, priv);
        if (err) {
                dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
-               goto clean_srcu;
+               goto clean_dev;
        }
 
        err = mlx5_health_init(dev);
@@ -1439,12 +1177,14 @@ static int init_one(struct pci_dev *pdev,
                goto close_pci;
        }
 
-       mlx5_pagealloc_init(dev);
+       err = mlx5_pagealloc_init(dev);
+       if (err)
+               goto err_pagealloc_init;
 
        err = mlx5_load_one(dev, priv, true);
        if (err) {
                dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
-               goto clean_health;
+               goto err_load_one;
        }
 
        request_module_nowait(MLX5_IB_MOD);
@@ -1458,16 +1198,13 @@ static int init_one(struct pci_dev *pdev,
 
 clean_load:
        mlx5_unload_one(dev, priv, true);
-clean_health:
+err_load_one:
        mlx5_pagealloc_cleanup(dev);
+err_pagealloc_init:
        mlx5_health_cleanup(dev);
 close_pci:
        mlx5_pci_close(dev, priv);
-clean_srcu:
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       cleanup_srcu_struct(&priv->pfault_srcu);
 clean_dev:
-#endif
        devlink_free(devlink);
 
        return err;
@@ -1491,9 +1228,6 @@ static void remove_one(struct pci_dev *pdev)
        mlx5_pagealloc_cleanup(dev);
        mlx5_health_cleanup(dev);
        mlx5_pci_close(dev, priv);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       cleanup_srcu_struct(&priv->pfault_srcu);
-#endif
        devlink_free(devlink);
 }
 
@@ -1637,7 +1371,6 @@ succeed:
         * kexec. There is no need to cleanup the mlx5_core software
         * contexts.
         */
-       mlx5_irq_clear_affinity_hints(dev);
        mlx5_core_eq_free_irqs(dev);
 
        return 0;
index 0594d0961cb3fa6ba03b00e68ecc0fc3300a1ec7..fd3141a4b3f1a47835fb0f4b46b9ba1c9b7a3502 100644 (file)
@@ -78,6 +78,11 @@ do {                                                                 \
                 __func__, __LINE__, current->pid,                      \
                ##__VA_ARGS__)
 
+#define mlx5_core_warn_once(__dev, format, ...)                                \
+       dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,   \
+                     __func__, __LINE__, current->pid,                 \
+                     ##__VA_ARGS__)
+
 #define mlx5_core_info(__dev, format, ...)                             \
        dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
 
@@ -97,12 +102,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
-
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-                    unsigned long param);
-void mlx5_core_page_fault(struct mlx5_core_dev *dev,
-                         struct mlx5_pagefault *pfault);
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
@@ -124,28 +123,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-                      int nent, u64 mask, const char *name,
-                      enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-                      u32 *out, int outlen);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
-/* This function should only be called after mlx5_cmd_force_teardown_hca */
-void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
-struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
-u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
-void mlx5_cq_tasklet_cb(unsigned long data);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
 int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
@@ -159,6 +137,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove(struct mlx5_core_dev *dev);
 
+int mlx5_events_init(struct mlx5_core_dev *dev);
+void mlx5_events_cleanup(struct mlx5_core_dev *dev);
+void mlx5_events_start(struct mlx5_core_dev *dev);
+void mlx5_events_stop(struct mlx5_core_dev *dev);
+
 void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
 void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
 void mlx5_attach_device(struct mlx5_core_dev *dev);
index e36d3e3675f963c44ff76f6c69a7ac6c72155554..a83b517b07143e68e1aaa8172d1924b7f91888b5 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
 enum {
        MLX5_PAGES_CANT_GIVE    = 0,
@@ -433,15 +434,28 @@ static void pages_work_handler(struct work_struct *work)
        kfree(req);
 }
 
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-                                s32 npages)
+static int req_pages_handler(struct notifier_block *nb,
+                            unsigned long type, void *data)
 {
        struct mlx5_pages_req *req;
-
+       struct mlx5_core_dev *dev;
+       struct mlx5_priv *priv;
+       struct mlx5_eqe *eqe;
+       u16 func_id;
+       s32 npages;
+
+       priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb);
+       dev  = container_of(priv, struct mlx5_core_dev, priv);
+       eqe  = data;
+
+       func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+       npages  = be32_to_cpu(eqe->data.req_pages.num_pages);
+       mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+                     func_id, npages);
        req = kzalloc(sizeof(*req), GFP_ATOMIC);
        if (!req) {
                mlx5_core_warn(dev, "failed to allocate pages request\n");
-               return;
+               return NOTIFY_DONE;
        }
 
        req->dev = dev;
@@ -449,6 +463,7 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
        req->npages = npages;
        INIT_WORK(&req->work, pages_work_handler);
        queue_work(dev->priv.pg_wq, &req->work);
+       return NOTIFY_OK;
 }
 
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
@@ -524,29 +539,32 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
        return 0;
 }
 
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
        dev->priv.page_root = RB_ROOT;
        INIT_LIST_HEAD(&dev->priv.free_list);
+       dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+       if (!dev->priv.pg_wq)
+               return -ENOMEM;
+
+       return 0;
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
 {
-       /* nothing */
+       destroy_workqueue(dev->priv.pg_wq);
 }
 
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev)
 {
-       dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
-       if (!dev->priv.pg_wq)
-               return -ENOMEM;
-
-       return 0;
+       MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST);
+       mlx5_eq_notifier_register(dev, &dev->priv.pg_nb);
 }
 
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
 {
-       destroy_workqueue(dev->priv.pg_wq);
+       mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb);
+       flush_workqueue(dev->priv.pg_wq);
 }
 
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
index 31a9cbd85689b01fc0bfe9e6c221d73cc7c5fe13..2b82f35f4c35153080cb0716abd68caa6f12d3c7 100644 (file)
@@ -915,63 +915,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
        *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
 }
 
-static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
-       "Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
-       "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
-       "Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
-};
-
-static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
-       "Power budget exceeded",
-       "Long Range for non MLNX cable",
-       "Bus stuck(I2C or data shorted)",
-       "No EEPROM/retry timeout",
-       "Enforce part number list",
-       "Unknown identifier",
-       "High Temperature",
-       "Bad or shorted cable/module",
-       "Unknown status",
-};
-
-void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
-{
-       enum port_module_event_status_type module_status;
-       enum port_module_event_error_type error_type;
-       struct mlx5_eqe_port_module *module_event_eqe;
-       struct mlx5_priv *priv = &dev->priv;
-       u8 module_num;
-
-       module_event_eqe = &eqe->data.port_module;
-       module_num = module_event_eqe->module;
-       module_status = module_event_eqe->module_status &
-                       PORT_MODULE_EVENT_MODULE_STATUS_MASK;
-       error_type = module_event_eqe->error_type &
-                    PORT_MODULE_EVENT_ERROR_TYPE_MASK;
-
-       if (module_status < MLX5_MODULE_STATUS_ERROR) {
-               priv->pme_stats.status_counters[module_status - 1]++;
-       } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
-               if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
-                       /* Unknown error type */
-                       error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
-               priv->pme_stats.error_counters[error_type]++;
-       }
-
-       if (!printk_ratelimit())
-               return;
-
-       if (module_status < MLX5_MODULE_STATUS_ERROR)
-               mlx5_core_info(dev,
-                              "Port module event: module %u, %s\n",
-                              module_num, mlx5_pme_status[module_status - 1]);
-
-       else if (module_status == MLX5_MODULE_STATUS_ERROR)
-               mlx5_core_info(dev,
-                              "Port module event[error]: module %u, %s, %s\n",
-                              module_num, mlx5_pme_status[module_status - 1],
-                              mlx5_pme_error[error_type]);
-}
-
 int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
 {
        u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
index 91b8139a388d2ece13a3fa5c296bd63ed7ab57c3..388f205a497f0ba703c26b73ea05be1b591ad7b7 100644 (file)
 #include <linux/mlx5/transobj.h>
 
 #include "mlx5_core.h"
+#include "lib/eq.h"
 
-static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
-                                                u32 rsn)
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
 {
-       struct mlx5_qp_table *table = &dev->priv.qp_table;
        struct mlx5_core_rsc_common *common;
 
        spin_lock(&table->lock);
@@ -53,11 +53,6 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
 
        spin_unlock(&table->lock);
 
-       if (!common) {
-               mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
-                              rsn);
-               return NULL;
-       }
        return common;
 }
 
@@ -120,19 +115,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
        }
 }
 
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+static int rsc_event_notifier(struct notifier_block *nb,
+                             unsigned long type, void *data)
 {
-       struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+       struct mlx5_core_rsc_common *common;
+       struct mlx5_qp_table *table;
+       struct mlx5_core_dev *dev;
        struct mlx5_core_dct *dct;
+       u8 event_type = (u8)type;
        struct mlx5_core_qp *qp;
+       struct mlx5_priv *priv;
+       struct mlx5_eqe *eqe;
+       u32 rsn;
+
+       switch (event_type) {
+       case MLX5_EVENT_TYPE_DCT_DRAINED:
+               eqe = data;
+               rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+               rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+               break;
+       case MLX5_EVENT_TYPE_PATH_MIG:
+       case MLX5_EVENT_TYPE_COMM_EST:
+       case MLX5_EVENT_TYPE_SQ_DRAINED:
+       case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+       case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+       case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+       case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+       case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+               eqe = data;
+               rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+               rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+               break;
+       default:
+               return NOTIFY_DONE;
+       }
+
+       table = container_of(nb, struct mlx5_qp_table, nb);
+       priv  = container_of(table, struct mlx5_priv, qp_table);
+       dev   = container_of(priv, struct mlx5_core_dev, priv);
+
+       mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
 
-       if (!common)
-               return;
+       common = mlx5_get_rsc(table, rsn);
+       if (!common) {
+               mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn);
+               return NOTIFY_OK;
+       }
 
        if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
                mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
                               event_type, rsn);
-               return;
+               goto out;
        }
 
        switch (common->res) {
@@ -150,8 +183,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
        default:
                mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
        }
-
+out:
        mlx5_core_put_rsc(common);
+
+       return NOTIFY_OK;
 }
 
 static int create_resource_common(struct mlx5_core_dev *dev,
@@ -487,10 +522,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev)
        spin_lock_init(&table->lock);
        INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
        mlx5_qp_debugfs_init(dev);
+
+       table->nb.notifier_call = rsc_event_notifier;
+       mlx5_notifier_register(dev, &table->nb);
 }
 
 void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
 {
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+       mlx5_notifier_unregister(dev, &table->nb);
        mlx5_qp_debugfs_cleanup(dev);
 }
 
@@ -670,3 +711,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
        return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+                                               int res_num,
+                                               enum mlx5_res_type res_type)
+{
+       u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+       struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+       return mlx5_get_rsc(table, rsn);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+       mlx5_core_put_rsc(res);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
deleted file mode 100644 (file)
index 6a6fc9b..0000000
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include <linux/mlx5/srq.h>
-#include <rdma/ib_verbs.h>
-#include "mlx5_core.h"
-#include <linux/mlx5/transobj.h>
-
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
-{
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
-       struct mlx5_core_srq *srq;
-
-       spin_lock(&table->lock);
-
-       srq = radix_tree_lookup(&table->tree, srqn);
-       if (srq)
-               atomic_inc(&srq->refcount);
-
-       spin_unlock(&table->lock);
-
-       if (!srq) {
-               mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
-               return;
-       }
-
-       srq->event(srq, event_type);
-
-       if (atomic_dec_and_test(&srq->refcount))
-               complete(&srq->free);
-}
-
-static int get_pas_size(struct mlx5_srq_attr *in)
-{
-       u32 log_page_size = in->log_page_size + 12;
-       u32 log_srq_size  = in->log_size;
-       u32 log_rq_stride = in->wqe_shift;
-       u32 page_offset   = in->page_offset;
-       u32 po_quanta     = 1 << (log_page_size - 6);
-       u32 rq_sz         = 1 << (log_srq_size + 4 + log_rq_stride);
-       u32 page_size     = 1 << log_page_size;
-       u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
-       u32 rq_num_pas    = DIV_ROUND_UP(rq_sz_po, page_size);
-
-       return rq_num_pas * sizeof(u64);
-}
-
-static void set_wq(void *wq, struct mlx5_srq_attr *in)
-{
-       MLX5_SET(wq,   wq, wq_signature,  !!(in->flags
-                & MLX5_SRQ_FLAG_WQ_SIG));
-       MLX5_SET(wq,   wq, log_wq_pg_sz,  in->log_page_size);
-       MLX5_SET(wq,   wq, log_wq_stride, in->wqe_shift + 4);
-       MLX5_SET(wq,   wq, log_wq_sz,     in->log_size);
-       MLX5_SET(wq,   wq, page_offset,   in->page_offset);
-       MLX5_SET(wq,   wq, lwm,           in->lwm);
-       MLX5_SET(wq,   wq, pd,            in->pd);
-       MLX5_SET64(wq, wq, dbr_addr,      in->db_record);
-}
-
-static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
-       MLX5_SET(srqc,   srqc, wq_signature,  !!(in->flags
-                & MLX5_SRQ_FLAG_WQ_SIG));
-       MLX5_SET(srqc,   srqc, log_page_size, in->log_page_size);
-       MLX5_SET(srqc,   srqc, log_rq_stride, in->wqe_shift);
-       MLX5_SET(srqc,   srqc, log_srq_size,  in->log_size);
-       MLX5_SET(srqc,   srqc, page_offset,   in->page_offset);
-       MLX5_SET(srqc,   srqc, lwm,           in->lwm);
-       MLX5_SET(srqc,   srqc, pd,            in->pd);
-       MLX5_SET64(srqc, srqc, dbr_addr,      in->db_record);
-       MLX5_SET(srqc,   srqc, xrcd,          in->xrcd);
-       MLX5_SET(srqc,   srqc, cqn,           in->cqn);
-}
-
-static void get_wq(void *wq, struct mlx5_srq_attr *in)
-{
-       if (MLX5_GET(wq, wq, wq_signature))
-               in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
-       in->log_page_size = MLX5_GET(wq,   wq, log_wq_pg_sz);
-       in->wqe_shift     = MLX5_GET(wq,   wq, log_wq_stride) - 4;
-       in->log_size      = MLX5_GET(wq,   wq, log_wq_sz);
-       in->page_offset   = MLX5_GET(wq,   wq, page_offset);
-       in->lwm           = MLX5_GET(wq,   wq, lwm);
-       in->pd            = MLX5_GET(wq,   wq, pd);
-       in->db_record     = MLX5_GET64(wq, wq, dbr_addr);
-}
-
-static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
-{
-       if (MLX5_GET(srqc, srqc, wq_signature))
-               in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
-       in->log_page_size = MLX5_GET(srqc,   srqc, log_page_size);
-       in->wqe_shift     = MLX5_GET(srqc,   srqc, log_rq_stride);
-       in->log_size      = MLX5_GET(srqc,   srqc, log_srq_size);
-       in->page_offset   = MLX5_GET(srqc,   srqc, page_offset);
-       in->lwm           = MLX5_GET(srqc,   srqc, lwm);
-       in->pd            = MLX5_GET(srqc,   srqc, pd);
-       in->db_record     = MLX5_GET64(srqc, srqc, dbr_addr);
-}
-
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
-{
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
-       struct mlx5_core_srq *srq;
-
-       spin_lock(&table->lock);
-
-       srq = radix_tree_lookup(&table->tree, srqn);
-       if (srq)
-               atomic_inc(&srq->refcount);
-
-       spin_unlock(&table->lock);
-
-       return srq;
-}
-EXPORT_SYMBOL(mlx5_core_get_srq);
-
-static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                         struct mlx5_srq_attr *in)
-{
-       u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
-       void *create_in;
-       void *srqc;
-       void *pas;
-       int pas_size;
-       int inlen;
-       int err;
-
-       pas_size  = get_pas_size(in);
-       inlen     = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
-       create_in = kvzalloc(inlen, GFP_KERNEL);
-       if (!create_in)
-               return -ENOMEM;
-
-       MLX5_SET(create_srq_in, create_in, uid, in->uid);
-       srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
-       pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
-
-       set_srqc(srqc, in);
-       memcpy(pas, in->pas, pas_size);
-
-       MLX5_SET(create_srq_in, create_in, opcode,
-                MLX5_CMD_OP_CREATE_SRQ);
-
-       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
-                           sizeof(create_out));
-       kvfree(create_in);
-       if (!err) {
-               srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
-               srq->uid = in->uid;
-       }
-
-       return err;
-}
-
-static int destroy_srq_cmd(struct mlx5_core_dev *dev,
-                          struct mlx5_core_srq *srq)
-{
-       u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
-       u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
-
-       MLX5_SET(destroy_srq_in, srq_in, opcode,
-                MLX5_CMD_OP_DESTROY_SRQ);
-       MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
-       MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
-
-       return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-                            srq_out, sizeof(srq_out));
-}
-
-static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                      u16 lwm, int is_srq)
-{
-       u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
-       u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
-
-       MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
-       MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
-       MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
-       MLX5_SET(arm_rq_in, srq_in, lwm,      lwm);
-       MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
-
-       return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-                             srq_out, sizeof(srq_out));
-}
-
-static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                        struct mlx5_srq_attr *out)
-{
-       u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
-       u32 *srq_out;
-       void *srqc;
-       int err;
-
-       srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
-       if (!srq_out)
-               return -ENOMEM;
-
-       MLX5_SET(query_srq_in, srq_in, opcode,
-                MLX5_CMD_OP_QUERY_SRQ);
-       MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
-       err =  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
-                            srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
-       if (err)
-               goto out;
-
-       srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
-       get_srqc(srqc, out);
-       if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
-               out->flags |= MLX5_SRQ_FLAG_ERR;
-out:
-       kvfree(srq_out);
-       return err;
-}
-
-static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
-                             struct mlx5_core_srq *srq,
-                             struct mlx5_srq_attr *in)
-{
-       u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
-       void *create_in;
-       void *xrc_srqc;
-       void *pas;
-       int pas_size;
-       int inlen;
-       int err;
-
-       pas_size  = get_pas_size(in);
-       inlen     = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
-       create_in = kvzalloc(inlen, GFP_KERNEL);
-       if (!create_in)
-               return -ENOMEM;
-
-       MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
-       xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
-                               xrc_srq_context_entry);
-       pas      = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
-
-       set_srqc(xrc_srqc, in);
-       MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
-       memcpy(pas, in->pas, pas_size);
-       MLX5_SET(create_xrc_srq_in, create_in, opcode,
-                MLX5_CMD_OP_CREATE_XRC_SRQ);
-
-       memset(create_out, 0, sizeof(create_out));
-       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
-                           sizeof(create_out));
-       if (err)
-               goto out;
-
-       srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
-       srq->uid = in->uid;
-out:
-       kvfree(create_in);
-       return err;
-}
-
-static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
-                              struct mlx5_core_srq *srq)
-{
-       u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
-       u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
-       MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
-                MLX5_CMD_OP_DESTROY_XRC_SRQ);
-       MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-       MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
-       return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
-                            xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
-                          struct mlx5_core_srq *srq, u16 lwm)
-{
-       u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
-       u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
-       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
-       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod,   MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
-       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
-       MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
-
-       return  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
-                             xrcsrq_out, sizeof(xrcsrq_out));
-}
-
-static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
-                            struct mlx5_core_srq *srq,
-                            struct mlx5_srq_attr *out)
-{
-       u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
-       u32 *xrcsrq_out;
-       void *xrc_srqc;
-       int err;
-
-       xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
-       if (!xrcsrq_out)
-               return -ENOMEM;
-       memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-
-       MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
-                MLX5_CMD_OP_QUERY_XRC_SRQ);
-       MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-
-       err =  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
-                            MLX5_ST_SZ_BYTES(query_xrc_srq_out));
-       if (err)
-               goto out;
-
-       xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
-                               xrc_srq_context_entry);
-       get_srqc(xrc_srqc, out);
-       if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
-               out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
-       kvfree(xrcsrq_out);
-       return err;
-}
-
-static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                         struct mlx5_srq_attr *in)
-{
-       void *create_in;
-       void *rmpc;
-       void *wq;
-       int pas_size;
-       int inlen;
-       int err;
-
-       pas_size = get_pas_size(in);
-       inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
-       create_in = kvzalloc(inlen, GFP_KERNEL);
-       if (!create_in)
-               return -ENOMEM;
-
-       rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
-       wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
-
-       MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-       MLX5_SET(create_rmp_in, create_in, uid, in->uid);
-       set_wq(wq, in);
-       memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
-
-       err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
-       if (!err)
-               srq->uid = in->uid;
-
-       kvfree(create_in);
-       return err;
-}
-
-static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
-                          struct mlx5_core_srq *srq)
-{
-       u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {};
-       u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
-
-       MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
-       MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
-       MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_rmp_cmd(struct mlx5_core_dev *dev,
-                      struct mlx5_core_srq *srq,
-                      u16 lwm)
-{
-       void *in;
-       void *rmpc;
-       void *wq;
-       void *bitmask;
-       int err;
-
-       in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       rmpc =    MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
-       bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
-       wq   =    MLX5_ADDR_OF(rmpc,            rmpc, wq);
-
-       MLX5_SET(modify_rmp_in, in,      rmp_state, MLX5_RMPC_STATE_RDY);
-       MLX5_SET(modify_rmp_in, in,      rmpn,      srq->srqn);
-       MLX5_SET(modify_rmp_in, in, uid, srq->uid);
-       MLX5_SET(wq,            wq,      lwm,       lwm);
-       MLX5_SET(rmp_bitmask,   bitmask, lwm,       1);
-       MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
-
-       err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
-       kvfree(in);
-       return err;
-}
-
-static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                        struct mlx5_srq_attr *out)
-{
-       u32 *rmp_out;
-       void *rmpc;
-       int err;
-
-       rmp_out =  kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
-       if (!rmp_out)
-               return -ENOMEM;
-
-       err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
-       if (err)
-               goto out;
-
-       rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
-       get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
-       if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
-               out->flags |= MLX5_SRQ_FLAG_ERR;
-
-out:
-       kvfree(rmp_out);
-       return err;
-}
-
-static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                         struct mlx5_srq_attr *in)
-{
-       u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
-       void *create_in;
-       void *xrqc;
-       void *wq;
-       int pas_size;
-       int inlen;
-       int err;
-
-       pas_size = get_pas_size(in);
-       inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
-       create_in = kvzalloc(inlen, GFP_KERNEL);
-       if (!create_in)
-               return -ENOMEM;
-
-       xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
-       wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
-
-       set_wq(wq, in);
-       memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
-
-       if (in->type == IB_SRQT_TM) {
-               MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
-               if (in->flags & MLX5_SRQ_FLAG_RNDV)
-                       MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
-               MLX5_SET(xrqc, xrqc,
-                        tag_matching_topology_context.log_matching_list_sz,
-                        in->tm_log_list_size);
-       }
-       MLX5_SET(xrqc, xrqc, user_index, in->user_index);
-       MLX5_SET(xrqc, xrqc, cqn, in->cqn);
-       MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
-       MLX5_SET(create_xrq_in, create_in, uid, in->uid);
-       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
-                           sizeof(create_out));
-       kvfree(create_in);
-       if (!err) {
-               srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
-               srq->uid = in->uid;
-       }
-
-       return err;
-}
-
-static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
-       u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
-       u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
-
-       MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
-       MLX5_SET(destroy_xrq_in, in, xrqn,   srq->srqn);
-       MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
-
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int arm_xrq_cmd(struct mlx5_core_dev *dev,
-                      struct mlx5_core_srq *srq,
-                      u16 lwm)
-{
-       u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
-       u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
-
-       MLX5_SET(arm_rq_in, in, opcode,     MLX5_CMD_OP_ARM_RQ);
-       MLX5_SET(arm_rq_in, in, op_mod,     MLX5_ARM_RQ_IN_OP_MOD_XRQ);
-       MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
-       MLX5_SET(arm_rq_in, in, lwm,        lwm);
-       MLX5_SET(arm_rq_in, in, uid, srq->uid);
-
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                        struct mlx5_srq_attr *out)
-{
-       u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
-       u32 *xrq_out;
-       int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
-       void *xrqc;
-       int err;
-
-       xrq_out = kvzalloc(outlen, GFP_KERNEL);
-       if (!xrq_out)
-               return -ENOMEM;
-
-       MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
-       MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
-
-       err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
-       if (err)
-               goto out;
-
-       xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
-       get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
-       if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
-               out->flags |= MLX5_SRQ_FLAG_ERR;
-       out->tm_next_tag =
-               MLX5_GET(xrqc, xrqc,
-                        tag_matching_topology_context.append_next_index);
-       out->tm_hw_phase_cnt =
-               MLX5_GET(xrqc, xrqc,
-                        tag_matching_topology_context.hw_phase_cnt);
-       out->tm_sw_phase_cnt =
-               MLX5_GET(xrqc, xrqc,
-                        tag_matching_topology_context.sw_phase_cnt);
-
-out:
-       kvfree(xrq_out);
-       return err;
-}
-
-static int create_srq_split(struct mlx5_core_dev *dev,
-                           struct mlx5_core_srq *srq,
-                           struct mlx5_srq_attr *in)
-{
-       if (!dev->issi)
-               return create_srq_cmd(dev, srq, in);
-       switch (srq->common.res) {
-       case MLX5_RES_XSRQ:
-               return create_xrc_srq_cmd(dev, srq, in);
-       case MLX5_RES_XRQ:
-               return create_xrq_cmd(dev, srq, in);
-       default:
-               return create_rmp_cmd(dev, srq, in);
-       }
-}
-
-static int destroy_srq_split(struct mlx5_core_dev *dev,
-                            struct mlx5_core_srq *srq)
-{
-       if (!dev->issi)
-               return destroy_srq_cmd(dev, srq);
-       switch (srq->common.res) {
-       case MLX5_RES_XSRQ:
-               return destroy_xrc_srq_cmd(dev, srq);
-       case MLX5_RES_XRQ:
-               return destroy_xrq_cmd(dev, srq);
-       default:
-               return destroy_rmp_cmd(dev, srq);
-       }
-}
-
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                        struct mlx5_srq_attr *in)
-{
-       int err;
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
-
-       switch (in->type) {
-       case IB_SRQT_XRC:
-               srq->common.res = MLX5_RES_XSRQ;
-               break;
-       case IB_SRQT_TM:
-               srq->common.res = MLX5_RES_XRQ;
-               break;
-       default:
-               srq->common.res = MLX5_RES_SRQ;
-       }
-
-       err = create_srq_split(dev, srq, in);
-       if (err)
-               return err;
-
-       atomic_set(&srq->refcount, 1);
-       init_completion(&srq->free);
-
-       spin_lock_irq(&table->lock);
-       err = radix_tree_insert(&table->tree, srq->srqn, srq);
-       spin_unlock_irq(&table->lock);
-       if (err) {
-               mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
-               goto err_destroy_srq_split;
-       }
-
-       return 0;
-
-err_destroy_srq_split:
-       destroy_srq_split(dev, srq);
-
-       return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_srq);
-
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
-{
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
-       struct mlx5_core_srq *tmp;
-       int err;
-
-       spin_lock_irq(&table->lock);
-       tmp = radix_tree_delete(&table->tree, srq->srqn);
-       spin_unlock_irq(&table->lock);
-       if (!tmp) {
-               mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
-               return -EINVAL;
-       }
-       if (tmp != srq) {
-               mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
-               return -EINVAL;
-       }
-
-       err = destroy_srq_split(dev, srq);
-       if (err)
-               return err;
-
-       if (atomic_dec_and_test(&srq->refcount))
-               complete(&srq->free);
-       wait_for_completion(&srq->free);
-
-       return 0;
-}
-EXPORT_SYMBOL(mlx5_core_destroy_srq);
-
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                       struct mlx5_srq_attr *out)
-{
-       if (!dev->issi)
-               return query_srq_cmd(dev, srq, out);
-       switch (srq->common.res) {
-       case MLX5_RES_XSRQ:
-               return query_xrc_srq_cmd(dev, srq, out);
-       case MLX5_RES_XRQ:
-               return query_xrq_cmd(dev, srq, out);
-       default:
-               return query_rmp_cmd(dev, srq, out);
-       }
-}
-EXPORT_SYMBOL(mlx5_core_query_srq);
-
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                     u16 lwm, int is_srq)
-{
-       if (!dev->issi)
-               return arm_srq_cmd(dev, srq, lwm, is_srq);
-       switch (srq->common.res) {
-       case MLX5_RES_XSRQ:
-               return arm_xrc_srq_cmd(dev, srq, lwm);
-       case MLX5_RES_XRQ:
-               return arm_xrq_cmd(dev, srq, lwm);
-       default:
-               return arm_rmp_cmd(dev, srq, lwm);
-       }
-}
-EXPORT_SYMBOL(mlx5_core_arm_srq);
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev)
-{
-       struct mlx5_srq_table *table = &dev->priv.srq_table;
-
-       memset(table, 0, sizeof(*table));
-       spin_lock_init(&table->lock);
-       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-}
-
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
-{
-       /* nothing */
-}
index a1ee9a8a769e8a96e2c25f84454772159bb4bd16..c4d4b76096dc2e5884965966e9e1e63913b5abd4 100644 (file)
@@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rmpn)
-{
-       u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
-       int err;
-
-       MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
-       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-       if (!err)
-               *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
-
-       return err;
-}
-
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
-{
-       u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
-
-       MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
-       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
-{
-       u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
-
-       MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
-       MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out,
-                                         sizeof(out));
-}
-
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
-{
-       u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
-       int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
-
-       MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
-       MLX5_SET(query_rmp_in, in, rmpn,   rmpn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
-{
-       void *in;
-       void *rmpc;
-       void *wq;
-       void *bitmask;
-       int  err;
-
-       in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       rmpc    = MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
-       bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
-       wq      = MLX5_ADDR_OF(rmpc,            rmpc, wq);
-
-       MLX5_SET(modify_rmp_in, in,      rmp_state, MLX5_RMPC_STATE_RDY);
-       MLX5_SET(modify_rmp_in, in,      rmpn,      rmpn);
-       MLX5_SET(wq,            wq,      lwm,       lwm);
-       MLX5_SET(rmp_bitmask,   bitmask, lwm,       1);
-       MLX5_SET(rmpc,          rmpc,    state,     MLX5_RMPC_STATE_RDY);
-
-       err =  mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
-
-       kvfree(in);
-
-       return err;
-}
-
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                         u32 *xsrqn)
-{
-       u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
-       int err;
-
-       MLX5_SET(create_xrc_srq_in, in, opcode,     MLX5_CMD_OP_CREATE_XRC_SRQ);
-       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-       if (!err)
-               *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
-
-       return err;
-}
-
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
-{
-       u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
-
-       MLX5_SET(destroy_xrc_srq_in, in, opcode,   MLX5_CMD_OP_DESTROY_XRC_SRQ);
-       MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
-{
-       u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
-
-       MLX5_SET(arm_xrc_srq_in, in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
-       MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
-       MLX5_SET(arm_xrc_srq_in, in, lwm,      lwm);
-       MLX5_SET(arm_xrc_srq_in, in, op_mod,
-                MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rqtn)
 {
index b1293d153a587e7cd1c99820d1293d6c396136b0..9bc2184a46bc82dcbf989246d363087178a25e57 100644 (file)
@@ -177,7 +177,7 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
        return mlx5_cqwq_ctr2ix(wq, wq->cc);
 }
 
-static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
        return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
index 31a750570c3889c5396ccac46000b07f8aeb2db7..28b757a64029915f3f6754af9b8535481334fec4 100644 (file)
@@ -60,7 +60,7 @@ struct mlx5_core_cq {
        } tasklet_ctx;
        int                     reset_notify_added;
        struct list_head        reset_notify;
-       struct mlx5_eq          *eq;
+       struct mlx5_eq_comp     *eq;
        u16 uid;
 };
 
index b4c0457fbebd9b92ec74181dbd371a7e20833749..4674b9e99f45f2ba6c220d0ecc98ee5301251e2f 100644 (file)
@@ -212,6 +212,13 @@ enum {
        MLX5_PFAULT_SUBTYPE_RDMA = 1,
 };
 
+enum wqe_page_fault_type {
+       MLX5_WQE_PF_TYPE_RMP = 0,
+       MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE = 1,
+       MLX5_WQE_PF_TYPE_RESP = 2,
+       MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC = 3,
+};
+
 enum {
        MLX5_PERM_LOCAL_READ    = 1 << 2,
        MLX5_PERM_LOCAL_WRITE   = 1 << 3,
@@ -294,9 +301,15 @@ enum {
        MLX5_EVENT_QUEUE_TYPE_DCT = 6,
 };
 
+/* mlx5 components can subscribe to any one of these events via
+ * mlx5_eq_notifier_register API.
+ */
 enum mlx5_event {
+       /* Special value to subscribe to any event */
+       MLX5_EVENT_TYPE_NOTIFY_ANY         = 0x0,
+       /* HW events enum start: comp events are not subscribable */
        MLX5_EVENT_TYPE_COMP               = 0x0,
-
+       /* HW Async events enum start: subscribable events */
        MLX5_EVENT_TYPE_PATH_MIG           = 0x01,
        MLX5_EVENT_TYPE_COMM_EST           = 0x02,
        MLX5_EVENT_TYPE_SQ_DRAINED         = 0x03,
@@ -317,6 +330,7 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_TEMP_WARN_EVENT    = 0x17,
        MLX5_EVENT_TYPE_REMOTE_CONFIG      = 0x19,
        MLX5_EVENT_TYPE_GENERAL_EVENT      = 0x22,
+       MLX5_EVENT_TYPE_MONITOR_COUNTER    = 0x24,
        MLX5_EVENT_TYPE_PPS_EVENT          = 0x25,
 
        MLX5_EVENT_TYPE_DB_BF_CONGESTION   = 0x1a,
@@ -334,6 +348,8 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
 
        MLX5_EVENT_TYPE_DEVICE_TRACER      = 0x26,
+
+       MLX5_EVENT_TYPE_MAX                = MLX5_EVENT_TYPE_DEVICE_TRACER + 1,
 };
 
 enum {
@@ -766,6 +782,11 @@ static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
        return (cqe->op_own >> 2) & 0x3;
 }
 
+static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
+{
+       return cqe->op_own >> 4;
+}
+
 static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
 {
        return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
index aa5963b5d38e192d4e4bb455938bc6200a8ee48e..cc29e880c7336268373730bf62164430f4cb32c2 100644 (file)
 #include <linux/mempool.h>
 #include <linux/interrupt.h>
 #include <linux/idr.h>
+#include <linux/notifier.h>
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
-#include <linux/mlx5/srq.h>
+#include <linux/mlx5/eq.h>
 #include <linux/timecounter.h>
 #include <linux/ptp_clock_kernel.h>
 
@@ -84,18 +85,6 @@ enum {
        MLX5_MAX_PORTS  = 2,
 };
 
-enum {
-       MLX5_EQ_VEC_PAGES        = 0,
-       MLX5_EQ_VEC_CMD          = 1,
-       MLX5_EQ_VEC_ASYNC        = 2,
-       MLX5_EQ_VEC_PFAULT       = 3,
-       MLX5_EQ_VEC_COMP_BASE,
-};
-
-enum {
-       MLX5_MAX_IRQ_NAME       = 32
-};
-
 enum {
        MLX5_ATOMIC_MODE_OFFSET = 16,
        MLX5_ATOMIC_MODE_IB_COMP = 1,
@@ -205,16 +194,7 @@ struct mlx5_rsc_debug {
 };
 
 enum mlx5_dev_event {
-       MLX5_DEV_EVENT_SYS_ERROR,
-       MLX5_DEV_EVENT_PORT_UP,
-       MLX5_DEV_EVENT_PORT_DOWN,
-       MLX5_DEV_EVENT_PORT_INITIALIZED,
-       MLX5_DEV_EVENT_LID_CHANGE,
-       MLX5_DEV_EVENT_PKEY_CHANGE,
-       MLX5_DEV_EVENT_GUID_CHANGE,
-       MLX5_DEV_EVENT_CLIENT_REREG,
-       MLX5_DEV_EVENT_PPS,
-       MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT,
+       MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */
 };
 
 enum mlx5_port_status {
@@ -222,14 +202,6 @@ enum mlx5_port_status {
        MLX5_PORT_DOWN      = 2,
 };
 
-enum mlx5_eq_type {
-       MLX5_EQ_TYPE_COMP,
-       MLX5_EQ_TYPE_ASYNC,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       MLX5_EQ_TYPE_PF,
-#endif
-};
-
 struct mlx5_bfreg_info {
        u32                    *sys_pages;
        int                     num_low_latency_bfregs;
@@ -297,6 +269,8 @@ struct mlx5_cmd_stats {
 };
 
 struct mlx5_cmd {
+       struct mlx5_nb    nb;
+
        void           *cmd_alloc_buf;
        dma_addr_t      alloc_dma;
        int             alloc_size;
@@ -366,51 +340,6 @@ struct mlx5_frag_buf_ctrl {
        u8                      log_frag_strides;
 };
 
-struct mlx5_eq_tasklet {
-       struct list_head list;
-       struct list_head process_list;
-       struct tasklet_struct task;
-       /* lock on completion tasklet list */
-       spinlock_t lock;
-};
-
-struct mlx5_eq_pagefault {
-       struct work_struct       work;
-       /* Pagefaults lock */
-       spinlock_t               lock;
-       struct workqueue_struct *wq;
-       mempool_t               *pool;
-};
-
-struct mlx5_cq_table {
-       /* protect radix tree */
-       spinlock_t              lock;
-       struct radix_tree_root  tree;
-};
-
-struct mlx5_eq {
-       struct mlx5_core_dev   *dev;
-       struct mlx5_cq_table    cq_table;
-       __be32 __iomem         *doorbell;
-       u32                     cons_index;
-       struct mlx5_frag_buf    buf;
-       int                     size;
-       unsigned int            irqn;
-       u8                      eqn;
-       int                     nent;
-       u64                     mask;
-       struct list_head        list;
-       int                     index;
-       struct mlx5_rsc_debug   *dbg;
-       enum mlx5_eq_type       type;
-       union {
-               struct mlx5_eq_tasklet   tasklet_ctx;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-               struct mlx5_eq_pagefault pf_ctx;
-#endif
-       };
-};
-
 struct mlx5_core_psv {
        u32     psv_idx;
        struct psv_layout {
@@ -463,36 +392,6 @@ struct mlx5_core_rsc_common {
        struct completion       free;
 };
 
-struct mlx5_core_srq {
-       struct mlx5_core_rsc_common     common; /* must be first */
-       u32             srqn;
-       int             max;
-       size_t          max_gs;
-       size_t          max_avail_gather;
-       int             wqe_shift;
-       void (*event)   (struct mlx5_core_srq *, enum mlx5_event);
-
-       atomic_t                refcount;
-       struct completion       free;
-       u16             uid;
-};
-
-struct mlx5_eq_table {
-       void __iomem           *update_ci;
-       void __iomem           *update_arm_ci;
-       struct list_head        comp_eqs_list;
-       struct mlx5_eq          pages_eq;
-       struct mlx5_eq          async_eq;
-       struct mlx5_eq          cmd_eq;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       struct mlx5_eq          pfault_eq;
-#endif
-       int                     num_comp_vectors;
-       /* protect EQs list
-        */
-       spinlock_t              lock;
-};
-
 struct mlx5_uars_page {
        void __iomem           *map;
        bool                    wc;
@@ -542,13 +441,8 @@ struct mlx5_core_health {
 };
 
 struct mlx5_qp_table {
-       /* protect radix tree
-        */
-       spinlock_t              lock;
-       struct radix_tree_root  tree;
-};
+       struct notifier_block   nb;
 
-struct mlx5_srq_table {
        /* protect radix tree
         */
        spinlock_t              lock;
@@ -575,11 +469,6 @@ struct mlx5_core_sriov {
        int                     enabled_vfs;
 };
 
-struct mlx5_irq_info {
-       cpumask_var_t mask;
-       char name[MLX5_MAX_IRQ_NAME];
-};
-
 struct mlx5_fc_stats {
        spinlock_t counters_idr_lock; /* protects counters_idr */
        struct idr counters_idr;
@@ -593,10 +482,11 @@ struct mlx5_fc_stats {
        unsigned long sampling_interval; /* jiffies */
 };
 
+struct mlx5_events;
 struct mlx5_mpfs;
 struct mlx5_eswitch;
 struct mlx5_lag;
-struct mlx5_pagefault;
+struct mlx5_eq_table;
 
 struct mlx5_rate_limit {
        u32                     rate;
@@ -619,37 +509,12 @@ struct mlx5_rl_table {
        struct mlx5_rl_entry   *rl_entry;
 };
 
-enum port_module_event_status_type {
-       MLX5_MODULE_STATUS_PLUGGED   = 0x1,
-       MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
-       MLX5_MODULE_STATUS_ERROR     = 0x3,
-       MLX5_MODULE_STATUS_NUM       = 0x3,
-};
-
-enum  port_module_event_error_type {
-       MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
-       MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
-       MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
-       MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
-       MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
-       MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
-       MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
-       MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
-       MLX5_MODULE_EVENT_ERROR_UNKNOWN,
-       MLX5_MODULE_EVENT_ERROR_NUM,
-};
-
-struct mlx5_port_module_event_stats {
-       u64 status_counters[MLX5_MODULE_STATUS_NUM];
-       u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
-};
-
 struct mlx5_priv {
        char                    name[MLX5_MAX_NAME_LEN];
-       struct mlx5_eq_table    eq_table;
-       struct mlx5_irq_info    *irq_info;
+       struct mlx5_eq_table    *eq_table;
 
        /* pages stuff */
+       struct mlx5_nb          pg_nb;
        struct workqueue_struct *pg_wq;
        struct rb_root          page_root;
        int                     fw_pages;
@@ -659,8 +524,6 @@ struct mlx5_priv {
 
        struct mlx5_core_health health;
 
-       struct mlx5_srq_table   srq_table;
-
        /* start: qp staff */
        struct mlx5_qp_table    qp_table;
        struct dentry          *qp_debugfs;
@@ -690,9 +553,7 @@ struct mlx5_priv {
        struct list_head        dev_list;
        struct list_head        ctx_list;
        spinlock_t              ctx_lock;
-
-       struct list_head        waiting_events_list;
-       bool                    is_accum_events;
+       struct mlx5_events      *events;
 
        struct mlx5_flow_steering *steering;
        struct mlx5_mpfs        *mpfs;
@@ -703,15 +564,6 @@ struct mlx5_priv {
        struct mlx5_fc_stats            fc_stats;
        struct mlx5_rl_table            rl_table;
 
-       struct mlx5_port_module_event_stats  pme_stats;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-       void                  (*pfault)(struct mlx5_core_dev *dev,
-                                       void *context,
-                                       struct mlx5_pagefault *pfault);
-       void                   *pfault_ctx;
-       struct srcu_struct      pfault_srcu;
-#endif
        struct mlx5_bfreg_data          bfregs;
        struct mlx5_uars_page          *uar;
 };
@@ -736,44 +588,6 @@ enum mlx5_pagefault_type_flags {
        MLX5_PFAULT_RDMA      = 1 << 2,
 };
 
-/* Contains the details of a pagefault. */
-struct mlx5_pagefault {
-       u32                     bytes_committed;
-       u32                     token;
-       u8                      event_subtype;
-       u8                      type;
-       union {
-               /* Initiator or send message responder pagefault details. */
-               struct {
-                       /* Received packet size, only valid for responders. */
-                       u32     packet_size;
-                       /*
-                        * Number of resource holding WQE, depends on type.
-                        */
-                       u32     wq_num;
-                       /*
-                        * WQE index. Refers to either the send queue or
-                        * receive queue, according to event_subtype.
-                        */
-                       u16     wqe_index;
-               } wqe;
-               /* RDMA responder pagefault details */
-               struct {
-                       u32     r_key;
-                       /*
-                        * Received packet size, minimal size page fault
-                        * resolution required for forward progress.
-                        */
-                       u32     packet_size;
-                       u32     rdma_op_len;
-                       u64     rdma_va;
-               } rdma;
-       };
-
-       struct mlx5_eq         *eq;
-       struct work_struct      work;
-};
-
 struct mlx5_td {
        struct list_head tirs_list;
        u32              tdn;
@@ -803,6 +617,8 @@ struct mlx5_pps {
 };
 
 struct mlx5_clock {
+       struct mlx5_core_dev      *mdev;
+       struct mlx5_nb             pps_nb;
        seqlock_t                  lock;
        struct cyclecounter        cycles;
        struct timecounter         tc;
@@ -810,7 +626,6 @@ struct mlx5_clock {
        u32                        nominal_c_mult;
        unsigned long              overflow_period;
        struct delayed_work        overflow_work;
-       struct mlx5_core_dev      *mdev;
        struct ptp_clock          *ptp;
        struct ptp_clock_info      ptp_info;
        struct mlx5_pps            pps_info;
@@ -843,9 +658,6 @@ struct mlx5_core_dev {
        /* sync interface state */
        struct mutex            intf_state_mutex;
        unsigned long           intf_state;
-       void                    (*event) (struct mlx5_core_dev *dev,
-                                         enum mlx5_dev_event event,
-                                         unsigned long param);
        struct mlx5_priv        priv;
        struct mlx5_profile     *profile;
        atomic_t                num_qps;
@@ -858,9 +670,6 @@ struct mlx5_core_dev {
        } roce;
 #ifdef CONFIG_MLX5_FPGA
        struct mlx5_fpga_device *fpga;
-#endif
-#ifdef CONFIG_RFS_ACCEL
-       struct cpu_rmap         *rmap;
 #endif
        struct mlx5_clock        clock;
        struct mlx5_ib_clock_info  *clock_info;
@@ -1070,13 +879,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
                                                      gfp_t flags, int npages);
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
                                 struct mlx5_cmd_mailbox *head);
-int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                        struct mlx5_srq_attr *in);
-int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
-int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                       struct mlx5_srq_attr *out);
-int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
-                     u16 lwm, int is_srq);
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -1095,9 +897,9 @@ int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
                      u16 opmod, u8 port);
-void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
+int mlx5_pagealloc_init(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
-int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
+void mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
                                 s32 npages);
@@ -1108,9 +910,6 @@ void mlx5_unregister_debugfs(void);
 
 void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
-void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
-struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1155,6 +954,9 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
                     bool map_wc, bool fast_path);
 void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
 
+unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev);
+struct cpumask *
+mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector);
 unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
 int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
                           u8 roce_version, u8 roce_l3_type, const u8 *gid,
@@ -1202,19 +1004,15 @@ struct mlx5_interface {
        void                    (*remove)(struct mlx5_core_dev *dev, void *context);
        int                     (*attach)(struct mlx5_core_dev *dev, void *context);
        void                    (*detach)(struct mlx5_core_dev *dev, void *context);
-       void                    (*event)(struct mlx5_core_dev *dev, void *context,
-                                        enum mlx5_dev_event event, unsigned long param);
-       void                    (*pfault)(struct mlx5_core_dev *dev,
-                                         void *context,
-                                         struct mlx5_pagefault *pfault);
-       void *                  (*get_dev)(void *context);
        int                     protocol;
        struct list_head        list;
 };
 
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
 int mlx5_register_interface(struct mlx5_interface *intf);
 void mlx5_unregister_interface(struct mlx5_interface *intf);
+int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
+int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb);
+
 int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
 
 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
@@ -1306,10 +1104,4 @@ enum {
        MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
 };
 
-static inline const struct cpumask *
-mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
-{
-       return dev->priv.irq_info[vector].mask;
-}
-
 #endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
new file mode 100644 (file)
index 0000000..00045cc
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef MLX5_CORE_EQ_H
+#define MLX5_CORE_EQ_H
+
+enum {
+       MLX5_EQ_PAGEREQ_IDX        = 0,
+       MLX5_EQ_CMD_IDX            = 1,
+       MLX5_EQ_ASYNC_IDX          = 2,
+       /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */
+       MLX5_EQ_PFAULT_IDX         = 3,
+       MLX5_EQ_MAX_ASYNC_EQS,
+       /* completion eqs vector indices start here */
+       MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS,
+};
+
+#define MLX5_NUM_CMD_EQE   (32)
+#define MLX5_NUM_ASYNC_EQE (0x1000)
+#define MLX5_NUM_SPARE_EQE (0x80)
+
+struct mlx5_eq;
+struct mlx5_core_dev;
+
+struct mlx5_eq_param {
+       u8             index;
+       int            nent;
+       u64            mask;
+       void          *context;
+       irq_handler_t  handler;
+};
+
+struct mlx5_eq *
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+                      struct mlx5_eq_param *param);
+int
+mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+
+struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc);
+void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm);
+
+/* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events.  We
+ * create EQs with MLX5_NUM_SPARE_EQE extra entries,
+ * so we must update our consumer index at
+ * least that often.
+ *
+ * mlx5_eq_update_cc must be called on every EQE @EQ irq handler
+ */
+static inline u32 mlx5_eq_update_cc(struct mlx5_eq *eq, u32 cc)
+{
+       if (unlikely(cc >= MLX5_NUM_SPARE_EQE)) {
+               mlx5_eq_update_ci(eq, cc, 0);
+               cc = 0;
+       }
+       return cc;
+}
+
+struct mlx5_nb {
+       struct notifier_block nb;
+       u8 event_type;
+};
+
+#define mlx5_nb_cof(ptr, type, member) \
+       (container_of(container_of(ptr, struct mlx5_nb, nb), type, member))
+
+#define MLX5_NB_INIT(name, handler, event) do {              \
+       (name)->nb.notifier_call = handler;                  \
+       (name)->event_type = MLX5_EVENT_TYPE_##event;        \
+} while (0)
+
+#endif /* MLX5_CORE_EQ_H */
index 5660f07d3be00082c58cb1df64ba8a273a675d3d..9df51da04621bc51cea8bc0c4495869bd73c47e6 100644 (file)
@@ -86,6 +86,11 @@ struct mlx5_flow_spec {
        u32  match_value[MLX5_ST_SZ_DW(fte_match_param)];
 };
 
+enum {
+       MLX5_FLOW_DEST_VPORT_VHCA_ID      = BIT(0),
+       MLX5_FLOW_DEST_VPORT_REFORMAT_ID  = BIT(1),
+};
+
 struct mlx5_flow_destination {
        enum mlx5_flow_destination_type type;
        union {
@@ -96,7 +101,8 @@ struct mlx5_flow_destination {
                struct {
                        u16             num;
                        u16             vhca_id;
-                       bool            vhca_id_valid;
+                       u32             reformat_id;
+                       u8              flags;
                } vport;
        };
 };
index 34e17e6f894290f161d5e734e6fb2f9dfb0c148e..60c1d49eb40c0e38fb3c049efd2587ed8e912500 100644 (file)
@@ -161,6 +161,8 @@ enum {
        MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
        MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
        MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
+       MLX5_CMD_OP_SET_MONITOR_COUNTER           = 0x774,
+       MLX5_CMD_OP_ARM_MONITOR_COUNTER           = 0x775,
        MLX5_CMD_OP_SET_PP_RATE_LIMIT             = 0x780,
        MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
        MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
@@ -349,7 +351,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
        u8         reformat_l3_tunnel_to_l2[0x1];
        u8         reformat_l2_to_l3_tunnel[0x1];
        u8         reformat_and_modify_action[0x1];
-       u8         reserved_at_14[0xb];
+       u8         reserved_at_15[0xb];
        u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
        u8         log_max_modify_header_context[0x8];
@@ -421,6 +423,16 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
        union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
 };
 
+struct mlx5_ifc_nvgre_key_bits {
+       u8 hi[0x18];
+       u8 lo[0x8];
+};
+
+union mlx5_ifc_gre_key_bits {
+       struct mlx5_ifc_nvgre_key_bits nvgre;
+       u8 key[0x20];
+};
+
 struct mlx5_ifc_fte_match_set_misc_bits {
        u8         reserved_at_0[0x8];
        u8         source_sqn[0x18];
@@ -442,8 +454,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
        u8         reserved_at_64[0xc];
        u8         gre_protocol[0x10];
 
-       u8         gre_key_h[0x18];
-       u8         gre_key_l[0x8];
+       union mlx5_ifc_gre_key_bits gre_key;
 
        u8         vxlan_vni[0x18];
        u8         reserved_at_b8[0x8];
@@ -586,7 +597,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
        u8      fdb_multi_path_to_table[0x1];
        u8      reserved_at_1d[0x1];
        u8      multi_fdb_encap[0x1];
-       u8      reserved_at_1e[0x1e1];
+       u8      reserved_at_1f[0x1e1];
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
@@ -610,7 +621,9 @@ struct mlx5_ifc_e_switch_cap_bits {
 
        u8         vxlan_encap_decap[0x1];
        u8         nvgre_encap_decap[0x1];
-       u8         reserved_at_22[0x9];
+       u8         reserved_at_22[0x1];
+       u8         log_max_fdb_encap_uplink[0x5];
+       u8         reserved_at_21[0x3];
        u8         log_max_packet_reformat_context[0x5];
        u8         reserved_2b[0x6];
        u8         max_encap_header_size[0xa];
@@ -829,7 +842,7 @@ struct mlx5_ifc_vector_calc_cap_bits {
        struct mlx5_ifc_calc_op calc2;
        struct mlx5_ifc_calc_op calc3;
 
-       u8         reserved_at_e0[0x720];
+       u8         reserved_at_c0[0x720];
 };
 
 enum {
@@ -883,6 +896,10 @@ enum {
        MLX5_CAP_UMR_FENCE_NONE         = 0x2,
 };
 
+enum {
+       MLX5_UCTX_CAP_RAW_TX = 1UL << 0,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_at_0[0x30];
        u8         vhca_id[0x10];
@@ -1043,7 +1060,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         vector_calc[0x1];
        u8         umr_ptr_rlky[0x1];
        u8         imaicl[0x1];
-       u8         reserved_at_232[0x4];
+       u8         qp_packet_based[0x1];
+       u8         reserved_at_233[0x3];
        u8         qkv[0x1];
        u8         pkv[0x1];
        u8         set_deth_sqpn[0x1];
@@ -1193,7 +1211,19 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         num_vhca_ports[0x8];
        u8         reserved_at_618[0x6];
        u8         sw_owner_id[0x1];
-       u8         reserved_at_61f[0x1e1];
+       u8         reserved_at_61f[0x1];
+
+       u8         max_num_of_monitor_counters[0x10];
+       u8         num_ppcnt_monitor_counters[0x10];
+
+       u8         reserved_at_640[0x10];
+       u8         num_q_monitor_counters[0x10];
+
+       u8         reserved_at_660[0x40];
+
+       u8         uctx_cap[0x20];
+
+       u8         reserved_at_6c0[0x140];
 };
 
 enum mlx5_flow_destination_type {
@@ -1209,8 +1239,10 @@ enum mlx5_flow_destination_type {
 struct mlx5_ifc_dest_format_struct_bits {
        u8         destination_type[0x8];
        u8         destination_id[0x18];
+
        u8         destination_eswitch_owner_vhca_id_valid[0x1];
-       u8         reserved_at_21[0xf];
+       u8         packet_reformat[0x1];
+       u8         reserved_at_22[0xe];
        u8         destination_eswitch_owner_vhca_id[0x10];
 };
 
@@ -1220,6 +1252,14 @@ struct mlx5_ifc_flow_counter_list_bits {
        u8         reserved_at_20[0x20];
 };
 
+struct mlx5_ifc_extended_dest_format_bits {
+       struct mlx5_ifc_dest_format_struct_bits destination_entry;
+
+       u8         packet_reformat_id[0x20];
+
+       u8         reserved_at_60[0x20];
+};
+
 union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits {
        struct mlx5_ifc_dest_format_struct_bits dest_format_struct;
        struct mlx5_ifc_flow_counter_list_bits flow_counter_list;
@@ -2249,7 +2289,8 @@ struct mlx5_ifc_qpc_bits {
        u8         st[0x8];
        u8         reserved_at_10[0x3];
        u8         pm_state[0x2];
-       u8         reserved_at_15[0x3];
+       u8         reserved_at_15[0x1];
+       u8         req_e2e_credit_mode[0x2];
        u8         offload_type[0x4];
        u8         end_padding_mode[0x2];
        u8         reserved_at_1e[0x2];
@@ -2440,7 +2481,8 @@ struct mlx5_ifc_flow_context_bits {
        u8         reserved_at_60[0x10];
        u8         action[0x10];
 
-       u8         reserved_at_80[0x8];
+       u8         extended_destination[0x1];
+       u8         reserved_at_80[0x7];
        u8         destination_list_size[0x18];
 
        u8         reserved_at_a0[0x8];
@@ -3796,6 +3838,83 @@ enum {
        MLX5_VPORT_STATE_OP_MOD_ESW_VPORT   = 0x1,
 };
 
+struct mlx5_ifc_arm_monitor_counter_in_bits {
+       u8         opcode[0x10];
+       u8         uid[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x20];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_arm_monitor_counter_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+enum {
+       MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT     = 0x0,
+       MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER = 0x1,
+};
+
+enum mlx5_monitor_counter_ppcnt {
+       MLX5_QUERY_MONITOR_PPCNT_IN_RANGE_LENGTH_ERRORS      = 0X0,
+       MLX5_QUERY_MONITOR_PPCNT_OUT_OF_RANGE_LENGTH_FIELD   = 0X1,
+       MLX5_QUERY_MONITOR_PPCNT_FRAME_TOO_LONG_ERRORS       = 0X2,
+       MLX5_QUERY_MONITOR_PPCNT_FRAME_CHECK_SEQUENCE_ERRORS = 0X3,
+       MLX5_QUERY_MONITOR_PPCNT_ALIGNMENT_ERRORS            = 0X4,
+       MLX5_QUERY_MONITOR_PPCNT_IF_OUT_DISCARDS             = 0X5,
+};
+
+enum {
+       MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER     = 0X4,
+};
+
+struct mlx5_ifc_monitor_counter_output_bits {
+       u8         reserved_at_0[0x4];
+       u8         type[0x4];
+       u8         reserved_at_8[0x8];
+       u8         counter[0x10];
+
+       u8         counter_group_id[0x20];
+};
+
+#define MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 (6)
+#define MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1    (1)
+#define MLX5_CMD_SET_MONITOR_NUM_COUNTER (MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 +\
+                                         MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1)
+
+struct mlx5_ifc_set_monitor_counter_in_bits {
+       u8         opcode[0x10];
+       u8         uid[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x10];
+       u8         num_of_counters[0x10];
+
+       u8         reserved_at_60[0x20];
+
+       struct mlx5_ifc_monitor_counter_output_bits monitor_counter[MLX5_CMD_SET_MONITOR_NUM_COUNTER];
+};
+
+struct mlx5_ifc_set_monitor_counter_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
 struct mlx5_ifc_query_vport_state_in_bits {
        u8         opcode[0x10];
        u8         reserved_at_10[0x10];
@@ -5567,7 +5686,7 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits {
 struct mlx5_ifc_modify_nic_vport_field_select_bits {
        u8         reserved_at_0[0x12];
        u8         affiliation[0x1];
-       u8         reserved_at_e[0x1];
+       u8         reserved_at_13[0x1];
        u8         disable_uc_local_lb[0x1];
        u8         disable_mc_local_lb[0x1];
        u8         node_guid[0x1];
@@ -9028,7 +9147,7 @@ struct mlx5_ifc_dcbx_param_bits {
        u8         dcbx_cee_cap[0x1];
        u8         dcbx_ieee_cap[0x1];
        u8         dcbx_standby_cap[0x1];
-       u8         reserved_at_0[0x5];
+       u8         reserved_at_3[0x5];
        u8         port_number[0x8];
        u8         reserved_at_10[0xa];
        u8         max_application_table_size[6];
@@ -9276,7 +9395,9 @@ struct mlx5_ifc_umem_bits {
 struct mlx5_ifc_uctx_bits {
        u8         modify_field_select[0x40];
 
-       u8         reserved_at_40[0x1c0];
+       u8         cap[0x20];
+
+       u8         reserved_at_60[0x1a0];
 };
 
 struct mlx5_ifc_create_umem_in_bits {
index 34aed6032f868317f3140b40ad2d94668d0bd9d9..bf4bc01ffb0c68fb46417e8939aad45d4cc0f538 100644 (file)
@@ -107,9 +107,6 @@ enum mlx5e_connector_type {
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 
-#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
-#define PORT_MODULE_EVENT_ERROR_TYPE_MASK         0xF
-
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
                         int ptys_size, int proto_mask, u8 local_port);
index fbe322c966bc775916f37ab4addf9609eb53f4b0..b26ea90773840eb9a6b127abc56b9334c67fb9a6 100644 (file)
@@ -596,6 +596,11 @@ int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id);
 int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
                              int reset, void *out, int out_size);
 
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
+                                               int res_num,
+                                               enum mlx5_res_type res_type);
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
+
 static inline const char *mlx5_qp_type_str(int type)
 {
        switch (type) {
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
deleted file mode 100644 (file)
index 1b1f3c2..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MLX5_SRQ_H
-#define MLX5_SRQ_H
-
-#include <linux/mlx5/driver.h>
-
-enum {
-       MLX5_SRQ_FLAG_ERR    = (1 << 0),
-       MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
-       MLX5_SRQ_FLAG_RNDV   = (1 << 2),
-};
-
-struct mlx5_srq_attr {
-       u32 type;
-       u32 flags;
-       u32 log_size;
-       u32 wqe_shift;
-       u32 log_page_size;
-       u32 wqe_cnt;
-       u32 srqn;
-       u32 xrcd;
-       u32 page_offset;
-       u32 cqn;
-       u32 pd;
-       u32 lwm;
-       u32 user_index;
-       u64 db_record;
-       __be64 *pas;
-       u32 tm_log_list_size;
-       u32 tm_next_tag;
-       u32 tm_hw_phase_cnt;
-       u32 tm_sw_phase_cnt;
-       u16 uid;
-};
-
-struct mlx5_core_dev;
-
-void mlx5_init_srq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);
-
-#endif /* MLX5_SRQ_H */
index 7f5ca2cd3a32f7438f3f1ab39ad47422a701b53b..a261d5528ff7e5b5382576d746d731175ac42e2c 100644 (file)
@@ -58,17 +58,6 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
 int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
                         int inlen);
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                        u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
-                         u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rqtn);
 int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,