IB/core: Add an unbound WQ type to the new CQ API
authorJack Morgenstein <jackm@dev.mellanox.co.il>
Mon, 27 Aug 2018 05:35:55 +0000 (08:35 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Wed, 5 Sep 2018 21:38:09 +0000 (15:38 -0600)
The upstream kernel commit cited below modified the workqueue in the
new CQ API to be bound to a specific CPU (instead of being unbound).
This caused ALL users of the new CQ API to use the same bound WQ.

Specifically, MAD handling was severely delayed when the CPU bound
to the WQ was busy handling (higher priority) interrupts.

This caused a delay in the MAD "heartbeat" response handling,
which resulted in ports being incorrectly classified as "down".

To fix this, add a new "unbound" WQ type to the new CQ API, so that users
have the option to choose either a bound WQ or an unbound WQ.

For MADs, choose the new "unbound" WQ.

Fixes: b7363e67b23e ("IB/device: Convert ib-comp-wq to be CPU-bound")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.m>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/core/cq.c
drivers/infiniband/core/device.c
drivers/infiniband/core/mad.c
include/rdma/ib_verbs.h

index af5ad6a56ae404d1cd2aae64f95e59d7ccded0ac..9271f72900052aa7007727364c4746f7e94675f3 100644 (file)
@@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
                                    IB_POLL_BATCH);
        if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
            ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
-               queue_work(ib_comp_wq, &cq->work);
+               queue_work(cq->comp_wq, &cq->work);
 }
 
 static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
 {
-       queue_work(ib_comp_wq, &cq->work);
+       queue_work(cq->comp_wq, &cq->work);
 }
 
 /**
@@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
                ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
                break;
        case IB_POLL_WORKQUEUE:
+       case IB_POLL_UNBOUND_WORKQUEUE:
                cq->comp_handler = ib_cq_completion_workqueue;
                INIT_WORK(&cq->work, ib_cq_poll_work);
                ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+               cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
+                               ib_comp_wq : ib_comp_unbound_wq;
                break;
        default:
                ret = -EINVAL;
@@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
                irq_poll_disable(&cq->iop);
                break;
        case IB_POLL_WORKQUEUE:
+       case IB_POLL_UNBOUND_WORKQUEUE:
                cancel_work_sync(&cq->work);
                break;
        default:
index db3b6271f09d5949a88c5f5a3ffab2d46fecb7f5..6d8ac51a39cc05600e6ea97b50ed62b13319e778 100644 (file)
@@ -61,6 +61,7 @@ struct ib_client_data {
 };
 
 struct workqueue_struct *ib_comp_wq;
+struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
@@ -1166,10 +1167,19 @@ static int __init ib_core_init(void)
                goto err;
        }
 
+       ib_comp_unbound_wq =
+               alloc_workqueue("ib-comp-unb-wq",
+                               WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
+                               WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
+       if (!ib_comp_unbound_wq) {
+               ret = -ENOMEM;
+               goto err_comp;
+       }
+
        ret = class_register(&ib_class);
        if (ret) {
                pr_warn("Couldn't create InfiniBand device class\n");
-               goto err_comp;
+               goto err_comp_unbound;
        }
 
        ret = rdma_nl_init();
@@ -1218,6 +1228,8 @@ err_ibnl:
        rdma_nl_exit();
 err_sysfs:
        class_unregister(&ib_class);
+err_comp_unbound:
+       destroy_workqueue(ib_comp_unbound_wq);
 err_comp:
        destroy_workqueue(ib_comp_wq);
 err:
@@ -1236,6 +1248,7 @@ static void __exit ib_core_cleanup(void)
        addr_cleanup();
        rdma_nl_exit();
        class_unregister(&ib_class);
+       destroy_workqueue(ib_comp_unbound_wq);
        destroy_workqueue(ib_comp_wq);
        /* Make sure that any pending umem accounting work is done. */
        destroy_workqueue(ib_wq);
index ef459f2f2eeb859c5a7c7a4b4501e00adedd7ae1..b8977c3db5f392ad93f3254bf16bc0610a579d77 100644 (file)
@@ -3183,7 +3183,7 @@ static int ib_mad_port_open(struct ib_device *device,
                cq_size *= 2;
 
        port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
-                       IB_POLL_WORKQUEUE);
+                       IB_POLL_UNBOUND_WORKQUEUE);
        if (IS_ERR(port_priv->cq)) {
                dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
                ret = PTR_ERR(port_priv->cq);
index e950c2a68f0601c98706b0b597acfe0d4dc30b34..df8d234a2b561ee1c8f5a9f04b1816bd5a94dd1e 100644 (file)
@@ -71,6 +71,7 @@
 
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
 
 union ib_gid {
        u8      raw[16];
@@ -1570,9 +1571,10 @@ struct ib_ah {
 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
 
 enum ib_poll_context {
-       IB_POLL_DIRECT,         /* caller context, no hw completions */
-       IB_POLL_SOFTIRQ,        /* poll from softirq context */
-       IB_POLL_WORKQUEUE,      /* poll from workqueue */
+       IB_POLL_DIRECT,            /* caller context, no hw completions */
+       IB_POLL_SOFTIRQ,           /* poll from softirq context */
+       IB_POLL_WORKQUEUE,         /* poll from workqueue */
+       IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
 };
 
 struct ib_cq {
@@ -1589,6 +1591,7 @@ struct ib_cq {
                struct irq_poll         iop;
                struct work_struct      work;
        };
+       struct workqueue_struct *comp_wq;
        /*
         * Implementation details of the RDMA core, don't use in drivers:
         */