Merge branch 'linus/master' into rdma.git for-next
[linux-2.6-block.git] / drivers / nvme / host / rdma.c
index 72e8e8e7d2d7cd515fc0113b5a8b48057e9cbfa5..dc042017c293adc77e0517efb80fd13157b8ac23 100644 (file)
 
 #define NVME_RDMA_MAX_SEGMENTS         256
 
-#define NVME_RDMA_MAX_INLINE_SEGMENTS  1
+#define NVME_RDMA_MAX_INLINE_SEGMENTS  4
 
 struct nvme_rdma_device {
        struct ib_device        *dev;
        struct ib_pd            *pd;
        struct kref             ref;
        struct list_head        entry;
+       unsigned int            num_inline_segments;
 };
 
 struct nvme_rdma_qe {
@@ -117,6 +118,7 @@ struct nvme_rdma_ctrl {
        struct sockaddr_storage src_addr;
 
        struct nvme_ctrl        ctrl;
+       bool                    use_inline_data;
 };
 
 static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
@@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
        /* +1 for drain */
        init_attr.cap.max_recv_wr = queue->queue_size + 1;
        init_attr.cap.max_recv_sge = 1;
-       init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS;
+       init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
        init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
        init_attr.qp_type = IB_QPT_RC;
        init_attr.send_cq = queue->ib_cq;
@@ -286,6 +288,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
        struct ib_device *ibdev = dev->dev;
        int ret;
 
+       nvme_req(rq)->ctrl = &ctrl->ctrl;
        ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
                        DMA_TO_DEVICE);
        if (ret)
@@ -374,6 +377,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
                goto out_free_pd;
        }
 
+       ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
+                                       ndev->dev->attrs.max_send_sge - 1);
        list_add(&ndev->entry, &device_list);
 out_unlock:
        mutex_unlock(&device_list_mutex);
@@ -868,6 +873,31 @@ out_free_io_queues:
        return ret;
 }
 
+static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
+               bool remove)
+{
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
+       blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request,
+                       &ctrl->ctrl);
+       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_destroy_admin_queue(ctrl, remove);
+}
+
+static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
+               bool remove)
+{
+       if (ctrl->ctrl.queue_count > 1) {
+               nvme_stop_queues(&ctrl->ctrl);
+               nvme_rdma_stop_io_queues(ctrl);
+               blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request,
+                               &ctrl->ctrl);
+               if (remove)
+                       nvme_start_queues(&ctrl->ctrl);
+               nvme_rdma_destroy_io_queues(ctrl, remove);
+       }
+}
+
 static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
 {
        struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -912,21 +942,44 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
        }
 }
 
-static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
+static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
 {
-       struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
-                       struct nvme_rdma_ctrl, reconnect_work);
+       int ret = -EINVAL;
        bool changed;
-       int ret;
 
-       ++ctrl->ctrl.nr_reconnects;
-
-       ret = nvme_rdma_configure_admin_queue(ctrl, false);
+       ret = nvme_rdma_configure_admin_queue(ctrl, new);
        if (ret)
-               goto requeue;
+               return ret;
+
+       if (ctrl->ctrl.icdoff) {
+               dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+               goto destroy_admin;
+       }
+
+       if (!(ctrl->ctrl.sgls & (1 << 2))) {
+               dev_err(ctrl->ctrl.device,
+                       "Mandatory keyed sgls are not supported!\n");
+               goto destroy_admin;
+       }
+
+       if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
+               dev_warn(ctrl->ctrl.device,
+                       "queue_size %zu > ctrl sqsize %u, clamping down\n",
+                       ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
+       }
+
+       if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
+               dev_warn(ctrl->ctrl.device,
+                       "sqsize %u > ctrl maxcmd %u, clamping down\n",
+                       ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
+               ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
+       }
+
+       if (ctrl->ctrl.sgls & (1 << 20))
+               ctrl->use_inline_data = true;
 
        if (ctrl->ctrl.queue_count > 1) {
-               ret = nvme_rdma_configure_io_queues(ctrl, false);
+               ret = nvme_rdma_configure_io_queues(ctrl, new);
                if (ret)
                        goto destroy_admin;
        }
@@ -935,10 +988,31 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        if (!changed) {
                /* state change failure is ok if we're in DELETING state */
                WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
-               return;
+               ret = -EINVAL;
+               goto destroy_io;
        }
 
        nvme_start_ctrl(&ctrl->ctrl);
+       return 0;
+
+destroy_io:
+       if (ctrl->ctrl.queue_count > 1)
+               nvme_rdma_destroy_io_queues(ctrl, new);
+destroy_admin:
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
+       nvme_rdma_destroy_admin_queue(ctrl, new);
+       return ret;
+}
+
+static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
+{
+       struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
+                       struct nvme_rdma_ctrl, reconnect_work);
+
+       ++ctrl->ctrl.nr_reconnects;
+
+       if (nvme_rdma_setup_ctrl(ctrl, false))
+               goto requeue;
 
        dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
                        ctrl->ctrl.nr_reconnects);
@@ -947,9 +1021,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
        return;
 
-destroy_admin:
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
-       nvme_rdma_destroy_admin_queue(ctrl, false);
 requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
                        ctrl->ctrl.nr_reconnects);
@@ -962,27 +1033,9 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
                        struct nvme_rdma_ctrl, err_work);
 
        nvme_stop_keep_alive(&ctrl->ctrl);
-
-       if (ctrl->ctrl.queue_count > 1) {
-               nvme_stop_queues(&ctrl->ctrl);
-               nvme_rdma_stop_io_queues(ctrl);
-               blk_mq_tagset_busy_iter(&ctrl->tag_set,
-                                       nvme_cancel_request, &ctrl->ctrl);
-               nvme_rdma_destroy_io_queues(ctrl, false);
-       }
-
-       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
-       blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
-                               nvme_cancel_request, &ctrl->ctrl);
-       nvme_rdma_destroy_admin_queue(ctrl, false);
-
-       /*
-        * queues are not a live anymore, so restart the queues to fail fast
-        * new IO
-        */
-       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_teardown_io_queues(ctrl, false);
        nvme_start_queues(&ctrl->ctrl);
+       nvme_rdma_teardown_admin_queue(ctrl, false);
 
        if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
                /* state change failure is ok if we're in DELETING state */
@@ -1089,19 +1142,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
 }
 
 static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
-               struct nvme_rdma_request *req, struct nvme_command *c)
+               struct nvme_rdma_request *req, struct nvme_command *c,
+               int count)
 {
        struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+       struct scatterlist *sgl = req->sg_table.sgl;
+       struct ib_sge *sge = &req->sge[1];
+       u32 len = 0;
+       int i;
 
-       req->sge[1].addr = sg_dma_address(req->sg_table.sgl);
-       req->sge[1].length = sg_dma_len(req->sg_table.sgl);
-       req->sge[1].lkey = queue->device->pd->local_dma_lkey;
+       for (i = 0; i < count; i++, sgl++, sge++) {
+               sge->addr = sg_dma_address(sgl);
+               sge->length = sg_dma_len(sgl);
+               sge->lkey = queue->device->pd->local_dma_lkey;
+               len += sge->length;
+       }
 
        sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
-       sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl));
+       sg->length = cpu_to_le32(len);
        sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
 
-       req->num_sge++;
+       req->num_sge += count;
        return 0;
 }
 
@@ -1194,15 +1255,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
                goto out_free_table;
        }
 
-       if (count == 1) {
+       if (count <= dev->num_inline_segments) {
                if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
+                   queue->ctrl->use_inline_data &&
                    blk_rq_payload_bytes(rq) <=
                                nvme_rdma_inline_data_size(queue)) {
-                       ret = nvme_rdma_map_sg_inline(queue, req, c);
+                       ret = nvme_rdma_map_sg_inline(queue, req, c, count);
                        goto out;
                }
 
-               if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
+               if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
                        ret = nvme_rdma_map_sg_single(queue, req, c);
                        goto out;
                }
@@ -1573,6 +1635,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
        case RDMA_CM_EVENT_CONNECT_ERROR:
        case RDMA_CM_EVENT_UNREACHABLE:
                nvme_rdma_destroy_queue_ib(queue);
+               /* fall through */
        case RDMA_CM_EVENT_ADDR_ERROR:
                dev_dbg(queue->ctrl->ctrl.device,
                        "CM error event %d\n", ev->event);
@@ -1735,25 +1798,12 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-       if (ctrl->ctrl.queue_count > 1) {
-               nvme_stop_queues(&ctrl->ctrl);
-               nvme_rdma_stop_io_queues(ctrl);
-               blk_mq_tagset_busy_iter(&ctrl->tag_set,
-                                       nvme_cancel_request, &ctrl->ctrl);
-               nvme_rdma_destroy_io_queues(ctrl, shutdown);
-       }
-
+       nvme_rdma_teardown_io_queues(ctrl, shutdown);
        if (shutdown)
                nvme_shutdown_ctrl(&ctrl->ctrl);
        else
                nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
-
-       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
-       blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
-                               nvme_cancel_request, &ctrl->ctrl);
-       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
-       nvme_rdma_destroy_admin_queue(ctrl, shutdown);
+       nvme_rdma_teardown_admin_queue(ctrl, shutdown);
 }
 
 static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
@@ -1765,8 +1815,6 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
        struct nvme_rdma_ctrl *ctrl =
                container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
-       int ret;
-       bool changed;
 
        nvme_stop_ctrl(&ctrl->ctrl);
        nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -1777,25 +1825,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
                return;
        }
 
-       ret = nvme_rdma_configure_admin_queue(ctrl, false);
-       if (ret)
+       if (nvme_rdma_setup_ctrl(ctrl, false))
                goto out_fail;
 
-       if (ctrl->ctrl.queue_count > 1) {
-               ret = nvme_rdma_configure_io_queues(ctrl, false);
-               if (ret)
-                       goto out_fail;
-       }
-
-       changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-       if (!changed) {
-               /* state change failure is ok if we're in DELETING state */
-               WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
-               return;
-       }
-
-       nvme_start_ctrl(&ctrl->ctrl);
-
        return;
 
 out_fail:
@@ -1958,49 +1990,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
        WARN_ON_ONCE(!changed);
 
-       ret = nvme_rdma_configure_admin_queue(ctrl, true);
+       ret = nvme_rdma_setup_ctrl(ctrl, true);
        if (ret)
                goto out_uninit_ctrl;
 
-       /* sanity check icdoff */
-       if (ctrl->ctrl.icdoff) {
-               dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
-               ret = -EINVAL;
-               goto out_remove_admin_queue;
-       }
-
-       /* sanity check keyed sgls */
-       if (!(ctrl->ctrl.sgls & (1 << 2))) {
-               dev_err(ctrl->ctrl.device,
-                       "Mandatory keyed sgls are not supported!\n");
-               ret = -EINVAL;
-               goto out_remove_admin_queue;
-       }
-
-       /* only warn if argument is too large here, will clamp later */
-       if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
-               dev_warn(ctrl->ctrl.device,
-                       "queue_size %zu > ctrl sqsize %u, clamping down\n",
-                       opts->queue_size, ctrl->ctrl.sqsize + 1);
-       }
-
-       /* warn if maxcmd is lower than sqsize+1 */
-       if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
-               dev_warn(ctrl->ctrl.device,
-                       "sqsize %u > ctrl maxcmd %u, clamping down\n",
-                       ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
-               ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
-       }
-
-       if (opts->nr_io_queues) {
-               ret = nvme_rdma_configure_io_queues(ctrl, true);
-               if (ret)
-                       goto out_remove_admin_queue;
-       }
-
-       changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-       WARN_ON_ONCE(!changed);
-
        dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
                ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
 
@@ -2010,13 +2003,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
        mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-       nvme_start_ctrl(&ctrl->ctrl);
-
        return &ctrl->ctrl;
 
-out_remove_admin_queue:
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
-       nvme_rdma_destroy_admin_queue(ctrl, true);
 out_uninit_ctrl:
        nvme_uninit_ctrl(&ctrl->ctrl);
        nvme_put_ctrl(&ctrl->ctrl);