Merge tag 'for-linus-20180616' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 16 Jun 2018 20:37:55 +0000 (05:37 +0900)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 16 Jun 2018 20:37:55 +0000 (05:37 +0900)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Jun 2018 20:37:55 +0000 (05:37 +0900)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 16 Jun 2018 20:37:55 +0000 (05:37 +0900)
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt

index 86927029a52db81ed81547fac93b09a6ffdd08da..207eca58efaaa2c652d63412b115acd0ec36a32f 100644 (file)
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -752,18 +752,6 @@ completion of the request to the block layer. This means ending tag
  operations before calling end_that_request_last()! For an example of a user
  of these helpers, see the IDE tagged command queueing support.
  
-Certain hardware conditions may dictate a need to invalidate the block tag
-queue. For instance, on IDE any tagged request error needs to clear both
-the hardware and software block queue and enable the driver to sanely restart
-all the outstanding requests. There's a third helper to do that:
-
-       blk_queue_invalidate_tags(struct request_queue *q)
-
-       Clear the internal block tag queue and re-add all the pending requests
-       to the request queue. The driver will receive them again on the
-       next request_fn run, just like it did the first time it encountered
-       them.
-
  3.2.5.2 Tag info
  
  Some block functions exist to query current tag status or to go from a
@@ -805,8 +793,7 @@ Internally, block manages tags in the blk_queue_tag structure:
  Most of the above is simple and straight forward, however busy_list may need
  a bit of explaining. Normally we don't care too much about request ordering,
  but in the event of any barrier requests in the tag queue we need to ensure
-that requests are restarted in the order they were queue. This may happen
-if the driver needs to use blk_queue_invalidate_tags().
+that requests are restarted in the order they were queue.
  
  3.3 I/O Submission
  
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index 70356a2a11ab12a059654bdf8af0a3f2c0eb3b41..09b2ee6694fb16858a104b7021b986aff603a91a 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -311,35 +311,6 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
  }
  EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
  
-int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
-                        int (fn)(void *, struct request *))
-{
-       int i, j, ret = 0;
-
-       if (WARN_ON_ONCE(!fn))
-               goto out;
-
-       for (i = 0; i < set->nr_hw_queues; i++) {
-               struct blk_mq_tags *tags = set->tags[i];
-
-               if (!tags)
-                       continue;
-
-               for (j = 0; j < tags->nr_tags; j++) {
-                       if (!tags->static_rqs[j])
-                               continue;
-
-                       ret = fn(data, tags->static_rqs[j]);
-                       if (ret)
-                               goto out;
-               }
-       }
-
-out:
-       return ret;
-}
-EXPORT_SYMBOL_GPL(blk_mq_tagset_iter);
-
  void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
                 void *priv)
  {
diff --git a/block/blk-mq.c b/block/blk-mq.c

index e9da5e6a8526f38bb6b3e581d7661eda4c520a38..70c65bb6c0131c84130fae44808acb51cf427ace 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -671,6 +671,7 @@ static void __blk_mq_requeue_request(struct request *rq)
  
         if (blk_mq_request_started(rq)) {
                 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
+               rq->rq_flags &= ~RQF_TIMED_OUT;
                 if (q->dma_drain_size && blk_rq_bytes(rq))
                         rq->nr_phys_segments--;
         }
@@ -770,6 +771,7 @@ EXPORT_SYMBOL(blk_mq_tag_to_rq);
  
  static void blk_mq_rq_timed_out(struct request *req, bool reserved)
  {
+       req->rq_flags |= RQF_TIMED_OUT;
         if (req->q->mq_ops->timeout) {
                 enum blk_eh_timer_return ret;
  
@@ -779,6 +781,7 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
                 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
         }
  
+       req->rq_flags &= ~RQF_TIMED_OUT;
         blk_add_timer(req);
  }
  
@@ -788,6 +791,8 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
  
         if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
                 return false;
+       if (rq->rq_flags & RQF_TIMED_OUT)
+               return false;
  
         deadline = blk_rq_deadline(rq);
         if (time_after_eq(jiffies, deadline))
@@ -2349,7 +2354,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
  
         mutex_lock(&set->tag_list_lock);
         list_del_rcu(&q->tag_set_list);
-       INIT_LIST_HEAD(&q->tag_set_list);
         if (list_is_singular(&set->tag_list)) {
                 /* just transitioned to unshared */
                 set->flags &= ~BLK_MQ_F_TAG_SHARED;
@@ -2357,8 +2361,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
                 blk_mq_update_tag_set_depth(set, false);
         }
         mutex_unlock(&set->tag_list_lock);
-
         synchronize_rcu();
+       INIT_LIST_HEAD(&q->tag_set_list);
  }
  
  static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
diff --git a/block/blk-tag.c b/block/blk-tag.c

index 24b20d86bcbcb3f160498fdf3eb5409f9a8730f4..fbc153aef166d7faa27b65f41d0870ca844d9aad 100644 (file)
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -188,7 +188,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
          */
         q->queue_tags = tags;
         queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
-       INIT_LIST_HEAD(&q->tag_busy_list);
         return 0;
  }
  EXPORT_SYMBOL(blk_queue_init_tags);
@@ -374,27 +373,6 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
         rq->tag = tag;
         bqt->tag_index[tag] = rq;
         blk_start_request(rq);
-       list_add(&rq->queuelist, &q->tag_busy_list);
         return 0;
  }
  EXPORT_SYMBOL(blk_queue_start_tag);
-
-/**
- * blk_queue_invalidate_tags - invalidate all pending tags
- * @q:  the request queue for the device
- *
- *  Description:
- *   Hardware conditions may dictate a need to stop all pending requests.
- *   In this case, we will safely clear the block side of the tag queue and
- *   readd all requests to the request queue in the right order.
- **/
-void blk_queue_invalidate_tags(struct request_queue *q)
-{
-       struct list_head *tmp, *n;
-
-       lockdep_assert_held(q->queue_lock);
-
-       list_for_each_safe(tmp, n, &q->tag_busy_list)
-               blk_requeue_request(q, list_entry_rq(tmp));
-}
-EXPORT_SYMBOL(blk_queue_invalidate_tags);
diff --git a/block/bsg.c b/block/bsg.c

index 132e657e2d913ca3fcff87c594de0957d8b6bd97..66602c48995643dcff921e6f10bba8cd203d3c5c 100644 (file)
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -693,6 +693,8 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
         struct bsg_device *bd;
         unsigned char buf[32];
  
+       lockdep_assert_held(&bsg_mutex);
+
         if (!blk_get_queue(rq))
                 return ERR_PTR(-ENXIO);
  
@@ -707,14 +709,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
         bsg_set_block(bd, file);
  
         atomic_set(&bd->ref_count, 1);
-       mutex_lock(&bsg_mutex);
         hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
  
         strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
         bsg_dbg(bd, "bound to <%s>, max queue %d\n",
                 format_dev_t(buf, inode->i_rdev), bd->max_queue);
  
-       mutex_unlock(&bsg_mutex);
         return bd;
  }
  
@@ -722,7 +722,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
  {
         struct bsg_device *bd;
  
-       mutex_lock(&bsg_mutex);
+       lockdep_assert_held(&bsg_mutex);
  
         hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
                 if (bd->queue == q) {
@@ -732,7 +732,6 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
         }
         bd = NULL;
  found:
-       mutex_unlock(&bsg_mutex);
         return bd;
  }
  
@@ -746,17 +745,18 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
          */
         mutex_lock(&bsg_mutex);
         bcd = idr_find(&bsg_minor_idr, iminor(inode));
-       mutex_unlock(&bsg_mutex);
  
-       if (!bcd)
-               return ERR_PTR(-ENODEV);
+       if (!bcd) {
+               bd = ERR_PTR(-ENODEV);
+               goto out_unlock;
+       }
  
         bd = __bsg_get_device(iminor(inode), bcd->queue);
-       if (bd)
-               return bd;
-
-       bd = bsg_add_device(inode, bcd->queue, file);
+       if (!bd)
+               bd = bsg_add_device(inode, bcd->queue, file);
  
+out_unlock:
+       mutex_unlock(&bsg_mutex);
         return bd;
  }
  
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index effb1309682eb2564d77a5550526510cb891b7bd..21710a7460c823bbc4f84134d7ecce70d3f993ba 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
                  * Verify that the subsystem actually supports multiple
                  * controllers, else bail out.
                  */
-               if (!ctrl->opts->discovery_nqn &&
+               if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
                     nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
                         dev_err(ctrl->device,
                                 "ignoring ctrl due to duplicate subnqn (%s).\n",
@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
         nvme_remove_invalid_namespaces(ctrl, nn);
  }
  
-static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
+static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
  {
         size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
         __le32 *log;
-       int error, i;
-       bool ret = false;
+       int error;
  
         log = kzalloc(log_size, GFP_KERNEL);
         if (!log)
-               return false;
+               return;
  
+       /*
+        * We need to read the log to clear the AEN, but we don't want to rely
+        * on it for the changed namespace information as userspace could have
+        * raced with us in reading the log page, which could cause us to miss
+        * updates.
+        */
         error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
-       if (error) {
+       if (error)
                 dev_warn(ctrl->device,
                         "reading changed ns log failed: %d\n", error);
-               goto out_free_log;
-       }
-
-       if (log[0] == cpu_to_le32(0xffffffff))
-               goto out_free_log;
-
-       for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
-               u32 nsid = le32_to_cpu(log[i]);
  
-               if (nsid == 0)
-                       break;
-               dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
-               nvme_validate_ns(ctrl, nsid);
-       }
-       ret = true;
-
-out_free_log:
         kfree(log);
-       return ret;
  }
  
  static void nvme_scan_work(struct work_struct *work)
@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work)
         WARN_ON_ONCE(!ctrl->tagset);
  
         if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
-               if (nvme_scan_changed_ns_log(ctrl))
-                       goto out_sort_namespaces;
                 dev_info(ctrl->device, "rescanning namespaces.\n");
+               nvme_clear_changed_ns_log(ctrl);
         }
  
         if (nvme_identify_ctrl(ctrl, &id))
@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work)
         nvme_scan_ns_sequential(ctrl, nn);
  out_free_id:
         kfree(id);
-out_sort_namespaces:
         down_write(&ctrl->namespaces_rwsem);
         list_sort(NULL, &ctrl->namespaces, ns_cmp);
         up_write(&ctrl->namespaces_rwsem);
@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
  }
  EXPORT_SYMBOL_GPL(nvme_start_queues);
  
-int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
-{
-       if (!ctrl->ops->reinit_request)
-               return 0;
-
-       return blk_mq_tagset_iter(set, set->driver_data,
-                       ctrl->ops->reinit_request);
-}
-EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
-
  int __init nvme_core_init(void)
  {
         int result = -ENOMEM;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c

index fa32c1216409a349ed502902317e443471bbd747..903eb4545e2699bc1b62365e5ca4490e824a8c5c 100644 (file)
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
         return NULL;
  }
  
-blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq,
-               bool queue_live, bool is_connected)
+/*
+ * For something we're not in a state to send to the device the default action
+ * is to busy it and retry it after the controller state is recovered.  However,
+ * anything marked for failfast or nvme multipath is immediately failed.
+ *
+ * Note: commands used to initialize the controller will be marked for failfast.
+ * Note: nvme cli/ioctl commands are marked for failfast.
+ */
+blk_status_t nvmf_fail_nonready_command(struct request *rq)
  {
-       struct nvme_command *cmd = nvme_req(rq)->cmd;
+       if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
+               return BLK_STS_RESOURCE;
+       nvme_req(rq)->status = NVME_SC_ABORT_REQ;
+       return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
  
-       if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected))
-               return BLK_STS_OK;
+bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+               bool queue_live)
+{
+       struct nvme_request *req = nvme_req(rq);
+
+       /*
+        * If we are in some state of setup or teardown only allow
+        * internally generated commands.
+        */
+       if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD))
+               return false;
  
+       /*
+        * Only allow commands on a live queue, except for the connect command,
+        * which is require to set the queue live in the appropinquate states.
+        */
         switch (ctrl->state) {
         case NVME_CTRL_NEW:
         case NVME_CTRL_CONNECTING:
-       case NVME_CTRL_DELETING:
-               /*
-                * This is the case of starting a new or deleting an association
-                * but connectivity was lost before it was fully created or torn
-                * down. We need to error the commands used to initialize the
-                * controller so the reconnect can go into a retry attempt.  The
-                * commands should all be marked REQ_FAILFAST_DRIVER, which will
-                * hit the reject path below. Anything else will be queued while
-                * the state settles.
-                */
-               if (!is_connected)
-                       break;
-
-               /*
-                * If queue is live, allow only commands that are internally
-                * generated pass through.  These are commands on the admin
-                * queue to initialize the controller. This will reject any
-                * ioctl admin cmds received while initializing.
-                */
-               if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
-                       return BLK_STS_OK;
-
-               /*
-                * If the queue is not live, allow only a connect command.  This
-                * will reject any ioctl admin cmd as well as initialization
-                * commands if the controller reverted the queue to non-live.
-                */
-               if (!queue_live && blk_rq_is_passthrough(rq) &&
-                    cmd->common.opcode == nvme_fabrics_command &&
-                    cmd->fabrics.fctype == nvme_fabrics_type_connect)
-                       return BLK_STS_OK;
+               if (req->cmd->common.opcode == nvme_fabrics_command &&
+                   req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
+                       return true;
                 break;
         default:
                 break;
+       case NVME_CTRL_DEAD:
+               return false;
         }
  
-       /*
-        * Any other new io is something we're not in a state to send to the
-        * device.  Default action is to busy it and retry it after the
-        * controller state is recovered. However, anything marked for failfast
-        * or nvme multipath is immediately failed.  Note: commands used to
-        * initialize the controller will be marked for failfast.
-        * Note: nvme cli/ioctl commands are marked for failfast.
-        */
-       if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
-               return BLK_STS_RESOURCE;
-       nvme_req(rq)->status = NVME_SC_ABORT_REQ;
-       return BLK_STS_IOERR;
+       return queue_live;
  }
-EXPORT_SYMBOL_GPL(nvmf_check_if_ready);
+EXPORT_SYMBOL_GPL(__nvmf_check_ready);
  
  static const match_table_t opt_tokens = {
         { NVMF_OPT_TRANSPORT,           "transport=%s"          },
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h

index 7491a0bbf711d6eb3321a7bdc4d3ff22699d3416..e1818a27aa2d7bcf75ff0e2c4522a61e294d8d9b 100644 (file)
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
  void nvmf_free_options(struct nvmf_ctrl_options *opts);
  int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
  bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
-blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl,
-       struct request *rq, bool queue_live, bool is_connected);
+blk_status_t nvmf_fail_nonready_command(struct request *rq);
+bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+               bool queue_live);
+
+static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+               bool queue_live)
+{
+       if (likely(ctrl->state == NVME_CTRL_LIVE ||
+                  ctrl->state == NVME_CTRL_ADMIN_ONLY))
+               return true;
+       return __nvmf_check_ready(ctrl, rq, queue_live);
+}
  
  #endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index 0bad65803271ff68bc883e0dd16c78b8386fabf8..b528a2f5826cbfe19b22aadd7e09e1ceff512cb6 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -142,6 +142,7 @@ struct nvme_fc_ctrl {
         struct nvme_fc_rport    *rport;
         u32                     cnum;
  
+       bool                    ioq_live;
         bool                    assoc_active;
         u64                     association_id;
  
@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
  
  static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
  
-static int
-nvme_fc_reinit_request(void *data, struct request *rq)
-{
-       struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-       struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
-
-       memset(cmdiu, 0, sizeof(*cmdiu));
-       cmdiu->scsi_id = NVME_CMD_SCSI_ID;
-       cmdiu->fc_id = NVME_CMD_FC_ID;
-       cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
-       memset(&op->rsp_iu, 0, sizeof(op->rsp_iu));
-
-       return 0;
-}
-
  static void
  __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
                 struct nvme_fc_fcp_op *op)
@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
          */
  
         queue->connection_id = 0;
+       atomic_set(&queue->csn, 1);
  }
  
  static void
@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
         struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
         struct nvme_command *sqe = &cmdiu->sqe;
         enum nvmefc_fcp_datadir io_dir;
+       bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
         u32 data_len;
         blk_status_t ret;
  
-       ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
-               test_bit(NVME_FC_Q_LIVE, &queue->flags),
-               ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE);
-       if (unlikely(ret))
-               return ret;
+       if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
+           !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+               return nvmf_fail_nonready_command(rq);
  
         ret = nvme_setup_cmd(ns, rq, sqe);
         if (ret)
@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
         if (ret)
                 goto out_delete_hw_queues;
  
+       ctrl->ioq_live = true;
+
         return 0;
  
  out_delete_hw_queues:
@@ -2480,7 +2468,7 @@ out_free_tag_set:
  }
  
  static int
-nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
+nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
  {
         struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
         unsigned int nr_io_queues;
@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
         if (ctrl->ctrl.queue_count == 1)
                 return 0;
  
-       nvme_fc_init_io_queues(ctrl);
-
-       ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
-       if (ret)
-               goto out_free_io_queues;
-
         ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
         if (ret)
                 goto out_free_io_queues;
@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
          * Create the admin queue
          */
  
-       nvme_fc_init_queue(ctrl, 0);
-
         ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
                                 NVME_AQ_DEPTH);
         if (ret)
@@ -2615,8 +2595,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
         if (ret)
                 goto out_delete_hw_queue;
  
-       if (ctrl->ctrl.state != NVME_CTRL_NEW)
-               blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
  
         ret = nvmf_connect_admin_queue(&ctrl->ctrl);
         if (ret)
@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
          */
  
         if (ctrl->ctrl.queue_count > 1) {
-               if (ctrl->ctrl.state == NVME_CTRL_NEW)
+               if (!ctrl->ioq_live)
                         ret = nvme_fc_create_io_queues(ctrl);
                 else
-                       ret = nvme_fc_reinit_io_queues(ctrl);
+                       ret = nvme_fc_recreate_io_queues(ctrl);
                 if (ret)
                         goto out_term_aen_ops;
         }
@@ -2776,8 +2755,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
          * use blk_mq_tagset_busy_itr() and the transport routine to
          * terminate the exchanges.
          */
-       if (ctrl->ctrl.state != NVME_CTRL_NEW)
-               blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
         blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                 nvme_fc_terminate_exchange, &ctrl->ctrl);
  
@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
         .submit_async_event     = nvme_fc_submit_async_event,
         .delete_ctrl            = nvme_fc_delete_ctrl,
         .get_address            = nvmf_get_address,
-       .reinit_request         = nvme_fc_reinit_request,
  };
  
  static void
@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
                 nvme_fc_reconnect_or_delete(ctrl, ret);
         else
                 dev_info(ctrl->ctrl.device,
-                       "NVME-FC{%d}: controller reconnect complete\n",
+                       "NVME-FC{%d}: controller connect complete\n",
                         ctrl->cnum);
  }
  
@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  {
         struct nvme_fc_ctrl *ctrl;
         unsigned long flags;
-       int ret, idx, retry;
+       int ret, idx;
  
         if (!(rport->remoteport.port_role &
             (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         }
  
         ctrl->ctrl.opts = opts;
+       ctrl->ctrl.nr_reconnects = 0;
         INIT_LIST_HEAD(&ctrl->ctrl_list);
         ctrl->lport = lport;
         ctrl->rport = rport;
         ctrl->dev = lport->dev;
         ctrl->cnum = idx;
+       ctrl->ioq_live = false;
         ctrl->assoc_active = false;
         init_waitqueue_head(&ctrl->ioabort_wait);
  
@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  
         ctrl->ctrl.sqsize = opts->queue_size - 1;
         ctrl->ctrl.kato = opts->kato;
+       ctrl->ctrl.cntlid = 0xffff;
  
         ret = -ENOMEM;
         ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         if (!ctrl->queues)
                 goto out_free_ida;
  
+       nvme_fc_init_queue(ctrl, 0);
+
         memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
         ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
         ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
@@ -3081,62 +3063,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
         spin_unlock_irqrestore(&rport->lock, flags);
  
-       /*
-        * It's possible that transactions used to create the association
-        * may fail. Examples: CreateAssociation LS or CreateIOConnection
-        * LS gets dropped/corrupted/fails; or a frame gets dropped or a
-        * command times out for one of the actions to init the controller
-        * (Connect, Get/Set_Property, Set_Features, etc). Many of these
-        * transport errors (frame drop, LS failure) inherently must kill
-        * the association. The transport is coded so that any command used
-        * to create the association (prior to a LIVE state transition
-        * while NEW or CONNECTING) will fail if it completes in error or
-        * times out.
-        *
-        * As such: as the connect request was mostly likely due to a
-        * udev event that discovered the remote port, meaning there is
-        * not an admin or script there to restart if the connect
-        * request fails, retry the initial connection creation up to
-        * three times before giving up and declaring failure.
-        */
-       for (retry = 0; retry < 3; retry++) {
-               ret = nvme_fc_create_association(ctrl);
-               if (!ret)
-                       break;
-       }
-
-       if (ret) {
-               nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
-               cancel_work_sync(&ctrl->ctrl.reset_work);
-               cancel_delayed_work_sync(&ctrl->connect_work);
-
-               /* couldn't schedule retry - fail out */
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
+           !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
                 dev_err(ctrl->ctrl.device,
-                       "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
-
-               ctrl->ctrl.opts = NULL;
+                       "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
+               goto fail_ctrl;
+       }
  
-               /* initiate nvme ctrl ref counting teardown */
-               nvme_uninit_ctrl(&ctrl->ctrl);
+       nvme_get_ctrl(&ctrl->ctrl);
  
-               /* Remove core ctrl ref. */
+       if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
                 nvme_put_ctrl(&ctrl->ctrl);
-
-               /* as we're past the point where we transition to the ref
-                * counting teardown path, if we return a bad pointer here,
-                * the calling routine, thinking it's prior to the
-                * transition, will do an rport put. Since the teardown
-                * path also does a rport put, we do an extra get here to
-                * so proper order/teardown happens.
-                */
-               nvme_fc_rport_get(rport);
-
-               if (ret > 0)
-                       ret = -EIO;
-               return ERR_PTR(ret);
+               dev_err(ctrl->ctrl.device,
+                       "NVME-FC{%d}: failed to schedule initial connect\n",
+                       ctrl->cnum);
+               goto fail_ctrl;
         }
  
-       nvme_get_ctrl(&ctrl->ctrl);
+       flush_delayed_work(&ctrl->connect_work);
  
         dev_info(ctrl->ctrl.device,
                 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
@@ -3144,6 +3088,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  
         return &ctrl->ctrl;
  
+fail_ctrl:
+       nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
+       cancel_work_sync(&ctrl->ctrl.reset_work);
+       cancel_delayed_work_sync(&ctrl->connect_work);
+
+       ctrl->ctrl.opts = NULL;
+
+       /* initiate nvme ctrl ref counting teardown */
+       nvme_uninit_ctrl(&ctrl->ctrl);
+
+       /* Remove core ctrl ref. */
+       nvme_put_ctrl(&ctrl->ctrl);
+
+       /* as we're past the point where we transition to the ref
+        * counting teardown path, if we return a bad pointer here,
+        * the calling routine, thinking it's prior to the
+        * transition, will do an rport put. Since the teardown
+        * path also does a rport put, we do an extra get here to
+        * so proper order/teardown happens.
+        */
+       nvme_fc_rport_get(rport);
+
+       return ERR_PTR(-EIO);
+
  out_cleanup_admin_q:
         blk_cleanup_queue(ctrl->ctrl.admin_q);
  out_free_admin_tag_set:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c

index d7b664ae5923e1217a493d68f1dac96c8a3cce4c..1ffd3e8b13a18dccf887beac8d1d053e64ce9c9b 100644 (file)
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -12,6 +12,7 @@
   */
  
  #include <linux/moduleparam.h>
+#include <trace/events/block.h>
  #include "nvme.h"
  
  static bool multipath = true;
@@ -111,6 +112,9 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
         if (likely(ns)) {
                 bio->bi_disk = ns->disk;
                 bio->bi_opf |= REQ_NVME_MPATH;
+               trace_block_bio_remap(bio->bi_disk->queue, bio,
+                                     disk_devt(ns->head->disk),
+                                     bio->bi_iter.bi_sector);
                 ret = direct_make_request(bio);
         } else if (!list_empty_careful(&head->list)) {
                 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index 34df07d44f8071e51f0c27bde7ade5c15bf2c00a..231807cbc849869afcbc16fce2e3389539ce2684 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -321,7 +321,6 @@ struct nvme_ctrl_ops {
         void (*submit_async_event)(struct nvme_ctrl *ctrl);
         void (*delete_ctrl)(struct nvme_ctrl *ctrl);
         int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
-       int (*reinit_request)(void *data, struct request *rq);
         void (*stop_ctrl)(struct nvme_ctrl *ctrl);
  };
  
@@ -416,7 +415,6 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl);
  void nvme_wait_freeze(struct nvme_ctrl *ctrl);
  void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
  void nvme_start_freeze(struct nvme_ctrl *ctrl);
-int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
  
  #define NVME_QID_ANY -1
  struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index 2aba03876d846a4bc47a3216dab864ec5c0df2d0..c9424da0d23e3cbbdd0e2b5209d9eddca9f1591f 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1189,21 +1189,38 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
         count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
                     rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
         if (unlikely(count <= 0)) {
-               sg_free_table_chained(&req->sg_table, true);
-               return -EIO;
+               ret = -EIO;
+               goto out_free_table;
         }
  
         if (count == 1) {
                 if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
                     blk_rq_payload_bytes(rq) <=
-                               nvme_rdma_inline_data_size(queue))
-                       return nvme_rdma_map_sg_inline(queue, req, c);
+                               nvme_rdma_inline_data_size(queue)) {
+                       ret = nvme_rdma_map_sg_inline(queue, req, c);
+                       goto out;
+               }
  
-               if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
-                       return nvme_rdma_map_sg_single(queue, req, c);
+               if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
+                       ret = nvme_rdma_map_sg_single(queue, req, c);
+                       goto out;
+               }
         }
  
-       return nvme_rdma_map_sg_fr(queue, req, c, count);
+       ret = nvme_rdma_map_sg_fr(queue, req, c, count);
+out:
+       if (unlikely(ret))
+               goto out_unmap_sg;
+
+       return 0;
+
+out_unmap_sg:
+       ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
+                       req->nents, rq_data_dir(rq) ==
+                       WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+out_free_table:
+       sg_free_table_chained(&req->sg_table, true);
+       return ret;
  }
  
  static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1613,15 +1630,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
         struct nvme_rdma_qe *sqe = &req->sqe;
         struct nvme_command *c = sqe->data;
         struct ib_device *dev;
+       bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
         blk_status_t ret;
         int err;
  
         WARN_ON_ONCE(rq->tag < 0);
  
-       ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
-               test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true);
-       if (unlikely(ret))
-               return ret;
+       if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+               return nvmf_fail_nonready_command(rq);
  
         dev = queue->device->dev;
         ib_dma_sync_single_for_cpu(dev, sqe->dma,
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c

index 9625328427690345b86f73c4a80e0c1035635e4b..38803576d5e122396ae25f1a675d48fd268f67ac 100644 (file)
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -119,9 +119,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
         else
                 status = nvmet_get_smart_log_nsid(req, log);
         if (status)
-               goto out;
+               goto out_free_log;
  
         status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
+out_free_log:
+       kfree(log);
  out:
         nvmet_req_complete(req, status);
  }
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c

index 1304ec3a7edeaadd2daf26ff74f55cb75e784696..d8d91f04bd7eedae3e183c3a89dc3d42bd33a3ff 100644 (file)
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -158,12 +158,11 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
         struct nvme_loop_queue *queue = hctx->driver_data;
         struct request *req = bd->rq;
         struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
+       bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
         blk_status_t ret;
  
-       ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req,
-               test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true);
-       if (unlikely(ret))
-               return ret;
+       if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
+               return nvmf_fail_nonready_command(req);
  
         ret = nvme_setup_cmd(ns, req, &iod->cmd);
         if (ret)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index fb355173f3c73a6e15a211a8d6f20a879a07d300..e3147eb74222b868a014f498f1186a7a6c661804 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -281,8 +281,6 @@ void blk_freeze_queue_start(struct request_queue *q);
  void blk_mq_freeze_queue_wait(struct request_queue *q);
  int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
                                      unsigned long timeout);
-int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
-               int (reinit_request)(void *, struct request *));
  
  int blk_mq_map_queues(struct blk_mq_tag_set *set);
  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index bca3a92eb55f5bc88c47c65343d595479cebb9c4..9154570edf2963628f873d7404930450735ff41a 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -127,6 +127,8 @@ typedef __u32 __bitwise req_flags_t;
  #define RQF_ZONE_WRITE_LOCKED  ((__force req_flags_t)(1 << 19))
  /* already slept for hybrid poll */
  #define RQF_MQ_POLL_SLEPT      ((__force req_flags_t)(1 << 20))
+/* ->timeout has been called, don't expire again */
+#define RQF_TIMED_OUT          ((__force req_flags_t)(1 << 21))
  
  /* flags that prevent us from merging requests: */
  #define RQF_NOMERGE_FLAGS \
@@ -560,7 +562,6 @@ struct request_queue {
         unsigned int            dma_alignment;
  
         struct blk_queue_tag    *queue_tags;
-       struct list_head        tag_busy_list;
  
         unsigned int            nr_sorted;
         unsigned int            in_flight[2];
@@ -1373,7 +1374,6 @@ extern void blk_queue_end_tag(struct request_queue *, struct request *);
  extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
  extern void blk_queue_free_tags(struct request_queue *);
  extern int blk_queue_resize_tags(struct request_queue *, int);
-extern void blk_queue_invalidate_tags(struct request_queue *);
  extern struct blk_queue_tag *blk_init_tags(int, int);
  extern void blk_free_tags(struct blk_queue_tag *);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 16 Jun 2018 20:37:55 +0000 (05:37 +0900)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 16 Jun 2018 20:37:55 +0000 (05:37 +0900)
Documentation/block/biodoc.txt		patch \| blob \| blame \| history
block/blk-mq-tag.c		patch \| blob \| blame \| history
block/blk-mq.c		patch \| blob \| blame \| history
block/blk-tag.c		patch \| blob \| blame \| history
block/bsg.c		patch \| blob \| blame \| history
drivers/nvme/host/core.c		patch \| blob \| blame \| history
drivers/nvme/host/fabrics.c		patch \| blob \| blame \| history
drivers/nvme/host/fabrics.h		patch \| blob \| blame \| history
drivers/nvme/host/fc.c		patch \| blob \| blame \| history
drivers/nvme/host/multipath.c		patch \| blob \| blame \| history
drivers/nvme/host/nvme.h		patch \| blob \| blame \| history
drivers/nvme/host/rdma.c		patch \| blob \| blame \| history
drivers/nvme/target/admin-cmd.c		patch \| blob \| blame \| history
drivers/nvme/target/loop.c		patch \| blob \| blame \| history
include/linux/blk-mq.h		patch \| blob \| blame \| history
include/linux/blkdev.h		patch \| blob \| blame \| history