nvme: move namespace scanning to core

[linux-2.6-block.git] / drivers / nvme / host / pci.c
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 0bf7f61a0a89edbf67a53ccea50de19be9cf869c..15bc337553248e6c08fa6a733218edeccf884bf2 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -92,7 +92,6 @@ struct nvme_dev {
         struct msix_entry *entry;
         void __iomem *bar;
         struct work_struct reset_work;
-       struct work_struct scan_work;
         struct work_struct remove_work;
         struct work_struct async_work;
         struct timer_list watchdog_timer;
@@ -102,11 +101,6 @@ struct nvme_dev {
         dma_addr_t cmb_dma_addr;
         u64 cmb_size;
         u32 cmbsz;
-       unsigned long flags;
-
-#define NVME_CTRL_RESETTING    0
-#define NVME_CTRL_REMOVING     1
-
         struct nvme_ctrl ctrl;
         struct completion ioq_wait;
  };
@@ -271,17 +265,6 @@ static int nvme_init_request(void *data, struct request *req,
         return 0;
  }
  
-static void nvme_queue_scan(struct nvme_dev *dev)
-{
-       /*
-        * Do not queue new scan work when a controller is reset during
-        * removal.
-        */
-       if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
-               return;
-       queue_work(nvme_workq, &dev->scan_work);
-}
-
  static void nvme_complete_async_event(struct nvme_dev *dev,
                 struct nvme_completion *cqe)
  {
@@ -299,7 +282,7 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
         switch (result & 0xff07) {
         case NVME_AER_NOTICE_NS_CHANGED:
                 dev_info(dev->ctrl.device, "rescanning\n");
-               nvme_queue_scan(dev);
+               nvme_queue_scan(&dev->ctrl);
         default:
                 dev_warn(dev->ctrl.device, "async event result %08x\n", result);
         }
@@ -593,43 +576,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
         nvme_free_iod(dev, req);
  }
  
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
-               struct nvme_command *cmnd)
-{
-       struct nvme_dsm_range *range;
-       struct page *page;
-       int offset;
-       unsigned int nr_bytes = blk_rq_bytes(req);
-
-       range = kmalloc(sizeof(*range), GFP_ATOMIC);
-       if (!range)
-               return BLK_MQ_RQ_QUEUE_BUSY;
-
-       range->cattr = cpu_to_le32(0);
-       range->nlb = cpu_to_le32(nr_bytes >> ns->lba_shift);
-       range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
-
-       memset(cmnd, 0, sizeof(*cmnd));
-       cmnd->dsm.opcode = nvme_cmd_dsm;
-       cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
-       cmnd->dsm.nr = 0;
-       cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
-       req->completion_data = range;
-       page = virt_to_page(range);
-       offset = offset_in_page(range);
-       blk_add_request_payload(req, page, offset, sizeof(*range));
-
-       /*
-        * we set __data_len back to the size of the area to be discarded
-        * on disk. This allows us to report completion on the full amount
-        * of blocks described by the request.
-        */
-       req->__data_len = nr_bytes;
-
-       return 0;
-}
-
  /*
   * NOTE: ns is NULL when called on the admin queue.
   */
@@ -662,15 +608,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
         if (ret)
                 return ret;
  
-       if (req->cmd_type == REQ_TYPE_DRV_PRIV)
-               memcpy(&cmnd, req->cmd, sizeof(cmnd));
-       else if (req->cmd_flags & REQ_FLUSH)
-               nvme_setup_flush(ns, &cmnd);
-       else if (req->cmd_flags & REQ_DISCARD)
-               ret = nvme_setup_discard(ns, req, &cmnd);
-       else
-               nvme_setup_rw(ns, req, &cmnd);
-
+       ret = nvme_setup_cmd(ns, req, &cmnd);
         if (ret)
                 goto out;
  
@@ -946,7 +884,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
          * cancellation error. All outstanding requests are completed on
          * shutdown, so we return BLK_EH_HANDLED.
          */
-       if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
+       if (dev->ctrl.state == NVME_CTRL_RESETTING) {
                 dev_warn(dev->ctrl.device,
                          "I/O %d QID %d timeout, disable controller\n",
                          req->tag, nvmeq->qid);
@@ -1010,16 +948,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         return BLK_EH_RESET_TIMER;
  }
  
-static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
+static void nvme_cancel_io(struct request *req, void *data, bool reserved)
  {
-       struct nvme_queue *nvmeq = data;
         int status;
  
         if (!blk_mq_request_started(req))
                 return;
  
-       dev_dbg_ratelimited(nvmeq->dev->ctrl.device,
-                "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
+       dev_dbg_ratelimited(((struct nvme_dev *) data)->ctrl.device,
+                               "Cancelling I/O %d", req->tag);
  
         status = NVME_SC_ABORT_REQ;
         if (blk_queue_dying(req->q))
@@ -1076,14 +1013,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
         return 0;
  }
  
-static void nvme_clear_queue(struct nvme_queue *nvmeq)
-{
-       spin_lock_irq(&nvmeq->q_lock);
-       if (nvmeq->tags && *nvmeq->tags)
-               blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq);
-       spin_unlock_irq(&nvmeq->q_lock);
-}
-
  static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
  {
         struct nvme_queue *nvmeq = dev->queues[0];
@@ -1357,22 +1286,44 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
         return result;
  }
  
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+       /* If true, indicates loss of adapter communication, possibly by a
+        * NVMe Subsystem reset.
+        */
+       bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+       /* If there is a reset ongoing, we shouldn't reset again. */
+       if (work_busy(&dev->reset_work))
+               return false;
+
+       /* We shouldn't reset unless the controller is on fatal error state
+        * _or_ if we lost the communication with it.
+        */
+       if (!(csts & NVME_CSTS_CFS) && !nssro)
+               return false;
+
+       /* If PCI error recovery process is happening, we cannot reset or
+        * the recovery mechanism will surely fail.
+        */
+       if (pci_channel_offline(to_pci_dev(dev->dev)))
+               return false;
+
+       return true;
+}
+
  static void nvme_watchdog_timer(unsigned long data)
  {
         struct nvme_dev *dev = (struct nvme_dev *)data;
         u32 csts = readl(dev->bar + NVME_REG_CSTS);
  
-       /*
-        * Skip controllers currently under reset.
-        */
-       if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) &&
-           ((csts & NVME_CSTS_CFS) ||
-            (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) {
-               if (queue_work(nvme_workq, &dev->reset_work)) {
+       /* Skip controllers under certain specific conditions. */
+       if (nvme_should_reset(dev, csts)) {
+               if (queue_work(nvme_workq, &dev->reset_work))
                         dev_warn(dev->dev,
                                 "Failed status: 0x%x, reset controller.\n",
                                 csts);
-               }
                 return;
         }
  
@@ -1558,8 +1509,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
         return result;
  }
  
-static void nvme_set_irq_hints(struct nvme_dev *dev)
+static void nvme_pci_post_scan(struct nvme_ctrl *ctrl)
  {
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
         struct nvme_queue *nvmeq;
         int i;
  
@@ -1574,16 +1526,6 @@ static void nvme_set_irq_hints(struct nvme_dev *dev)
         }
  }
  
-static void nvme_dev_scan(struct work_struct *work)
-{
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
-
-       if (!dev->tagset.tags)
-               return;
-       nvme_scan_namespaces(&dev->ctrl);
-       nvme_set_irq_hints(dev);
-}
-
  static void nvme_del_queue_end(struct request *req, int error)
  {
         struct nvme_queue *nvmeq = req->end_io_data;
@@ -1697,7 +1639,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
                 nvme_free_queues(dev, dev->online_queues);
         }
  
-       nvme_queue_scan(dev);
         return 0;
  }
  
@@ -1810,8 +1751,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
         }
         nvme_pci_disable(dev);
  
-       for (i = dev->queue_count - 1; i >= 0; i--)
-               nvme_clear_queue(dev->queues[i]);
+       blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_io, dev);
+       blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_io, dev);
         mutex_unlock(&dev->shutdown_lock);
  }
  
@@ -1867,7 +1808,7 @@ static void nvme_reset_work(struct work_struct *work)
         struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
         int result = -ENODEV;
  
-       if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
+       if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
                 goto out;
  
         /*
@@ -1877,7 +1818,8 @@ static void nvme_reset_work(struct work_struct *work)
         if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
                 nvme_dev_disable(dev, false);
  
-       set_bit(NVME_CTRL_RESETTING, &dev->flags);
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
+               goto out;
  
         result = nvme_pci_enable(dev);
         if (result)
@@ -1900,8 +1842,16 @@ static void nvme_reset_work(struct work_struct *work)
         if (result)
                 goto out;
  
-       dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
-       queue_work(nvme_workq, &dev->async_work);
+       /*
+        * A controller that can not execute IO typically requires user
+        * intervention to correct. For such degraded controllers, the driver
+        * should not submit commands the user did not request, so skip
+        * registering for asynchronous event notification on this condition.
+        */
+       if (dev->online_queues > 1) {
+               dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
+               queue_work(nvme_workq, &dev->async_work);
+       }
  
         mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
  
@@ -1911,13 +1861,20 @@ static void nvme_reset_work(struct work_struct *work)
          */
         if (dev->online_queues < 2) {
                 dev_warn(dev->ctrl.device, "IO queues not created\n");
+               nvme_kill_queues(&dev->ctrl);
                 nvme_remove_namespaces(&dev->ctrl);
         } else {
                 nvme_start_queues(&dev->ctrl);
                 nvme_dev_add(dev);
         }
  
-       clear_bit(NVME_CTRL_RESETTING, &dev->flags);
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
+               dev_warn(dev->ctrl.device, "failed to mark controller live\n");
+               goto out;
+       }
+
+       if (dev->online_queues > 1)
+               nvme_queue_scan(&dev->ctrl);
         return;
  
   out:
@@ -1965,13 +1922,6 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
         return 0;
  }
  
-static bool nvme_pci_io_incapable(struct nvme_ctrl *ctrl)
-{
-       struct nvme_dev *dev = to_nvme_dev(ctrl);
-
-       return !dev->bar || dev->online_queues < 2;
-}
-
  static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
  {
         return nvme_reset(to_nvme_dev(ctrl));
@@ -1982,9 +1932,9 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
         .reg_read32             = nvme_pci_reg_read32,
         .reg_write32            = nvme_pci_reg_write32,
         .reg_read64             = nvme_pci_reg_read64,
-       .io_incapable           = nvme_pci_io_incapable,
         .reset_ctrl             = nvme_pci_reset_ctrl,
         .free_ctrl              = nvme_pci_free_ctrl,
+       .post_scan              = nvme_pci_post_scan,
  };
  
  static int nvme_dev_map(struct nvme_dev *dev)
@@ -2036,7 +1986,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         if (result)
                 goto free;
  
-       INIT_WORK(&dev->scan_work, nvme_dev_scan);
         INIT_WORK(&dev->reset_work, nvme_reset_work);
         INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
         INIT_WORK(&dev->async_work, nvme_async_event_work);
@@ -2098,11 +2047,10 @@ static void nvme_remove(struct pci_dev *pdev)
  
         del_timer_sync(&dev->watchdog_timer);
  
-       set_bit(NVME_CTRL_REMOVING, &dev->flags);
+       nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
+
         pci_set_drvdata(pdev, NULL);
         flush_work(&dev->async_work);
-       flush_work(&dev->scan_work);
-       nvme_remove_namespaces(&dev->ctrl);
         nvme_uninit_ctrl(&dev->ctrl);
         nvme_dev_disable(dev, true);
         flush_work(&dev->reset_work);