Merge branch 'for-4.6/drivers' of git://git.kernel.dk/linux-block

[linux-2.6-block.git] / drivers / nvme / host / pci.c
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 680f5780750cffa45efc93101fb60eb744fdd04c..f8db70ae172d9f558bd14916ca9f05d461232a17 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -27,7 +27,6 @@
  #include <linux/interrupt.h>
  #include <linux/io.h>
  #include <linux/kdev_t.h>
-#include <linux/kthread.h>
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/module.h>
@@ -39,6 +38,7 @@
  #include <linux/sched.h>
  #include <linux/slab.h>
  #include <linux/t10-pi.h>
+#include <linux/timer.h>
  #include <linux/types.h>
  #include <linux/io-64-nonatomic-lo-hi.h>
  #include <asm/unaligned.h>
@@ -57,18 +57,6 @@
  #define NVME_NR_AEN_COMMANDS   1
  #define NVME_AQ_BLKMQ_DEPTH    (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
  
-unsigned char admin_timeout = 60;
-module_param(admin_timeout, byte, 0644);
-MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
-
-unsigned char nvme_io_timeout = 30;
-module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
-MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-
-unsigned char shutdown_timeout = 5;
-module_param(shutdown_timeout, byte, 0644);
-MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-
  static int use_threaded_interrupts;
  module_param(use_threaded_interrupts, int, 0);
  
@@ -76,10 +64,7 @@ static bool use_cmb_sqes = true;
  module_param(use_cmb_sqes, bool, 0644);
  MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
  
-static LIST_HEAD(dev_list);
-static struct task_struct *nvme_thread;
  static struct workqueue_struct *nvme_workq;
-static wait_queue_head_t nvme_kthread_wait;
  
  struct nvme_dev;
  struct nvme_queue;
@@ -92,7 +77,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
   * Represents an NVM Express device.  Each nvme_dev is a PCI function.
   */
  struct nvme_dev {
-       struct list_head node;
         struct nvme_queue **queues;
         struct blk_mq_tag_set tagset;
         struct blk_mq_tag_set admin_tagset;
@@ -110,6 +94,8 @@ struct nvme_dev {
         struct work_struct reset_work;
         struct work_struct scan_work;
         struct work_struct remove_work;
+       struct work_struct async_work;
+       struct timer_list watchdog_timer;
         struct mutex shutdown_lock;
         bool subsystem;
         void __iomem *cmb;
@@ -148,7 +134,6 @@ struct nvme_queue {
         u32 __iomem *q_db;
         u16 q_depth;
         s16 cq_vector;
-       u16 sq_head;
         u16 sq_tail;
         u16 cq_head;
         u16 qid;
@@ -303,17 +288,20 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
         u16 status = le16_to_cpu(cqe->status) >> 1;
         u32 result = le32_to_cpu(cqe->result);
  
-       if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
+       if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) {
                 ++dev->ctrl.event_limit;
+               queue_work(nvme_workq, &dev->async_work);
+       }
+
         if (status != NVME_SC_SUCCESS)
                 return;
  
         switch (result & 0xff07) {
         case NVME_AER_NOTICE_NS_CHANGED:
-               dev_info(dev->dev, "rescanning\n");
+               dev_info(dev->ctrl.device, "rescanning\n");
                 nvme_queue_scan(dev);
         default:
-               dev_warn(dev->dev, "async event result %08x\n", result);
+               dev_warn(dev->ctrl.device, "async event result %08x\n", result);
         }
  }
  
@@ -727,7 +715,7 @@ static void nvme_complete_rq(struct request *req)
         }
  
         if (unlikely(iod->aborted)) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                         "completing aborted command with status: %04x\n",
                         req->errors);
         }
@@ -749,7 +737,6 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
  
                 if ((status & 1) != phase)
                         break;
-               nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
                 if (++head == nvmeq->q_depth) {
                         head = 0;
                         phase = !phase;
@@ -759,7 +746,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                         *tag = -1;
  
                 if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
-                       dev_warn(nvmeq->q_dmadev,
+                       dev_warn(nvmeq->dev->ctrl.device,
                                 "invalid id %d completed on queue %d\n",
                                 cqe.command_id, le16_to_cpu(cqe.sq_id));
                         continue;
@@ -778,10 +765,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                 }
  
                 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-               if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-                       u32 result = le32_to_cpu(cqe.result);
-                       req->special = (void *)(uintptr_t)result;
-               }
+               if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
+                       memcpy(req->special, &cqe, sizeof(cqe));
                 blk_mq_complete_request(req, status >> 1);
  
         }
@@ -846,15 +831,22 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
         return 0;
  }
  
-static void nvme_submit_async_event(struct nvme_dev *dev)
+static void nvme_async_event_work(struct work_struct *work)
  {
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work);
+       struct nvme_queue *nvmeq = dev->queues[0];
         struct nvme_command c;
  
         memset(&c, 0, sizeof(c));
         c.common.opcode = nvme_admin_async_event;
-       c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit;
  
-       __nvme_submit_cmd(dev->queues[0], &c);
+       spin_lock_irq(&nvmeq->q_lock);
+       while (dev->ctrl.event_limit > 0) {
+               c.common.command_id = NVME_AQ_BLKMQ_DEPTH +
+                       --dev->ctrl.event_limit;
+               __nvme_submit_cmd(nvmeq, &c);
+       }
+       spin_unlock_irq(&nvmeq->q_lock);
  }
  
  static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -924,12 +916,10 @@ static void abort_endio(struct request *req, int error)
  {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
         struct nvme_queue *nvmeq = iod->nvmeq;
-       u32 result = (u32)(uintptr_t)req->special;
         u16 status = req->errors;
  
-       dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+       dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
         atomic_inc(&nvmeq->dev->ctrl.abort_limit);
-
         blk_mq_free_request(req);
  }
  
@@ -948,7 +938,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
          * shutdown, so we return BLK_EH_HANDLED.
          */
         if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                          "I/O %d QID %d timeout, disable controller\n",
                          req->tag, nvmeq->qid);
                 nvme_dev_disable(dev, false);
@@ -962,7 +952,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
          * returned to the driver, or if this is the admin queue.
          */
         if (!nvmeq->qid || iod->aborted) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                          "I/O %d QID %d timeout, reset controller\n",
                          req->tag, nvmeq->qid);
                 nvme_dev_disable(dev, false);
@@ -988,8 +978,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         cmd.abort.cid = req->tag;
         cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
  
-       dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
-                                req->tag, nvmeq->qid);
+       dev_warn(nvmeq->dev->ctrl.device,
+               "I/O %d QID %d timeout, aborting\n",
+                req->tag, nvmeq->qid);
  
         abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
                         BLK_MQ_REQ_NOWAIT);
@@ -1018,7 +1009,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved
         if (!blk_mq_request_started(req))
                 return;
  
-       dev_dbg_ratelimited(nvmeq->q_dmadev,
+       dev_dbg_ratelimited(nvmeq->dev->ctrl.device,
                  "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
  
         status = NVME_SC_ABORT_REQ;
@@ -1173,9 +1164,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
         nvmeq->qid = qid;
         nvmeq->cq_vector = -1;
         dev->queues[qid] = nvmeq;
-
-       /* make sure queue descriptor is set before queue count, for kthread */
-       mb();
         dev->queue_count++;
  
         return nvmeq;
@@ -1360,53 +1348,31 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
         return result;
  }
  
-static int nvme_kthread(void *data)
-{
-       struct nvme_dev *dev, *next;
-
-       while (!kthread_should_stop()) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               spin_lock(&dev_list_lock);
-               list_for_each_entry_safe(dev, next, &dev_list, node) {
-                       int i;
-                       u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
-                       /*
-                        * Skip controllers currently under reset.
-                        */
-                       if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work))
-                               continue;
-
-                       if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
-                                                       csts & NVME_CSTS_CFS) {
-                               if (queue_work(nvme_workq, &dev->reset_work)) {
-                                       dev_warn(dev->dev,
-                                               "Failed status: %x, reset controller\n",
-                                               readl(dev->bar + NVME_REG_CSTS));
-                               }
-                               continue;
-                       }
-                       for (i = 0; i < dev->queue_count; i++) {
-                               struct nvme_queue *nvmeq = dev->queues[i];
-                               if (!nvmeq)
-                                       continue;
-                               spin_lock_irq(&nvmeq->q_lock);
-                               nvme_process_cq(nvmeq);
-
-                               while (i == 0 && dev->ctrl.event_limit > 0)
-                                       nvme_submit_async_event(dev);
-                               spin_unlock_irq(&nvmeq->q_lock);
-                       }
+static void nvme_watchdog_timer(unsigned long data)
+{
+       struct nvme_dev *dev = (struct nvme_dev *)data;
+       u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+       /*
+        * Skip controllers currently under reset.
+        */
+       if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) &&
+           ((csts & NVME_CSTS_CFS) ||
+            (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) {
+               if (queue_work(nvme_workq, &dev->reset_work)) {
+                       dev_warn(dev->dev,
+                               "Failed status: 0x%x, reset controller.\n",
+                               csts);
                 }
-               spin_unlock(&dev_list_lock);
-               schedule_timeout(round_jiffies_relative(HZ));
+               return;
         }
-       return 0;
+
+       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
  }
  
  static int nvme_create_io_queues(struct nvme_dev *dev)
  {
-       unsigned i;
+       unsigned i, max;
         int ret = 0;
  
         for (i = dev->queue_count; i <= dev->max_qid; i++) {
@@ -1416,7 +1382,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
                 }
         }
  
-       for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
+       max = min(dev->max_qid, dev->queue_count - 1);
+       for (i = dev->online_queues; i <= max; i++) {
                 ret = nvme_create_queue(dev->queues[i], i);
                 if (ret) {
                         nvme_free_queues(dev, i);
@@ -1507,7 +1474,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
          * access to the admin queue, as that might be only way to fix them up.
          */
         if (result > 0) {
-               dev_err(dev->dev, "Could not set queue count (%d)\n", result);
+               dev_err(dev->ctrl.device,
+                       "Could not set queue count (%d)\n", result);
                 nr_io_queues = 0;
                 result = 0;
         }
@@ -1573,9 +1541,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
                 adminq->cq_vector = -1;
                 goto free_queues;
         }
-
-       /* Free previously allocated queues that are no longer usable */
-       nvme_free_queues(dev, nr_io_queues + 1);
         return nvme_create_io_queues(dev);
  
   free_queues:
@@ -1709,7 +1674,13 @@ static int nvme_dev_add(struct nvme_dev *dev)
                 if (blk_mq_alloc_tag_set(&dev->tagset))
                         return 0;
                 dev->ctrl.tagset = &dev->tagset;
+       } else {
+               blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
+
+               /* Free previously allocated queues that are no longer usable */
+               nvme_free_queues(dev, dev->online_queues);
         }
+
         nvme_queue_scan(dev);
         return 0;
  }
@@ -1796,56 +1767,12 @@ static void nvme_pci_disable(struct nvme_dev *dev)
         }
  }
  
-static int nvme_dev_list_add(struct nvme_dev *dev)
-{
-       bool start_thread = false;
-
-       spin_lock(&dev_list_lock);
-       if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
-               start_thread = true;
-               nvme_thread = NULL;
-       }
-       list_add(&dev->node, &dev_list);
-       spin_unlock(&dev_list_lock);
-
-       if (start_thread) {
-               nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
-               wake_up_all(&nvme_kthread_wait);
-       } else
-               wait_event_killable(nvme_kthread_wait, nvme_thread);
-
-       if (IS_ERR_OR_NULL(nvme_thread))
-               return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
-
-       return 0;
-}
-
-/*
-* Remove the node from the device list and check
-* for whether or not we need to stop the nvme_thread.
-*/
-static void nvme_dev_list_remove(struct nvme_dev *dev)
-{
-       struct task_struct *tmp = NULL;
-
-       spin_lock(&dev_list_lock);
-       list_del_init(&dev->node);
-       if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) {
-               tmp = nvme_thread;
-               nvme_thread = NULL;
-       }
-       spin_unlock(&dev_list_lock);
-
-       if (tmp)
-               kthread_stop(tmp);
-}
-
  static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
  {
         int i;
         u32 csts = -1;
  
-       nvme_dev_list_remove(dev);
+       del_timer_sync(&dev->watchdog_timer);
  
         mutex_lock(&dev->shutdown_lock);
         if (pci_is_enabled(to_pci_dev(dev->dev))) {
@@ -1907,7 +1834,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
  
  static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
  {
-       dev_warn(dev->dev, "Removing after probe failure status: %d\n", status);
+       dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
  
         kref_get(&dev->ctrl.kref);
         nvme_dev_disable(dev, false);
@@ -1954,17 +1881,16 @@ static void nvme_reset_work(struct work_struct *work)
                 goto out;
  
         dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
+       queue_work(nvme_workq, &dev->async_work);
  
-       result = nvme_dev_list_add(dev);
-       if (result)
-               goto out;
+       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
  
         /*
          * Keep the controller around but remove all namespaces if we don't have
          * any working I/O queue.
          */
         if (dev->online_queues < 2) {
-               dev_warn(dev->dev, "IO queues not created\n");
+               dev_warn(dev->ctrl.device, "IO queues not created\n");
                 nvme_remove_namespaces(&dev->ctrl);
         } else {
                 nvme_start_queues(&dev->ctrl);
@@ -2032,6 +1958,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
  }
  
  static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+       .module                 = THIS_MODULE,
         .reg_read32             = nvme_pci_reg_read32,
         .reg_write32            = nvme_pci_reg_write32,
         .reg_read64             = nvme_pci_reg_read64,
@@ -2089,10 +2016,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         if (result)
                 goto free;
  
-       INIT_LIST_HEAD(&dev->node);
         INIT_WORK(&dev->scan_work, nvme_dev_scan);
         INIT_WORK(&dev->reset_work, nvme_reset_work);
         INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
+       INIT_WORK(&dev->async_work, nvme_async_event_work);
+       setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
+               (unsigned long)dev);
         mutex_init(&dev->shutdown_lock);
         init_completion(&dev->ioq_wait);
  
@@ -2105,6 +2034,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         if (result)
                 goto release_pools;
  
+       dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
+
         queue_work(nvme_workq, &dev->reset_work);
         return 0;
  
@@ -2145,8 +2076,11 @@ static void nvme_remove(struct pci_dev *pdev)
  {
         struct nvme_dev *dev = pci_get_drvdata(pdev);
  
+       del_timer_sync(&dev->watchdog_timer);
+
         set_bit(NVME_CTRL_REMOVING, &dev->flags);
         pci_set_drvdata(pdev, NULL);
+       flush_work(&dev->async_work);
         flush_work(&dev->scan_work);
         nvme_remove_namespaces(&dev->ctrl);
         nvme_uninit_ctrl(&dev->ctrl);
@@ -2192,7 +2126,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
          * shutdown the controller to quiesce. The controller will be restarted
          * after the slot reset through driver's slot_reset callback.
          */
-       dev_warn(&pdev->dev, "error detected: state:%d\n", state);
+       dev_warn(dev->ctrl.device, "error detected: state:%d\n", state);
         switch (state) {
         case pci_channel_io_normal:
                 return PCI_ERS_RESULT_CAN_RECOVER;
@@ -2209,7 +2143,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
  {
         struct nvme_dev *dev = pci_get_drvdata(pdev);
  
-       dev_info(&pdev->dev, "restart after slot reset\n");
+       dev_info(dev->ctrl.device, "restart after slot reset\n");
         pci_restore_state(pdev);
         queue_work(nvme_workq, &dev->reset_work);
         return PCI_ERS_RESULT_RECOVERED;
@@ -2232,7 +2166,8 @@ static const struct pci_error_handlers nvme_err_handler = {
  
  static const struct pci_device_id nvme_id_table[] = {
         { PCI_VDEVICE(INTEL, 0x0953),
-               .driver_data = NVME_QUIRK_STRIPE_SIZE, },
+               .driver_data = NVME_QUIRK_STRIPE_SIZE |
+                               NVME_QUIRK_DISCARD_ZEROES, },
         { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
                 .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
         { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
@@ -2257,34 +2192,20 @@ static int __init nvme_init(void)
  {
         int result;
  
-       init_waitqueue_head(&nvme_kthread_wait);
-
         nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
         if (!nvme_workq)
                 return -ENOMEM;
  
-       result = nvme_core_init();
-       if (result < 0)
-               goto kill_workq;
-
         result = pci_register_driver(&nvme_driver);
         if (result)
-               goto core_exit;
-       return 0;
-
- core_exit:
-       nvme_core_exit();
- kill_workq:
-       destroy_workqueue(nvme_workq);
+               destroy_workqueue(nvme_workq);
         return result;
  }
  
  static void __exit nvme_exit(void)
  {
         pci_unregister_driver(&nvme_driver);
-       nvme_core_exit();
         destroy_workqueue(nvme_workq);
-       BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
         _nvme_check_size();
  }