NVMe: IOCTL path RCU protect queue access

author Keith Busch <keith.busch@intel.com>

Mon, 3 Mar 2014 23:39:13 +0000 (16:39 -0700)

committer Matthew Wilcox <matthew.r.wilcox@intel.com>

Mon, 24 Mar 2014 12:54:40 +0000 (08:54 -0400)
author Keith Busch <keith.busch@intel.com>
Mon, 3 Mar 2014 23:39:13 +0000 (16:39 -0700)
committer Matthew Wilcox <matthew.r.wilcox@intel.com>
Mon, 24 Mar 2014 12:54:40 +0000 (08:54 -0400)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c

index b66ab1db46292e37eaac2a82619a457107046bf5..04664cadadfa3657d627be9dea48c3a4da7ffcc4 100644 (file)
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -268,18 +268,30 @@ static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid)
         return rcu_dereference_raw(dev->queues[qid]);
  }
  
-struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
+static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
  {
         rcu_read_lock();
         return rcu_dereference(dev->queues[get_cpu() + 1]);
  }
  
-void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
+static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
  {
         put_cpu();
         rcu_read_unlock();
  }
  
+static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
+                                                       __acquires(RCU)
+{
+       rcu_read_lock();
+       return rcu_dereference(dev->queues[q_idx]);
+}
+
+static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
+{
+       rcu_read_unlock();
+}
+
  /**
   * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
   * @nvmeq: The queue to use
@@ -292,6 +304,10 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
         unsigned long flags;
         u16 tail;
         spin_lock_irqsave(&nvmeq->q_lock, flags);
+       if (nvmeq->q_suspended) {
+               spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+               return -EBUSY;
+       }
         tail = nvmeq->sq_tail;
         memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
         if (++tail == nvmeq->q_depth)
@@ -812,27 +828,46 @@ static void sync_completion(struct nvme_dev *dev, void *ctx,
   * Returns 0 on success.  If the result is negative, it's a Linux error code;
   * if the result is positive, it's an NVM Express status code
   */
-int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
+static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx,
+                                               struct nvme_command *cmd,
                                                 u32 *result, unsigned timeout)
  {
-       int cmdid;
+       int cmdid, ret;
         struct sync_cmd_info cmdinfo;
+       struct nvme_queue *nvmeq;
+
+       nvmeq = lock_nvmeq(dev, q_idx);
+       if (!nvmeq) {
+               unlock_nvmeq(nvmeq);
+               return -ENODEV;
+       }
  
         cmdinfo.task = current;
         cmdinfo.status = -EINTR;
  
-       cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion,
-                                                               timeout);
-       if (cmdid < 0)
+       cmdid = alloc_cmdid(nvmeq, &cmdinfo, sync_completion, timeout);
+       if (cmdid < 0) {
+               unlock_nvmeq(nvmeq);
                 return cmdid;
+       }
         cmd->common.command_id = cmdid;
  
         set_current_state(TASK_KILLABLE);
-       nvme_submit_cmd(nvmeq, cmd);
+       ret = nvme_submit_cmd(nvmeq, cmd);
+       if (ret) {
+               free_cmdid(nvmeq, cmdid, NULL);
+               unlock_nvmeq(nvmeq);
+               set_current_state(TASK_RUNNING);
+               return ret;
+       }
+       unlock_nvmeq(nvmeq);
         schedule_timeout(timeout);
  
         if (cmdinfo.status == -EINTR) {
-               nvme_abort_command(nvmeq, cmdid);
+               nvmeq = lock_nvmeq(dev, q_idx);
+               if (nvmeq)
+                       nvme_abort_command(nvmeq, cmdid);
+               unlock_nvmeq(nvmeq);
                 return -EINTR;
         }
  
@@ -853,15 +888,20 @@ static int nvme_submit_async_cmd(struct nvme_queue *nvmeq,
                 return cmdid;
         cmdinfo->status = -EINTR;
         cmd->common.command_id = cmdid;
-       nvme_submit_cmd(nvmeq, cmd);
-       return 0;
+       return nvme_submit_cmd(nvmeq, cmd);
  }
  
  int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
                                                                 u32 *result)
  {
-       return nvme_submit_sync_cmd(raw_nvmeq(dev, 0), cmd, result,
-                                                               ADMIN_TIMEOUT);
+       return nvme_submit_sync_cmd(dev, 0, cmd, result, ADMIN_TIMEOUT);
+}
+
+int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
+                                                               u32 *result)
+{
+       return nvme_submit_sync_cmd(dev, smp_processor_id() + 1, cmd, result,
+                                                       NVME_IO_TIMEOUT);
  }
  
  static int nvme_submit_admin_cmd_async(struct nvme_dev *dev,
@@ -1434,7 +1474,6 @@ void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
  static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
  {
         struct nvme_dev *dev = ns->dev;
-       struct nvme_queue *nvmeq;
         struct nvme_user_io io;
         struct nvme_command c;
         unsigned length, meta_len;
@@ -1510,20 +1549,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
  
         length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL);
  
-       nvmeq = get_nvmeq(dev);
-       /*
-        * Since nvme_submit_sync_cmd sleeps, we can't keep preemption
-        * disabled.  We may be preempted at any point, and be rescheduled
-        * to a different CPU.  That will cause cacheline bouncing, but no
-        * additional races since q_lock already protects against other CPUs.
-        */
-       put_nvmeq(nvmeq);
         if (length != (io.nblocks + 1) << ns->lba_shift)
                 status = -ENOMEM;
-       else if (!nvmeq || nvmeq->q_suspended)
-               status = -EBUSY;
         else
-               status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
+               status = nvme_submit_io_cmd(dev, &c, NULL);
  
         if (meta_len) {
                 if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
@@ -1597,8 +1626,7 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,
         if (length != cmd.data_len)
                 status = -ENOMEM;
         else
-               status = nvme_submit_sync_cmd(raw_nvmeq(dev, 0), &c,
-                                                       &cmd.result, timeout);
+               status = nvme_submit_sync_cmd(dev, 0, &c, &cmd.result, timeout);
  
         if (cmd.data_len) {
                 nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c

index 4a0ceb64e26924b0b777d19d1580850b1e12901d..e157e85bb5d79975b0e3c8eba4921bf232864ba8 100644 (file)
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -2033,7 +2033,6 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
         int res = SNTI_TRANSLATION_SUCCESS;
         int nvme_sc;
         struct nvme_dev *dev = ns->dev;
-       struct nvme_queue *nvmeq;
         u32 num_cmds;
         struct nvme_iod *iod;
         u64 unit_len;
@@ -2106,18 +2105,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
  
                 nvme_offset += unit_num_blocks;
  
-               nvmeq = get_nvmeq(dev);
-               /*
-                * Since nvme_submit_sync_cmd sleeps, we can't keep
-                * preemption disabled.  We may be preempted at any
-                * point, and be rescheduled to a different CPU.  That
-                * will cause cacheline bouncing, but no additional
-                * races since q_lock already protects against other
-                * CPUs.
-                */
-               put_nvmeq(nvmeq);
-               nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL,
-                                               NVME_IO_TIMEOUT);
+               nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
                 if (nvme_sc != NVME_SC_SUCCESS) {
                         nvme_unmap_user_pages(dev,
                                 (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@ -2644,7 +2632,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
  {
         int res = SNTI_TRANSLATION_SUCCESS;
         int nvme_sc;
-       struct nvme_queue *nvmeq;
         struct nvme_command c;
         u8 immed, pcmod, pc, no_flush, start;
  
@@ -2671,10 +2658,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
                         c.common.opcode = nvme_cmd_flush;
                         c.common.nsid = cpu_to_le32(ns->ns_id);
  
-                       nvmeq = get_nvmeq(ns->dev);
-                       put_nvmeq(nvmeq);
-                       nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
-
+                       nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
                         res = nvme_trans_status_code(hdr, nvme_sc);
                         if (res)
                                 goto out;
@@ -2697,15 +2681,12 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
         int res = SNTI_TRANSLATION_SUCCESS;
         int nvme_sc;
         struct nvme_command c;
-       struct nvme_queue *nvmeq;
  
         memset(&c, 0, sizeof(c));
         c.common.opcode = nvme_cmd_flush;
         c.common.nsid = cpu_to_le32(ns->ns_id);
  
-       nvmeq = get_nvmeq(ns->dev);
-       put_nvmeq(nvmeq);
-       nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
+       nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
  
         res = nvme_trans_status_code(hdr, nvme_sc);
         if (res)
@@ -2872,7 +2853,6 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
         struct nvme_dev *dev = ns->dev;
         struct scsi_unmap_parm_list *plist;
         struct nvme_dsm_range *range;
-       struct nvme_queue *nvmeq;
         struct nvme_command c;
         int i, nvme_sc, res = -ENOMEM;
         u16 ndesc, list_len;
@@ -2914,10 +2894,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
         c.dsm.nr = cpu_to_le32(ndesc - 1);
         c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
  
-       nvmeq = get_nvmeq(dev);
-       put_nvmeq(nvmeq);
-
-       nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
+       nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
         res = nvme_trans_status_code(hdr, nvme_sc);
  
         dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
diff --git a/include/linux/nvme.h b/include/linux/nvme.h

index 98d367b06f9c1deddce769fa93de7fcc1e58d729..7c3f85bc10f19d9c5a3db0e232cf32384a0507b3 100644 (file)
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -151,10 +151,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
                                 unsigned long addr, unsigned length);
  void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
                         struct nvme_iod *iod);
-struct nvme_queue *get_nvmeq(struct nvme_dev *dev);
-void put_nvmeq(struct nvme_queue *nvmeq);
-int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
-                                               u32 *result, unsigned timeout);
+int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
  int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
  int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
                                                         u32 *result);
author	Keith Busch <keith.busch@intel.com>
	Mon, 3 Mar 2014 23:39:13 +0000 (16:39 -0700)
committer	Matthew Wilcox <matthew.r.wilcox@intel.com>
	Mon, 24 Mar 2014 12:54:40 +0000 (08:54 -0400)
drivers/block/nvme-core.c		patch \| blob \| blame \| history
drivers/block/nvme-scsi.c		patch \| blob \| blame \| history
include/linux/nvme.h		patch \| blob \| blame \| history