NVMe: default to 4k device page size
[linux-2.6-block.git] / drivers / nvme / host / pci.c
index 97b6640a3745922fe030bbf04525a4b6de6566d6..d9d6229e9f3f011b18b914c8ae6eb60bcacce266 100644 (file)
@@ -90,7 +90,7 @@ static struct class *nvme_class;
 
 static int __nvme_reset(struct nvme_dev *dev);
 static int nvme_reset(struct nvme_dev *dev);
-static int nvme_process_cq(struct nvme_queue *nvmeq);
+static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dead_ctrl(struct nvme_dev *dev);
 
 struct async_cmd_info {
@@ -935,7 +935,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
        return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
-static int nvme_process_cq(struct nvme_queue *nvmeq)
+static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 {
        u16 head, phase;
 
@@ -953,6 +953,8 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
                        head = 0;
                        phase = !phase;
                }
+               if (tag && *tag == cqe.command_id)
+                       *tag = -1;
                ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn);
                fn(nvmeq, ctx, &cqe);
        }
@@ -964,14 +966,19 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
         * a big problem.
         */
        if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
-               return 0;
+               return;
 
-       writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+       if (likely(nvmeq->cq_vector >= 0))
+               writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
        nvmeq->cq_head = head;
        nvmeq->cq_phase = phase;
 
        nvmeq->cqe_seen = 1;
-       return 1;
+}
+
+static void nvme_process_cq(struct nvme_queue *nvmeq)
+{
+       __nvme_process_cq(nvmeq, NULL);
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)
@@ -995,6 +1002,23 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
        return IRQ_WAKE_THREAD;
 }
 
+static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+{
+       struct nvme_queue *nvmeq = hctx->driver_data;
+
+       if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
+           nvmeq->cq_phase) {
+               spin_lock_irq(&nvmeq->q_lock);
+               __nvme_process_cq(nvmeq, &tag);
+               spin_unlock_irq(&nvmeq->q_lock);
+
+               if (tag == -1)
+                       return 1;
+       }
+
+       return 0;
+}
+
 /*
  * Returns 0 on success.  If the result is negative, it's a Linux error code;
  * if the result is positive, it's an NVM Express status code
@@ -1656,6 +1680,7 @@ static struct blk_mq_ops nvme_mq_ops = {
        .init_hctx      = nvme_init_hctx,
        .init_request   = nvme_init_request,
        .timeout        = nvme_timeout,
+       .poll           = nvme_poll,
 };
 
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
@@ -1701,11 +1726,15 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
        int result;
        u32 aqa;
-       u64 cap = readq(&dev->bar->cap);
+       u64 cap = lo_hi_readq(&dev->bar->cap);
        struct nvme_queue *nvmeq;
-       unsigned page_shift = PAGE_SHIFT;
+       /*
+        * default to a 4K page size, with the intention to update this
+        * path in the future to accomodate architectures with differing
+        * kernel and IO page sizes.
+        */
+       unsigned page_shift = 12;
        unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
-       unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
 
        if (page_shift < dev_page_min) {
                dev_err(dev->dev,
@@ -1714,13 +1743,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
                                1 << page_shift);
                return -ENODEV;
        }
-       if (page_shift > dev_page_max) {
-               dev_info(dev->dev,
-                               "Device maximum page size (%u) smaller than "
-                               "host (%u); enabling work-around\n",
-                               1 << dev_page_max, 1 << page_shift);
-               page_shift = dev_page_max;
-       }
 
        dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
                                                NVME_CAP_NSSRC(cap) : 0;
@@ -1750,8 +1772,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
        writel(aqa, &dev->bar->aqa);
-       writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
-       writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
+       lo_hi_writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
+       lo_hi_writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
 
        result = nvme_enable_ctrl(dev, cap);
        if (result)
@@ -2244,7 +2266,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
        if (dev->max_hw_sectors) {
                blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
                blk_queue_max_segments(ns->queue,
-                       ((dev->max_hw_sectors << 9) / dev->page_size) + 1);
+                       (dev->max_hw_sectors / (dev->page_size >> 9)) + 1);
        }
        if (dev->stripe_size)
                blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
@@ -2582,7 +2604,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
        struct pci_dev *pdev = to_pci_dev(dev->dev);
        int res;
        struct nvme_id_ctrl *ctrl;
-       int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
+       int shift = NVME_CAP_MPSMIN(lo_hi_readq(&dev->bar->cap)) + 12;
 
        res = nvme_identify_ctrl(dev, &ctrl);
        if (res) {
@@ -2598,6 +2620,8 @@ static int nvme_dev_add(struct nvme_dev *dev)
        memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
        if (ctrl->mdts)
                dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
+       else
+               dev->max_hw_sectors = UINT_MAX;
        if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
                        (pdev->device == 0x0953) && ctrl->vs[3]) {
                unsigned int max_hw_sectors;
@@ -2671,7 +2695,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
                        goto unmap;
        }
 
-       cap = readq(&dev->bar->cap);
+       cap = lo_hi_readq(&dev->bar->cap);
        dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
        dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
        dev->dbs = ((void __iomem *)dev->bar) + 4096;
@@ -2734,7 +2758,7 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
                         * queues than admin tags.
                         */
                        set_current_state(TASK_RUNNING);
-                       nvme_disable_ctrl(dev, readq(&dev->bar->cap));
+                       nvme_disable_ctrl(dev, lo_hi_readq(&dev->bar->cap));
                        nvme_clear_queue(dev->queues[0]);
                        flush_kthread_worker(dq->worker);
                        nvme_disable_queue(dev, 0);
@@ -2761,6 +2785,10 @@ static void nvme_del_queue_end(struct nvme_queue *nvmeq)
 {
        struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
        nvme_put_dq(dq);
+
+       spin_lock_irq(&nvmeq->q_lock);
+       nvme_process_cq(nvmeq);
+       spin_unlock_irq(&nvmeq->q_lock);
 }
 
 static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
@@ -3377,6 +3405,7 @@ static const struct pci_error_handlers nvme_err_handler = {
 
 static const struct pci_device_id nvme_id_table[] = {
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
+       { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { 0, }
 };
 MODULE_DEVICE_TABLE(pci, nvme_id_table);