iod->npages = -1;
iod->length = nbytes;
iod->nents = 0;
+ iod->start_time = jiffies;
}
return iod;
kfree(iod);
}
+static void nvme_start_io_acct(struct bio *bio)
+{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+ const int rw = bio_data_dir(bio);
+ int cpu = part_stat_lock();
+ part_round_stats(cpu, &disk->part0);
+ part_stat_inc(cpu, &disk->part0, ios[rw]);
+ part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
+ part_inc_in_flight(&disk->part0, rw);
+ part_stat_unlock();
+}
+
+static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
+{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+ const int rw = bio_data_dir(bio);
+ unsigned long duration = jiffies - start_time;
+ int cpu = part_stat_lock();
+ part_stat_add(cpu, &disk->part0, ticks[rw], duration);
+ part_round_stats(cpu, &disk->part0);
+ part_dec_in_flight(&disk->part0, rw);
+ part_stat_unlock();
+}
+
static void bio_completion(struct nvme_dev *dev, void *ctx,
struct nvme_completion *cqe)
{
if (iod->nents)
dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+ nvme_end_io_acct(bio, iod->start_time);
nvme_free_iod(dev, iod);
if (status)
bio_endio(bio, -EIO);
struct nvme_command *cmnd;
struct nvme_iod *iod;
enum dma_data_direction dma_dir;
- int cmdid, length, result = -ENOMEM;
+ int cmdid, length, result;
u16 control;
u32 dsmgmt;
int psegs = bio_phys_segments(ns->queue, bio);
return result;
}
+ result = -ENOMEM;
iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC);
if (!iod)
goto nomem;
cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+ nvme_start_io_acct(bio);
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
writel(nvmeq->sq_tail, nvmeq->q_db);
if (timeout && !time_after(now, info[cmdid].timeout))
continue;
+ if (info[cmdid].ctx == CMD_CTX_CANCELLED)
+ continue;
dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid);
ctx = cancel_cmdid(nvmeq, cmdid, &fn);
fn(nvmeq->dev, ctx, &cqe);
if (addr & 3)
return ERR_PTR(-EINVAL);
- if (!length)
+ if (!length || length > INT_MAX - PAGE_SIZE)
return ERR_PTR(-EINVAL);
offset = offset_in_page(addr);
sg_init_table(sg, count);
for (i = 0; i < count; i++) {
sg_set_page(&sg[i], pages[i],
- min_t(int, length, PAGE_SIZE - offset), offset);
+ min_t(unsigned, length, PAGE_SIZE - offset),
+ offset);
length -= (PAGE_SIZE - offset);
offset = 0;
}
nvme_free_iod(dev, iod);
}
- if (!status && copy_to_user(&ucmd->result, &cmd.result,
+ if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result,
sizeof(cmd.result)))
status = -EFAULT;
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
- int result, cpu, i, nr_io_queues, db_bar_size, q_depth;
+ struct pci_dev *pdev = dev->pci_dev;
+ int result, cpu, i, vecs, nr_io_queues, db_bar_size, q_depth;
nr_io_queues = num_online_cpus();
result = set_queue_count(dev, nr_io_queues);
db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
if (db_bar_size > 8192) {
iounmap(dev->bar);
- dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0),
- db_bar_size);
+ dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size);
dev->dbs = ((void __iomem *)dev->bar) + 4096;
dev->queues[0]->q_db = dev->dbs;
}
- for (i = 0; i < nr_io_queues; i++)
+ vecs = nr_io_queues;
+ for (i = 0; i < vecs; i++)
dev->entry[i].entry = i;
for (;;) {
- result = pci_enable_msix(dev->pci_dev, dev->entry,
- nr_io_queues);
- if (result == 0) {
- break;
- } else if (result > 0) {
- nr_io_queues = result;
- continue;
- } else {
- nr_io_queues = 1;
+ result = pci_enable_msix(pdev, dev->entry, vecs);
+ if (result <= 0)
break;
+ vecs = result;
+ }
+
+ if (result < 0) {
+ vecs = nr_io_queues;
+ if (vecs > 32)
+ vecs = 32;
+ for (;;) {
+ result = pci_enable_msi_block(pdev, vecs);
+ if (result == 0) {
+ for (i = 0; i < vecs; i++)
+ dev->entry[i].vector = i + pdev->irq;
+ break;
+ } else if (result < 0) {
+ vecs = 1;
+ break;
+ }
+ vecs = result;
}
}
+ /*
+ * Should investigate if there's a performance win from allocating
+ * more queues than interrupt vectors; it might allow the submission
+ * path to scale better, even if the receive path is limited by the
+ * number of interrupts.
+ */
+ nr_io_queues = vecs;
+
result = queue_request_irq(dev, dev->queues[0], "nvme admin");
/* XXX: handle failure here */
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
nvme_dev_remove(dev);
- pci_disable_msix(dev->pci_dev);
+ if (dev->pci_dev->msi_enabled)
+ pci_disable_msi(dev->pci_dev);
+ else if (dev->pci_dev->msix_enabled)
+ pci_disable_msix(dev->pci_dev);
iounmap(dev->bar);
nvme_release_instance(dev);
nvme_release_prp_pools(dev);
INIT_LIST_HEAD(&dev->namespaces);
dev->pci_dev = pdev;
pci_set_drvdata(pdev, dev);
- dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
- dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+
+ if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
+ dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+ else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
+ dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+ else
+ goto disable;
+
result = nvme_set_instance(dev);
if (result)
goto disable;
unmap:
iounmap(dev->bar);
disable_msix:
- pci_disable_msix(pdev);
+ if (dev->pci_dev->msi_enabled)
+ pci_disable_msi(dev->pci_dev);
+ else if (dev->pci_dev->msix_enabled)
+ pci_disable_msix(dev->pci_dev);
nvme_release_instance(dev);
nvme_release_prp_pools(dev);
disable: