static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
+static bool use_cmb_sqes = true;
+module_param(use_cmb_sqes, bool, 0644);
+MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
+
static DEFINE_SPINLOCK(dev_list_lock);
static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
static struct class *nvme_class;
-static void nvme_reset_failed_dev(struct work_struct *ws);
+static int __nvme_reset(struct nvme_dev *dev);
static int nvme_reset(struct nvme_dev *dev);
static int nvme_process_cq(struct nvme_queue *nvmeq);
+static void nvme_dead_ctrl(struct nvme_dev *dev);
struct async_cmd_info {
struct kthread_work work;
char irqname[24]; /* nvme4294967295-65535\0 */
spinlock_t q_lock;
struct nvme_command *sq_cmds;
+ struct nvme_command __iomem *sq_cmds_io;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
*
* Safe to use from interrupt context
*/
-static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
+ struct nvme_command *cmd)
{
u16 tail = nvmeq->sq_tail;
- memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+ if (nvmeq->sq_cmds_io)
+ memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
+ else
+ memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+
if (++tail == nvmeq->q_depth)
tail = 0;
writel(tail, nvmeq->q_db);
nvmeq->sq_tail = tail;
-
- return 0;
}
-static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{
unsigned long flags;
- int ret;
spin_lock_irqsave(&nvmeq->q_lock, flags);
- ret = __nvme_submit_cmd(nvmeq, cmd);
+ __nvme_submit_cmd(nvmeq, cmd);
spin_unlock_irqrestore(&nvmeq->q_lock, flags);
- return ret;
}
static __le64 **iod_list(struct nvme_iod *iod)
spin_unlock_irqrestore(req->q->queue_lock, flags);
return;
}
+
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
if (cmd_rq->ctx == CMD_CTX_CANCELLED)
- req->errors = -EINTR;
- else
- req->errors = status;
+ status = -EINTR;
} else {
- req->errors = nvme_error_status(status);
+ status = nvme_error_status(status);
}
- } else
- req->errors = 0;
+ }
+
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
u32 result = le32_to_cpup(&cqe->result);
req->special = (void *)(uintptr_t)result;
}
nvme_free_iod(nvmeq->dev, iod);
- blk_mq_complete_request(req);
+ blk_mq_complete_request(req, status);
}
/* length is in bytes. gfp flags indicates whether we may sleep. */
static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
struct nvme_iod *iod)
{
- struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+ struct nvme_command cmnd;
- memcpy(cmnd, req->cmd, sizeof(struct nvme_command));
- cmnd->rw.command_id = req->tag;
+ memcpy(&cmnd, req->cmd, sizeof(cmnd));
+ cmnd.rw.command_id = req->tag;
if (req->nr_phys_segments) {
- cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+ cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
}
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
}
/*
{
struct nvme_dsm_range *range =
(struct nvme_dsm_range *)iod_list(iod)[0];
- struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+ struct nvme_command cmnd;
range->cattr = cpu_to_le32(0);
range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
- memset(cmnd, 0, sizeof(*cmnd));
- cmnd->dsm.opcode = nvme_cmd_dsm;
- cmnd->dsm.command_id = req->tag;
- cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
- cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
- cmnd->dsm.nr = 0;
- cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+ memset(&cmnd, 0, sizeof(cmnd));
+ cmnd.dsm.opcode = nvme_cmd_dsm;
+ cmnd.dsm.command_id = req->tag;
+ cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
+ cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
+ cmnd.dsm.nr = 0;
+ cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
}
static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
int cmdid)
{
- struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+ struct nvme_command cmnd;
- memset(cmnd, 0, sizeof(*cmnd));
- cmnd->common.opcode = nvme_cmd_flush;
- cmnd->common.command_id = cmdid;
- cmnd->common.nsid = cpu_to_le32(ns->ns_id);
+ memset(&cmnd, 0, sizeof(cmnd));
+ cmnd.common.opcode = nvme_cmd_flush;
+ cmnd.common.command_id = cmdid;
+ cmnd.common.nsid = cpu_to_le32(ns->ns_id);
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
}
static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
struct nvme_ns *ns)
{
struct request *req = iod_get_private(iod);
- struct nvme_command *cmnd;
+ struct nvme_command cmnd;
u16 control = 0;
u32 dsmgmt = 0;
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
- cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
- memset(cmnd, 0, sizeof(*cmnd));
+ memset(&cmnd, 0, sizeof(cmnd));
+ cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+ cmnd.rw.command_id = req->tag;
+ cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
+ cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
+ cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
- cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
- cmnd->rw.command_id = req->tag;
- cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
- cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
- cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
- cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-
- if (blk_integrity_rq(req)) {
- cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg));
+ if (ns->ms) {
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
control |= NVME_RW_PRINFO_PRCHK_GUARD;
case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF;
- cmnd->rw.reftag = cpu_to_le32(
+ cmnd.rw.reftag = cpu_to_le32(
nvme_block_nr(ns, blk_rq_pos(req)));
break;
}
- } else if (ns->ms)
- control |= NVME_RW_PRINFO_PRACT;
+ if (blk_integrity_rq(req))
+ cmnd.rw.metadata =
+ cpu_to_le64(sg_dma_address(iod->meta_sg));
+ else
+ control |= NVME_RW_PRINFO_PRACT;
+ }
- cmnd->rw.control = cpu_to_le16(control);
- cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+ cmnd.rw.control = cpu_to_le16(control);
+ cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
- if (++nvmeq->sq_tail == nvmeq->q_depth)
- nvmeq->sq_tail = 0;
- writel(nvmeq->sq_tail, nvmeq->q_db);
+ __nvme_submit_cmd(nvmeq, &cmnd);
return 0;
}
if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8) &&
req->cmd_type != REQ_TYPE_DRV_PRIV) {
- req->errors = -EFAULT;
- blk_mq_complete_request(req);
+ blk_mq_complete_request(req, -EFAULT);
return BLK_MQ_RQ_QUEUE_OK;
}
}
c.common.command_id = req->tag;
blk_mq_free_request(req);
- return __nvme_submit_cmd(nvmeq, &c);
+ __nvme_submit_cmd(nvmeq, &c);
+ return 0;
}
static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
cmd->common.command_id = req->tag;
- return nvme_submit_cmd(nvmeq, cmd);
+ nvme_submit_cmd(nvmeq, cmd);
+ return 0;
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
struct nvme_command cmd;
if (!nvmeq->qid || cmd_rq->aborted) {
- unsigned long flags;
-
- spin_lock_irqsave(&dev_list_lock, flags);
- if (work_busy(&dev->reset_work))
- goto out;
- list_del_init(&dev->node);
- dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
- req->tag, nvmeq->qid);
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
- out:
- spin_unlock_irqrestore(&dev_list_lock, flags);
+ spin_lock(&dev_list_lock);
+ if (!__nvme_reset(dev)) {
+ dev_warn(dev->dev,
+ "I/O %d QID %d timeout, reset controller\n",
+ req->tag, nvmeq->qid);
+ }
+ spin_unlock(&dev_list_lock);
return;
}
dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
nvmeq->qid);
- if (nvme_submit_cmd(dev->queues[0], &cmd) < 0) {
- dev_warn(nvmeq->q_dmadev,
- "Could not abort I/O %d QID %d",
- req->tag, nvmeq->qid);
- blk_mq_free_request(abort_req);
- }
+ nvme_submit_cmd(dev->queues[0], &cmd);
}
static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
{
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+ if (nvmeq->sq_cmds)
+ dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
kfree(nvmeq);
}
spin_unlock_irq(&nvmeq->q_lock);
}
+static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
+ int entry_size)
+{
+ int q_depth = dev->q_depth;
+ unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
+
+ if (q_size_aligned * nr_io_queues > dev->cmb_size) {
+ u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
+ mem_per_q = round_down(mem_per_q, dev->page_size);
+ q_depth = div_u64(mem_per_q, entry_size);
+
+ /*
+ * Ensure the reduced q_depth is above some threshold where it
+ * would be better to map queues in system memory with the
+ * original depth
+ */
+ if (q_depth < 64)
+ return -ENOMEM;
+ }
+
+ return q_depth;
+}
+
+static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
+ int qid, int depth)
+{
+ if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
+ unsigned offset = (qid - 1) *
+ roundup(SQ_SIZE(depth), dev->page_size);
+ nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+ nvmeq->sq_cmds_io = dev->cmb + offset;
+ } else {
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ if (!nvmeq->sq_cmds)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth)
{
if (!nvmeq->cqes)
goto free_nvmeq;
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
- if (!nvmeq->sq_cmds)
+ if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
goto free_cqdma;
nvmeq->q_dmadev = dev->dev;
page_shift = dev_page_max;
}
+ dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
+ NVME_CAP_NSSRC(cap) : 0;
+
+ if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
+ writel(NVME_CSTS_NSSRO, &dev->bar->csts);
+
result = nvme_disable_ctrl(dev, cap);
if (result < 0)
return result;
return status;
}
+static int nvme_subsys_reset(struct nvme_dev *dev)
+{
+ if (!dev->subsystem)
+ return -ENOTTY;
+
+ writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
+ return 0;
+}
+
static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
unsigned long arg)
{
#define nvme_compat_ioctl NULL
#endif
+static void nvme_free_dev(struct kref *kref);
+static void nvme_free_ns(struct kref *kref)
+{
+ struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ kref_put(&ns->dev->kref, nvme_free_dev);
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
static int nvme_open(struct block_device *bdev, fmode_t mode)
{
int ret = 0;
ns = bdev->bd_disk->private_data;
if (!ns)
ret = -ENXIO;
- else if (!kref_get_unless_zero(&ns->dev->kref))
+ else if (!kref_get_unless_zero(&ns->kref))
ret = -ENXIO;
spin_unlock(&dev_list_lock);
return ret;
}
-static void nvme_free_dev(struct kref *kref);
-
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
struct nvme_ns *ns = disk->private_data;
- struct nvme_dev *dev = ns->dev;
-
- kref_put(&dev->kref, nvme_free_dev);
+ kref_put(&ns->kref, nvme_free_ns);
}
static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
ns->queue->limits.discard_zeroes_data = 0;
ns->queue->limits.discard_alignment = logical_block_size;
ns->queue->limits.discard_granularity = logical_block_size;
- ns->queue->limits.max_discard_sectors = 0xffffffff;
+ blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
}
!ns->ext)
nvme_init_integrity(ns);
- if (ns->ms && !blk_get_integrity(disk))
+ if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
set_capacity(disk, 0);
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) {
int i;
- if (readl(&dev->bar->csts) & NVME_CSTS_CFS) {
- if (work_busy(&dev->reset_work))
- continue;
- list_del_init(&dev->node);
- dev_warn(dev->dev,
- "Failed status: %x, reset controller\n",
- readl(&dev->bar->csts));
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
+ u32 csts = readl(&dev->bar->csts);
+
+ if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
+ csts & NVME_CSTS_CFS) {
+ if (!__nvme_reset(dev)) {
+ dev_warn(dev->dev,
+ "Failed status: %x, reset controller\n",
+ readl(&dev->bar->csts));
+ }
continue;
}
for (i = 0; i < dev->queue_count; i++) {
goto out_free_ns;
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
- queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, ns->queue);
ns->dev = dev;
ns->queue->queuedata = ns;
if (!disk)
goto out_free_queue;
+ kref_init(&ns->kref);
ns->ns_id = nsid;
ns->disk = disk;
ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
list_add_tail(&ns->list, &dev->namespaces);
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
- if (dev->max_hw_sectors)
+ if (dev->max_hw_sectors) {
blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+ blk_queue_max_segments(ns->queue,
+ ((dev->max_hw_sectors << 9) / dev->page_size) + 1);
+ }
if (dev->stripe_size)
blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
if (dev->vwc & NVME_CTRL_VWC_PRESENT)
blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
+ blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
disk->major = nvme_major;
disk->first_minor = 0;
if (nvme_revalidate_disk(ns->disk))
goto out_free_disk;
+ kref_get(&dev->kref);
add_disk(ns->disk);
if (ns->ms) {
struct block_device *bd = bdget_disk(ns->disk, 0);
kfree(ns);
}
+/*
+ * Create I/O queues. Failing to create an I/O queue is not an issue,
+ * we can continue with less than the desired amount of queues, and
+ * even a controller without I/O queues an still be used to issue
+ * admin commands. This might be useful to upgrade a buggy firmware
+ * for example.
+ */
static void nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i;
break;
for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
- if (nvme_create_queue(dev->queues[i], i))
+ if (nvme_create_queue(dev->queues[i], i)) {
+ nvme_free_queues(dev, i);
break;
+ }
}
static int set_queue_count(struct nvme_dev *dev, int count)
return min(result & 0xffff, result >> 16) + 1;
}
+static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
+{
+ u64 szu, size, offset;
+ u32 cmbloc;
+ resource_size_t bar_size;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ void __iomem *cmb;
+ dma_addr_t dma_addr;
+
+ if (!use_cmb_sqes)
+ return NULL;
+
+ dev->cmbsz = readl(&dev->bar->cmbsz);
+ if (!(NVME_CMB_SZ(dev->cmbsz)))
+ return NULL;
+
+ cmbloc = readl(&dev->bar->cmbloc);
+
+ szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
+ size = szu * NVME_CMB_SZ(dev->cmbsz);
+ offset = szu * NVME_CMB_OFST(cmbloc);
+ bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
+
+ if (offset > bar_size)
+ return NULL;
+
+ /*
+ * Controllers may support a CMB size larger than their BAR,
+ * for example, due to being behind a bridge. Reduce the CMB to
+ * the reported size of the BAR
+ */
+ if (size > bar_size - offset)
+ size = bar_size - offset;
+
+ dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
+ cmb = ioremap_wc(dma_addr, size);
+ if (!cmb)
+ return NULL;
+
+ dev->cmb_dma_addr = dma_addr;
+ dev->cmb_size = size;
+ return cmb;
+}
+
+static inline void nvme_release_cmb(struct nvme_dev *dev)
+{
+ if (dev->cmb) {
+ iounmap(dev->cmb);
+ dev->cmb = NULL;
+ }
+}
+
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
if (result < nr_io_queues)
nr_io_queues = result;
+ if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
+ result = nvme_cmb_qdepth(dev, nr_io_queues,
+ sizeof(struct nvme_command));
+ if (result > 0)
+ dev->q_depth = result;
+ else
+ nvme_release_cmb(dev);
+ }
+
size = db_bar_size(dev, nr_io_queues);
if (size > 8192) {
iounmap(dev->bar);
return result;
}
-static void nvme_free_namespace(struct nvme_ns *ns)
-{
- list_del(&ns->list);
-
- spin_lock(&dev_list_lock);
- ns->disk->private_data = NULL;
- spin_unlock(&dev_list_lock);
-
- put_disk(ns->disk);
- kfree(ns);
-}
-
static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
if (kill || !blk_queue_dying(ns->queue)) {
blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue);
- }
+ }
+ list_del_init(&ns->list);
+ kref_put(&ns->kref, nvme_free_ns);
}
static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
for (i = 1; i <= nn; i++) {
ns = nvme_find_ns(dev, i);
if (ns) {
- if (revalidate_disk(ns->disk)) {
+ if (revalidate_disk(ns->disk))
nvme_ns_remove(ns);
- nvme_free_namespace(ns);
- }
} else
nvme_alloc_ns(dev, i);
}
list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
- if (ns->ns_id > nn) {
+ if (ns->ns_id > nn)
nvme_ns_remove(ns);
- nvme_free_namespace(ns);
- }
}
list_sort(NULL, &dev->namespaces, ns_cmp);
}
+static void nvme_set_irq_hints(struct nvme_dev *dev)
+{
+ struct nvme_queue *nvmeq;
+ int i;
+
+ for (i = 0; i < dev->online_queues; i++) {
+ nvmeq = dev->queues[i];
+
+ if (!nvmeq->tags || !(*nvmeq->tags))
+ continue;
+
+ irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
+ blk_mq_tags_cpumask(*nvmeq->tags));
+ }
+}
+
static void nvme_dev_scan(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
return;
nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
kfree(ctrl);
+ nvme_set_irq_hints(dev);
}
/*
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
int res;
- unsigned nn;
struct nvme_id_ctrl *ctrl;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
return -EIO;
}
- nn = le32_to_cpup(&ctrl->nn);
dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1;
dev->vwc = ctrl->vwc;
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
dev->dbs = ((void __iomem *)dev->bar) + 4096;
+ if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
+ dev->cmb = nvme_map_cmb(dev);
return 0;
static void nvme_dev_remove(struct nvme_dev *dev)
{
- struct nvme_ns *ns;
+ struct nvme_ns *ns, *next;
- list_for_each_entry(ns, &dev->namespaces, list)
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list)
nvme_ns_remove(ns);
}
spin_unlock(&dev_list_lock);
}
-static void nvme_free_namespaces(struct nvme_dev *dev)
-{
- struct nvme_ns *ns, *next;
-
- list_for_each_entry_safe(ns, next, &dev->namespaces, list)
- nvme_free_namespace(ns);
-}
-
static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
put_device(dev->dev);
put_device(dev->device);
- nvme_free_namespaces(dev);
nvme_release_instance(dev);
if (dev->tagset.tags)
blk_mq_free_tag_set(&dev->tagset);
case NVME_IOCTL_RESET:
dev_warn(dev->dev, "resetting controller\n");
return nvme_reset(dev);
+ case NVME_IOCTL_SUBSYS_RESET:
+ return nvme_subsys_reset(dev);
default:
return -ENOTTY;
}
.compat_ioctl = nvme_dev_ioctl,
};
-static void nvme_set_irq_hints(struct nvme_dev *dev)
-{
- struct nvme_queue *nvmeq;
- int i;
-
- for (i = 0; i < dev->online_queues; i++) {
- nvmeq = dev->queues[i];
-
- if (!nvmeq->tags || !(*nvmeq->tags))
- continue;
-
- irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- blk_mq_tags_cpumask(*nvmeq->tags));
- }
-}
-
-static int nvme_dev_start(struct nvme_dev *dev)
+static void nvme_probe_work(struct work_struct *work)
{
- int result;
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
bool start_thread = false;
+ int result;
result = nvme_dev_map(dev);
if (result)
- return result;
+ goto out;
result = nvme_configure_admin_queue(dev);
if (result)
if (result)
goto free_tags;
- nvme_set_irq_hints(dev);
-
dev->event_limit = 1;
- return result;
+
+ /*
+ * Keep the controller around but remove all namespaces if we don't have
+ * any working I/O queue.
+ */
+ if (dev->online_queues < 2) {
+ dev_warn(dev->dev, "IO queues not created\n");
+ nvme_dev_remove(dev);
+ } else {
+ nvme_unfreeze_queues(dev);
+ nvme_dev_add(dev);
+ }
+
+ return;
free_tags:
nvme_dev_remove_admin(dev);
nvme_dev_list_remove(dev);
unmap:
nvme_dev_unmap(dev);
- return result;
+ out:
+ if (!work_busy(&dev->reset_work))
+ nvme_dead_ctrl(dev);
}
static int nvme_remove_dead_ctrl(void *arg)
return 0;
}
-static void nvme_remove_disks(struct work_struct *ws)
-{
- struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
-
- nvme_free_queues(dev, 1);
- nvme_dev_remove(dev);
-}
-
-static int nvme_dev_resume(struct nvme_dev *dev)
-{
- int ret;
-
- ret = nvme_dev_start(dev);
- if (ret)
- return ret;
- if (dev->online_queues < 2) {
- spin_lock(&dev_list_lock);
- dev->reset_workfn = nvme_remove_disks;
- queue_work(nvme_workq, &dev->reset_work);
- spin_unlock(&dev_list_lock);
- } else {
- nvme_unfreeze_queues(dev);
- nvme_dev_add(dev);
- nvme_set_irq_hints(dev);
- }
- return 0;
-}
-
static void nvme_dead_ctrl(struct nvme_dev *dev)
{
dev_warn(dev->dev, "Device failed to resume\n");
}
}
-static void nvme_dev_reset(struct nvme_dev *dev)
+static void nvme_reset_work(struct work_struct *ws)
{
+ struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
bool in_probe = work_busy(&dev->probe_work);
nvme_dev_shutdown(dev);
schedule_work(&dev->probe_work);
}
-static void nvme_reset_failed_dev(struct work_struct *ws)
-{
- struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
- nvme_dev_reset(dev);
-}
-
-static void nvme_reset_workfn(struct work_struct *work)
+static int __nvme_reset(struct nvme_dev *dev)
{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
- dev->reset_workfn(work);
+ if (work_pending(&dev->reset_work))
+ return -EBUSY;
+ list_del_init(&dev->node);
+ queue_work(nvme_workq, &dev->reset_work);
+ return 0;
}
static int nvme_reset(struct nvme_dev *dev)
{
- int ret = -EBUSY;
+ int ret;
if (!dev->admin_q || blk_queue_dying(dev->admin_q))
return -ENODEV;
spin_lock(&dev_list_lock);
- if (!work_pending(&dev->reset_work)) {
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
- ret = 0;
- }
+ ret = __nvme_reset(dev);
spin_unlock(&dev_list_lock);
if (!ret) {
}
static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
-static void nvme_async_probe(struct work_struct *work);
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
goto free;
INIT_LIST_HEAD(&dev->namespaces);
- dev->reset_workfn = nvme_reset_failed_dev;
- INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+ INIT_WORK(&dev->reset_work, nvme_reset_work);
dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
result = nvme_set_instance(dev);
INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->scan_work, nvme_dev_scan);
- INIT_WORK(&dev->probe_work, nvme_async_probe);
+ INIT_WORK(&dev->probe_work, nvme_probe_work);
schedule_work(&dev->probe_work);
return 0;
return result;
}
-static void nvme_async_probe(struct work_struct *work)
-{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
-
- if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
- nvme_dead_ctrl(dev);
-}
-
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
if (prepare)
nvme_dev_shutdown(dev);
else
- nvme_dev_resume(dev);
+ schedule_work(&dev->probe_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
nvme_dev_remove_admin(dev);
device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
+ nvme_release_cmb(dev);
nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev);
}
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
- ndev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &ndev->reset_work);
- }
+ schedule_work(&ndev->probe_work);
return 0;
}
#endif