Merge branch 'for-4.5/nvme' of git://git.kernel.dk/linux-block
[linux-2.6-block.git] / drivers / nvme / host / core.c
index 1b8498434b496a511e8d76e681490dcc2bf4016c..c5bf001af55954e2c9275d34c65cbb5dfa2a037d 100644 (file)
 
 #include "nvme.h"
 
+#define NVME_MINORS            (1U << MINORBITS)
+
 static int nvme_major;
 module_param(nvme_major, int, 0);
 
+static int nvme_char_major;
+module_param(nvme_char_major, int, 0);
+
+static LIST_HEAD(nvme_ctrl_list);
 DEFINE_SPINLOCK(dev_list_lock);
 
+static struct class *nvme_class;
+
 static void nvme_free_ns(struct kref *kref)
 {
        struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
@@ -70,6 +78,17 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
        return ns;
 }
 
+void nvme_requeue_req(struct request *req)
+{
+       unsigned long flags;
+
+       blk_mq_requeue_request(req);
+       spin_lock_irqsave(req->q->queue_lock, flags);
+       if (!blk_queue_stopped(req->q))
+               blk_mq_kick_requeue_list(req->q);
+       spin_unlock_irqrestore(req->q->queue_lock, flags);
+}
+
 struct request *nvme_alloc_request(struct request_queue *q,
                struct nvme_command *cmd, unsigned int flags)
 {
@@ -182,8 +201,8 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                        }
 
                        bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
-                       if (!bip) {
-                               ret = -ENOMEM;
+                       if (IS_ERR(bip)) {
+                               ret = PTR_ERR(bip);
                                goto out_free_meta;
                        }
 
@@ -248,6 +267,16 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
        return error;
 }
 
+static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
+{
+       struct nvme_command c = { };
+
+       c.identify.opcode = nvme_admin_identify;
+       c.identify.cns = cpu_to_le32(2);
+       c.identify.nsid = cpu_to_le32(nsid);
+       return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
+}
+
 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
                struct nvme_id_ns **id)
 {
@@ -319,6 +348,22 @@ int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
        return error;
 }
 
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
+{
+       u32 q_count = (*count - 1) | ((*count - 1) << 16);
+       u32 result;
+       int status, nr_io_queues;
+
+       status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
+                       &result);
+       if (status)
+               return status;
+
+       nr_io_queues = min(result & 0xffff, result >> 16) + 1;
+       *count = min(*count, nr_io_queues);
+       return 0;
+}
+
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
        struct nvme_user_io io;
@@ -367,7 +412,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
                        metadata, meta_len, io.slba, NULL, 0);
 }
 
-int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
                        struct nvme_passthru_cmd __user *ucmd)
 {
        struct nvme_passthru_cmd cmd;
@@ -397,7 +442,7 @@ int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
                timeout = msecs_to_jiffies(cmd.timeout_ms);
 
        status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
-                       (void __user *)cmd.addr, cmd.data_len,
+                       (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
                        &cmd.result, timeout);
        if (status >= 0) {
                if (put_user(cmd.result, &ucmd->result))
@@ -422,10 +467,12 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
                return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
        case NVME_IOCTL_SUBMIT_IO:
                return nvme_submit_io(ns, (void __user *)arg);
+#ifdef CONFIG_BLK_DEV_NVME_SCSI
        case SG_GET_VERSION_NUM:
                return nvme_sg_get_version_num((void __user *)arg);
        case SG_IO:
                return nvme_sg_io(ns, (void __user *)arg);
+#endif
        default:
                return -ENOTTY;
        }
@@ -529,6 +576,11 @@ static int nvme_revalidate_disk(struct gendisk *disk)
                ns->type = NVME_NS_LIGHTNVM;
        }
 
+       if (ns->ctrl->vs >= NVME_VS(1, 1))
+               memcpy(ns->eui, id->eui64, sizeof(ns->eui));
+       if (ns->ctrl->vs >= NVME_VS(1, 2))
+               memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
+
        old_ms = ns->ms;
        lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
        ns->lba_shift = id->lbaf[lbaf].ds;
@@ -542,7 +594,6 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        if (ns->lba_shift == 0)
                ns->lba_shift = 9;
        bs = 1 << ns->lba_shift;
-
        /* XXX: PI implementation requires metadata equal t10 pi tuple size */
        pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
                                        id->dps & NVME_NS_DPS_PI_MASK : 0;
@@ -557,9 +608,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        ns->pi_type = pi_type;
        blk_queue_logical_block_size(ns->queue, bs);
 
-       if (ns->ms && !ns->ext)
+       if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
                nvme_init_integrity(ns);
-
        if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
                set_capacity(disk, 0);
        else
@@ -647,7 +697,7 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
 
 static int nvme_pr_clear(struct block_device *bdev, u64 key)
 {
-       u32 cdw10 = 1 | key ? 1 << 3 : 0;
+       u32 cdw10 = 1 | (key ? 1 << 3 : 0);
        return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
 }
 
@@ -792,6 +842,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        u64 cap;
        int ret, page_shift;
 
+       ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+       if (ret) {
+               dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+               return ret;
+       }
+
        ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
        if (ret) {
                dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
@@ -799,6 +855,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        }
        page_shift = NVME_CAP_MPSMIN(cap) + 12;
 
+       if (ctrl->vs >= NVME_VS(1, 1))
+               ctrl->subsystem = NVME_CAP_NSSRC(cap);
+
        ret = nvme_identify_ctrl(ctrl, &id);
        if (ret) {
                dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
@@ -806,7 +865,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        }
 
        ctrl->oncs = le16_to_cpup(&id->oncs);
-       ctrl->abort_limit = id->acl + 1;
+       atomic_set(&ctrl->abort_limit, id->acl + 1);
        ctrl->vwc = id->vwc;
        memcpy(ctrl->serial, id->sn, sizeof(id->sn));
        memcpy(ctrl->model, id->mn, sizeof(id->mn));
@@ -833,18 +892,197 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        return 0;
 }
 
-static void nvme_free_ctrl(struct kref *kref)
+static int nvme_dev_open(struct inode *inode, struct file *file)
 {
-       struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+       struct nvme_ctrl *ctrl;
+       int instance = iminor(inode);
+       int ret = -ENODEV;
 
-       ctrl->ops->free_ctrl(ctrl);
+       spin_lock(&dev_list_lock);
+       list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
+               if (ctrl->instance != instance)
+                       continue;
+
+               if (!ctrl->admin_q) {
+                       ret = -EWOULDBLOCK;
+                       break;
+               }
+               if (!kref_get_unless_zero(&ctrl->kref))
+                       break;
+               file->private_data = ctrl;
+               ret = 0;
+               break;
+       }
+       spin_unlock(&dev_list_lock);
+
+       return ret;
 }
 
-void nvme_put_ctrl(struct nvme_ctrl *ctrl)
+static int nvme_dev_release(struct inode *inode, struct file *file)
 {
-       kref_put(&ctrl->kref, nvme_free_ctrl);
+       nvme_put_ctrl(file->private_data);
+       return 0;
 }
 
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+{
+       struct nvme_ns *ns;
+       int ret;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       if (list_empty(&ctrl->namespaces)) {
+               ret = -ENOTTY;
+               goto out_unlock;
+       }
+
+       ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+       if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
+               dev_warn(ctrl->dev,
+                       "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       dev_warn(ctrl->dev,
+               "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
+       kref_get(&ns->kref);
+       mutex_unlock(&ctrl->namespaces_mutex);
+
+       ret = nvme_user_cmd(ctrl, ns, argp);
+       nvme_put_ns(ns);
+       return ret;
+
+out_unlock:
+       mutex_unlock(&ctrl->namespaces_mutex);
+       return ret;
+}
+
+static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+               unsigned long arg)
+{
+       struct nvme_ctrl *ctrl = file->private_data;
+       void __user *argp = (void __user *)arg;
+
+       switch (cmd) {
+       case NVME_IOCTL_ADMIN_CMD:
+               return nvme_user_cmd(ctrl, NULL, argp);
+       case NVME_IOCTL_IO_CMD:
+               return nvme_dev_user_cmd(ctrl, argp);
+       case NVME_IOCTL_RESET:
+               dev_warn(ctrl->dev, "resetting controller\n");
+               return ctrl->ops->reset_ctrl(ctrl);
+       case NVME_IOCTL_SUBSYS_RESET:
+               return nvme_reset_subsystem(ctrl);
+       default:
+               return -ENOTTY;
+       }
+}
+
+static const struct file_operations nvme_dev_fops = {
+       .owner          = THIS_MODULE,
+       .open           = nvme_dev_open,
+       .release        = nvme_dev_release,
+       .unlocked_ioctl = nvme_dev_ioctl,
+       .compat_ioctl   = nvme_dev_ioctl,
+};
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t count)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+       int ret;
+
+       ret = ctrl->ops->reset_ctrl(ctrl);
+       if (ret < 0)
+               return ret;
+       return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+                                                               char *buf)
+{
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+       return sprintf(buf, "%pU\n", ns->uuid);
+}
+static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
+
+static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
+                                                               char *buf)
+{
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+       return sprintf(buf, "%8phd\n", ns->eui);
+}
+static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
+
+static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
+                                                               char *buf)
+{
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+       return sprintf(buf, "%d\n", ns->ns_id);
+}
+static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
+
+static struct attribute *nvme_ns_attrs[] = {
+       &dev_attr_uuid.attr,
+       &dev_attr_eui.attr,
+       &dev_attr_nsid.attr,
+       NULL,
+};
+
+static umode_t nvme_attrs_are_visible(struct kobject *kobj,
+               struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+
+       if (a == &dev_attr_uuid.attr) {
+               if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+                       return 0;
+       }
+       if (a == &dev_attr_eui.attr) {
+               if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
+                       return 0;
+       }
+       return a->mode;
+}
+
+static const struct attribute_group nvme_ns_attr_group = {
+       .attrs          = nvme_ns_attrs,
+       .is_visible     = nvme_attrs_are_visible,
+};
+
+#define nvme_show_function(field)                                              \
+static ssize_t  field##_show(struct device *dev,                               \
+                           struct device_attribute *attr, char *buf)           \
+{                                                                              \
+        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);                         \
+        return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field);  \
+}                                                                              \
+static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
+
+nvme_show_function(model);
+nvme_show_function(serial);
+nvme_show_function(firmware_rev);
+
+static struct attribute *nvme_dev_attrs[] = {
+       &dev_attr_reset_controller.attr,
+       &dev_attr_model.attr,
+       &dev_attr_serial.attr,
+       &dev_attr_firmware_rev.attr,
+       NULL
+};
+
+static struct attribute_group nvme_dev_attrs_group = {
+       .attrs = nvme_dev_attrs,
+};
+
+static const struct attribute_group *nvme_dev_attr_groups[] = {
+       &nvme_dev_attrs_group,
+       NULL,
+};
+
 static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
        struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
@@ -857,6 +1095,8 @@ static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
        struct nvme_ns *ns;
 
+       lockdep_assert_held(&ctrl->namespaces_mutex);
+
        list_for_each_entry(ns, &ctrl->namespaces, list) {
                if (ns->ns_id == nsid)
                        return ns;
@@ -872,6 +1112,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        struct gendisk *disk;
        int node = dev_to_node(ctrl->dev);
 
+       lockdep_assert_held(&ctrl->namespaces_mutex);
+
        ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
        if (!ns)
                return;
@@ -892,7 +1134,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        ns->ns_id = nsid;
        ns->disk = disk;
        ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
-       list_add_tail(&ns->list, &ctrl->namespaces);
 
        blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
        if (ctrl->max_hw_sectors) {
@@ -915,36 +1156,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        disk->flags = GENHD_FL_EXT_DEVT;
        sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
 
-       /*
-        * Initialize capacity to 0 until we establish the namespace format and
-        * setup integrity extentions if necessary. The revalidate_disk after
-        * add_disk allows the driver to register with integrity if the format
-        * requires it.
-        */
-       set_capacity(disk, 0);
        if (nvme_revalidate_disk(ns->disk))
                goto out_free_disk;
 
+       list_add_tail(&ns->list, &ctrl->namespaces);
        kref_get(&ctrl->kref);
-       if (ns->type != NVME_NS_LIGHTNVM) {
-               add_disk(ns->disk);
-               if (ns->ms) {
-                       struct block_device *bd = bdget_disk(ns->disk, 0);
-                       if (!bd)
-                               return;
-                       if (blkdev_get(bd, FMODE_READ, NULL)) {
-                               bdput(bd);
-                               return;
-                       }
-                       blkdev_reread_part(bd);
-                       blkdev_put(bd, FMODE_READ);
-               }
-       }
+       if (ns->type == NVME_NS_LIGHTNVM)
+               return;
 
+       add_disk(ns->disk);
+       if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
+                                       &nvme_ns_attr_group))
+               pr_warn("%s: failed to create sysfs group for identification\n",
+                       ns->disk->disk_name);
        return;
  out_free_disk:
        kfree(disk);
-       list_del(&ns->list);
  out_free_queue:
        blk_cleanup_queue(ns->queue);
  out_free_ns:
@@ -956,11 +1183,24 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        bool kill = nvme_io_incapable(ns->ctrl) &&
                        !blk_queue_dying(ns->queue);
 
-       if (kill)
+       lockdep_assert_held(&ns->ctrl->namespaces_mutex);
+
+       if (kill) {
                blk_set_queue_dying(ns->queue);
+
+               /*
+                * The controller was shutdown first if we got here through
+                * device removal. The shutdown may requeue outstanding
+                * requests. These need to be aborted immediately so
+                * del_gendisk doesn't block indefinitely for their completion.
+                */
+               blk_mq_abort_requeue_list(ns->queue);
+       }
        if (ns->disk->flags & GENHD_FL_UP) {
                if (blk_get_integrity(ns->disk))
                        blk_integrity_unregister(ns->disk);
+               sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
+                                       &nvme_ns_attr_group);
                del_gendisk(ns->disk);
        }
        if (kill || !blk_queue_dying(ns->queue)) {
@@ -971,33 +1211,89 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        nvme_put_ns(ns);
 }
 
+static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+       struct nvme_ns *ns;
+
+       ns = nvme_find_ns(ctrl, nsid);
+       if (ns) {
+               if (revalidate_disk(ns->disk))
+                       nvme_ns_remove(ns);
+       } else
+               nvme_alloc_ns(ctrl, nsid);
+}
+
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+{
+       struct nvme_ns *ns;
+       __le32 *ns_list;
+       unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+       int ret = 0;
+
+       ns_list = kzalloc(0x1000, GFP_KERNEL);
+       if (!ns_list)
+               return -ENOMEM;
+
+       for (i = 0; i < num_lists; i++) {
+               ret = nvme_identify_ns_list(ctrl, prev, ns_list);
+               if (ret)
+                       goto out;
+
+               for (j = 0; j < min(nn, 1024U); j++) {
+                       nsid = le32_to_cpu(ns_list[j]);
+                       if (!nsid)
+                               goto out;
+
+                       nvme_validate_ns(ctrl, nsid);
+
+                       while (++prev < nsid) {
+                               ns = nvme_find_ns(ctrl, prev);
+                               if (ns)
+                                       nvme_ns_remove(ns);
+                       }
+               }
+               nn -= j;
+       }
+ out:
+       kfree(ns_list);
+       return ret;
+}
+
 static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
 {
        struct nvme_ns *ns, *next;
        unsigned i;
 
-       for (i = 1; i <= nn; i++) {
-               ns = nvme_find_ns(ctrl, i);
-               if (ns) {
-                       if (revalidate_disk(ns->disk))
-                               nvme_ns_remove(ns);
-               } else
-                       nvme_alloc_ns(ctrl, i);
-       }
+       lockdep_assert_held(&ctrl->namespaces_mutex);
+
+       for (i = 1; i <= nn; i++)
+               nvme_validate_ns(ctrl, i);
+
        list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
                if (ns->ns_id > nn)
                        nvme_ns_remove(ns);
        }
-       list_sort(NULL, &ctrl->namespaces, ns_cmp);
 }
 
 void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
 {
        struct nvme_id_ctrl *id;
+       unsigned nn;
 
        if (nvme_identify_ctrl(ctrl, &id))
                return;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       nn = le32_to_cpu(id->nn);
+       if (ctrl->vs >= NVME_VS(1, 1) &&
+           !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+               if (!nvme_scan_ns_list(ctrl, nn))
+                       goto done;
+       }
        __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
+ done:
+       list_sort(NULL, &ctrl->namespaces, ns_cmp);
+       mutex_unlock(&ctrl->namespaces_mutex);
        kfree(id);
 }
 
@@ -1005,8 +1301,135 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
        struct nvme_ns *ns, *next;
 
+       mutex_lock(&ctrl->namespaces_mutex);
        list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
                nvme_ns_remove(ns);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct nvme_ctrl *ctrl)
+{
+       int instance, error;
+
+       do {
+               if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+                       return -ENODEV;
+
+               spin_lock(&dev_list_lock);
+               error = ida_get_new(&nvme_instance_ida, &instance);
+               spin_unlock(&dev_list_lock);
+       } while (error == -EAGAIN);
+
+       if (error)
+               return -ENODEV;
+
+       ctrl->instance = instance;
+       return 0;
+}
+
+static void nvme_release_instance(struct nvme_ctrl *ctrl)
+{
+       spin_lock(&dev_list_lock);
+       ida_remove(&nvme_instance_ida, ctrl->instance);
+       spin_unlock(&dev_list_lock);
+}
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+ {
+       device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
+
+       spin_lock(&dev_list_lock);
+       list_del(&ctrl->node);
+       spin_unlock(&dev_list_lock);
+}
+
+static void nvme_free_ctrl(struct kref *kref)
+{
+       struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+
+       put_device(ctrl->device);
+       nvme_release_instance(ctrl);
+
+       ctrl->ops->free_ctrl(ctrl);
+}
+
+void nvme_put_ctrl(struct nvme_ctrl *ctrl)
+{
+       kref_put(&ctrl->kref, nvme_free_ctrl);
+}
+
+/*
+ * Initialize a NVMe controller structures.  This needs to be called during
+ * earliest initialization so that we have the initialized structured around
+ * during probing.
+ */
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+               const struct nvme_ctrl_ops *ops, unsigned long quirks)
+{
+       int ret;
+
+       INIT_LIST_HEAD(&ctrl->namespaces);
+       mutex_init(&ctrl->namespaces_mutex);
+       kref_init(&ctrl->kref);
+       ctrl->dev = dev;
+       ctrl->ops = ops;
+       ctrl->quirks = quirks;
+
+       ret = nvme_set_instance(ctrl);
+       if (ret)
+               goto out;
+
+       ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
+                               MKDEV(nvme_char_major, ctrl->instance),
+                               dev, nvme_dev_attr_groups,
+                               "nvme%d", ctrl->instance);
+       if (IS_ERR(ctrl->device)) {
+               ret = PTR_ERR(ctrl->device);
+               goto out_release_instance;
+       }
+       get_device(ctrl->device);
+       dev_set_drvdata(ctrl->device, ctrl);
+
+       spin_lock(&dev_list_lock);
+       list_add_tail(&ctrl->node, &nvme_ctrl_list);
+       spin_unlock(&dev_list_lock);
+
+       return 0;
+out_release_instance:
+       nvme_release_instance(ctrl);
+out:
+       return ret;
+}
+
+void nvme_stop_queues(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               spin_lock_irq(ns->queue->queue_lock);
+               queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+               spin_unlock_irq(ns->queue->queue_lock);
+
+               blk_mq_cancel_requeue_work(ns->queue);
+               blk_mq_stop_hw_queues(ns->queue);
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+void nvme_start_queues(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+               blk_mq_start_stopped_hw_queues(ns->queue, true);
+               blk_mq_kick_requeue_list(ns->queue);
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
 }
 
 int __init nvme_core_init(void)
@@ -1019,10 +1442,31 @@ int __init nvme_core_init(void)
        else if (result > 0)
                nvme_major = result;
 
+       result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
+                                                       &nvme_dev_fops);
+       if (result < 0)
+               goto unregister_blkdev;
+       else if (result > 0)
+               nvme_char_major = result;
+
+       nvme_class = class_create(THIS_MODULE, "nvme");
+       if (IS_ERR(nvme_class)) {
+               result = PTR_ERR(nvme_class);
+               goto unregister_chrdev;
+       }
+
        return 0;
+
+ unregister_chrdev:
+       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ unregister_blkdev:
+       unregister_blkdev(nvme_major, "nvme");
+       return result;
 }
 
 void nvme_core_exit(void)
 {
        unregister_blkdev(nvme_major, "nvme");
+       class_destroy(nvme_class);
+       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
 }