Merge tag 'for-4.20/block-20181021' of git://git.kernel.dk/linux-block
[linux-2.6-block.git] / drivers / nvme / host / core.c
index 46df030b2c3f74f33621dc7d4512b17fe40fd079..9e4a30b05bd203883353662777a42eb72a151822 100644 (file)
@@ -100,6 +100,22 @@ static struct class *nvme_subsys_class;
 static void nvme_ns_remove(struct nvme_ns *ns);
 static int nvme_revalidate_disk(struct gendisk *disk);
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
+static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
+                                          unsigned nsid);
+
+static void nvme_set_queue_dying(struct nvme_ns *ns)
+{
+       /*
+        * Revalidating a dead namespace sets capacity to 0. This will end
+        * buffered writers dirtying pages that can't be synced.
+        */
+       if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+               return;
+       revalidate_disk(ns->disk);
+       blk_set_queue_dying(ns->queue);
+       /* Forcibly unquiesce queues to avoid blocking dispatch */
+       blk_mq_unquiesce_queue(ns->queue);
+}
 
 static void nvme_queue_scan(struct nvme_ctrl *ctrl)
 {
@@ -236,7 +252,8 @@ void nvme_complete_rq(struct request *req)
        trace_nvme_complete_rq(req);
 
        if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
-               if (nvme_req_needs_failover(req, status)) {
+               if ((req->cmd_flags & REQ_NVME_MPATH) &&
+                   blk_path_error(status)) {
                        nvme_failover_req(req);
                        return;
                }
@@ -601,6 +618,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
                        if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
                                return BLK_STS_NOTSUPP;
                        control |= NVME_RW_PRINFO_PRACT;
+               } else if (req_op(req) == REQ_OP_WRITE) {
+                       t10_pi_prepare(req, ns->pi_type);
                }
 
                switch (ns->pi_type) {
@@ -611,8 +630,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
                case NVME_NS_DPS_PI_TYPE2:
                        control |= NVME_RW_PRINFO_PRCHK_GUARD |
                                        NVME_RW_PRINFO_PRCHK_REF;
-                       cmnd->rw.reftag = cpu_to_le32(
-                                       nvme_block_nr(ns, blk_rq_pos(req)));
+                       cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
                        break;
                }
        }
@@ -622,6 +640,22 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
        return 0;
 }
 
+void nvme_cleanup_cmd(struct request *req)
+{
+       if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+           nvme_req(req)->status == 0) {
+               struct nvme_ns *ns = req->rq_disk->private_data;
+
+               t10_pi_complete(req, ns->pi_type,
+                               blk_rq_bytes(req) >> ns->lba_shift);
+       }
+       if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+               kfree(page_address(req->special_vec.bv_page) +
+                     req->special_vec.bv_offset);
+       }
+}
+EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
+
 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
                struct nvme_command *cmd)
 {
@@ -652,10 +686,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
        }
 
        cmd->common.command_id = req->tag;
-       if (ns)
-               trace_nvme_setup_nvm_cmd(req->q->id, cmd);
-       else
-               trace_nvme_setup_admin_cmd(cmd);
+       trace_nvme_setup_cmd(req, cmd);
        return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_setup_cmd);
@@ -848,9 +879,6 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
        if (unlikely(ctrl->kato == 0))
                return;
 
-       INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
-       memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
-       ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
        schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
 
@@ -943,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
                        uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
                        break;
                default:
-                       /* Skip unnkown types */
+                       /* Skip unknown types */
                        len = cur->nidl;
                        break;
                }
@@ -1040,18 +1068,21 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 EXPORT_SYMBOL_GPL(nvme_set_queue_count);
 
 #define NVME_AEN_SUPPORTED \
-       (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
+       (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
 
 static void nvme_enable_aen(struct nvme_ctrl *ctrl)
 {
-       u32 result;
+       u32 result, supported_aens = ctrl->oaes & NVME_AEN_SUPPORTED;
        int status;
 
-       status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT,
-                       ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result);
+       if (!supported_aens)
+               return;
+
+       status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported_aens,
+                       NULL, 0, &result);
        if (status)
                dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
-                        ctrl->oaes & NVME_AEN_SUPPORTED);
+                        supported_aens);
 }
 
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
@@ -1101,7 +1132,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 
        return nvme_submit_user_cmd(ns->queue, &c,
                        (void __user *)(uintptr_t)io.addr, length,
-                       metadata, meta_len, io.slba, NULL, 0);
+                       metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
 }
 
 static u32 nvme_known_admin_effects(u8 opcode)
@@ -1151,19 +1182,15 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 
 static void nvme_update_formats(struct nvme_ctrl *ctrl)
 {
-       struct nvme_ns *ns, *next;
-       LIST_HEAD(rm_list);
+       struct nvme_ns *ns;
 
-       down_write(&ctrl->namespaces_rwsem);
-       list_for_each_entry(ns, &ctrl->namespaces, list) {
-               if (ns->disk && nvme_revalidate_disk(ns->disk)) {
-                       list_move_tail(&ns->list, &rm_list);
-               }
-       }
-       up_write(&ctrl->namespaces_rwsem);
+       down_read(&ctrl->namespaces_rwsem);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               if (ns->disk && nvme_revalidate_disk(ns->disk))
+                       nvme_set_queue_dying(ns);
+       up_read(&ctrl->namespaces_rwsem);
 
-       list_for_each_entry_safe(ns, next, &rm_list, list)
-               nvme_ns_remove(ns);
+       nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
 }
 
 static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -1218,7 +1245,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
        status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
                        (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
-                       (void __user *)(uintptr_t)cmd.metadata, cmd.metadata,
+                       (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len,
                        0, &cmd.result, timeout);
        nvme_passthru_end(ctrl, effects);
 
@@ -1457,6 +1484,12 @@ static void nvme_update_disk_info(struct gendisk *disk,
 
        set_capacity(disk, capacity);
        nvme_config_discard(ns);
+
+       if (id->nsattr & (1 << 0))
+               set_disk_ro(disk, true);
+       else
+               set_disk_ro(disk, false);
+
        blk_mq_unfreeze_queue(disk->queue);
 }
 
@@ -2043,7 +2076,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
 
        nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
        if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
-               strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
+               strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
                return;
        }
 
@@ -2255,21 +2288,16 @@ out_unlock:
        return ret;
 }
 
-int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-                    u8 log_page, void *log,
-                    size_t size, u64 offset)
+int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
+               void *log, size_t size, u64 offset)
 {
        struct nvme_command c = { };
        unsigned long dwlen = size / 4 - 1;
 
        c.get_log_page.opcode = nvme_admin_get_log_page;
-
-       if (ns)
-               c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
-       else
-               c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
-
+       c.get_log_page.nsid = cpu_to_le32(nsid);
        c.get_log_page.lid = log_page;
+       c.get_log_page.lsp = lsp;
        c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
        c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
        c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
@@ -2278,12 +2306,6 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
 }
 
-static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
-                       size_t size)
-{
-       return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
-}
-
 static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
 {
        int ret;
@@ -2294,8 +2316,8 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
        if (!ctrl->effects)
                return 0;
 
-       ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects,
-                                       sizeof(*ctrl->effects));
+       ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
+                       ctrl->effects, sizeof(*ctrl->effects), 0);
        if (ret) {
                kfree(ctrl->effects);
                ctrl->effects = NULL;
@@ -2386,6 +2408,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        nvme_set_queue_limits(ctrl, ctrl->admin_q);
        ctrl->sgls = le32_to_cpu(id->sgls);
        ctrl->kas = le16_to_cpu(id->kas);
+       ctrl->max_namespaces = le32_to_cpu(id->mnan);
 
        if (id->rtd3e) {
                /* us -> s */
@@ -2445,8 +2468,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
        }
 
+       ret = nvme_mpath_init(ctrl, id);
        kfree(id);
 
+       if (ret < 0)
+               return ret;
+
        if (ctrl->apst_enabled && !prev_apst_enabled)
                dev_pm_qos_expose_latency_tolerance(ctrl->device);
        else if (!ctrl->apst_enabled && prev_apst_enabled)
@@ -2665,6 +2692,10 @@ static struct attribute *nvme_ns_id_attrs[] = {
        &dev_attr_nguid.attr,
        &dev_attr_eui.attr,
        &dev_attr_nsid.attr,
+#ifdef CONFIG_NVME_MULTIPATH
+       &dev_attr_ana_grpid.attr,
+       &dev_attr_ana_state.attr,
+#endif
        NULL,
 };
 
@@ -2687,14 +2718,30 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
                if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
                        return 0;
        }
+#ifdef CONFIG_NVME_MULTIPATH
+       if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
+               if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
+                       return 0;
+               if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
+                       return 0;
+       }
+#endif
        return a->mode;
 }
 
-const struct attribute_group nvme_ns_id_attr_group = {
+static const struct attribute_group nvme_ns_id_attr_group = {
        .attrs          = nvme_ns_id_attrs,
        .is_visible     = nvme_ns_id_attrs_are_visible,
 };
 
+const struct attribute_group *nvme_ns_id_attr_groups[] = {
+       &nvme_ns_id_attr_group,
+#ifdef CONFIG_NVM
+       &nvme_nvm_attr_group,
+#endif
+       NULL,
+};
+
 #define nvme_show_str_function(field)                                          \
 static ssize_t  field##_show(struct device *dev,                               \
                            struct device_attribute *attr, char *buf)           \
@@ -2861,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
                unsigned nsid, struct nvme_id_ns *id)
 {
        struct nvme_ns_head *head;
+       size_t size = sizeof(*head);
        int ret = -ENOMEM;
 
-       head = kzalloc(sizeof(*head), GFP_KERNEL);
+#ifdef CONFIG_NVME_MULTIPATH
+       size += num_possible_nodes() * sizeof(struct nvme_ns *);
+#endif
+
+       head = kzalloc(size, GFP_KERNEL);
        if (!head)
                goto out;
        ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
@@ -3060,19 +3112,12 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        nvme_get_ctrl(ctrl);
 
-       kfree(id);
-
-       device_add_disk(ctrl->device, ns->disk);
-       if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
-                                       &nvme_ns_id_attr_group))
-               pr_warn("%s: failed to create sysfs group for identification\n",
-                       ns->disk->disk_name);
-       if (ns->ndev && nvme_nvm_register_sysfs(ns))
-               pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
-                       ns->disk->disk_name);
+       device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
 
-       nvme_mpath_add_disk(ns->head);
+       nvme_mpath_add_disk(ns, id);
        nvme_fault_inject_init(ns);
+       kfree(id);
+
        return;
  out_unlink_ns:
        mutex_lock(&ctrl->subsys->lock);
@@ -3093,10 +3138,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 
        nvme_fault_inject_fini(ns);
        if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-               sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
-                                       &nvme_ns_id_attr_group);
-               if (ns->ndev)
-                       nvme_nvm_unregister_sysfs(ns);
                del_gendisk(ns->disk);
                blk_cleanup_queue(ns->queue);
                if (blk_get_integrity(ns->disk))
@@ -3104,8 +3145,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        }
 
        mutex_lock(&ns->ctrl->subsys->lock);
-       nvme_mpath_clear_current_path(ns);
        list_del_rcu(&ns->siblings);
+       nvme_mpath_clear_current_path(ns);
        mutex_unlock(&ns->ctrl->subsys->lock);
 
        down_write(&ns->ctrl->namespaces_rwsem);
@@ -3138,7 +3179,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 
        down_write(&ctrl->namespaces_rwsem);
        list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
-               if (ns->head->ns_id > nsid)
+               if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
                        list_move_tail(&ns->list, &rm_list);
        }
        up_write(&ctrl->namespaces_rwsem);
@@ -3214,7 +3255,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
         * raced with us in reading the log page, which could cause us to miss
         * updates.
         */
-       error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
+       error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
+                       log_size, 0);
        if (error)
                dev_warn(ctrl->device,
                        "reading changed ns log failed: %d\n", error);
@@ -3331,9 +3373,9 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
        if (!log)
                return;
 
-       if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log)))
-               dev_warn(ctrl->device,
-                               "Get FW SLOT INFO log error\n");
+       if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
+                       sizeof(*log), 0))
+               dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
        kfree(log);
 }
 
@@ -3371,14 +3413,26 @@ static void nvme_fw_act_work(struct work_struct *work)
 
 static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
 {
-       switch ((result & 0xff00) >> 8) {
+       u32 aer_notice_type = (result & 0xff00) >> 8;
+
+       switch (aer_notice_type) {
        case NVME_AER_NOTICE_NS_CHANGED:
+               trace_nvme_async_event(ctrl, aer_notice_type);
                set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
                nvme_queue_scan(ctrl);
                break;
        case NVME_AER_NOTICE_FW_ACT_STARTING:
+               trace_nvme_async_event(ctrl, aer_notice_type);
                queue_work(nvme_wq, &ctrl->fw_act_work);
                break;
+#ifdef CONFIG_NVME_MULTIPATH
+       case NVME_AER_NOTICE_ANA:
+               trace_nvme_async_event(ctrl, aer_notice_type);
+               if (!ctrl->ana_log_buf)
+                       break;
+               queue_work(nvme_wq, &ctrl->ana_work);
+               break;
+#endif
        default:
                dev_warn(ctrl->device, "async event result %08x\n", result);
        }
@@ -3388,11 +3442,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
                volatile union nvme_result *res)
 {
        u32 result = le32_to_cpu(res->u32);
+       u32 aer_type = result & 0x07;
 
        if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
                return;
 
-       switch (result & 0x7) {
+       switch (aer_type) {
        case NVME_AER_NOTICE:
                nvme_handle_aen_notice(ctrl, result);
                break;
@@ -3400,6 +3455,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
        case NVME_AER_SMART:
        case NVME_AER_CSS:
        case NVME_AER_VS:
+               trace_nvme_async_event(ctrl, aer_type);
                ctrl->aen_result = result;
                break;
        default:
@@ -3411,6 +3467,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 
 void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
+       nvme_mpath_stop(ctrl);
        nvme_stop_keep_alive(ctrl);
        flush_work(&ctrl->async_event_work);
        flush_work(&ctrl->scan_work);
@@ -3448,6 +3505,7 @@ static void nvme_free_ctrl(struct device *dev)
 
        ida_simple_remove(&nvme_instance_ida, ctrl->instance);
        kfree(ctrl->effects);
+       nvme_mpath_uninit(ctrl);
 
        if (subsys) {
                mutex_lock(&subsys->lock);
@@ -3484,6 +3542,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
        INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
 
+       INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+       memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
+       ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
+
        ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
        if (ret < 0)
                goto out;
@@ -3542,19 +3604,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
        if (ctrl->admin_q)
                blk_mq_unquiesce_queue(ctrl->admin_q);
 
-       list_for_each_entry(ns, &ctrl->namespaces, list) {
-               /*
-                * Revalidating a dead namespace sets capacity to 0. This will
-                * end buffered writers dirtying pages that can't be synced.
-                */
-               if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
-                       continue;
-               revalidate_disk(ns->disk);
-               blk_set_queue_dying(ns->queue);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               nvme_set_queue_dying(ns);
 
-               /* Forcibly unquiesce queues to avoid blocking dispatch */
-               blk_mq_unquiesce_queue(ns->queue);
-       }
        up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);