static void nvme_ns_remove(struct nvme_ns *ns);
static int nvme_revalidate_disk(struct gendisk *disk);
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
+static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
+ unsigned nsid);
+
+static void nvme_set_queue_dying(struct nvme_ns *ns)
+{
+ /*
+ * Revalidating a dead namespace sets capacity to 0. This will end
+ * buffered writers dirtying pages that can't be synced.
+ */
+ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ return;
+ revalidate_disk(ns->disk);
+ blk_set_queue_dying(ns->queue);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ns->queue);
+}
static void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
trace_nvme_complete_rq(req);
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
- if (nvme_req_needs_failover(req, status)) {
+ if ((req->cmd_flags & REQ_NVME_MPATH) &&
+ blk_path_error(status)) {
nvme_failover_req(req);
return;
}
if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
return BLK_STS_NOTSUPP;
control |= NVME_RW_PRINFO_PRACT;
+ } else if (req_op(req) == REQ_OP_WRITE) {
+ t10_pi_prepare(req, ns->pi_type);
}
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF;
- cmnd->rw.reftag = cpu_to_le32(
- nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
break;
}
}
return 0;
}
+void nvme_cleanup_cmd(struct request *req)
+{
+ if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+ nvme_req(req)->status == 0) {
+ struct nvme_ns *ns = req->rq_disk->private_data;
+
+ t10_pi_complete(req, ns->pi_type,
+ blk_rq_bytes(req) >> ns->lba_shift);
+ }
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ kfree(page_address(req->special_vec.bv_page) +
+ req->special_vec.bv_offset);
+ }
+}
+EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
+
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd)
{
}
cmd->common.command_id = req->tag;
- if (ns)
- trace_nvme_setup_nvm_cmd(req->q->id, cmd);
- else
- trace_nvme_setup_admin_cmd(cmd);
+ trace_nvme_setup_cmd(req, cmd);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_setup_cmd);
if (unlikely(ctrl->kato == 0))
return;
- INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
- memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
- ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
break;
default:
- /* Skip unnkown types */
+ /* Skip unknown types */
len = cur->nidl;
break;
}
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
#define NVME_AEN_SUPPORTED \
- (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
+ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
static void nvme_enable_aen(struct nvme_ctrl *ctrl)
{
- u32 result;
+ u32 result, supported_aens = ctrl->oaes & NVME_AEN_SUPPORTED;
int status;
- status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT,
- ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result);
+ if (!supported_aens)
+ return;
+
+ status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported_aens,
+ NULL, 0, &result);
if (status)
dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
- ctrl->oaes & NVME_AEN_SUPPORTED);
+ supported_aens);
}
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
return nvme_submit_user_cmd(ns->queue, &c,
(void __user *)(uintptr_t)io.addr, length,
- metadata, meta_len, io.slba, NULL, 0);
+ metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
}
static u32 nvme_known_admin_effects(u8 opcode)
static void nvme_update_formats(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns, *next;
- LIST_HEAD(rm_list);
+ struct nvme_ns *ns;
- down_write(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
- if (ns->disk && nvme_revalidate_disk(ns->disk)) {
- list_move_tail(&ns->list, &rm_list);
- }
- }
- up_write(&ctrl->namespaces_rwsem);
+ down_read(&ctrl->namespaces_rwsem);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ if (ns->disk && nvme_revalidate_disk(ns->disk))
+ nvme_set_queue_dying(ns);
+ up_read(&ctrl->namespaces_rwsem);
- list_for_each_entry_safe(ns, next, &rm_list, list)
- nvme_ns_remove(ns);
+ nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
- (void __user *)(uintptr_t)cmd.metadata, cmd.metadata,
+ (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len,
0, &cmd.result, timeout);
nvme_passthru_end(ctrl, effects);
set_capacity(disk, capacity);
nvme_config_discard(ns);
+
+ if (id->nsattr & (1 << 0))
+ set_disk_ro(disk, true);
+ else
+ set_disk_ro(disk, false);
+
blk_mq_unfreeze_queue(disk->queue);
}
nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
- strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
+ strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
return;
}
return ret;
}
-int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- u8 log_page, void *log,
- size_t size, u64 offset)
+int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
+ void *log, size_t size, u64 offset)
{
struct nvme_command c = { };
unsigned long dwlen = size / 4 - 1;
c.get_log_page.opcode = nvme_admin_get_log_page;
-
- if (ns)
- c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
- else
- c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
-
+ c.get_log_page.nsid = cpu_to_le32(nsid);
c.get_log_page.lid = log_page;
+ c.get_log_page.lsp = lsp;
c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
-static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
- size_t size)
-{
- return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
-}
-
static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
{
int ret;
if (!ctrl->effects)
return 0;
- ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects,
- sizeof(*ctrl->effects));
+ ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
+ ctrl->effects, sizeof(*ctrl->effects), 0);
if (ret) {
kfree(ctrl->effects);
ctrl->effects = NULL;
nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
+ ctrl->max_namespaces = le32_to_cpu(id->mnan);
if (id->rtd3e) {
/* us -> s */
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}
+ ret = nvme_mpath_init(ctrl, id);
kfree(id);
+ if (ret < 0)
+ return ret;
+
if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
&dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
+#ifdef CONFIG_NVME_MULTIPATH
+ &dev_attr_ana_grpid.attr,
+ &dev_attr_ana_state.attr,
+#endif
NULL,
};
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
return 0;
}
+#ifdef CONFIG_NVME_MULTIPATH
+ if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
+ if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
+ return 0;
+ if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
+ return 0;
+ }
+#endif
return a->mode;
}
-const struct attribute_group nvme_ns_id_attr_group = {
+static const struct attribute_group nvme_ns_id_attr_group = {
.attrs = nvme_ns_id_attrs,
.is_visible = nvme_ns_id_attrs_are_visible,
};
+const struct attribute_group *nvme_ns_id_attr_groups[] = {
+ &nvme_ns_id_attr_group,
+#ifdef CONFIG_NVM
+ &nvme_nvm_attr_group,
+#endif
+ NULL,
+};
+
#define nvme_show_str_function(field) \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
unsigned nsid, struct nvme_id_ns *id)
{
struct nvme_ns_head *head;
+ size_t size = sizeof(*head);
int ret = -ENOMEM;
- head = kzalloc(sizeof(*head), GFP_KERNEL);
+#ifdef CONFIG_NVME_MULTIPATH
+ size += num_possible_nodes() * sizeof(struct nvme_ns *);
+#endif
+
+ head = kzalloc(size, GFP_KERNEL);
if (!head)
goto out;
ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
nvme_get_ctrl(ctrl);
- kfree(id);
-
- device_add_disk(ctrl->device, ns->disk);
- if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
- &nvme_ns_id_attr_group))
- pr_warn("%s: failed to create sysfs group for identification\n",
- ns->disk->disk_name);
- if (ns->ndev && nvme_nvm_register_sysfs(ns))
- pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
- ns->disk->disk_name);
+ device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
- nvme_mpath_add_disk(ns->head);
+ nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(ns);
+ kfree(id);
+
return;
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
nvme_fault_inject_fini(ns);
if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
- sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
- &nvme_ns_id_attr_group);
- if (ns->ndev)
- nvme_nvm_unregister_sysfs(ns);
del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue);
if (blk_get_integrity(ns->disk))
}
mutex_lock(&ns->ctrl->subsys->lock);
- nvme_mpath_clear_current_path(ns);
list_del_rcu(&ns->siblings);
+ nvme_mpath_clear_current_path(ns);
mutex_unlock(&ns->ctrl->subsys->lock);
down_write(&ns->ctrl->namespaces_rwsem);
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
- if (ns->head->ns_id > nsid)
+ if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags))
list_move_tail(&ns->list, &rm_list);
}
up_write(&ctrl->namespaces_rwsem);
* raced with us in reading the log page, which could cause us to miss
* updates.
*/
- error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
+ error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
+ log_size, 0);
if (error)
dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error);
if (!log)
return;
- if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log)))
- dev_warn(ctrl->device,
- "Get FW SLOT INFO log error\n");
+ if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
+ sizeof(*log), 0))
+ dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log);
}
static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
{
- switch ((result & 0xff00) >> 8) {
+ u32 aer_notice_type = (result & 0xff00) >> 8;
+
+ switch (aer_notice_type) {
case NVME_AER_NOTICE_NS_CHANGED:
+ trace_nvme_async_event(ctrl, aer_notice_type);
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
+ trace_nvme_async_event(ctrl, aer_notice_type);
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
+#ifdef CONFIG_NVME_MULTIPATH
+ case NVME_AER_NOTICE_ANA:
+ trace_nvme_async_event(ctrl, aer_notice_type);
+ if (!ctrl->ana_log_buf)
+ break;
+ queue_work(nvme_wq, &ctrl->ana_work);
+ break;
+#endif
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
volatile union nvme_result *res)
{
u32 result = le32_to_cpu(res->u32);
+ u32 aer_type = result & 0x07;
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
return;
- switch (result & 0x7) {
+ switch (aer_type) {
case NVME_AER_NOTICE:
nvme_handle_aen_notice(ctrl, result);
break;
case NVME_AER_SMART:
case NVME_AER_CSS:
case NVME_AER_VS:
+ trace_nvme_async_event(ctrl, aer_type);
ctrl->aen_result = result;
break;
default:
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
+ nvme_mpath_stop(ctrl);
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects);
+ nvme_mpath_uninit(ctrl);
if (subsys) {
mutex_lock(&subsys->lock);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
+ INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
+ memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
+ ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
+
ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
if (ret < 0)
goto out;
if (ctrl->admin_q)
blk_mq_unquiesce_queue(ctrl->admin_q);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
- /*
- * Revalidating a dead namespace sets capacity to 0. This will
- * end buffered writers dirtying pages that can't be synced.
- */
- if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- continue;
- revalidate_disk(ns->disk);
- blk_set_queue_dying(ns->queue);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ nvme_set_queue_dying(ns);
- /* Forcibly unquiesce queues to avoid blocking dispatch */
- blk_mq_unquiesce_queue(ns->queue);
- }
up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_kill_queues);