summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Kelley <mikelley@microsoft.com>2022-06-08 11:52:21 -0700
committerChristoph Hellwig <hch@lst.de>2022-07-06 18:14:04 +0200
commit48e1bc03b7983b3ad2f920ca70805bbc6b55609d (patch)
tree4f3a6bf4848a48a57ca45b23d876c9532a36f907
parent12c6870bf7efbbc275972edaab86071e21cfc2f1 (diff)
nvme: handle the persistent internal error AER
In the NVM Express Revision 1.4 spec, Figure 145 describes possible values for an AER with event type "Error" (value 000b). For a Persistent Internal Error (value 03h), the host should perform a controller reset. Add support for this error using code that already exists for doing a controller reset. As part of this support, introduce two utility functions for parsing the AER type and subtype. This new support was tested in a lab environment where we can generate the persistent internal error on demand, and observe both the Linux side and NVMe controller side to see that the controller reset has been done. Signed-off-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--drivers/nvme/host/core.c31
-rw-r--r--include/linux/nvme.h4
2 files changed, 33 insertions, 2 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b3d9c29aba1e..6eb42f696fc9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4526,9 +4526,19 @@ static void nvme_fw_act_work(struct work_struct *work)
nvme_get_fw_slot_info(ctrl);
}
+static u32 nvme_aer_type(u32 result)
+{
+ return result & 0x7;
+}
+
+static u32 nvme_aer_subtype(u32 result)
+{
+ return (result & 0xff00) >> 8;
+}
+
static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
{
- u32 aer_notice_type = (result & 0xff00) >> 8;
+ u32 aer_notice_type = nvme_aer_subtype(result);
trace_nvme_async_event(ctrl, aer_notice_type);
@@ -4561,11 +4571,19 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
}
}
+static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)
+{
+ trace_nvme_async_event(ctrl, NVME_AER_ERROR);
+ dev_warn(ctrl->device, "resetting controller due to AER\n");
+ nvme_reset_ctrl(ctrl);
+}
+
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
volatile union nvme_result *res)
{
u32 result = le32_to_cpu(res->u32);
- u32 aer_type = result & 0x07;
+ u32 aer_type = nvme_aer_type(result);
+ u32 aer_subtype = nvme_aer_subtype(result);
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
return;
@@ -4575,6 +4593,15 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
nvme_handle_aen_notice(ctrl, result);
break;
case NVME_AER_ERROR:
+ /*
+ * For a persistent internal error, don't run async_event_work
+ * to submit a new AER. The controller reset will do it.
+ */
+ if (aer_subtype == NVME_AER_ERROR_PERSIST_INT_ERR) {
+ nvme_handle_aer_persistent_error(ctrl);
+ return;
+ }
+ fallthrough;
case NVME_AER_SMART:
case NVME_AER_CSS:
case NVME_AER_VS:
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index e3934003f239..e167cdad7844 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -712,6 +712,10 @@ enum {
};
enum {
+ NVME_AER_ERROR_PERSIST_INT_ERR = 0x03,
+};
+
+enum {
NVME_AER_NOTICE_NS_CHANGED = 0x00,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
NVME_AER_NOTICE_ANA = 0x03,