crypto: qat - add auto reset on error
authorDamian Muszynski <damian.muszynski@intel.com>
Fri, 2 Feb 2024 10:53:22 +0000 (18:53 +0800)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 9 Feb 2024 04:57:18 +0000 (12:57 +0800)
Expose the `auto_reset` sysfs attribute to configure the driver to reset
the device when a fatal error is detected.

When auto reset is enabled, the driver resets the device when it detects
either an heartbeat failure or a fatal error through an interrupt.

This patch is based on earlier work done by Shashank Gupta.

Signed-off-by: Damian Muszynski <damian.muszynski@intel.com>
Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
Reviewed-by: Markas Rapoportas <markas.rapoportas@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Mun Chun Yep <mun.chun.yep@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Documentation/ABI/testing/sysfs-driver-qat
drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
drivers/crypto/intel/qat/qat_common/adf_aer.c
drivers/crypto/intel/qat/qat_common/adf_common_drv.h
drivers/crypto/intel/qat/qat_common/adf_sysfs.c

index bbf329cf0d67bc10c5e3b74fdf7a1e34317688ef..6778f1fea8740d31afaf65d1b363605a6ddb2801 100644 (file)
@@ -141,3 +141,23 @@ Description:
                        64
 
                This attribute is only available for qat_4xxx devices.
+
+What:          /sys/bus/pci/devices/<BDF>/qat/auto_reset
+Date:          March 2024
+KernelVersion: 6.8
+Contact:       qat-linux@intel.com
+Description:   (RW) Reports the current state of the autoreset feature
+               for a QAT device
+
+               Write to the attribute to enable or disable device auto reset.
+
+               Device auto reset is disabled by default.
+
+               The values are::
+
+               * 1/Yy/on: auto reset enabled. If the device encounters an
+                 unrecoverable error, it will be reset automatically.
+               * 0/Nn/off: auto reset disabled. If the device encounters an
+                 unrecoverable error, it will not be reset.
+
+               This attribute is only available for qat_4xxx devices.
index 4a3c36aaa7caf912a88cdd5c8e903e9ced6bd15b..0f26aa976c8ca320f333c6443fe87ed4af9a94ac 100644 (file)
@@ -402,6 +402,7 @@ struct adf_accel_dev {
        struct adf_error_counters ras_errors;
        struct mutex state_lock; /* protect state of the device */
        bool is_vf;
+       bool autoreset_on_error;
        u32 accel_id;
 };
 #endif
index cd273b31db0eea151cd4ecefedbc8d3be3b15ea9..b3d4b6b99c65a144df431f4e4ed6b5686ec74007 100644 (file)
@@ -204,6 +204,14 @@ const struct pci_error_handlers adf_err_handler = {
 };
 EXPORT_SYMBOL_GPL(adf_err_handler);
 
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev)
+{
+       if (accel_dev->autoreset_on_error)
+               return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC);
+
+       return 0;
+}
+
 static void adf_notify_fatal_error_worker(struct work_struct *work)
 {
        struct adf_fatal_error_data *wq_data =
@@ -215,10 +223,11 @@ static void adf_notify_fatal_error_worker(struct work_struct *work)
 
        if (!accel_dev->is_vf) {
                /* Disable arbitration to stop processing of new requests */
-               if (hw_device->exit_arb)
+               if (accel_dev->autoreset_on_error && hw_device->exit_arb)
                        hw_device->exit_arb(accel_dev);
                if (accel_dev->pf.vf_info)
                        adf_pf2vf_notify_fatal_error(accel_dev);
+               adf_dev_autoreset(accel_dev);
        }
 
        kfree(wq_data);
index 10891c9da6e7e94aa0ce784a17b65af5fef72db4..57328249c89e7a6f4ae7c7754bf2bbb218c6f651 100644 (file)
@@ -87,6 +87,7 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev);
 extern const struct pci_error_handlers adf_err_handler;
 void adf_reset_sbr(struct adf_accel_dev *accel_dev);
 void adf_reset_flr(struct adf_accel_dev *accel_dev);
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev);
 void adf_dev_restore(struct adf_accel_dev *accel_dev);
 int adf_init_aer(void);
 void adf_exit_aer(void);
index d450dad32c9e4e9be3ad635855ee2c58ab4819f6..4e7f70d4049d354bd1776611dbae079fe61f3511 100644 (file)
@@ -204,6 +204,42 @@ static ssize_t pm_idle_enabled_store(struct device *dev, struct device_attribute
 }
 static DEVICE_ATTR_RW(pm_idle_enabled);
 
+static ssize_t auto_reset_show(struct device *dev, struct device_attribute *attr,
+                              char *buf)
+{
+       char *auto_reset;
+       struct adf_accel_dev *accel_dev;
+
+       accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+       if (!accel_dev)
+               return -EINVAL;
+
+       auto_reset = accel_dev->autoreset_on_error ? "on" : "off";
+
+       return sysfs_emit(buf, "%s\n", auto_reset);
+}
+
+static ssize_t auto_reset_store(struct device *dev, struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct adf_accel_dev *accel_dev;
+       bool enabled = false;
+       int ret;
+
+       ret = kstrtobool(buf, &enabled);
+       if (ret)
+               return ret;
+
+       accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+       if (!accel_dev)
+               return -EINVAL;
+
+       accel_dev->autoreset_on_error = enabled;
+
+       return count;
+}
+static DEVICE_ATTR_RW(auto_reset);
+
 static DEVICE_ATTR_RW(state);
 static DEVICE_ATTR_RW(cfg_services);
 
@@ -291,6 +327,7 @@ static struct attribute *qat_attrs[] = {
        &dev_attr_pm_idle_enabled.attr,
        &dev_attr_rp2srv.attr,
        &dev_attr_num_rps.attr,
+       &dev_attr_auto_reset.attr,
        NULL,
 };