If PF (Physical Function) has SFs (Sub-Functions), since the SFs are not
taking part in the synchronization flow, sync reset can lead to fatal
error on the SFs, as the function will be closed unexpectedly from the
SF point of view.
Add a check to prevent sync reset when there are SFs on a PF device
which is not ECPF, as ECPF is teardowned gracefully before reset.
Fixes:
92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20250825143435.598584-8-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
#include "fw_reset.h"
#include "diag/fw_tracer.h"
#include "lib/tout.h"
+#include "sf/sf.h"
enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
return false;
}
+ if (!mlx5_core_is_ecpf(dev) && !mlx5_sf_table_empty(dev)) {
+ mlx5_core_warn(dev, "SFs should be removed before reset\n");
+ return false;
+ }
+
#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) {
err = mlx5_check_hotplug_interrupt(dev, bridge);
WARN_ON(!xa_empty(&table->function_ids));
kfree(table);
}
+
+bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_sf_table *table = dev->priv.sf_table;
+
+ if (!table)
+ return true;
+
+ return xa_empty(&table->function_ids);
+}
int mlx5_sf_table_init(struct mlx5_core_dev *dev);
void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev);
+bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev);
int mlx5_devlink_sf_port_new(struct devlink *devlink,
const struct devlink_port_new_attrs *add_attr,
{
}
+static inline bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev)
+{
+ return true;
+}
+
#endif
#endif