net/mlx5: Register a unique thermal zone per device
authorSaeed Mahameed <saeedm@nvidia.com>
Tue, 27 Jun 2023 03:36:41 +0000 (20:36 -0700)
committerSaeed Mahameed <saeedm@nvidia.com>
Wed, 5 Jul 2023 17:57:03 +0000 (10:57 -0700)
Prior to this patch only one "mlx5" thermal zone could have been
registered regardless of the number of individual mlx5 devices in the
system.

To fix this setup a unique name per device to register its own thermal
zone.

In order to not register a thermal zone for a virtual device (VF/SF) add
a check for PF device type.

The new name is a concatenation between "mlx5_" and "<PCI_DEV_BDF>", which
will also help associating a thermal zone with its PCI device.

$ lspci | grep ConnectX
00:04.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx]
00:05.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx]

$ cat /sys/devices/virtual/thermal/thermal_zone0/type
mlx5_0000:00:04.0
$ cat /sys/devices/virtual/thermal/thermal_zone1/type
mlx5_0000:00:05.0

Fixes: c1fef618d611 ("net/mlx5: Implement thermal zone")
CC: Sandipan Patra <spatra@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/thermal.c

index 20bb5eb266c1f11797d9c8ee63269400366ddb49..52199d39657ed2caf0e363baf70a574d0994c63d 100644 (file)
@@ -68,14 +68,19 @@ static struct thermal_zone_device_ops mlx5_thermal_ops = {
 
 int mlx5_thermal_init(struct mlx5_core_dev *mdev)
 {
+       char data[THERMAL_NAME_LENGTH];
        struct mlx5_thermal *thermal;
-       struct thermal_zone_device *tzd;
-       const char *data = "mlx5";
+       int err;
 
-       tzd = thermal_zone_get_zone_by_name(data);
-       if (!IS_ERR(tzd))
+       if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev))
                return 0;
 
+       err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device));
+       if (err < 0 || err >= sizeof(data)) {
+               mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err);
+               return -EINVAL;
+       }
+
        thermal = kzalloc(sizeof(*thermal), GFP_KERNEL);
        if (!thermal)
                return -ENOMEM;
@@ -89,10 +94,10 @@ int mlx5_thermal_init(struct mlx5_core_dev *mdev)
                                                                 &mlx5_thermal_ops,
                                                                 NULL, 0, MLX5_THERMAL_POLL_INT_MSEC);
        if (IS_ERR(thermal->tzdev)) {
-               dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n",
-                       data, PTR_ERR(thermal->tzdev));
+               err = PTR_ERR(thermal->tzdev);
+               mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err);
                kfree(thermal);
-               return -EINVAL;
+               return err;
        }
 
        mdev->thermal = thermal;