drm/amdkfd: Update SMI throttle event bitmask
authorGraham Sider <Graham.Sider@amd.com>
Tue, 6 Jul 2021 21:46:37 +0000 (17:46 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 23 Jul 2021 14:08:00 +0000 (10:08 -0400)
Update Arcturus/Aldebaran thermal throttle SMI event path to use
ASIC-independent throttler bits when logging.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c

index a8b05c6ddce5fc35970b306307491c59d08b3cb8..0462d4aceab7bcf44b905b792a33110db891fbda 100644 (file)
@@ -332,7 +332,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd);
 int kgd2kfd_post_reset(struct kfd_dev *kfd);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
 #else
 static inline int kgd2kfd_init(void)
 {
@@ -391,7 +391,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
 }
 
 static inline
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
 {
 }
 #endif
index 7fc7fe9d2b45c0e7d8781e9f5e7542796236300e..fd1fd20cd70c17d01ff3a4db446c74bf1505a4bb 100644 (file)
@@ -1369,7 +1369,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
        WARN_ONCE(count < 0, "Compute profile ref. count error");
 }
 
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
 {
        if (kfd && kfd->init_complete)
                kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
index 2465224235593ce8660f78375941148541ab9565..ed4bc5f844ce7675f17937515fbb24d085482b23 100644 (file)
@@ -205,23 +205,23 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
 }
 
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
-                                            uint32_t throttle_bitmask)
+                                            uint64_t throttle_bitmask)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
        /*
         * ThermalThrottle msg = throttle_bitmask(8):
         *                       thermal_interrupt_count(16):
-        * 1 byte event + 1 byte space + 8 byte throttle_bitmask +
+        * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
         * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
-        * 1 byte \0 = 29
+        * 1 byte \0 = 37
         */
-       char fifo_in[29];
+       char fifo_in[37];
        int len;
 
        if (list_empty(&dev->smi_clients))
                return;
 
-       len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
+       len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
                       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
                       atomic64_read(&adev->smu.throttle_int_counter));
 
index b9b0438202e21cf92ac6a71e11c3e0da9f2d02b3..bffd0c32b0603c14ee6be0169bc44749600206ac 100644 (file)
@@ -26,7 +26,7 @@
 int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
-                                            uint32_t throttle_bitmask);
+                                            uint64_t throttle_bitmask);
 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
 
 #endif
index 6b3e0ea10163a1c3a3d2d516bed0f4a05c0da2ec..6ec8492f71f593ff82109dbb5ab2218257bbbc8e 100644 (file)
@@ -2178,7 +2178,9 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu)
 
        dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
                        log_buf);
-       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
+       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
+               smu_cmn_get_indep_throttler_status(throttler_status,
+                                                  arcturus_throttler_map));
 }
 
 static uint16_t arcturus_get_current_pcie_link_speed(struct smu_context *smu)
index 4b12c3b807e9d15d905bc6cb493277f73422cb33..856eeaf293b895c759a244dde5d11cf7b6b793a8 100644 (file)
@@ -1653,7 +1653,9 @@ static void aldebaran_log_thermal_throttling_event(struct smu_context *smu)
 
        dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
                 log_buf);
-       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
+       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
+               smu_cmn_get_indep_throttler_status(throttler_status,
+                                                  aldebaran_throttler_map));
 }
 
 static int aldebaran_get_current_pcie_link_speed(struct smu_context *smu)