drm/i915/selftests: Implement frequency logging for energy reading validation
authorSk Anirban <sk.anirban@intel.com>
Fri, 29 Nov 2024 15:47:16 +0000 (21:17 +0530)
committerAndi Shyti <andi.shyti@linux.intel.com>
Thu, 19 Dec 2024 10:06:03 +0000 (11:06 +0100)
Add RC6 & RC0 frequency printing to ensure accurate energy
readings aimed at addressing GPU energy leaks and power
measurement failures.
Also update sleep time for RC6 mode to match RC0.

v2:
  - Improved commit message.
v3:
  - Used pr_err log to display frequency (Anshuman)
  - Sorted headers alphabetically (Sai Teja)
v4:
  - Improved commit message.
  - Fix pr_err log (Sai Teja)
v5:
  - Add error & debug logging for RC0 power and frequency checks (Anshuman)
v6:
  - Modify debug logging for RC0 power and frequency checks (Sai Teja)
v7:
  - Use pr_debug if RC0 power isn't measured but frequency is (Anshuman)
  - Improved commit message (Badal)
  - Change API to read actual frequency without applying forcewake (Badal)
  - Update sleep time for RC6 mode (Anshuman)

Signed-off-by: Sk Anirban <sk.anirban@intel.com>
Reviewed-by: Sai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241129154716.2764974-1-sk.anirban@intel.com
drivers/gpu/drm/i915/gt/selftest_rc6.c

index 1aa1446c8fb09b2c5a21b3bcd3573fa58dad620e..27b6d51ef1454f38713718042f99088f30391f24 100644 (file)
@@ -8,6 +8,7 @@
 #include "intel_gpu_commands.h"
 #include "intel_gt_requests.h"
 #include "intel_ring.h"
+#include "intel_rps.h"
 #include "selftest_rc6.h"
 
 #include "selftests/i915_random.h"
@@ -38,6 +39,9 @@ int live_rc6_manual(void *arg)
        ktime_t dt;
        u64 res[2];
        int err = 0;
+       u32 rc0_freq = 0;
+       u32 rc6_freq = 0;
+       struct intel_rps *rps = &gt->rps;
 
        /*
         * Our claim is that we can "encourage" the GPU to enter rc6 at will.
@@ -66,6 +70,7 @@ int live_rc6_manual(void *arg)
        rc0_power = librapl_energy_uJ() - rc0_power;
        dt = ktime_sub(ktime_get(), dt);
        res[1] = rc6_residency(rc6);
+       rc0_freq = intel_rps_read_actual_frequency_fw(rps);
        if ((res[1] - res[0]) >> 10) {
                pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n",
                       (res[1] - res[0]) >> 10);
@@ -77,7 +82,11 @@ int live_rc6_manual(void *arg)
                rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
                                      ktime_to_ns(dt));
                if (!rc0_power) {
-                       pr_err("No power measured while in RC0\n");
+                       if (rc0_freq)
+                               pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n",
+                                        rc0_freq);
+                       else
+                               pr_err("No power and freq measured while in RC0\n");
                        err = -EINVAL;
                        goto out_unlock;
                }
@@ -90,7 +99,8 @@ int live_rc6_manual(void *arg)
        intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
        dt = ktime_get();
        rc6_power = librapl_energy_uJ();
-       msleep(100);
+       msleep(1000);
+       rc6_freq = intel_rps_read_actual_frequency_fw(rps);
        rc6_power = librapl_energy_uJ() - rc6_power;
        dt = ktime_sub(ktime_get(), dt);
        res[1] = rc6_residency(rc6);
@@ -108,7 +118,8 @@ int live_rc6_manual(void *arg)
                pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
                        rc0_power, rc6_power);
                if (2 * rc6_power > rc0_power) {
-                       pr_err("GPU leaked energy while in RC6!\n");
+                       pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n",
+                              rc6_freq, rc0_freq);
                        err = -EINVAL;
                        goto out_unlock;
                }