1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
9 #include "gem/i915_gem_internal.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_engine_regs.h"
14 #include "intel_gpu_commands.h"
15 #include "intel_gt_clock_utils.h"
16 #include "intel_gt_pm.h"
17 #include "intel_rc6.h"
18 #include "selftest_engine_heartbeat.h"
19 #include "selftest_rps.h"
20 #include "selftests/igt_flush_test.h"
21 #include "selftests/igt_spinner.h"
22 #include "selftests/librapl.h"
24 /* Try to isolate the impact of cstates from determing frequency response */
25 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27 static void dummy_rps_work(struct work_struct *wrk)
31 static int cmp_u64(const void *A, const void *B)
33 const u64 *a = A, *b = B;
43 static int cmp_u32(const void *A, const void *B)
45 const u32 *a = A, *b = B;
55 static struct i915_vma *
56 create_spin_counter(struct intel_engine_cs *engine,
57 struct i915_address_space *vm,
67 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
68 struct drm_i915_gem_object *obj;
75 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
79 end = obj->base.size / sizeof(u32) - 1;
81 vma = i915_vma_instance(obj, vm, NULL);
87 err = i915_vma_pin(vma, 0, 0, PIN_USER);
93 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
100 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
101 for (i = 0; i < __NGPR__; i++) {
102 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
104 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
108 *cs++ = MI_LOAD_REGISTER_IMM(1);
109 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
114 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
115 for (i = 0; i < 1024; i++) {
117 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
123 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
124 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
125 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
126 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
130 *cs++ = MI_BATCH_BUFFER_START_GEN8;
131 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
132 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
133 GEM_BUG_ON(cs - base > end);
135 i915_gem_object_flush_map(obj);
137 *cancel = base + loop;
138 *counter = srm ? memset32(base + end, 0, 1) : NULL;
144 i915_vma_unlock(vma);
146 i915_gem_object_put(obj);
150 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
157 memset(history, freq, sizeof(history));
160 /* The PCU does not change instantly, but drifts towards the goal? */
161 end = jiffies + msecs_to_jiffies(timeout_ms);
165 act = read_cagf(rps);
166 if (time_after(jiffies, end))
169 /* Target acquired */
173 /* Any change within the last N samples? */
174 if (!memchr_inv(history, act, sizeof(history)))
178 i = (i + 1) % ARRAY_SIZE(history);
180 usleep_range(sleep, 2 * sleep);
182 if (sleep > timeout_ms * 20)
183 sleep = timeout_ms * 20;
187 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189 mutex_lock(&rps->lock);
190 GEM_BUG_ON(!intel_rps_is_active(rps));
191 if (wait_for(!intel_rps_set(rps, freq), 50)) {
192 mutex_unlock(&rps->lock);
195 GEM_BUG_ON(rps->last_freq != freq);
196 mutex_unlock(&rps->lock);
198 return wait_for_freq(rps, freq, 50);
201 static void show_pstate_limits(struct intel_rps *rps)
203 struct drm_i915_private *i915 = rps_to_i915(rps);
205 if (IS_BROXTON(i915)) {
206 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
207 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
208 intel_uncore_read(rps_to_uncore(rps),
210 } else if (GRAPHICS_VER(i915) == 9) {
211 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
212 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
213 intel_uncore_read(rps_to_uncore(rps),
214 GEN9_RP_STATE_LIMITS));
218 int live_rps_clock_interval(void *arg)
220 struct intel_gt *gt = arg;
221 struct intel_rps *rps = >->rps;
222 void (*saved_work)(struct work_struct *wrk);
223 struct intel_engine_cs *engine;
224 enum intel_engine_id id;
225 struct igt_spinner spin;
228 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
231 if (igt_spinner_init(&spin, gt))
234 intel_gt_pm_wait_for_idle(gt);
235 saved_work = rps->work.func;
236 rps->work.func = dummy_rps_work;
239 intel_rps_disable(>->rps);
241 intel_gt_check_clock_frequency(gt);
243 for_each_engine(engine, gt, id) {
244 struct i915_request *rq;
248 if (!intel_engine_can_store_dword(engine))
251 st_engine_heartbeat_disable(engine);
253 rq = igt_spinner_create_request(&spin,
254 engine->kernel_context,
257 st_engine_heartbeat_enable(engine);
262 i915_request_add(rq);
264 if (!igt_wait_for_spinner(&spin, rq)) {
265 pr_err("%s: RPS spinner did not start\n",
267 igt_spinner_end(&spin);
268 st_engine_heartbeat_enable(engine);
269 intel_gt_set_wedged(engine->gt);
274 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
276 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
278 /* Set the evaluation interval to infinity! */
279 intel_uncore_write_fw(gt->uncore,
280 GEN6_RP_UP_EI, 0xffffffff);
281 intel_uncore_write_fw(gt->uncore,
282 GEN6_RP_UP_THRESHOLD, 0xffffffff);
284 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
285 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
287 if (wait_for(intel_uncore_read_fw(gt->uncore,
290 /* Just skip the test; assume lack of HW support */
291 pr_notice("%s: rps evaluation interval not ticking\n",
299 for (i = 0; i < 5; i++) {
302 dt_[i] = ktime_get();
303 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
307 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
308 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
313 /* Use the median of both cycle/dt; close enough */
314 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
315 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
316 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
317 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
320 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
321 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
323 igt_spinner_end(&spin);
324 st_engine_heartbeat_enable(engine);
327 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
329 intel_gt_ns_to_pm_interval(gt, dt);
331 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
332 engine->name, cycles, time, dt, expected,
333 gt->clock_frequency / 1000);
335 if (10 * time < 8 * dt ||
336 8 * time > 10 * dt) {
337 pr_err("%s: rps clock time does not match walltime!\n",
342 if (10 * expected < 8 * cycles ||
343 8 * expected > 10 * cycles) {
344 pr_err("%s: walltime does not match rps clock ticks!\n",
350 if (igt_flush_test(gt->i915))
353 break; /* once is enough */
356 intel_rps_enable(>->rps);
359 igt_spinner_fini(&spin);
361 intel_gt_pm_wait_for_idle(gt);
362 rps->work.func = saved_work;
364 if (err == -ENODEV) /* skipped, don't report a fail */
370 int live_rps_control(void *arg)
372 struct intel_gt *gt = arg;
373 struct intel_rps *rps = >->rps;
374 void (*saved_work)(struct work_struct *wrk);
375 struct intel_engine_cs *engine;
376 enum intel_engine_id id;
377 struct igt_spinner spin;
381 * Check that the actual frequency matches our requested frequency,
382 * to verify our control mechanism. We have to be careful that the
383 * PCU may throttle the GPU in which case the actual frequency used
384 * will be lowered than requested.
387 if (!intel_rps_is_enabled(rps))
390 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
393 if (igt_spinner_init(&spin, gt))
396 intel_gt_pm_wait_for_idle(gt);
397 saved_work = rps->work.func;
398 rps->work.func = dummy_rps_work;
401 for_each_engine(engine, gt, id) {
402 struct i915_request *rq;
403 ktime_t min_dt, max_dt;
407 if (!intel_engine_can_store_dword(engine))
410 st_engine_heartbeat_disable(engine);
412 rq = igt_spinner_create_request(&spin,
413 engine->kernel_context,
420 i915_request_add(rq);
422 if (!igt_wait_for_spinner(&spin, rq)) {
423 pr_err("%s: RPS spinner did not start\n",
425 igt_spinner_end(&spin);
426 st_engine_heartbeat_enable(engine);
427 intel_gt_set_wedged(engine->gt);
432 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
433 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
434 engine->name, rps->min_freq, read_cagf(rps));
435 igt_spinner_end(&spin);
436 st_engine_heartbeat_enable(engine);
437 show_pstate_limits(rps);
442 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
443 if (rps_set_check(rps, f) < f)
447 limit = rps_set_check(rps, f);
449 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
450 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
451 engine->name, rps->min_freq, read_cagf(rps));
452 igt_spinner_end(&spin);
453 st_engine_heartbeat_enable(engine);
454 show_pstate_limits(rps);
459 max_dt = ktime_get();
460 max = rps_set_check(rps, limit);
461 max_dt = ktime_sub(ktime_get(), max_dt);
463 min_dt = ktime_get();
464 min = rps_set_check(rps, rps->min_freq);
465 min_dt = ktime_sub(ktime_get(), min_dt);
467 igt_spinner_end(&spin);
468 st_engine_heartbeat_enable(engine);
470 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
472 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
473 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
474 limit, intel_gpu_freq(rps, limit),
475 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
477 if (limit == rps->min_freq) {
478 pr_err("%s: GPU throttled to minimum!\n",
480 show_pstate_limits(rps);
485 if (igt_flush_test(gt->i915)) {
492 igt_spinner_fini(&spin);
494 intel_gt_pm_wait_for_idle(gt);
495 rps->work.func = saved_work;
500 static void show_pcu_config(struct intel_rps *rps)
502 struct drm_i915_private *i915 = rps_to_i915(rps);
503 unsigned int max_gpu_freq, min_gpu_freq;
504 intel_wakeref_t wakeref;
510 min_gpu_freq = rps->min_freq;
511 max_gpu_freq = rps->max_freq;
512 if (GRAPHICS_VER(i915) >= 9) {
513 /* Convert GT frequency to 50 HZ units */
514 min_gpu_freq /= GEN9_FREQ_SCALER;
515 max_gpu_freq /= GEN9_FREQ_SCALER;
518 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
520 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
521 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
522 int ia_freq = gpu_freq;
524 snb_pcode_read(i915, GEN6_PCODE_READ_MIN_FREQ_TABLE,
527 pr_info("%5d %5d %5d\n",
529 ((ia_freq >> 0) & 0xff) * 100,
530 ((ia_freq >> 8) & 0xff) * 100);
533 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
536 static u64 __measure_frequency(u32 *cntr, int duration_ms)
541 dc = READ_ONCE(*cntr);
542 usleep_range(1000 * duration_ms, 2000 * duration_ms);
543 dc = READ_ONCE(*cntr) - dc;
544 dt = ktime_get() - dt;
546 return div64_u64(1000 * 1000 * dc, dt);
549 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
554 *freq = rps_set_check(rps, *freq);
555 for (i = 0; i < 5; i++)
556 x[i] = __measure_frequency(cntr, 2);
557 *freq = (*freq + read_cagf(rps)) / 2;
559 /* A simple triangle filter for better result stability */
560 sort(x, 5, sizeof(*x), cmp_u64, NULL);
561 return div_u64(x[1] + 2 * x[2] + x[3], 4);
564 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
571 usleep_range(1000 * duration_ms, 2000 * duration_ms);
572 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
573 dt = ktime_get() - dt;
575 return div64_u64(1000 * 1000 * dc, dt);
578 static u64 measure_cs_frequency_at(struct intel_rps *rps,
579 struct intel_engine_cs *engine,
585 *freq = rps_set_check(rps, *freq);
586 for (i = 0; i < 5; i++)
587 x[i] = __measure_cs_frequency(engine, 2);
588 *freq = (*freq + read_cagf(rps)) / 2;
590 /* A simple triangle filter for better result stability */
591 sort(x, 5, sizeof(*x), cmp_u64, NULL);
592 return div_u64(x[1] + 2 * x[2] + x[3], 4);
595 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
597 return f_d * x > f_n * y && f_n * x < f_d * y;
600 int live_rps_frequency_cs(void *arg)
602 void (*saved_work)(struct work_struct *wrk);
603 struct intel_gt *gt = arg;
604 struct intel_rps *rps = >->rps;
605 struct intel_engine_cs *engine;
606 struct pm_qos_request qos;
607 enum intel_engine_id id;
611 * The premise is that the GPU does change frequency at our behest.
612 * Let's check there is a correspondence between the requested
613 * frequency, the actual frequency, and the observed clock rate.
616 if (!intel_rps_is_enabled(rps))
619 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
622 if (CPU_LATENCY >= 0)
623 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
625 intel_gt_pm_wait_for_idle(gt);
626 saved_work = rps->work.func;
627 rps->work.func = dummy_rps_work;
629 for_each_engine(engine, gt, id) {
630 struct i915_request *rq;
631 struct i915_vma *vma;
638 st_engine_heartbeat_disable(engine);
640 vma = create_spin_counter(engine,
641 engine->kernel_context->vm, false,
645 st_engine_heartbeat_enable(engine);
649 rq = intel_engine_create_kernel_request(engine);
655 err = i915_request_await_object(rq, vma->obj, false);
657 err = i915_vma_move_to_active(vma, rq, 0);
659 err = rq->engine->emit_bb_start(rq,
662 i915_request_add(rq);
666 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
668 pr_err("%s: timed loop did not start\n",
673 min.freq = rps->min_freq;
674 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
676 max.freq = rps->max_freq;
677 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
679 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
681 min.count, intel_gpu_freq(rps, min.freq),
682 max.count, intel_gpu_freq(rps, max.freq),
683 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
684 max.freq * min.count));
686 if (!scaled_within(max.freq * min.count,
687 min.freq * max.count,
691 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
693 max.freq * min.count,
694 min.freq * max.count);
695 show_pcu_config(rps);
697 for (f = min.freq + 1; f <= rps->max_freq; f++) {
701 count = measure_cs_frequency_at(rps, engine, &act);
705 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
707 act, intel_gpu_freq(rps, act), count,
708 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
711 f = act; /* may skip ahead [pcu granularity] */
714 err = -EINTR; /* ignore error, continue on with test */
718 *cancel = MI_BATCH_BUFFER_END;
719 i915_gem_object_flush_map(vma->obj);
720 i915_gem_object_unpin_map(vma->obj);
722 i915_vma_unlock(vma);
725 st_engine_heartbeat_enable(engine);
726 if (igt_flush_test(gt->i915))
732 intel_gt_pm_wait_for_idle(gt);
733 rps->work.func = saved_work;
735 if (CPU_LATENCY >= 0)
736 cpu_latency_qos_remove_request(&qos);
741 int live_rps_frequency_srm(void *arg)
743 void (*saved_work)(struct work_struct *wrk);
744 struct intel_gt *gt = arg;
745 struct intel_rps *rps = >->rps;
746 struct intel_engine_cs *engine;
747 struct pm_qos_request qos;
748 enum intel_engine_id id;
752 * The premise is that the GPU does change frequency at our behest.
753 * Let's check there is a correspondence between the requested
754 * frequency, the actual frequency, and the observed clock rate.
757 if (!intel_rps_is_enabled(rps))
760 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
763 if (CPU_LATENCY >= 0)
764 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
766 intel_gt_pm_wait_for_idle(gt);
767 saved_work = rps->work.func;
768 rps->work.func = dummy_rps_work;
770 for_each_engine(engine, gt, id) {
771 struct i915_request *rq;
772 struct i915_vma *vma;
779 st_engine_heartbeat_disable(engine);
781 vma = create_spin_counter(engine,
782 engine->kernel_context->vm, true,
786 st_engine_heartbeat_enable(engine);
790 rq = intel_engine_create_kernel_request(engine);
796 err = i915_request_await_object(rq, vma->obj, false);
798 err = i915_vma_move_to_active(vma, rq, 0);
800 err = rq->engine->emit_bb_start(rq,
803 i915_request_add(rq);
807 if (wait_for(READ_ONCE(*cntr), 10)) {
808 pr_err("%s: timed loop did not start\n",
813 min.freq = rps->min_freq;
814 min.count = measure_frequency_at(rps, cntr, &min.freq);
816 max.freq = rps->max_freq;
817 max.count = measure_frequency_at(rps, cntr, &max.freq);
819 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
821 min.count, intel_gpu_freq(rps, min.freq),
822 max.count, intel_gpu_freq(rps, max.freq),
823 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
824 max.freq * min.count));
826 if (!scaled_within(max.freq * min.count,
827 min.freq * max.count,
831 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
833 max.freq * min.count,
834 min.freq * max.count);
835 show_pcu_config(rps);
837 for (f = min.freq + 1; f <= rps->max_freq; f++) {
841 count = measure_frequency_at(rps, cntr, &act);
845 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
847 act, intel_gpu_freq(rps, act), count,
848 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
851 f = act; /* may skip ahead [pcu granularity] */
854 err = -EINTR; /* ignore error, continue on with test */
858 *cancel = MI_BATCH_BUFFER_END;
859 i915_gem_object_flush_map(vma->obj);
860 i915_gem_object_unpin_map(vma->obj);
862 i915_vma_unlock(vma);
865 st_engine_heartbeat_enable(engine);
866 if (igt_flush_test(gt->i915))
872 intel_gt_pm_wait_for_idle(gt);
873 rps->work.func = saved_work;
875 if (CPU_LATENCY >= 0)
876 cpu_latency_qos_remove_request(&qos);
881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
883 /* Flush any previous EI */
884 usleep_range(timeout_us, 2 * timeout_us);
886 /* Reset the interrupt status */
887 rps_disable_interrupts(rps);
888 GEM_BUG_ON(rps->pm_iir);
889 rps_enable_interrupts(rps);
891 /* And then wait for the timeout, for real this time */
892 usleep_range(2 * timeout_us, 3 * timeout_us);
895 static int __rps_up_interrupt(struct intel_rps *rps,
896 struct intel_engine_cs *engine,
897 struct igt_spinner *spin)
899 struct intel_uncore *uncore = engine->uncore;
900 struct i915_request *rq;
903 if (!intel_engine_can_store_dword(engine))
906 rps_set_check(rps, rps->min_freq);
908 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
912 i915_request_get(rq);
913 i915_request_add(rq);
915 if (!igt_wait_for_spinner(spin, rq)) {
916 pr_err("%s: RPS spinner did not start\n",
918 i915_request_put(rq);
919 intel_gt_set_wedged(engine->gt);
923 if (!intel_rps_is_active(rps)) {
924 pr_err("%s: RPS not enabled on starting spinner\n",
926 igt_spinner_end(spin);
927 i915_request_put(rq);
931 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
932 pr_err("%s: RPS did not register UP interrupt\n",
934 i915_request_put(rq);
938 if (rps->last_freq != rps->min_freq) {
939 pr_err("%s: RPS did not program min frequency\n",
941 i915_request_put(rq);
945 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
946 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
947 timeout = DIV_ROUND_UP(timeout, 1000);
949 sleep_for_ei(rps, timeout);
950 GEM_BUG_ON(i915_request_completed(rq));
952 igt_spinner_end(spin);
953 i915_request_put(rq);
955 if (rps->cur_freq != rps->min_freq) {
956 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
957 engine->name, intel_rps_read_actual_frequency(rps));
961 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
962 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
963 engine->name, rps->pm_iir,
964 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
965 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
966 intel_uncore_read(uncore, GEN6_RP_UP_EI));
973 static int __rps_down_interrupt(struct intel_rps *rps,
974 struct intel_engine_cs *engine)
976 struct intel_uncore *uncore = engine->uncore;
979 rps_set_check(rps, rps->max_freq);
981 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
982 pr_err("%s: RPS did not register DOWN interrupt\n",
987 if (rps->last_freq != rps->max_freq) {
988 pr_err("%s: RPS did not program max frequency\n",
993 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
994 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
995 timeout = DIV_ROUND_UP(timeout, 1000);
997 sleep_for_ei(rps, timeout);
999 if (rps->cur_freq != rps->max_freq) {
1000 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1002 intel_rps_read_actual_frequency(rps));
1006 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1007 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1008 engine->name, rps->pm_iir,
1009 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1010 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1011 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1012 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1013 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1014 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1021 int live_rps_interrupt(void *arg)
1023 struct intel_gt *gt = arg;
1024 struct intel_rps *rps = >->rps;
1025 void (*saved_work)(struct work_struct *wrk);
1026 struct intel_engine_cs *engine;
1027 enum intel_engine_id id;
1028 struct igt_spinner spin;
1033 * First, let's check whether or not we are receiving interrupts.
1036 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1039 intel_gt_pm_get(gt);
1040 pm_events = rps->pm_events;
1041 intel_gt_pm_put(gt);
1043 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1047 if (igt_spinner_init(&spin, gt))
1050 intel_gt_pm_wait_for_idle(gt);
1051 saved_work = rps->work.func;
1052 rps->work.func = dummy_rps_work;
1054 for_each_engine(engine, gt, id) {
1055 /* Keep the engine busy with a spinner; expect an UP! */
1056 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057 intel_gt_pm_wait_for_idle(engine->gt);
1058 GEM_BUG_ON(intel_rps_is_active(rps));
1060 st_engine_heartbeat_disable(engine);
1062 err = __rps_up_interrupt(rps, engine, &spin);
1064 st_engine_heartbeat_enable(engine);
1068 intel_gt_pm_wait_for_idle(engine->gt);
1071 /* Keep the engine awake but idle and check for DOWN */
1072 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073 st_engine_heartbeat_disable(engine);
1074 intel_rc6_disable(>->rc6);
1076 err = __rps_down_interrupt(rps, engine);
1078 intel_rc6_enable(>->rc6);
1079 st_engine_heartbeat_enable(engine);
1086 if (igt_flush_test(gt->i915))
1089 igt_spinner_fini(&spin);
1091 intel_gt_pm_wait_for_idle(gt);
1092 rps->work.func = saved_work;
1097 static u64 __measure_power(int duration_ms)
1102 dE = librapl_energy_uJ();
1103 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104 dE = librapl_energy_uJ() - dE;
1105 dt = ktime_get() - dt;
1107 return div64_u64(1000 * 1000 * dE, dt);
1110 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1115 *freq = rps_set_check(rps, *freq);
1116 for (i = 0; i < 5; i++)
1117 x[i] = __measure_power(5);
1118 *freq = (*freq + read_cagf(rps)) / 2;
1120 /* A simple triangle filter for better result stability */
1121 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1125 int live_rps_power(void *arg)
1127 struct intel_gt *gt = arg;
1128 struct intel_rps *rps = >->rps;
1129 void (*saved_work)(struct work_struct *wrk);
1130 struct intel_engine_cs *engine;
1131 enum intel_engine_id id;
1132 struct igt_spinner spin;
1136 * Our fundamental assumption is that running at lower frequency
1137 * actually saves power. Let's see if our RAPL measurement support
1141 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1144 if (!librapl_supported(gt->i915))
1147 if (igt_spinner_init(&spin, gt))
1150 intel_gt_pm_wait_for_idle(gt);
1151 saved_work = rps->work.func;
1152 rps->work.func = dummy_rps_work;
1154 for_each_engine(engine, gt, id) {
1155 struct i915_request *rq;
1161 if (!intel_engine_can_store_dword(engine))
1164 st_engine_heartbeat_disable(engine);
1166 rq = igt_spinner_create_request(&spin,
1167 engine->kernel_context,
1170 st_engine_heartbeat_enable(engine);
1175 i915_request_add(rq);
1177 if (!igt_wait_for_spinner(&spin, rq)) {
1178 pr_err("%s: RPS spinner did not start\n",
1180 igt_spinner_end(&spin);
1181 st_engine_heartbeat_enable(engine);
1182 intel_gt_set_wedged(engine->gt);
1187 max.freq = rps->max_freq;
1188 max.power = measure_power_at(rps, &max.freq);
1190 min.freq = rps->min_freq;
1191 min.power = measure_power_at(rps, &min.freq);
1193 igt_spinner_end(&spin);
1194 st_engine_heartbeat_enable(engine);
1196 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1198 min.power, intel_gpu_freq(rps, min.freq),
1199 max.power, intel_gpu_freq(rps, max.freq));
1201 if (10 * min.freq >= 9 * max.freq) {
1202 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1203 min.freq, intel_gpu_freq(rps, min.freq),
1204 max.freq, intel_gpu_freq(rps, max.freq));
1208 if (11 * min.power > 10 * max.power) {
1209 pr_err("%s: did not conserve power when setting lower frequency!\n",
1215 if (igt_flush_test(gt->i915)) {
1221 igt_spinner_fini(&spin);
1223 intel_gt_pm_wait_for_idle(gt);
1224 rps->work.func = saved_work;
1229 int live_rps_dynamic(void *arg)
1231 struct intel_gt *gt = arg;
1232 struct intel_rps *rps = >->rps;
1233 struct intel_engine_cs *engine;
1234 enum intel_engine_id id;
1235 struct igt_spinner spin;
1239 * We've looked at the bascs, and have established that we
1240 * can change the clock frequency and that the HW will generate
1241 * interrupts based on load. Now we check how we integrate those
1242 * moving parts into dynamic reclocking based on load.
1245 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1248 if (igt_spinner_init(&spin, gt))
1251 if (intel_rps_has_interrupts(rps))
1252 pr_info("RPS has interrupt support\n");
1253 if (intel_rps_uses_timer(rps))
1254 pr_info("RPS has timer support\n");
1256 for_each_engine(engine, gt, id) {
1257 struct i915_request *rq;
1263 if (!intel_engine_can_store_dword(engine))
1266 intel_gt_pm_wait_for_idle(gt);
1267 GEM_BUG_ON(intel_rps_is_active(rps));
1268 rps->cur_freq = rps->min_freq;
1270 intel_engine_pm_get(engine);
1271 intel_rc6_disable(>->rc6);
1272 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1274 rq = igt_spinner_create_request(&spin,
1275 engine->kernel_context,
1282 i915_request_add(rq);
1284 max.dt = ktime_get();
1285 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1286 max.dt = ktime_sub(ktime_get(), max.dt);
1288 igt_spinner_end(&spin);
1290 min.dt = ktime_get();
1291 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1292 min.dt = ktime_sub(ktime_get(), min.dt);
1294 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1296 max.freq, intel_gpu_freq(rps, max.freq),
1297 ktime_to_ns(max.dt),
1298 min.freq, intel_gpu_freq(rps, min.freq),
1299 ktime_to_ns(min.dt));
1300 if (min.freq >= max.freq) {
1301 pr_err("%s: dynamic reclocking of spinner failed\n!",
1307 intel_rc6_enable(>->rc6);
1308 intel_engine_pm_put(engine);
1310 if (igt_flush_test(gt->i915))
1316 igt_spinner_fini(&spin);