1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/sort.h>
8 #include "intel_engine_pm.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_gt_pm.h"
11 #include "intel_rc6.h"
12 #include "selftest_rps.h"
13 #include "selftests/igt_flush_test.h"
14 #include "selftests/igt_spinner.h"
15 #include "selftests/librapl.h"
17 static void dummy_rps_work(struct work_struct *wrk)
21 static int cmp_u64(const void *A, const void *B)
23 const u64 *a = A, *b = B;
33 static struct i915_vma *
34 create_spin_counter(struct intel_engine_cs *engine,
35 struct i915_address_space *vm,
44 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
45 struct drm_i915_gem_object *obj;
51 obj = i915_gem_object_create_internal(vm->i915, 4096);
55 vma = i915_vma_instance(obj, vm, NULL);
57 i915_gem_object_put(obj);
61 err = i915_vma_pin(vma, 0, 0, PIN_USER);
67 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
69 i915_gem_object_put(obj);
70 return ERR_CAST(base);
74 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
75 for (i = 0; i < __NGPR__; i++) {
76 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
78 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
82 *cs++ = MI_LOAD_REGISTER_IMM(1);
83 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
89 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
90 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
92 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
94 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
95 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
96 *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
97 *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
99 *cs++ = MI_BATCH_BUFFER_START_GEN8;
100 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
101 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
103 i915_gem_object_flush_map(obj);
105 *cancel = base + loop;
106 *counter = memset32(base + 1000, 0, 1);
110 static u64 __measure_frequency(u32 *cntr, int duration_ms)
115 dc = READ_ONCE(*cntr);
116 usleep_range(1000 * duration_ms, 2000 * duration_ms);
117 dc = READ_ONCE(*cntr) - dc;
118 dt = ktime_get() - dt;
120 return div64_u64(1000 * 1000 * dc, dt);
123 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
128 mutex_lock(&rps->lock);
129 GEM_BUG_ON(!rps->active);
130 intel_rps_set(rps, *freq);
131 mutex_unlock(&rps->lock);
133 msleep(20); /* more than enough time to stabilise! */
135 for (i = 0; i < 5; i++)
136 x[i] = __measure_frequency(cntr, 2);
137 *freq = read_cagf(rps);
139 /* A simple triangle filter for better result stability */
140 sort(x, 5, sizeof(*x), cmp_u64, NULL);
141 return div_u64(x[1] + 2 * x[2] + x[3], 4);
144 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
146 return f_d * x > f_n * y && f_n * x < f_d * y;
149 int live_rps_frequency(void *arg)
151 void (*saved_work)(struct work_struct *wrk);
152 struct intel_gt *gt = arg;
153 struct intel_rps *rps = >->rps;
154 struct intel_engine_cs *engine;
155 enum intel_engine_id id;
159 * The premise is that the GPU does change freqency at our behest.
160 * Let's check there is a correspondence between the requested
161 * frequency, the actual frequency, and the observed clock rate.
164 if (!rps->enabled || rps->max_freq <= rps->min_freq)
167 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
170 intel_gt_pm_wait_for_idle(gt);
171 saved_work = rps->work.func;
172 rps->work.func = dummy_rps_work;
174 for_each_engine(engine, gt, id) {
175 struct i915_request *rq;
176 struct i915_vma *vma;
183 vma = create_spin_counter(engine,
184 engine->kernel_context->vm,
191 rq = intel_engine_create_kernel_request(engine);
198 err = i915_request_await_object(rq, vma->obj, false);
200 err = i915_vma_move_to_active(vma, rq, 0);
202 err = rq->engine->emit_bb_start(rq,
205 i915_vma_unlock(vma);
206 i915_request_add(rq);
210 if (wait_for(READ_ONCE(*cntr), 10)) {
211 pr_err("%s: timed loop did not start\n",
216 min.freq = rps->min_freq;
217 min.count = measure_frequency_at(rps, cntr, &min.freq);
219 max.freq = rps->max_freq;
220 max.count = measure_frequency_at(rps, cntr, &max.freq);
222 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
224 min.count, intel_gpu_freq(rps, min.freq),
225 max.count, intel_gpu_freq(rps, max.freq),
226 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
227 max.freq * min.count));
229 if (!scaled_within(max.freq * min.count,
230 min.freq * max.count,
232 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
234 max.freq * min.count,
235 min.freq * max.count);
240 *cancel = MI_BATCH_BUFFER_END;
241 i915_gem_object_unpin_map(vma->obj);
245 if (igt_flush_test(gt->i915))
251 intel_gt_pm_wait_for_idle(gt);
252 rps->work.func = saved_work;
257 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
259 /* Flush any previous EI */
260 usleep_range(timeout_us, 2 * timeout_us);
262 /* Reset the interrupt status */
263 rps_disable_interrupts(rps);
264 GEM_BUG_ON(rps->pm_iir);
265 rps_enable_interrupts(rps);
267 /* And then wait for the timeout, for real this time */
268 usleep_range(2 * timeout_us, 3 * timeout_us);
271 static int __rps_up_interrupt(struct intel_rps *rps,
272 struct intel_engine_cs *engine,
273 struct igt_spinner *spin)
275 struct intel_uncore *uncore = engine->uncore;
276 struct i915_request *rq;
279 if (!intel_engine_can_store_dword(engine))
282 mutex_lock(&rps->lock);
283 GEM_BUG_ON(!rps->active);
284 intel_rps_set(rps, rps->min_freq);
285 mutex_unlock(&rps->lock);
287 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
291 i915_request_get(rq);
292 i915_request_add(rq);
294 if (!igt_wait_for_spinner(spin, rq)) {
295 pr_err("%s: RPS spinner did not start\n",
297 i915_request_put(rq);
298 intel_gt_set_wedged(engine->gt);
303 pr_err("%s: RPS not enabled on starting spinner\n",
305 igt_spinner_end(spin);
306 i915_request_put(rq);
310 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
311 pr_err("%s: RPS did not register UP interrupt\n",
313 i915_request_put(rq);
317 if (rps->last_freq != rps->min_freq) {
318 pr_err("%s: RPS did not program min frequency\n",
320 i915_request_put(rq);
324 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
325 timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
327 sleep_for_ei(rps, timeout);
328 GEM_BUG_ON(i915_request_completed(rq));
330 igt_spinner_end(spin);
331 i915_request_put(rq);
333 if (rps->cur_freq != rps->min_freq) {
334 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
335 engine->name, intel_rps_read_actual_frequency(rps));
339 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
340 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
341 engine->name, rps->pm_iir,
342 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
343 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
344 intel_uncore_read(uncore, GEN6_RP_UP_EI));
351 static int __rps_down_interrupt(struct intel_rps *rps,
352 struct intel_engine_cs *engine)
354 struct intel_uncore *uncore = engine->uncore;
357 mutex_lock(&rps->lock);
358 GEM_BUG_ON(!rps->active);
359 intel_rps_set(rps, rps->max_freq);
360 mutex_unlock(&rps->lock);
362 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
363 pr_err("%s: RPS did not register DOWN interrupt\n",
368 if (rps->last_freq != rps->max_freq) {
369 pr_err("%s: RPS did not program max frequency\n",
374 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
375 timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
377 sleep_for_ei(rps, timeout);
379 if (rps->cur_freq != rps->max_freq) {
380 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
382 intel_rps_read_actual_frequency(rps));
386 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
387 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
388 engine->name, rps->pm_iir,
389 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
390 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
391 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
392 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
393 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
394 intel_uncore_read(uncore, GEN6_RP_UP_EI));
401 int live_rps_interrupt(void *arg)
403 struct intel_gt *gt = arg;
404 struct intel_rps *rps = >->rps;
405 void (*saved_work)(struct work_struct *wrk);
406 struct intel_engine_cs *engine;
407 enum intel_engine_id id;
408 struct igt_spinner spin;
413 * First, let's check whether or not we are receiving interrupts.
416 if (!rps->enabled || rps->max_freq <= rps->min_freq)
420 pm_events = rps->pm_events;
423 pr_err("No RPS PM events registered, but RPS is enabled?\n");
427 if (igt_spinner_init(&spin, gt))
430 intel_gt_pm_wait_for_idle(gt);
431 saved_work = rps->work.func;
432 rps->work.func = dummy_rps_work;
434 for_each_engine(engine, gt, id) {
435 /* Keep the engine busy with a spinner; expect an UP! */
436 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
437 intel_gt_pm_wait_for_idle(engine->gt);
438 GEM_BUG_ON(rps->active);
440 intel_engine_pm_get(engine);
441 err = __rps_up_interrupt(rps, engine, &spin);
442 intel_engine_pm_put(engine);
446 intel_gt_pm_wait_for_idle(engine->gt);
449 /* Keep the engine awake but idle and check for DOWN */
450 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
451 intel_engine_pm_get(engine);
452 intel_rc6_disable(>->rc6);
454 err = __rps_down_interrupt(rps, engine);
456 intel_rc6_enable(>->rc6);
457 intel_engine_pm_put(engine);
464 if (igt_flush_test(gt->i915))
467 igt_spinner_fini(&spin);
469 intel_gt_pm_wait_for_idle(gt);
470 rps->work.func = saved_work;
475 static u64 __measure_power(int duration_ms)
480 dE = librapl_energy_uJ();
481 usleep_range(1000 * duration_ms, 2000 * duration_ms);
482 dE = librapl_energy_uJ() - dE;
483 dt = ktime_get() - dt;
485 return div64_u64(1000 * 1000 * dE, dt);
488 static u64 measure_power_at(struct intel_rps *rps, int freq)
493 mutex_lock(&rps->lock);
494 GEM_BUG_ON(!rps->active);
495 intel_rps_set(rps, freq);
496 mutex_unlock(&rps->lock);
498 msleep(20); /* more than enough time to stabilise! */
502 pr_notice("Running at %x [%uMHz], not target %x [%uMHz]\n",
503 i, intel_gpu_freq(rps, i),
504 freq, intel_gpu_freq(rps, freq));
506 for (i = 0; i < 5; i++)
507 x[i] = __measure_power(5);
509 /* A simple triangle filter for better result stability */
510 sort(x, 5, sizeof(*x), cmp_u64, NULL);
511 return div_u64(x[1] + 2 * x[2] + x[3], 4);
514 int live_rps_power(void *arg)
516 struct intel_gt *gt = arg;
517 struct intel_rps *rps = >->rps;
518 void (*saved_work)(struct work_struct *wrk);
519 struct intel_engine_cs *engine;
520 enum intel_engine_id id;
521 struct igt_spinner spin;
525 * Our fundamental assumption is that running at lower frequency
526 * actually saves power. Let's see if our RAPL measurement support
530 if (!rps->enabled || rps->max_freq <= rps->min_freq)
533 if (!librapl_energy_uJ())
536 if (igt_spinner_init(&spin, gt))
539 intel_gt_pm_wait_for_idle(gt);
540 saved_work = rps->work.func;
541 rps->work.func = dummy_rps_work;
543 for_each_engine(engine, gt, id) {
544 struct i915_request *rq;
547 if (!intel_engine_can_store_dword(engine))
550 rq = igt_spinner_create_request(&spin,
551 engine->kernel_context,
558 i915_request_add(rq);
560 if (!igt_wait_for_spinner(&spin, rq)) {
561 pr_err("%s: RPS spinner did not start\n",
563 intel_gt_set_wedged(engine->gt);
568 max = measure_power_at(rps, rps->max_freq);
569 min = measure_power_at(rps, rps->min_freq);
571 igt_spinner_end(&spin);
573 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
575 min, intel_gpu_freq(rps, rps->min_freq),
576 max, intel_gpu_freq(rps, rps->max_freq));
577 if (11 * min > 10 * max) {
578 pr_err("%s: did not conserve power when setting lower frequency!\n",
584 if (igt_flush_test(gt->i915)) {
590 igt_spinner_fini(&spin);
592 intel_gt_pm_wait_for_idle(gt);
593 rps->work.func = saved_work;