drm/i915/selftests: Verify frequency scaling with RPS
[linux-block.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/sort.h>
7
8 #include "intel_engine_pm.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_gt_pm.h"
11 #include "intel_rc6.h"
12 #include "selftest_rps.h"
13 #include "selftests/igt_flush_test.h"
14 #include "selftests/igt_spinner.h"
15 #include "selftests/librapl.h"
16
17 static void dummy_rps_work(struct work_struct *wrk)
18 {
19 }
20
21 static int cmp_u64(const void *A, const void *B)
22 {
23         const u64 *a = A, *b = B;
24
25         if (a < b)
26                 return -1;
27         else if (a > b)
28                 return 1;
29         else
30                 return 0;
31 }
32
33 static struct i915_vma *
34 create_spin_counter(struct intel_engine_cs *engine,
35                     struct i915_address_space *vm,
36                     u32 **cancel,
37                     u32 **counter)
38 {
39         enum {
40                 COUNT,
41                 INC,
42                 __NGPR__,
43         };
44 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
45         struct drm_i915_gem_object *obj;
46         struct i915_vma *vma;
47         u32 *base, *cs;
48         int loop, i;
49         int err;
50
51         obj = i915_gem_object_create_internal(vm->i915, 4096);
52         if (IS_ERR(obj))
53                 return ERR_CAST(obj);
54
55         vma = i915_vma_instance(obj, vm, NULL);
56         if (IS_ERR(vma)) {
57                 i915_gem_object_put(obj);
58                 return vma;
59         }
60
61         err = i915_vma_pin(vma, 0, 0, PIN_USER);
62         if (err) {
63                 i915_vma_put(vma);
64                 return ERR_PTR(err);
65         }
66
67         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
68         if (IS_ERR(base)) {
69                 i915_gem_object_put(obj);
70                 return ERR_CAST(base);
71         }
72         cs = base;
73
74         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
75         for (i = 0; i < __NGPR__; i++) {
76                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
77                 *cs++ = 0;
78                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
79                 *cs++ = 0;
80         }
81
82         *cs++ = MI_LOAD_REGISTER_IMM(1);
83         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
84         *cs++ = 1;
85
86         loop = cs - base;
87
88         *cs++ = MI_MATH(4);
89         *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
90         *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
91         *cs++ = MI_MATH_ADD;
92         *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
93
94         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
95         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
96         *cs++ = lower_32_bits(vma->node.start + 1000 * sizeof(*cs));
97         *cs++ = upper_32_bits(vma->node.start + 1000 * sizeof(*cs));
98
99         *cs++ = MI_BATCH_BUFFER_START_GEN8;
100         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
101         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
102
103         i915_gem_object_flush_map(obj);
104
105         *cancel = base + loop;
106         *counter = memset32(base + 1000, 0, 1);
107         return vma;
108 }
109
110 static u64 __measure_frequency(u32 *cntr, int duration_ms)
111 {
112         u64 dc, dt;
113
114         dt = ktime_get();
115         dc = READ_ONCE(*cntr);
116         usleep_range(1000 * duration_ms, 2000 * duration_ms);
117         dc = READ_ONCE(*cntr) - dc;
118         dt = ktime_get() - dt;
119
120         return div64_u64(1000 * 1000 * dc, dt);
121 }
122
123 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
124 {
125         u64 x[5];
126         int i;
127
128         mutex_lock(&rps->lock);
129         GEM_BUG_ON(!rps->active);
130         intel_rps_set(rps, *freq);
131         mutex_unlock(&rps->lock);
132
133         msleep(20); /* more than enough time to stabilise! */
134
135         for (i = 0; i < 5; i++)
136                 x[i] = __measure_frequency(cntr, 2);
137         *freq = read_cagf(rps);
138
139         /* A simple triangle filter for better result stability */
140         sort(x, 5, sizeof(*x), cmp_u64, NULL);
141         return div_u64(x[1] + 2 * x[2] + x[3], 4);
142 }
143
144 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
145 {
146         return f_d * x > f_n * y && f_n * x < f_d * y;
147 }
148
149 int live_rps_frequency(void *arg)
150 {
151         void (*saved_work)(struct work_struct *wrk);
152         struct intel_gt *gt = arg;
153         struct intel_rps *rps = &gt->rps;
154         struct intel_engine_cs *engine;
155         enum intel_engine_id id;
156         int err = 0;
157
158         /*
159          * The premise is that the GPU does change freqency at our behest.
160          * Let's check there is a correspondence between the requested
161          * frequency, the actual frequency, and the observed clock rate.
162          */
163
164         if (!rps->enabled || rps->max_freq <= rps->min_freq)
165                 return 0;
166
167         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
168                 return 0;
169
170         intel_gt_pm_wait_for_idle(gt);
171         saved_work = rps->work.func;
172         rps->work.func = dummy_rps_work;
173
174         for_each_engine(engine, gt, id) {
175                 struct i915_request *rq;
176                 struct i915_vma *vma;
177                 u32 *cancel, *cntr;
178                 struct {
179                         u64 count;
180                         int freq;
181                 } min, max;
182
183                 vma = create_spin_counter(engine,
184                                           engine->kernel_context->vm,
185                                           &cancel, &cntr);
186                 if (IS_ERR(vma)) {
187                         err = PTR_ERR(vma);
188                         break;
189                 }
190
191                 rq = intel_engine_create_kernel_request(engine);
192                 if (IS_ERR(rq)) {
193                         err = PTR_ERR(rq);
194                         goto err_vma;
195                 }
196
197                 i915_vma_lock(vma);
198                 err = i915_request_await_object(rq, vma->obj, false);
199                 if (!err)
200                         err = i915_vma_move_to_active(vma, rq, 0);
201                 if (!err)
202                         err = rq->engine->emit_bb_start(rq,
203                                                         vma->node.start,
204                                                         PAGE_SIZE, 0);
205                 i915_vma_unlock(vma);
206                 i915_request_add(rq);
207                 if (err)
208                         goto err_vma;
209
210                 if (wait_for(READ_ONCE(*cntr), 10)) {
211                         pr_err("%s: timed loop did not start\n",
212                                engine->name);
213                         goto err_vma;
214                 }
215
216                 min.freq = rps->min_freq;
217                 min.count = measure_frequency_at(rps, cntr, &min.freq);
218
219                 max.freq = rps->max_freq;
220                 max.count = measure_frequency_at(rps, cntr, &max.freq);
221
222                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
223                         engine->name,
224                         min.count, intel_gpu_freq(rps, min.freq),
225                         max.count, intel_gpu_freq(rps, max.freq),
226                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
227                                                      max.freq * min.count));
228
229                 if (!scaled_within(max.freq * min.count,
230                                    min.freq * max.count,
231                                    1, 2)) {
232                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
233                                engine->name,
234                                max.freq * min.count,
235                                min.freq * max.count);
236                         err = -EINVAL;
237                 }
238
239 err_vma:
240                 *cancel = MI_BATCH_BUFFER_END;
241                 i915_gem_object_unpin_map(vma->obj);
242                 i915_vma_unpin(vma);
243                 i915_vma_put(vma);
244
245                 if (igt_flush_test(gt->i915))
246                         err = -EIO;
247                 if (err)
248                         break;
249         }
250
251         intel_gt_pm_wait_for_idle(gt);
252         rps->work.func = saved_work;
253
254         return err;
255 }
256
257 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
258 {
259         /* Flush any previous EI */
260         usleep_range(timeout_us, 2 * timeout_us);
261
262         /* Reset the interrupt status */
263         rps_disable_interrupts(rps);
264         GEM_BUG_ON(rps->pm_iir);
265         rps_enable_interrupts(rps);
266
267         /* And then wait for the timeout, for real this time */
268         usleep_range(2 * timeout_us, 3 * timeout_us);
269 }
270
271 static int __rps_up_interrupt(struct intel_rps *rps,
272                               struct intel_engine_cs *engine,
273                               struct igt_spinner *spin)
274 {
275         struct intel_uncore *uncore = engine->uncore;
276         struct i915_request *rq;
277         u32 timeout;
278
279         if (!intel_engine_can_store_dword(engine))
280                 return 0;
281
282         mutex_lock(&rps->lock);
283         GEM_BUG_ON(!rps->active);
284         intel_rps_set(rps, rps->min_freq);
285         mutex_unlock(&rps->lock);
286
287         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
288         if (IS_ERR(rq))
289                 return PTR_ERR(rq);
290
291         i915_request_get(rq);
292         i915_request_add(rq);
293
294         if (!igt_wait_for_spinner(spin, rq)) {
295                 pr_err("%s: RPS spinner did not start\n",
296                        engine->name);
297                 i915_request_put(rq);
298                 intel_gt_set_wedged(engine->gt);
299                 return -EIO;
300         }
301
302         if (!rps->active) {
303                 pr_err("%s: RPS not enabled on starting spinner\n",
304                        engine->name);
305                 igt_spinner_end(spin);
306                 i915_request_put(rq);
307                 return -EINVAL;
308         }
309
310         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
311                 pr_err("%s: RPS did not register UP interrupt\n",
312                        engine->name);
313                 i915_request_put(rq);
314                 return -EINVAL;
315         }
316
317         if (rps->last_freq != rps->min_freq) {
318                 pr_err("%s: RPS did not program min frequency\n",
319                        engine->name);
320                 i915_request_put(rq);
321                 return -EINVAL;
322         }
323
324         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
325         timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
326
327         sleep_for_ei(rps, timeout);
328         GEM_BUG_ON(i915_request_completed(rq));
329
330         igt_spinner_end(spin);
331         i915_request_put(rq);
332
333         if (rps->cur_freq != rps->min_freq) {
334                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
335                        engine->name, intel_rps_read_actual_frequency(rps));
336                 return -EINVAL;
337         }
338
339         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
340                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
341                        engine->name, rps->pm_iir,
342                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
343                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
344                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
345                 return -EINVAL;
346         }
347
348         return 0;
349 }
350
351 static int __rps_down_interrupt(struct intel_rps *rps,
352                                 struct intel_engine_cs *engine)
353 {
354         struct intel_uncore *uncore = engine->uncore;
355         u32 timeout;
356
357         mutex_lock(&rps->lock);
358         GEM_BUG_ON(!rps->active);
359         intel_rps_set(rps, rps->max_freq);
360         mutex_unlock(&rps->lock);
361
362         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
363                 pr_err("%s: RPS did not register DOWN interrupt\n",
364                        engine->name);
365                 return -EINVAL;
366         }
367
368         if (rps->last_freq != rps->max_freq) {
369                 pr_err("%s: RPS did not program max frequency\n",
370                        engine->name);
371                 return -EINVAL;
372         }
373
374         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
375         timeout = GT_PM_INTERVAL_TO_US(engine->i915, timeout);
376
377         sleep_for_ei(rps, timeout);
378
379         if (rps->cur_freq != rps->max_freq) {
380                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
381                        engine->name,
382                        intel_rps_read_actual_frequency(rps));
383                 return -EINVAL;
384         }
385
386         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
387                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
388                        engine->name, rps->pm_iir,
389                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
390                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
391                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
392                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
393                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
394                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
395                 return -EINVAL;
396         }
397
398         return 0;
399 }
400
401 int live_rps_interrupt(void *arg)
402 {
403         struct intel_gt *gt = arg;
404         struct intel_rps *rps = &gt->rps;
405         void (*saved_work)(struct work_struct *wrk);
406         struct intel_engine_cs *engine;
407         enum intel_engine_id id;
408         struct igt_spinner spin;
409         u32 pm_events;
410         int err = 0;
411
412         /*
413          * First, let's check whether or not we are receiving interrupts.
414          */
415
416         if (!rps->enabled || rps->max_freq <= rps->min_freq)
417                 return 0;
418
419         intel_gt_pm_get(gt);
420         pm_events = rps->pm_events;
421         intel_gt_pm_put(gt);
422         if (!pm_events) {
423                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
424                 return -ENODEV;
425         }
426
427         if (igt_spinner_init(&spin, gt))
428                 return -ENOMEM;
429
430         intel_gt_pm_wait_for_idle(gt);
431         saved_work = rps->work.func;
432         rps->work.func = dummy_rps_work;
433
434         for_each_engine(engine, gt, id) {
435                 /* Keep the engine busy with a spinner; expect an UP! */
436                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
437                         intel_gt_pm_wait_for_idle(engine->gt);
438                         GEM_BUG_ON(rps->active);
439
440                         intel_engine_pm_get(engine);
441                         err = __rps_up_interrupt(rps, engine, &spin);
442                         intel_engine_pm_put(engine);
443                         if (err)
444                                 goto out;
445
446                         intel_gt_pm_wait_for_idle(engine->gt);
447                 }
448
449                 /* Keep the engine awake but idle and check for DOWN */
450                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
451                         intel_engine_pm_get(engine);
452                         intel_rc6_disable(&gt->rc6);
453
454                         err = __rps_down_interrupt(rps, engine);
455
456                         intel_rc6_enable(&gt->rc6);
457                         intel_engine_pm_put(engine);
458                         if (err)
459                                 goto out;
460                 }
461         }
462
463 out:
464         if (igt_flush_test(gt->i915))
465                 err = -EIO;
466
467         igt_spinner_fini(&spin);
468
469         intel_gt_pm_wait_for_idle(gt);
470         rps->work.func = saved_work;
471
472         return err;
473 }
474
475 static u64 __measure_power(int duration_ms)
476 {
477         u64 dE, dt;
478
479         dt = ktime_get();
480         dE = librapl_energy_uJ();
481         usleep_range(1000 * duration_ms, 2000 * duration_ms);
482         dE = librapl_energy_uJ() - dE;
483         dt = ktime_get() - dt;
484
485         return div64_u64(1000 * 1000 * dE, dt);
486 }
487
488 static u64 measure_power_at(struct intel_rps *rps, int freq)
489 {
490         u64 x[5];
491         int i;
492
493         mutex_lock(&rps->lock);
494         GEM_BUG_ON(!rps->active);
495         intel_rps_set(rps, freq);
496         mutex_unlock(&rps->lock);
497
498         msleep(20); /* more than enough time to stabilise! */
499
500         i = read_cagf(rps);
501         if (i != freq)
502                 pr_notice("Running at %x [%uMHz], not target %x [%uMHz]\n",
503                           i, intel_gpu_freq(rps, i),
504                           freq, intel_gpu_freq(rps, freq));
505
506         for (i = 0; i < 5; i++)
507                 x[i] = __measure_power(5);
508
509         /* A simple triangle filter for better result stability */
510         sort(x, 5, sizeof(*x), cmp_u64, NULL);
511         return div_u64(x[1] + 2 * x[2] + x[3], 4);
512 }
513
514 int live_rps_power(void *arg)
515 {
516         struct intel_gt *gt = arg;
517         struct intel_rps *rps = &gt->rps;
518         void (*saved_work)(struct work_struct *wrk);
519         struct intel_engine_cs *engine;
520         enum intel_engine_id id;
521         struct igt_spinner spin;
522         int err = 0;
523
524         /*
525          * Our fundamental assumption is that running at lower frequency
526          * actually saves power. Let's see if our RAPL measurement support
527          * that theory.
528          */
529
530         if (!rps->enabled || rps->max_freq <= rps->min_freq)
531                 return 0;
532
533         if (!librapl_energy_uJ())
534                 return 0;
535
536         if (igt_spinner_init(&spin, gt))
537                 return -ENOMEM;
538
539         intel_gt_pm_wait_for_idle(gt);
540         saved_work = rps->work.func;
541         rps->work.func = dummy_rps_work;
542
543         for_each_engine(engine, gt, id) {
544                 struct i915_request *rq;
545                 u64 min, max;
546
547                 if (!intel_engine_can_store_dword(engine))
548                         continue;
549
550                 rq = igt_spinner_create_request(&spin,
551                                                 engine->kernel_context,
552                                                 MI_NOOP);
553                 if (IS_ERR(rq)) {
554                         err = PTR_ERR(rq);
555                         break;
556                 }
557
558                 i915_request_add(rq);
559
560                 if (!igt_wait_for_spinner(&spin, rq)) {
561                         pr_err("%s: RPS spinner did not start\n",
562                                engine->name);
563                         intel_gt_set_wedged(engine->gt);
564                         err = -EIO;
565                         break;
566                 }
567
568                 max = measure_power_at(rps, rps->max_freq);
569                 min = measure_power_at(rps, rps->min_freq);
570
571                 igt_spinner_end(&spin);
572
573                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
574                         engine->name,
575                         min, intel_gpu_freq(rps, rps->min_freq),
576                         max, intel_gpu_freq(rps, rps->max_freq));
577                 if (11 * min > 10 * max) {
578                         pr_err("%s: did not conserve power when setting lower frequency!\n",
579                                engine->name);
580                         err = -EINVAL;
581                         break;
582                 }
583
584                 if (igt_flush_test(gt->i915)) {
585                         err = -EIO;
586                         break;
587                 }
588         }
589
590         igt_spinner_fini(&spin);
591
592         intel_gt_pm_wait_for_idle(gt);
593         rps->work.func = saved_work;
594
595         return err;
596 }