Merge tag 'ucount-rlimit-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "gem/i915_gem_internal.h"
10
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_engine_regs.h"
14 #include "intel_gpu_commands.h"
15 #include "intel_gt_clock_utils.h"
16 #include "intel_gt_pm.h"
17 #include "intel_rc6.h"
18 #include "selftest_engine_heartbeat.h"
19 #include "selftest_rps.h"
20 #include "selftests/igt_flush_test.h"
21 #include "selftests/igt_spinner.h"
22 #include "selftests/librapl.h"
23
24 /* Try to isolate the impact of cstates from determing frequency response */
25 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
26
27 static void dummy_rps_work(struct work_struct *wrk)
28 {
29 }
30
31 static int cmp_u64(const void *A, const void *B)
32 {
33         const u64 *a = A, *b = B;
34
35         if (*a < *b)
36                 return -1;
37         else if (*a > *b)
38                 return 1;
39         else
40                 return 0;
41 }
42
43 static int cmp_u32(const void *A, const void *B)
44 {
45         const u32 *a = A, *b = B;
46
47         if (*a < *b)
48                 return -1;
49         else if (*a > *b)
50                 return 1;
51         else
52                 return 0;
53 }
54
55 static struct i915_vma *
56 create_spin_counter(struct intel_engine_cs *engine,
57                     struct i915_address_space *vm,
58                     bool srm,
59                     u32 **cancel,
60                     u32 **counter)
61 {
62         enum {
63                 COUNT,
64                 INC,
65                 __NGPR__,
66         };
67 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
68         struct drm_i915_gem_object *obj;
69         struct i915_vma *vma;
70         unsigned long end;
71         u32 *base, *cs;
72         int loop, i;
73         int err;
74
75         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
76         if (IS_ERR(obj))
77                 return ERR_CAST(obj);
78
79         end = obj->base.size / sizeof(u32) - 1;
80
81         vma = i915_vma_instance(obj, vm, NULL);
82         if (IS_ERR(vma)) {
83                 err = PTR_ERR(vma);
84                 goto err_put;
85         }
86
87         err = i915_vma_pin(vma, 0, 0, PIN_USER);
88         if (err)
89                 goto err_unlock;
90
91         i915_vma_lock(vma);
92
93         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
94         if (IS_ERR(base)) {
95                 err = PTR_ERR(base);
96                 goto err_unpin;
97         }
98         cs = base;
99
100         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
101         for (i = 0; i < __NGPR__; i++) {
102                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
103                 *cs++ = 0;
104                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
105                 *cs++ = 0;
106         }
107
108         *cs++ = MI_LOAD_REGISTER_IMM(1);
109         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
110         *cs++ = 1;
111
112         loop = cs - base;
113
114         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
115         for (i = 0; i < 1024; i++) {
116                 *cs++ = MI_MATH(4);
117                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
118                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
119                 *cs++ = MI_MATH_ADD;
120                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
121
122                 if (srm) {
123                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
124                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
125                         *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
126                         *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
127                 }
128         }
129
130         *cs++ = MI_BATCH_BUFFER_START_GEN8;
131         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
132         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
133         GEM_BUG_ON(cs - base > end);
134
135         i915_gem_object_flush_map(obj);
136
137         *cancel = base + loop;
138         *counter = srm ? memset32(base + end, 0, 1) : NULL;
139         return vma;
140
141 err_unpin:
142         i915_vma_unpin(vma);
143 err_unlock:
144         i915_vma_unlock(vma);
145 err_put:
146         i915_gem_object_put(obj);
147         return ERR_PTR(err);
148 }
149
150 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
151 {
152         u8 history[64], i;
153         unsigned long end;
154         int sleep;
155
156         i = 0;
157         memset(history, freq, sizeof(history));
158         sleep = 20;
159
160         /* The PCU does not change instantly, but drifts towards the goal? */
161         end = jiffies + msecs_to_jiffies(timeout_ms);
162         do {
163                 u8 act;
164
165                 act = read_cagf(rps);
166                 if (time_after(jiffies, end))
167                         return act;
168
169                 /* Target acquired */
170                 if (act == freq)
171                         return act;
172
173                 /* Any change within the last N samples? */
174                 if (!memchr_inv(history, act, sizeof(history)))
175                         return act;
176
177                 history[i] = act;
178                 i = (i + 1) % ARRAY_SIZE(history);
179
180                 usleep_range(sleep, 2 * sleep);
181                 sleep *= 2;
182                 if (sleep > timeout_ms * 20)
183                         sleep = timeout_ms * 20;
184         } while (1);
185 }
186
187 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
188 {
189         mutex_lock(&rps->lock);
190         GEM_BUG_ON(!intel_rps_is_active(rps));
191         if (wait_for(!intel_rps_set(rps, freq), 50)) {
192                 mutex_unlock(&rps->lock);
193                 return 0;
194         }
195         GEM_BUG_ON(rps->last_freq != freq);
196         mutex_unlock(&rps->lock);
197
198         return wait_for_freq(rps, freq, 50);
199 }
200
201 static void show_pstate_limits(struct intel_rps *rps)
202 {
203         struct drm_i915_private *i915 = rps_to_i915(rps);
204
205         if (IS_BROXTON(i915)) {
206                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
207                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
208                         intel_uncore_read(rps_to_uncore(rps),
209                                           BXT_RP_STATE_CAP));
210         } else if (GRAPHICS_VER(i915) == 9) {
211                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
212                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
213                         intel_uncore_read(rps_to_uncore(rps),
214                                           GEN9_RP_STATE_LIMITS));
215         }
216 }
217
218 int live_rps_clock_interval(void *arg)
219 {
220         struct intel_gt *gt = arg;
221         struct intel_rps *rps = &gt->rps;
222         void (*saved_work)(struct work_struct *wrk);
223         struct intel_engine_cs *engine;
224         enum intel_engine_id id;
225         struct igt_spinner spin;
226         int err = 0;
227
228         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
229                 return 0;
230
231         if (igt_spinner_init(&spin, gt))
232                 return -ENOMEM;
233
234         intel_gt_pm_wait_for_idle(gt);
235         saved_work = rps->work.func;
236         rps->work.func = dummy_rps_work;
237
238         intel_gt_pm_get(gt);
239         intel_rps_disable(&gt->rps);
240
241         intel_gt_check_clock_frequency(gt);
242
243         for_each_engine(engine, gt, id) {
244                 struct i915_request *rq;
245                 u32 cycles;
246                 u64 dt;
247
248                 if (!intel_engine_can_store_dword(engine))
249                         continue;
250
251                 st_engine_heartbeat_disable(engine);
252
253                 rq = igt_spinner_create_request(&spin,
254                                                 engine->kernel_context,
255                                                 MI_NOOP);
256                 if (IS_ERR(rq)) {
257                         st_engine_heartbeat_enable(engine);
258                         err = PTR_ERR(rq);
259                         break;
260                 }
261
262                 i915_request_add(rq);
263
264                 if (!igt_wait_for_spinner(&spin, rq)) {
265                         pr_err("%s: RPS spinner did not start\n",
266                                engine->name);
267                         igt_spinner_end(&spin);
268                         st_engine_heartbeat_enable(engine);
269                         intel_gt_set_wedged(engine->gt);
270                         err = -EIO;
271                         break;
272                 }
273
274                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
275
276                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
277
278                 /* Set the evaluation interval to infinity! */
279                 intel_uncore_write_fw(gt->uncore,
280                                       GEN6_RP_UP_EI, 0xffffffff);
281                 intel_uncore_write_fw(gt->uncore,
282                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
283
284                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
285                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
286
287                 if (wait_for(intel_uncore_read_fw(gt->uncore,
288                                                   GEN6_RP_CUR_UP_EI),
289                              10)) {
290                         /* Just skip the test; assume lack of HW support */
291                         pr_notice("%s: rps evaluation interval not ticking\n",
292                                   engine->name);
293                         err = -ENODEV;
294                 } else {
295                         ktime_t dt_[5];
296                         u32 cycles_[5];
297                         int i;
298
299                         for (i = 0; i < 5; i++) {
300                                 preempt_disable();
301
302                                 dt_[i] = ktime_get();
303                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
304
305                                 udelay(1000);
306
307                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
308                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
309
310                                 preempt_enable();
311                         }
312
313                         /* Use the median of both cycle/dt; close enough */
314                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
315                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
316                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
317                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
318                 }
319
320                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
321                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
322
323                 igt_spinner_end(&spin);
324                 st_engine_heartbeat_enable(engine);
325
326                 if (err == 0) {
327                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
328                         u32 expected =
329                                 intel_gt_ns_to_pm_interval(gt, dt);
330
331                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
332                                 engine->name, cycles, time, dt, expected,
333                                 gt->clock_frequency / 1000);
334
335                         if (10 * time < 8 * dt ||
336                             8 * time > 10 * dt) {
337                                 pr_err("%s: rps clock time does not match walltime!\n",
338                                        engine->name);
339                                 err = -EINVAL;
340                         }
341
342                         if (10 * expected < 8 * cycles ||
343                             8 * expected > 10 * cycles) {
344                                 pr_err("%s: walltime does not match rps clock ticks!\n",
345                                        engine->name);
346                                 err = -EINVAL;
347                         }
348                 }
349
350                 if (igt_flush_test(gt->i915))
351                         err = -EIO;
352
353                 break; /* once is enough */
354         }
355
356         intel_rps_enable(&gt->rps);
357         intel_gt_pm_put(gt);
358
359         igt_spinner_fini(&spin);
360
361         intel_gt_pm_wait_for_idle(gt);
362         rps->work.func = saved_work;
363
364         if (err == -ENODEV) /* skipped, don't report a fail */
365                 err = 0;
366
367         return err;
368 }
369
370 int live_rps_control(void *arg)
371 {
372         struct intel_gt *gt = arg;
373         struct intel_rps *rps = &gt->rps;
374         void (*saved_work)(struct work_struct *wrk);
375         struct intel_engine_cs *engine;
376         enum intel_engine_id id;
377         struct igt_spinner spin;
378         int err = 0;
379
380         /*
381          * Check that the actual frequency matches our requested frequency,
382          * to verify our control mechanism. We have to be careful that the
383          * PCU may throttle the GPU in which case the actual frequency used
384          * will be lowered than requested.
385          */
386
387         if (!intel_rps_is_enabled(rps))
388                 return 0;
389
390         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
391                 return 0;
392
393         if (igt_spinner_init(&spin, gt))
394                 return -ENOMEM;
395
396         intel_gt_pm_wait_for_idle(gt);
397         saved_work = rps->work.func;
398         rps->work.func = dummy_rps_work;
399
400         intel_gt_pm_get(gt);
401         for_each_engine(engine, gt, id) {
402                 struct i915_request *rq;
403                 ktime_t min_dt, max_dt;
404                 int f, limit;
405                 int min, max;
406
407                 if (!intel_engine_can_store_dword(engine))
408                         continue;
409
410                 st_engine_heartbeat_disable(engine);
411
412                 rq = igt_spinner_create_request(&spin,
413                                                 engine->kernel_context,
414                                                 MI_NOOP);
415                 if (IS_ERR(rq)) {
416                         err = PTR_ERR(rq);
417                         break;
418                 }
419
420                 i915_request_add(rq);
421
422                 if (!igt_wait_for_spinner(&spin, rq)) {
423                         pr_err("%s: RPS spinner did not start\n",
424                                engine->name);
425                         igt_spinner_end(&spin);
426                         st_engine_heartbeat_enable(engine);
427                         intel_gt_set_wedged(engine->gt);
428                         err = -EIO;
429                         break;
430                 }
431
432                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
433                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
434                                engine->name, rps->min_freq, read_cagf(rps));
435                         igt_spinner_end(&spin);
436                         st_engine_heartbeat_enable(engine);
437                         show_pstate_limits(rps);
438                         err = -EINVAL;
439                         break;
440                 }
441
442                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
443                         if (rps_set_check(rps, f) < f)
444                                 break;
445                 }
446
447                 limit = rps_set_check(rps, f);
448
449                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
450                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
451                                engine->name, rps->min_freq, read_cagf(rps));
452                         igt_spinner_end(&spin);
453                         st_engine_heartbeat_enable(engine);
454                         show_pstate_limits(rps);
455                         err = -EINVAL;
456                         break;
457                 }
458
459                 max_dt = ktime_get();
460                 max = rps_set_check(rps, limit);
461                 max_dt = ktime_sub(ktime_get(), max_dt);
462
463                 min_dt = ktime_get();
464                 min = rps_set_check(rps, rps->min_freq);
465                 min_dt = ktime_sub(ktime_get(), min_dt);
466
467                 igt_spinner_end(&spin);
468                 st_engine_heartbeat_enable(engine);
469
470                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
471                         engine->name,
472                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
473                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
474                         limit, intel_gpu_freq(rps, limit),
475                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
476
477                 if (limit == rps->min_freq) {
478                         pr_err("%s: GPU throttled to minimum!\n",
479                                engine->name);
480                         show_pstate_limits(rps);
481                         err = -ENODEV;
482                         break;
483                 }
484
485                 if (igt_flush_test(gt->i915)) {
486                         err = -EIO;
487                         break;
488                 }
489         }
490         intel_gt_pm_put(gt);
491
492         igt_spinner_fini(&spin);
493
494         intel_gt_pm_wait_for_idle(gt);
495         rps->work.func = saved_work;
496
497         return err;
498 }
499
500 static void show_pcu_config(struct intel_rps *rps)
501 {
502         struct drm_i915_private *i915 = rps_to_i915(rps);
503         unsigned int max_gpu_freq, min_gpu_freq;
504         intel_wakeref_t wakeref;
505         int gpu_freq;
506
507         if (!HAS_LLC(i915))
508                 return;
509
510         min_gpu_freq = rps->min_freq;
511         max_gpu_freq = rps->max_freq;
512         if (GRAPHICS_VER(i915) >= 9) {
513                 /* Convert GT frequency to 50 HZ units */
514                 min_gpu_freq /= GEN9_FREQ_SCALER;
515                 max_gpu_freq /= GEN9_FREQ_SCALER;
516         }
517
518         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
519
520         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
521         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
522                 int ia_freq = gpu_freq;
523
524                 snb_pcode_read(i915, GEN6_PCODE_READ_MIN_FREQ_TABLE,
525                                &ia_freq, NULL);
526
527                 pr_info("%5d  %5d  %5d\n",
528                         gpu_freq * 50,
529                         ((ia_freq >> 0) & 0xff) * 100,
530                         ((ia_freq >> 8) & 0xff) * 100);
531         }
532
533         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
534 }
535
536 static u64 __measure_frequency(u32 *cntr, int duration_ms)
537 {
538         u64 dc, dt;
539
540         dt = ktime_get();
541         dc = READ_ONCE(*cntr);
542         usleep_range(1000 * duration_ms, 2000 * duration_ms);
543         dc = READ_ONCE(*cntr) - dc;
544         dt = ktime_get() - dt;
545
546         return div64_u64(1000 * 1000 * dc, dt);
547 }
548
549 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
550 {
551         u64 x[5];
552         int i;
553
554         *freq = rps_set_check(rps, *freq);
555         for (i = 0; i < 5; i++)
556                 x[i] = __measure_frequency(cntr, 2);
557         *freq = (*freq + read_cagf(rps)) / 2;
558
559         /* A simple triangle filter for better result stability */
560         sort(x, 5, sizeof(*x), cmp_u64, NULL);
561         return div_u64(x[1] + 2 * x[2] + x[3], 4);
562 }
563
564 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
565                                   int duration_ms)
566 {
567         u64 dc, dt;
568
569         dt = ktime_get();
570         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
571         usleep_range(1000 * duration_ms, 2000 * duration_ms);
572         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
573         dt = ktime_get() - dt;
574
575         return div64_u64(1000 * 1000 * dc, dt);
576 }
577
578 static u64 measure_cs_frequency_at(struct intel_rps *rps,
579                                    struct intel_engine_cs *engine,
580                                    int *freq)
581 {
582         u64 x[5];
583         int i;
584
585         *freq = rps_set_check(rps, *freq);
586         for (i = 0; i < 5; i++)
587                 x[i] = __measure_cs_frequency(engine, 2);
588         *freq = (*freq + read_cagf(rps)) / 2;
589
590         /* A simple triangle filter for better result stability */
591         sort(x, 5, sizeof(*x), cmp_u64, NULL);
592         return div_u64(x[1] + 2 * x[2] + x[3], 4);
593 }
594
595 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
596 {
597         return f_d * x > f_n * y && f_n * x < f_d * y;
598 }
599
600 int live_rps_frequency_cs(void *arg)
601 {
602         void (*saved_work)(struct work_struct *wrk);
603         struct intel_gt *gt = arg;
604         struct intel_rps *rps = &gt->rps;
605         struct intel_engine_cs *engine;
606         struct pm_qos_request qos;
607         enum intel_engine_id id;
608         int err = 0;
609
610         /*
611          * The premise is that the GPU does change frequency at our behest.
612          * Let's check there is a correspondence between the requested
613          * frequency, the actual frequency, and the observed clock rate.
614          */
615
616         if (!intel_rps_is_enabled(rps))
617                 return 0;
618
619         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
620                 return 0;
621
622         if (CPU_LATENCY >= 0)
623                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
624
625         intel_gt_pm_wait_for_idle(gt);
626         saved_work = rps->work.func;
627         rps->work.func = dummy_rps_work;
628
629         for_each_engine(engine, gt, id) {
630                 struct i915_request *rq;
631                 struct i915_vma *vma;
632                 u32 *cancel, *cntr;
633                 struct {
634                         u64 count;
635                         int freq;
636                 } min, max;
637
638                 st_engine_heartbeat_disable(engine);
639
640                 vma = create_spin_counter(engine,
641                                           engine->kernel_context->vm, false,
642                                           &cancel, &cntr);
643                 if (IS_ERR(vma)) {
644                         err = PTR_ERR(vma);
645                         st_engine_heartbeat_enable(engine);
646                         break;
647                 }
648
649                 rq = intel_engine_create_kernel_request(engine);
650                 if (IS_ERR(rq)) {
651                         err = PTR_ERR(rq);
652                         goto err_vma;
653                 }
654
655                 err = i915_request_await_object(rq, vma->obj, false);
656                 if (!err)
657                         err = i915_vma_move_to_active(vma, rq, 0);
658                 if (!err)
659                         err = rq->engine->emit_bb_start(rq,
660                                                         vma->node.start,
661                                                         PAGE_SIZE, 0);
662                 i915_request_add(rq);
663                 if (err)
664                         goto err_vma;
665
666                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
667                              10)) {
668                         pr_err("%s: timed loop did not start\n",
669                                engine->name);
670                         goto err_vma;
671                 }
672
673                 min.freq = rps->min_freq;
674                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
675
676                 max.freq = rps->max_freq;
677                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
678
679                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
680                         engine->name,
681                         min.count, intel_gpu_freq(rps, min.freq),
682                         max.count, intel_gpu_freq(rps, max.freq),
683                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
684                                                      max.freq * min.count));
685
686                 if (!scaled_within(max.freq * min.count,
687                                    min.freq * max.count,
688                                    2, 3)) {
689                         int f;
690
691                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
692                                engine->name,
693                                max.freq * min.count,
694                                min.freq * max.count);
695                         show_pcu_config(rps);
696
697                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
698                                 int act = f;
699                                 u64 count;
700
701                                 count = measure_cs_frequency_at(rps, engine, &act);
702                                 if (act < f)
703                                         break;
704
705                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
706                                         engine->name,
707                                         act, intel_gpu_freq(rps, act), count,
708                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
709                                                                      act * min.count));
710
711                                 f = act; /* may skip ahead [pcu granularity] */
712                         }
713
714                         err = -EINTR; /* ignore error, continue on with test */
715                 }
716
717 err_vma:
718                 *cancel = MI_BATCH_BUFFER_END;
719                 i915_gem_object_flush_map(vma->obj);
720                 i915_gem_object_unpin_map(vma->obj);
721                 i915_vma_unpin(vma);
722                 i915_vma_unlock(vma);
723                 i915_vma_put(vma);
724
725                 st_engine_heartbeat_enable(engine);
726                 if (igt_flush_test(gt->i915))
727                         err = -EIO;
728                 if (err)
729                         break;
730         }
731
732         intel_gt_pm_wait_for_idle(gt);
733         rps->work.func = saved_work;
734
735         if (CPU_LATENCY >= 0)
736                 cpu_latency_qos_remove_request(&qos);
737
738         return err;
739 }
740
741 int live_rps_frequency_srm(void *arg)
742 {
743         void (*saved_work)(struct work_struct *wrk);
744         struct intel_gt *gt = arg;
745         struct intel_rps *rps = &gt->rps;
746         struct intel_engine_cs *engine;
747         struct pm_qos_request qos;
748         enum intel_engine_id id;
749         int err = 0;
750
751         /*
752          * The premise is that the GPU does change frequency at our behest.
753          * Let's check there is a correspondence between the requested
754          * frequency, the actual frequency, and the observed clock rate.
755          */
756
757         if (!intel_rps_is_enabled(rps))
758                 return 0;
759
760         if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
761                 return 0;
762
763         if (CPU_LATENCY >= 0)
764                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
765
766         intel_gt_pm_wait_for_idle(gt);
767         saved_work = rps->work.func;
768         rps->work.func = dummy_rps_work;
769
770         for_each_engine(engine, gt, id) {
771                 struct i915_request *rq;
772                 struct i915_vma *vma;
773                 u32 *cancel, *cntr;
774                 struct {
775                         u64 count;
776                         int freq;
777                 } min, max;
778
779                 st_engine_heartbeat_disable(engine);
780
781                 vma = create_spin_counter(engine,
782                                           engine->kernel_context->vm, true,
783                                           &cancel, &cntr);
784                 if (IS_ERR(vma)) {
785                         err = PTR_ERR(vma);
786                         st_engine_heartbeat_enable(engine);
787                         break;
788                 }
789
790                 rq = intel_engine_create_kernel_request(engine);
791                 if (IS_ERR(rq)) {
792                         err = PTR_ERR(rq);
793                         goto err_vma;
794                 }
795
796                 err = i915_request_await_object(rq, vma->obj, false);
797                 if (!err)
798                         err = i915_vma_move_to_active(vma, rq, 0);
799                 if (!err)
800                         err = rq->engine->emit_bb_start(rq,
801                                                         vma->node.start,
802                                                         PAGE_SIZE, 0);
803                 i915_request_add(rq);
804                 if (err)
805                         goto err_vma;
806
807                 if (wait_for(READ_ONCE(*cntr), 10)) {
808                         pr_err("%s: timed loop did not start\n",
809                                engine->name);
810                         goto err_vma;
811                 }
812
813                 min.freq = rps->min_freq;
814                 min.count = measure_frequency_at(rps, cntr, &min.freq);
815
816                 max.freq = rps->max_freq;
817                 max.count = measure_frequency_at(rps, cntr, &max.freq);
818
819                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
820                         engine->name,
821                         min.count, intel_gpu_freq(rps, min.freq),
822                         max.count, intel_gpu_freq(rps, max.freq),
823                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
824                                                      max.freq * min.count));
825
826                 if (!scaled_within(max.freq * min.count,
827                                    min.freq * max.count,
828                                    1, 2)) {
829                         int f;
830
831                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
832                                engine->name,
833                                max.freq * min.count,
834                                min.freq * max.count);
835                         show_pcu_config(rps);
836
837                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
838                                 int act = f;
839                                 u64 count;
840
841                                 count = measure_frequency_at(rps, cntr, &act);
842                                 if (act < f)
843                                         break;
844
845                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
846                                         engine->name,
847                                         act, intel_gpu_freq(rps, act), count,
848                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
849                                                                      act * min.count));
850
851                                 f = act; /* may skip ahead [pcu granularity] */
852                         }
853
854                         err = -EINTR; /* ignore error, continue on with test */
855                 }
856
857 err_vma:
858                 *cancel = MI_BATCH_BUFFER_END;
859                 i915_gem_object_flush_map(vma->obj);
860                 i915_gem_object_unpin_map(vma->obj);
861                 i915_vma_unpin(vma);
862                 i915_vma_unlock(vma);
863                 i915_vma_put(vma);
864
865                 st_engine_heartbeat_enable(engine);
866                 if (igt_flush_test(gt->i915))
867                         err = -EIO;
868                 if (err)
869                         break;
870         }
871
872         intel_gt_pm_wait_for_idle(gt);
873         rps->work.func = saved_work;
874
875         if (CPU_LATENCY >= 0)
876                 cpu_latency_qos_remove_request(&qos);
877
878         return err;
879 }
880
881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
882 {
883         /* Flush any previous EI */
884         usleep_range(timeout_us, 2 * timeout_us);
885
886         /* Reset the interrupt status */
887         rps_disable_interrupts(rps);
888         GEM_BUG_ON(rps->pm_iir);
889         rps_enable_interrupts(rps);
890
891         /* And then wait for the timeout, for real this time */
892         usleep_range(2 * timeout_us, 3 * timeout_us);
893 }
894
895 static int __rps_up_interrupt(struct intel_rps *rps,
896                               struct intel_engine_cs *engine,
897                               struct igt_spinner *spin)
898 {
899         struct intel_uncore *uncore = engine->uncore;
900         struct i915_request *rq;
901         u32 timeout;
902
903         if (!intel_engine_can_store_dword(engine))
904                 return 0;
905
906         rps_set_check(rps, rps->min_freq);
907
908         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
909         if (IS_ERR(rq))
910                 return PTR_ERR(rq);
911
912         i915_request_get(rq);
913         i915_request_add(rq);
914
915         if (!igt_wait_for_spinner(spin, rq)) {
916                 pr_err("%s: RPS spinner did not start\n",
917                        engine->name);
918                 i915_request_put(rq);
919                 intel_gt_set_wedged(engine->gt);
920                 return -EIO;
921         }
922
923         if (!intel_rps_is_active(rps)) {
924                 pr_err("%s: RPS not enabled on starting spinner\n",
925                        engine->name);
926                 igt_spinner_end(spin);
927                 i915_request_put(rq);
928                 return -EINVAL;
929         }
930
931         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
932                 pr_err("%s: RPS did not register UP interrupt\n",
933                        engine->name);
934                 i915_request_put(rq);
935                 return -EINVAL;
936         }
937
938         if (rps->last_freq != rps->min_freq) {
939                 pr_err("%s: RPS did not program min frequency\n",
940                        engine->name);
941                 i915_request_put(rq);
942                 return -EINVAL;
943         }
944
945         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
946         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
947         timeout = DIV_ROUND_UP(timeout, 1000);
948
949         sleep_for_ei(rps, timeout);
950         GEM_BUG_ON(i915_request_completed(rq));
951
952         igt_spinner_end(spin);
953         i915_request_put(rq);
954
955         if (rps->cur_freq != rps->min_freq) {
956                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
957                        engine->name, intel_rps_read_actual_frequency(rps));
958                 return -EINVAL;
959         }
960
961         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
962                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
963                        engine->name, rps->pm_iir,
964                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
965                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
966                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
967                 return -EINVAL;
968         }
969
970         return 0;
971 }
972
973 static int __rps_down_interrupt(struct intel_rps *rps,
974                                 struct intel_engine_cs *engine)
975 {
976         struct intel_uncore *uncore = engine->uncore;
977         u32 timeout;
978
979         rps_set_check(rps, rps->max_freq);
980
981         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
982                 pr_err("%s: RPS did not register DOWN interrupt\n",
983                        engine->name);
984                 return -EINVAL;
985         }
986
987         if (rps->last_freq != rps->max_freq) {
988                 pr_err("%s: RPS did not program max frequency\n",
989                        engine->name);
990                 return -EINVAL;
991         }
992
993         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
994         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
995         timeout = DIV_ROUND_UP(timeout, 1000);
996
997         sleep_for_ei(rps, timeout);
998
999         if (rps->cur_freq != rps->max_freq) {
1000                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1001                        engine->name,
1002                        intel_rps_read_actual_frequency(rps));
1003                 return -EINVAL;
1004         }
1005
1006         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1007                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1008                        engine->name, rps->pm_iir,
1009                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1010                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1011                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1012                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1013                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1014                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1015                 return -EINVAL;
1016         }
1017
1018         return 0;
1019 }
1020
1021 int live_rps_interrupt(void *arg)
1022 {
1023         struct intel_gt *gt = arg;
1024         struct intel_rps *rps = &gt->rps;
1025         void (*saved_work)(struct work_struct *wrk);
1026         struct intel_engine_cs *engine;
1027         enum intel_engine_id id;
1028         struct igt_spinner spin;
1029         u32 pm_events;
1030         int err = 0;
1031
1032         /*
1033          * First, let's check whether or not we are receiving interrupts.
1034          */
1035
1036         if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1037                 return 0;
1038
1039         intel_gt_pm_get(gt);
1040         pm_events = rps->pm_events;
1041         intel_gt_pm_put(gt);
1042         if (!pm_events) {
1043                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1044                 return -ENODEV;
1045         }
1046
1047         if (igt_spinner_init(&spin, gt))
1048                 return -ENOMEM;
1049
1050         intel_gt_pm_wait_for_idle(gt);
1051         saved_work = rps->work.func;
1052         rps->work.func = dummy_rps_work;
1053
1054         for_each_engine(engine, gt, id) {
1055                 /* Keep the engine busy with a spinner; expect an UP! */
1056                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057                         intel_gt_pm_wait_for_idle(engine->gt);
1058                         GEM_BUG_ON(intel_rps_is_active(rps));
1059
1060                         st_engine_heartbeat_disable(engine);
1061
1062                         err = __rps_up_interrupt(rps, engine, &spin);
1063
1064                         st_engine_heartbeat_enable(engine);
1065                         if (err)
1066                                 goto out;
1067
1068                         intel_gt_pm_wait_for_idle(engine->gt);
1069                 }
1070
1071                 /* Keep the engine awake but idle and check for DOWN */
1072                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073                         st_engine_heartbeat_disable(engine);
1074                         intel_rc6_disable(&gt->rc6);
1075
1076                         err = __rps_down_interrupt(rps, engine);
1077
1078                         intel_rc6_enable(&gt->rc6);
1079                         st_engine_heartbeat_enable(engine);
1080                         if (err)
1081                                 goto out;
1082                 }
1083         }
1084
1085 out:
1086         if (igt_flush_test(gt->i915))
1087                 err = -EIO;
1088
1089         igt_spinner_fini(&spin);
1090
1091         intel_gt_pm_wait_for_idle(gt);
1092         rps->work.func = saved_work;
1093
1094         return err;
1095 }
1096
1097 static u64 __measure_power(int duration_ms)
1098 {
1099         u64 dE, dt;
1100
1101         dt = ktime_get();
1102         dE = librapl_energy_uJ();
1103         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104         dE = librapl_energy_uJ() - dE;
1105         dt = ktime_get() - dt;
1106
1107         return div64_u64(1000 * 1000 * dE, dt);
1108 }
1109
1110 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1111 {
1112         u64 x[5];
1113         int i;
1114
1115         *freq = rps_set_check(rps, *freq);
1116         for (i = 0; i < 5; i++)
1117                 x[i] = __measure_power(5);
1118         *freq = (*freq + read_cagf(rps)) / 2;
1119
1120         /* A simple triangle filter for better result stability */
1121         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1123 }
1124
1125 int live_rps_power(void *arg)
1126 {
1127         struct intel_gt *gt = arg;
1128         struct intel_rps *rps = &gt->rps;
1129         void (*saved_work)(struct work_struct *wrk);
1130         struct intel_engine_cs *engine;
1131         enum intel_engine_id id;
1132         struct igt_spinner spin;
1133         int err = 0;
1134
1135         /*
1136          * Our fundamental assumption is that running at lower frequency
1137          * actually saves power. Let's see if our RAPL measurement support
1138          * that theory.
1139          */
1140
1141         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1142                 return 0;
1143
1144         if (!librapl_supported(gt->i915))
1145                 return 0;
1146
1147         if (igt_spinner_init(&spin, gt))
1148                 return -ENOMEM;
1149
1150         intel_gt_pm_wait_for_idle(gt);
1151         saved_work = rps->work.func;
1152         rps->work.func = dummy_rps_work;
1153
1154         for_each_engine(engine, gt, id) {
1155                 struct i915_request *rq;
1156                 struct {
1157                         u64 power;
1158                         int freq;
1159                 } min, max;
1160
1161                 if (!intel_engine_can_store_dword(engine))
1162                         continue;
1163
1164                 st_engine_heartbeat_disable(engine);
1165
1166                 rq = igt_spinner_create_request(&spin,
1167                                                 engine->kernel_context,
1168                                                 MI_NOOP);
1169                 if (IS_ERR(rq)) {
1170                         st_engine_heartbeat_enable(engine);
1171                         err = PTR_ERR(rq);
1172                         break;
1173                 }
1174
1175                 i915_request_add(rq);
1176
1177                 if (!igt_wait_for_spinner(&spin, rq)) {
1178                         pr_err("%s: RPS spinner did not start\n",
1179                                engine->name);
1180                         igt_spinner_end(&spin);
1181                         st_engine_heartbeat_enable(engine);
1182                         intel_gt_set_wedged(engine->gt);
1183                         err = -EIO;
1184                         break;
1185                 }
1186
1187                 max.freq = rps->max_freq;
1188                 max.power = measure_power_at(rps, &max.freq);
1189
1190                 min.freq = rps->min_freq;
1191                 min.power = measure_power_at(rps, &min.freq);
1192
1193                 igt_spinner_end(&spin);
1194                 st_engine_heartbeat_enable(engine);
1195
1196                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1197                         engine->name,
1198                         min.power, intel_gpu_freq(rps, min.freq),
1199                         max.power, intel_gpu_freq(rps, max.freq));
1200
1201                 if (10 * min.freq >= 9 * max.freq) {
1202                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1203                                   min.freq, intel_gpu_freq(rps, min.freq),
1204                                   max.freq, intel_gpu_freq(rps, max.freq));
1205                         continue;
1206                 }
1207
1208                 if (11 * min.power > 10 * max.power) {
1209                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1210                                engine->name);
1211                         err = -EINVAL;
1212                         break;
1213                 }
1214
1215                 if (igt_flush_test(gt->i915)) {
1216                         err = -EIO;
1217                         break;
1218                 }
1219         }
1220
1221         igt_spinner_fini(&spin);
1222
1223         intel_gt_pm_wait_for_idle(gt);
1224         rps->work.func = saved_work;
1225
1226         return err;
1227 }
1228
1229 int live_rps_dynamic(void *arg)
1230 {
1231         struct intel_gt *gt = arg;
1232         struct intel_rps *rps = &gt->rps;
1233         struct intel_engine_cs *engine;
1234         enum intel_engine_id id;
1235         struct igt_spinner spin;
1236         int err = 0;
1237
1238         /*
1239          * We've looked at the bascs, and have established that we
1240          * can change the clock frequency and that the HW will generate
1241          * interrupts based on load. Now we check how we integrate those
1242          * moving parts into dynamic reclocking based on load.
1243          */
1244
1245         if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1246                 return 0;
1247
1248         if (igt_spinner_init(&spin, gt))
1249                 return -ENOMEM;
1250
1251         if (intel_rps_has_interrupts(rps))
1252                 pr_info("RPS has interrupt support\n");
1253         if (intel_rps_uses_timer(rps))
1254                 pr_info("RPS has timer support\n");
1255
1256         for_each_engine(engine, gt, id) {
1257                 struct i915_request *rq;
1258                 struct {
1259                         ktime_t dt;
1260                         u8 freq;
1261                 } min, max;
1262
1263                 if (!intel_engine_can_store_dword(engine))
1264                         continue;
1265
1266                 intel_gt_pm_wait_for_idle(gt);
1267                 GEM_BUG_ON(intel_rps_is_active(rps));
1268                 rps->cur_freq = rps->min_freq;
1269
1270                 intel_engine_pm_get(engine);
1271                 intel_rc6_disable(&gt->rc6);
1272                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1273
1274                 rq = igt_spinner_create_request(&spin,
1275                                                 engine->kernel_context,
1276                                                 MI_NOOP);
1277                 if (IS_ERR(rq)) {
1278                         err = PTR_ERR(rq);
1279                         goto err;
1280                 }
1281
1282                 i915_request_add(rq);
1283
1284                 max.dt = ktime_get();
1285                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1286                 max.dt = ktime_sub(ktime_get(), max.dt);
1287
1288                 igt_spinner_end(&spin);
1289
1290                 min.dt = ktime_get();
1291                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1292                 min.dt = ktime_sub(ktime_get(), min.dt);
1293
1294                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1295                         engine->name,
1296                         max.freq, intel_gpu_freq(rps, max.freq),
1297                         ktime_to_ns(max.dt),
1298                         min.freq, intel_gpu_freq(rps, min.freq),
1299                         ktime_to_ns(min.dt));
1300                 if (min.freq >= max.freq) {
1301                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1302                                engine->name);
1303                         err = -EINVAL;
1304                 }
1305
1306 err:
1307                 intel_rc6_enable(&gt->rc6);
1308                 intel_engine_pm_put(engine);
1309
1310                 if (igt_flush_test(gt->i915))
1311                         err = -EIO;
1312                 if (err)
1313                         break;
1314         }
1315
1316         igt_spinner_fini(&spin);
1317
1318         return err;
1319 }