Merge tag 'amlogic-dt-2' of https://git.kernel.org/pub/scm/linux/kernel/git/khilman...
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_pmu.c
CommitLineData
b46a33e2
TU
1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/perf_event.h>
26#include <linux/pm_runtime.h>
27
28#include "i915_drv.h"
29#include "i915_pmu.h"
30#include "intel_ringbuffer.h"
31
32/* Frequency for the sampling timer for events which need it. */
33#define FREQUENCY 200
34#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
35
36#define ENGINE_SAMPLE_MASK \
37 (BIT(I915_SAMPLE_BUSY) | \
38 BIT(I915_SAMPLE_WAIT) | \
39 BIT(I915_SAMPLE_SEMA))
40
41#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
42
141a0895 43static cpumask_t i915_pmu_cpumask;
b46a33e2
TU
44
45static u8 engine_config_sample(u64 config)
46{
47 return config & I915_PMU_SAMPLE_MASK;
48}
49
50static u8 engine_event_sample(struct perf_event *event)
51{
52 return engine_config_sample(event->attr.config);
53}
54
55static u8 engine_event_class(struct perf_event *event)
56{
57 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
58}
59
60static u8 engine_event_instance(struct perf_event *event)
61{
62 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
63}
64
65static bool is_engine_config(u64 config)
66{
67 return config < __I915_PMU_OTHER(0);
68}
69
70static unsigned int config_enabled_bit(u64 config)
71{
72 if (is_engine_config(config))
73 return engine_config_sample(config);
74 else
75 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
76}
77
78static u64 config_enabled_mask(u64 config)
79{
80 return BIT_ULL(config_enabled_bit(config));
81}
82
83static bool is_engine_event(struct perf_event *event)
84{
85 return is_engine_config(event->attr.config);
86}
87
88static unsigned int event_enabled_bit(struct perf_event *event)
89{
90 return config_enabled_bit(event->attr.config);
91}
92
feff0dc6
TU
93static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
94{
95 u64 enable;
96
97 /*
98 * Only some counters need the sampling timer.
99 *
100 * We start with a bitmask of all currently enabled events.
101 */
102 enable = i915->pmu.enable;
103
104 /*
105 * Mask out all the ones which do not need the timer, or in
106 * other words keep all the ones that could need the timer.
107 */
108 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
109 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
110 ENGINE_SAMPLE_MASK;
111
112 /*
113 * When the GPU is idle per-engine counters do not need to be
114 * running so clear those bits out.
115 */
116 if (!gpu_active)
117 enable &= ~ENGINE_SAMPLE_MASK;
b3add01e
TU
118 /*
119 * Also there is software busyness tracking available we do not
120 * need the timer for I915_SAMPLE_BUSY counter.
cf669b4e
TU
121 *
122 * Use RCS as proxy for all engines.
b3add01e 123 */
cf669b4e 124 else if (intel_engine_supports_stats(i915->engine[RCS]))
b3add01e 125 enable &= ~BIT(I915_SAMPLE_BUSY);
feff0dc6
TU
126
127 /*
128 * If some bits remain it means we need the sampling timer running.
129 */
130 return enable;
131}
132
133void i915_pmu_gt_parked(struct drm_i915_private *i915)
134{
135 if (!i915->pmu.base.event_init)
136 return;
137
138 spin_lock_irq(&i915->pmu.lock);
139 /*
140 * Signal sampling timer to stop if only engine events are enabled and
141 * GPU went idle.
142 */
143 i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
144 spin_unlock_irq(&i915->pmu.lock);
145}
146
147static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
148{
149 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
150 i915->pmu.timer_enabled = true;
151 hrtimer_start_range_ns(&i915->pmu.timer,
152 ns_to_ktime(PERIOD), 0,
153 HRTIMER_MODE_REL_PINNED);
154 }
155}
156
157void i915_pmu_gt_unparked(struct drm_i915_private *i915)
158{
159 if (!i915->pmu.base.event_init)
160 return;
161
162 spin_lock_irq(&i915->pmu.lock);
163 /*
164 * Re-enable sampling timer when GPU goes active.
165 */
166 __i915_pmu_maybe_start_timer(i915);
167 spin_unlock_irq(&i915->pmu.lock);
168}
169
b46a33e2
TU
170static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
171{
172 if (!fw)
173 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
174
175 return true;
176}
177
178static void
179update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
180{
8ee4f19c 181 sample->cur += mul_u32_u32(val, unit);
b46a33e2
TU
182}
183
184static void engines_sample(struct drm_i915_private *dev_priv)
185{
186 struct intel_engine_cs *engine;
187 enum intel_engine_id id;
188 bool fw = false;
189
190 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
191 return;
192
193 if (!dev_priv->gt.awake)
194 return;
195
196 if (!intel_runtime_pm_get_if_in_use(dev_priv))
197 return;
198
199 for_each_engine(engine, dev_priv, id) {
200 u32 current_seqno = intel_engine_get_seqno(engine);
201 u32 last_seqno = intel_engine_last_submit(engine);
202 u32 val;
203
204 val = !i915_seqno_passed(current_seqno, last_seqno);
205
206 update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
207 PERIOD, val);
208
209 if (val && (engine->pmu.enable &
210 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
211 fw = grab_forcewake(dev_priv, fw);
212
213 val = I915_READ_FW(RING_CTL(engine->mmio_base));
214 } else {
215 val = 0;
216 }
217
218 update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
219 PERIOD, !!(val & RING_WAIT));
220
221 update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
222 PERIOD, !!(val & RING_WAIT_SEMAPHORE));
223 }
224
225 if (fw)
226 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
227
228 intel_runtime_pm_put(dev_priv);
229}
230
231static void frequency_sample(struct drm_i915_private *dev_priv)
232{
233 if (dev_priv->pmu.enable &
234 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
235 u32 val;
236
237 val = dev_priv->gt_pm.rps.cur_freq;
238 if (dev_priv->gt.awake &&
239 intel_runtime_pm_get_if_in_use(dev_priv)) {
240 val = intel_get_cagf(dev_priv,
241 I915_READ_NOTRACE(GEN6_RPSTAT1));
242 intel_runtime_pm_put(dev_priv);
243 }
244
245 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
246 1, intel_gpu_freq(dev_priv, val));
247 }
248
249 if (dev_priv->pmu.enable &
250 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
251 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
252 intel_gpu_freq(dev_priv,
253 dev_priv->gt_pm.rps.cur_freq));
254 }
255}
256
257static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
258{
259 struct drm_i915_private *i915 =
260 container_of(hrtimer, struct drm_i915_private, pmu.timer);
261
8ee4f19c 262 if (!READ_ONCE(i915->pmu.timer_enabled))
b46a33e2
TU
263 return HRTIMER_NORESTART;
264
265 engines_sample(i915);
266 frequency_sample(i915);
267
268 hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
269 return HRTIMER_RESTART;
270}
271
0cd4684d
TU
272static u64 count_interrupts(struct drm_i915_private *i915)
273{
274 /* open-coded kstat_irqs() */
275 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
276 u64 sum = 0;
277 int cpu;
278
279 if (!desc || !desc->kstat_irqs)
280 return 0;
281
282 for_each_possible_cpu(cpu)
283 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
284
285 return sum;
286}
287
b2f78cda
TU
288static void engine_event_destroy(struct perf_event *event)
289{
290 struct drm_i915_private *i915 =
291 container_of(event->pmu, typeof(*i915), pmu.base);
292 struct intel_engine_cs *engine;
293
294 engine = intel_engine_lookup_user(i915,
295 engine_event_class(event),
296 engine_event_instance(event));
297 if (WARN_ON_ONCE(!engine))
298 return;
299
300 if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
301 intel_engine_supports_stats(engine))
302 intel_disable_engine_stats(engine);
303}
304
b46a33e2
TU
305static void i915_pmu_event_destroy(struct perf_event *event)
306{
307 WARN_ON(event->parent);
b2f78cda
TU
308
309 if (is_engine_event(event))
310 engine_event_destroy(event);
b46a33e2
TU
311}
312
109ec558
TU
313static int
314engine_event_status(struct intel_engine_cs *engine,
315 enum drm_i915_pmu_engine_sample sample)
b46a33e2 316{
109ec558 317 switch (sample) {
b46a33e2
TU
318 case I915_SAMPLE_BUSY:
319 case I915_SAMPLE_WAIT:
320 break;
321 case I915_SAMPLE_SEMA:
109ec558
TU
322 if (INTEL_GEN(engine->i915) < 6)
323 return -ENODEV;
324 break;
325 default:
326 return -ENOENT;
327 }
328
329 return 0;
330}
331
332static int
333config_status(struct drm_i915_private *i915, u64 config)
334{
335 switch (config) {
336 case I915_PMU_ACTUAL_FREQUENCY:
337 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
338 /* Requires a mutex for sampling! */
339 return -ENODEV;
340 /* Fall-through. */
341 case I915_PMU_REQUESTED_FREQUENCY:
b46a33e2
TU
342 if (INTEL_GEN(i915) < 6)
343 return -ENODEV;
344 break;
109ec558
TU
345 case I915_PMU_INTERRUPTS:
346 break;
347 case I915_PMU_RC6_RESIDENCY:
348 if (!HAS_RC6(i915))
349 return -ENODEV;
350 break;
b46a33e2
TU
351 default:
352 return -ENOENT;
353 }
354
355 return 0;
356}
357
109ec558
TU
358static int engine_event_init(struct perf_event *event)
359{
360 struct drm_i915_private *i915 =
361 container_of(event->pmu, typeof(*i915), pmu.base);
362 struct intel_engine_cs *engine;
b2f78cda
TU
363 u8 sample;
364 int ret;
109ec558
TU
365
366 engine = intel_engine_lookup_user(i915, engine_event_class(event),
367 engine_event_instance(event));
368 if (!engine)
369 return -ENODEV;
370
b2f78cda
TU
371 sample = engine_event_sample(event);
372 ret = engine_event_status(engine, sample);
373 if (ret)
374 return ret;
375
376 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
377 ret = intel_enable_engine_stats(engine);
378
379 return ret;
109ec558
TU
380}
381
b46a33e2
TU
382static int i915_pmu_event_init(struct perf_event *event)
383{
384 struct drm_i915_private *i915 =
385 container_of(event->pmu, typeof(*i915), pmu.base);
0426c046 386 int ret;
b46a33e2
TU
387
388 if (event->attr.type != event->pmu->type)
389 return -ENOENT;
390
391 /* unsupported modes and filters */
392 if (event->attr.sample_period) /* no sampling */
393 return -EINVAL;
394
395 if (has_branch_stack(event))
396 return -EOPNOTSUPP;
397
398 if (event->cpu < 0)
399 return -EINVAL;
400
0426c046
TU
401 /* only allow running on one cpu at a time */
402 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
00a79722 403 return -EINVAL;
b46a33e2 404
109ec558 405 if (is_engine_event(event))
b46a33e2 406 ret = engine_event_init(event);
109ec558
TU
407 else
408 ret = config_status(i915, event->attr.config);
b46a33e2
TU
409 if (ret)
410 return ret;
411
b46a33e2
TU
412 if (!event->parent)
413 event->destroy = i915_pmu_event_destroy;
414
415 return 0;
416}
417
05273c95 418static u64 __get_rc6(struct drm_i915_private *i915)
1fe699e3 419{
1fe699e3
TU
420 u64 val;
421
05273c95
CW
422 val = intel_rc6_residency_ns(i915,
423 IS_VALLEYVIEW(i915) ?
424 VLV_GT_RENDER_RC6 :
425 GEN6_GT_GFX_RC6);
1fe699e3 426
05273c95
CW
427 if (HAS_RC6p(i915))
428 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
429
430 if (HAS_RC6pp(i915))
431 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
432
433 return val;
434}
1fe699e3 435
22de4e7a 436static u64 get_rc6(struct drm_i915_private *i915)
05273c95
CW
437{
438#if IS_ENABLED(CONFIG_PM)
439 unsigned long flags;
440 u64 val;
1fe699e3 441
05273c95
CW
442 if (intel_runtime_pm_get_if_in_use(i915)) {
443 val = __get_rc6(i915);
1fe699e3
TU
444 intel_runtime_pm_put(i915);
445
446 /*
447 * If we are coming back from being runtime suspended we must
448 * be careful not to report a larger value than returned
449 * previously.
450 */
451
22de4e7a 452 spin_lock_irqsave(&i915->pmu.lock, flags);
1fe699e3
TU
453
454 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
455 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
456 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
457 } else {
458 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
459 }
460
22de4e7a 461 spin_unlock_irqrestore(&i915->pmu.lock, flags);
1fe699e3
TU
462 } else {
463 struct pci_dev *pdev = i915->drm.pdev;
464 struct device *kdev = &pdev->dev;
1fe699e3
TU
465
466 /*
467 * We are runtime suspended.
468 *
469 * Report the delta from when the device was suspended to now,
470 * on top of the last known real value, as the approximated RC6
471 * counter value.
472 */
22de4e7a
TU
473 spin_lock_irqsave(&i915->pmu.lock, flags);
474 spin_lock(&kdev->power.lock);
1fe699e3 475
e6be6bd8
TU
476 /*
477 * After the above branch intel_runtime_pm_get_if_in_use failed
478 * to get the runtime PM reference we cannot assume we are in
479 * runtime suspend since we can either: a) race with coming out
480 * of it before we took the power.lock, or b) there are other
481 * states than suspended which can bring us here.
482 *
483 * We need to double-check that we are indeed currently runtime
484 * suspended and if not we cannot do better than report the last
485 * known RC6 value.
486 */
487 if (kdev->power.runtime_status == RPM_SUSPENDED) {
488 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
489 i915->pmu.suspended_jiffies_last =
490 kdev->power.suspended_jiffies;
1fe699e3 491
e6be6bd8
TU
492 val = kdev->power.suspended_jiffies -
493 i915->pmu.suspended_jiffies_last;
494 val += jiffies - kdev->power.accounting_timestamp;
1fe699e3 495
e6be6bd8
TU
496 val = jiffies_to_nsecs(val);
497 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
1fe699e3 498
e6be6bd8
TU
499 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
500 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
501 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
502 } else {
503 val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
504 }
1fe699e3 505
e6be6bd8 506 spin_unlock(&kdev->power.lock);
22de4e7a 507 spin_unlock_irqrestore(&i915->pmu.lock, flags);
1fe699e3
TU
508 }
509
510 return val;
05273c95
CW
511#else
512 return __get_rc6(i915);
513#endif
1fe699e3
TU
514}
515
22de4e7a 516static u64 __i915_pmu_event_read(struct perf_event *event)
b46a33e2
TU
517{
518 struct drm_i915_private *i915 =
519 container_of(event->pmu, typeof(*i915), pmu.base);
520 u64 val = 0;
521
522 if (is_engine_event(event)) {
523 u8 sample = engine_event_sample(event);
524 struct intel_engine_cs *engine;
525
526 engine = intel_engine_lookup_user(i915,
527 engine_event_class(event),
528 engine_event_instance(event));
529
530 if (WARN_ON_ONCE(!engine)) {
531 /* Do nothing */
b3add01e 532 } else if (sample == I915_SAMPLE_BUSY &&
b2f78cda 533 intel_engine_supports_stats(engine)) {
b3add01e 534 val = ktime_to_ns(intel_engine_get_busy_time(engine));
b46a33e2
TU
535 } else {
536 val = engine->pmu.sample[sample].cur;
537 }
538 } else {
539 switch (event->attr.config) {
540 case I915_PMU_ACTUAL_FREQUENCY:
541 val =
542 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
543 FREQUENCY);
544 break;
545 case I915_PMU_REQUESTED_FREQUENCY:
546 val =
547 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
548 FREQUENCY);
549 break;
0cd4684d
TU
550 case I915_PMU_INTERRUPTS:
551 val = count_interrupts(i915);
552 break;
6060b6ae 553 case I915_PMU_RC6_RESIDENCY:
22de4e7a 554 val = get_rc6(i915);
6060b6ae 555 break;
b46a33e2
TU
556 }
557 }
558
559 return val;
560}
561
562static void i915_pmu_event_read(struct perf_event *event)
563{
564 struct hw_perf_event *hwc = &event->hw;
565 u64 prev, new;
566
567again:
568 prev = local64_read(&hwc->prev_count);
22de4e7a 569 new = __i915_pmu_event_read(event);
b46a33e2
TU
570
571 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
572 goto again;
573
574 local64_add(new - prev, &event->count);
575}
576
577static void i915_pmu_enable(struct perf_event *event)
578{
579 struct drm_i915_private *i915 =
580 container_of(event->pmu, typeof(*i915), pmu.base);
581 unsigned int bit = event_enabled_bit(event);
582 unsigned long flags;
583
584 spin_lock_irqsave(&i915->pmu.lock, flags);
585
b46a33e2
TU
586 /*
587 * Update the bitmask of enabled events and increment
588 * the event reference counter.
589 */
590 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
591 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
592 i915->pmu.enable |= BIT_ULL(bit);
593 i915->pmu.enable_count[bit]++;
594
feff0dc6
TU
595 /*
596 * Start the sampling timer if needed and not already enabled.
597 */
598 __i915_pmu_maybe_start_timer(i915);
599
b46a33e2
TU
600 /*
601 * For per-engine events the bitmask and reference counting
602 * is stored per engine.
603 */
604 if (is_engine_event(event)) {
605 u8 sample = engine_event_sample(event);
606 struct intel_engine_cs *engine;
607
608 engine = intel_engine_lookup_user(i915,
609 engine_event_class(event),
610 engine_event_instance(event));
611 GEM_BUG_ON(!engine);
612 engine->pmu.enable |= BIT(sample);
613
614 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
615 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
b2f78cda 616 engine->pmu.enable_count[sample]++;
b46a33e2
TU
617 }
618
22de4e7a
TU
619 spin_unlock_irqrestore(&i915->pmu.lock, flags);
620
b46a33e2
TU
621 /*
622 * Store the current counter value so we can report the correct delta
623 * for all listeners. Even when the event was already enabled and has
624 * an existing non-zero value.
625 */
22de4e7a 626 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
b46a33e2
TU
627}
628
629static void i915_pmu_disable(struct perf_event *event)
630{
631 struct drm_i915_private *i915 =
632 container_of(event->pmu, typeof(*i915), pmu.base);
633 unsigned int bit = event_enabled_bit(event);
634 unsigned long flags;
635
636 spin_lock_irqsave(&i915->pmu.lock, flags);
637
638 if (is_engine_event(event)) {
639 u8 sample = engine_event_sample(event);
640 struct intel_engine_cs *engine;
641
642 engine = intel_engine_lookup_user(i915,
643 engine_event_class(event),
644 engine_event_instance(event));
645 GEM_BUG_ON(!engine);
646 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
647 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
648 /*
649 * Decrement the reference count and clear the enabled
650 * bitmask when the last listener on an event goes away.
651 */
b2f78cda 652 if (--engine->pmu.enable_count[sample] == 0)
b46a33e2
TU
653 engine->pmu.enable &= ~BIT(sample);
654 }
655
656 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
657 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
658 /*
659 * Decrement the reference count and clear the enabled
660 * bitmask when the last listener on an event goes away.
661 */
feff0dc6 662 if (--i915->pmu.enable_count[bit] == 0) {
b46a33e2 663 i915->pmu.enable &= ~BIT_ULL(bit);
feff0dc6
TU
664 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
665 }
b46a33e2
TU
666
667 spin_unlock_irqrestore(&i915->pmu.lock, flags);
668}
669
670static void i915_pmu_event_start(struct perf_event *event, int flags)
671{
672 i915_pmu_enable(event);
673 event->hw.state = 0;
674}
675
676static void i915_pmu_event_stop(struct perf_event *event, int flags)
677{
678 if (flags & PERF_EF_UPDATE)
679 i915_pmu_event_read(event);
680 i915_pmu_disable(event);
681 event->hw.state = PERF_HES_STOPPED;
682}
683
684static int i915_pmu_event_add(struct perf_event *event, int flags)
685{
686 if (flags & PERF_EF_START)
687 i915_pmu_event_start(event, flags);
688
689 return 0;
690}
691
692static void i915_pmu_event_del(struct perf_event *event, int flags)
693{
694 i915_pmu_event_stop(event, PERF_EF_UPDATE);
695}
696
697static int i915_pmu_event_event_idx(struct perf_event *event)
698{
699 return 0;
700}
701
b7d3aabf
CW
702struct i915_str_attribute {
703 struct device_attribute attr;
704 const char *str;
705};
706
b46a33e2
TU
707static ssize_t i915_pmu_format_show(struct device *dev,
708 struct device_attribute *attr, char *buf)
709{
b7d3aabf 710 struct i915_str_attribute *eattr;
b46a33e2 711
b7d3aabf
CW
712 eattr = container_of(attr, struct i915_str_attribute, attr);
713 return sprintf(buf, "%s\n", eattr->str);
b46a33e2
TU
714}
715
716#define I915_PMU_FORMAT_ATTR(_name, _config) \
b7d3aabf 717 (&((struct i915_str_attribute[]) { \
b46a33e2 718 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
b7d3aabf 719 .str = _config, } \
b46a33e2
TU
720 })[0].attr.attr)
721
722static struct attribute *i915_pmu_format_attrs[] = {
723 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
724 NULL,
725};
726
727static const struct attribute_group i915_pmu_format_attr_group = {
728 .name = "format",
729 .attrs = i915_pmu_format_attrs,
730};
731
b7d3aabf
CW
732struct i915_ext_attribute {
733 struct device_attribute attr;
734 unsigned long val;
735};
736
b46a33e2
TU
737static ssize_t i915_pmu_event_show(struct device *dev,
738 struct device_attribute *attr, char *buf)
739{
b7d3aabf 740 struct i915_ext_attribute *eattr;
b46a33e2 741
b7d3aabf
CW
742 eattr = container_of(attr, struct i915_ext_attribute, attr);
743 return sprintf(buf, "config=0x%lx\n", eattr->val);
b46a33e2
TU
744}
745
109ec558 746static struct attribute_group i915_pmu_events_attr_group = {
b46a33e2 747 .name = "events",
109ec558 748 /* Patch in attrs at runtime. */
b46a33e2
TU
749};
750
751static ssize_t
752i915_pmu_get_attr_cpumask(struct device *dev,
753 struct device_attribute *attr,
754 char *buf)
755{
756 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
757}
758
759static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
760
761static struct attribute *i915_cpumask_attrs[] = {
762 &dev_attr_cpumask.attr,
763 NULL,
764};
765
109ec558 766static const struct attribute_group i915_pmu_cpumask_attr_group = {
b46a33e2
TU
767 .attrs = i915_cpumask_attrs,
768};
769
770static const struct attribute_group *i915_pmu_attr_groups[] = {
771 &i915_pmu_format_attr_group,
772 &i915_pmu_events_attr_group,
773 &i915_pmu_cpumask_attr_group,
774 NULL
775};
776
109ec558
TU
777#define __event(__config, __name, __unit) \
778{ \
779 .config = (__config), \
780 .name = (__name), \
781 .unit = (__unit), \
782}
783
784#define __engine_event(__sample, __name) \
785{ \
786 .sample = (__sample), \
787 .name = (__name), \
788}
789
790static struct i915_ext_attribute *
791add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
792{
2bbba4e9 793 sysfs_attr_init(&attr->attr.attr);
109ec558
TU
794 attr->attr.attr.name = name;
795 attr->attr.attr.mode = 0444;
796 attr->attr.show = i915_pmu_event_show;
797 attr->val = config;
798
799 return ++attr;
800}
801
802static struct perf_pmu_events_attr *
803add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
804 const char *str)
805{
2bbba4e9 806 sysfs_attr_init(&attr->attr.attr);
109ec558
TU
807 attr->attr.attr.name = name;
808 attr->attr.attr.mode = 0444;
809 attr->attr.show = perf_event_sysfs_show;
810 attr->event_str = str;
811
812 return ++attr;
813}
814
815static struct attribute **
816create_event_attributes(struct drm_i915_private *i915)
817{
818 static const struct {
819 u64 config;
820 const char *name;
821 const char *unit;
822 } events[] = {
823 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
824 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
825 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
826 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
827 };
828 static const struct {
829 enum drm_i915_pmu_engine_sample sample;
830 char *name;
831 } engine_events[] = {
832 __engine_event(I915_SAMPLE_BUSY, "busy"),
833 __engine_event(I915_SAMPLE_SEMA, "sema"),
834 __engine_event(I915_SAMPLE_WAIT, "wait"),
835 };
836 unsigned int count = 0;
837 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
838 struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
839 struct attribute **attr = NULL, **attr_iter;
840 struct intel_engine_cs *engine;
841 enum intel_engine_id id;
842 unsigned int i;
843
844 /* Count how many counters we will be exposing. */
845 for (i = 0; i < ARRAY_SIZE(events); i++) {
846 if (!config_status(i915, events[i].config))
847 count++;
848 }
849
850 for_each_engine(engine, i915, id) {
851 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
852 if (!engine_event_status(engine,
853 engine_events[i].sample))
854 count++;
855 }
856 }
857
858 /* Allocate attribute objects and table. */
dd5fec87 859 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
109ec558
TU
860 if (!i915_attr)
861 goto err_alloc;
862
dd5fec87 863 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
109ec558
TU
864 if (!pmu_attr)
865 goto err_alloc;
866
867 /* Max one pointer of each attribute type plus a termination entry. */
dd5fec87 868 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
109ec558
TU
869 if (!attr)
870 goto err_alloc;
871
872 i915_iter = i915_attr;
873 pmu_iter = pmu_attr;
874 attr_iter = attr;
875
876 /* Initialize supported non-engine counters. */
877 for (i = 0; i < ARRAY_SIZE(events); i++) {
878 char *str;
879
880 if (config_status(i915, events[i].config))
881 continue;
882
883 str = kstrdup(events[i].name, GFP_KERNEL);
884 if (!str)
885 goto err;
886
887 *attr_iter++ = &i915_iter->attr.attr;
888 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
889
890 if (events[i].unit) {
891 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
892 if (!str)
893 goto err;
894
895 *attr_iter++ = &pmu_iter->attr.attr;
896 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
897 }
898 }
899
900 /* Initialize supported engine counters. */
901 for_each_engine(engine, i915, id) {
902 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
903 char *str;
904
905 if (engine_event_status(engine,
906 engine_events[i].sample))
907 continue;
908
909 str = kasprintf(GFP_KERNEL, "%s-%s",
910 engine->name, engine_events[i].name);
911 if (!str)
912 goto err;
913
914 *attr_iter++ = &i915_iter->attr.attr;
915 i915_iter =
916 add_i915_attr(i915_iter, str,
8810bc56 917 __I915_PMU_ENGINE(engine->uabi_class,
109ec558
TU
918 engine->instance,
919 engine_events[i].sample));
920
921 str = kasprintf(GFP_KERNEL, "%s-%s.unit",
922 engine->name, engine_events[i].name);
923 if (!str)
924 goto err;
925
926 *attr_iter++ = &pmu_iter->attr.attr;
927 pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
928 }
929 }
930
931 i915->pmu.i915_attr = i915_attr;
932 i915->pmu.pmu_attr = pmu_attr;
933
934 return attr;
935
936err:;
937 for (attr_iter = attr; *attr_iter; attr_iter++)
938 kfree((*attr_iter)->name);
939
940err_alloc:
941 kfree(attr);
942 kfree(i915_attr);
943 kfree(pmu_attr);
944
945 return NULL;
946}
947
948static void free_event_attributes(struct drm_i915_private *i915)
949{
950 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
951
952 for (; *attr_iter; attr_iter++)
953 kfree((*attr_iter)->name);
954
955 kfree(i915_pmu_events_attr_group.attrs);
956 kfree(i915->pmu.i915_attr);
957 kfree(i915->pmu.pmu_attr);
958
959 i915_pmu_events_attr_group.attrs = NULL;
960 i915->pmu.i915_attr = NULL;
961 i915->pmu.pmu_attr = NULL;
962}
963
b46a33e2
TU
964static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
965{
966 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
b46a33e2
TU
967
968 GEM_BUG_ON(!pmu->base.event_init);
969
b46a33e2 970 /* Select the first online CPU as a designated reader. */
0426c046 971 if (!cpumask_weight(&i915_pmu_cpumask))
b46a33e2
TU
972 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
973
974 return 0;
975}
976
977static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
978{
979 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
980 unsigned int target;
981
982 GEM_BUG_ON(!pmu->base.event_init);
983
984 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
985 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
986 /* Migrate events if there is a valid target */
987 if (target < nr_cpu_ids) {
988 cpumask_set_cpu(target, &i915_pmu_cpumask);
989 perf_pmu_migrate_context(&pmu->base, cpu, target);
990 }
991 }
992
993 return 0;
994}
995
996static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
b46a33e2
TU
997
998static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
999{
b46a33e2
TU
1000 enum cpuhp_state slot;
1001 int ret;
1002
1003 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1004 "perf/x86/intel/i915:online",
1005 i915_pmu_cpu_online,
1006 i915_pmu_cpu_offline);
1007 if (ret < 0)
1008 return ret;
1009
1010 slot = ret;
1011 ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1012 if (ret) {
1013 cpuhp_remove_multi_state(slot);
1014 return ret;
1015 }
1016
1017 cpuhp_slot = slot;
b46a33e2
TU
1018 return 0;
1019}
1020
1021static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1022{
b46a33e2
TU
1023 WARN_ON(cpuhp_slot == CPUHP_INVALID);
1024 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1025 cpuhp_remove_multi_state(cpuhp_slot);
b46a33e2
TU
1026}
1027
1028void i915_pmu_register(struct drm_i915_private *i915)
1029{
1030 int ret;
1031
1032 if (INTEL_GEN(i915) <= 2) {
1033 DRM_INFO("PMU not supported for this GPU.");
1034 return;
1035 }
1036
109ec558
TU
1037 i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1038 if (!i915_pmu_events_attr_group.attrs) {
1039 ret = -ENOMEM;
1040 goto err;
1041 }
1042
b46a33e2
TU
1043 i915->pmu.base.attr_groups = i915_pmu_attr_groups;
1044 i915->pmu.base.task_ctx_nr = perf_invalid_context;
1045 i915->pmu.base.event_init = i915_pmu_event_init;
1046 i915->pmu.base.add = i915_pmu_event_add;
1047 i915->pmu.base.del = i915_pmu_event_del;
1048 i915->pmu.base.start = i915_pmu_event_start;
1049 i915->pmu.base.stop = i915_pmu_event_stop;
1050 i915->pmu.base.read = i915_pmu_event_read;
1051 i915->pmu.base.event_idx = i915_pmu_event_event_idx;
1052
1053 spin_lock_init(&i915->pmu.lock);
1054 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1055 i915->pmu.timer.function = i915_sample;
1056
1057 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1058 if (ret)
1059 goto err;
1060
1061 ret = i915_pmu_register_cpuhp_state(i915);
1062 if (ret)
1063 goto err_unreg;
1064
1065 return;
1066
1067err_unreg:
1068 perf_pmu_unregister(&i915->pmu.base);
1069err:
1070 i915->pmu.base.event_init = NULL;
109ec558 1071 free_event_attributes(i915);
b46a33e2
TU
1072 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1073}
1074
1075void i915_pmu_unregister(struct drm_i915_private *i915)
1076{
1077 if (!i915->pmu.base.event_init)
1078 return;
1079
1080 WARN_ON(i915->pmu.enable);
1081
1082 hrtimer_cancel(&i915->pmu.timer);
1083
1084 i915_pmu_unregister_cpuhp_state(i915);
1085
1086 perf_pmu_unregister(&i915->pmu.base);
1087 i915->pmu.base.event_init = NULL;
109ec558 1088 free_event_attributes(i915);
b46a33e2 1089}